Repository: DoctorWkt/acwj Branch: master Commit: ba1d3ba77fe8 Files: 9835 Total size: 8.7 MB Directory structure: gitextract_us1gy_yo/ ├── 00_Introduction/ │ ├── Figs/ │ │ └── parsing_steps.dia │ └── Readme.md ├── 01_Scanner/ │ ├── Makefile │ ├── Readme.md │ ├── data.h │ ├── decl.h │ ├── defs.h │ ├── input01 │ ├── input02 │ ├── input03 │ ├── input04 │ ├── input05 │ ├── main.c │ └── scan.c ├── 02_Parser/ │ ├── Makefile │ ├── Readme.md │ ├── data.h │ ├── decl.h │ ├── defs.h │ ├── expr.c │ ├── input01 │ ├── input02 │ ├── input03 │ ├── input04 │ ├── input05 │ ├── interp.c │ ├── main.c │ ├── scan.c │ └── tree.c ├── 03_Precedence/ │ ├── Makefile │ ├── Readme.md │ ├── data.h │ ├── decl.h │ ├── defs.h │ ├── expr.c │ ├── expr2.c │ ├── input01 │ ├── input02 │ ├── input03 │ ├── input04 │ ├── input05 │ ├── interp.c │ ├── main.c │ ├── scan.c │ └── tree.c ├── 04_Assembly/ │ ├── Makefile │ ├── Readme.md │ ├── cg.c │ ├── cgn.c │ ├── data.h │ ├── decl.h │ ├── defs.h │ ├── expr.c │ ├── gen.c │ ├── input01 │ ├── input02 │ ├── interp.c │ ├── main.c │ ├── scan.c │ └── tree.c ├── 05_Statements/ │ ├── Makefile │ ├── Readme.md │ ├── cg.c │ ├── cgn.c │ ├── data.h │ ├── decl.h │ ├── defs.h │ ├── expr.c │ ├── gen.c │ ├── input01 │ ├── main.c │ ├── misc.c │ ├── scan.c │ ├── stmt.c │ └── tree.c ├── 06_Variables/ │ ├── Makefile │ ├── Readme.md │ ├── cg.c │ ├── cgn.c │ ├── data.h │ ├── decl.c │ ├── decl.h │ ├── defs.h │ ├── expr.c │ ├── gen.c │ ├── input01 │ ├── input02 │ ├── input03 │ ├── main.c │ ├── misc.c │ ├── scan.c │ ├── stmt.c │ ├── sym.c │ └── tree.c ├── 07_Comparisons/ │ ├── Makefile │ ├── Readme.md │ ├── cg.c │ ├── cgn.c │ ├── data.h │ ├── decl.c │ ├── decl.h │ ├── defs.h │ ├── expr.c │ ├── gen.c │ ├── input01 │ ├── input02 │ ├── input03 │ ├── input04 │ ├── main.c │ ├── misc.c │ ├── scan.c │ ├── stmt.c │ ├── sym.c │ └── tree.c ├── 08_If_Statements/ │ ├── Makefile │ ├── Readme.md │ ├── cg.c │ ├── cgn.c │ ├── data.h │ ├── decl.c │ ├── decl.h │ ├── defs.h │ ├── expr.c │ ├── gen.c │ ├── input01 │ ├── input02 │ ├── input03 │ ├── input04 │ ├── input05 │ ├── main.c │ ├── misc.c │ ├── scan.c │ ├── stmt.c │ ├── sym.c │ └── tree.c ├── 09_While_Loops/ │ ├── Makefile │ ├── Readme.md │ ├── cg.c │ ├── cgn.c │ ├── data.h │ ├── decl.c │ ├── decl.h │ ├── defs.h │ ├── expr.c │ ├── gen.c │ ├── main.c │ ├── misc.c │ ├── scan.c │ ├── stmt.c │ ├── sym.c │ ├── tests/ │ │ ├── input01 │ │ ├── input02 │ │ ├── input03 │ │ ├── input04 │ │ ├── input05 │ │ ├── input06 │ │ ├── mktests │ │ ├── mktestsn │ │ ├── out.input01 │ │ ├── out.input02 │ │ ├── out.input03 │ │ ├── out.input04 │ │ ├── out.input05 │ │ ├── out.input06 │ │ ├── runtests │ │ └── runtestsn │ └── tree.c ├── 10_For_Loops/ │ ├── Makefile │ ├── Readme.md │ ├── cg.c │ ├── cgn.c │ ├── data.h │ ├── decl.c │ ├── decl.h │ ├── defs.h │ ├── expr.c │ ├── gen.c │ ├── main.c │ ├── misc.c │ ├── scan.c │ ├── stmt.c │ ├── sym.c │ ├── tests/ │ │ ├── input01 │ │ ├── input02 │ │ ├── input03 │ │ ├── input04 │ │ ├── input05 │ │ ├── input06 │ │ ├── input07 │ │ ├── mktests │ │ ├── mktestsn │ │ ├── out.input01 │ │ ├── out.input02 │ │ ├── out.input03 │ │ ├── out.input04 │ │ ├── out.input05 │ │ ├── out.input06 │ │ ├── out.input07 │ │ ├── runtests │ │ └── runtestsn │ └── tree.c ├── 11_Functions_pt1/ │ ├── Makefile │ ├── Readme.md │ ├── cg.c │ ├── cgn.c │ ├── data.h │ ├── decl.c │ ├── decl.h │ ├── defs.h │ ├── expr.c │ ├── gen.c │ ├── main.c │ ├── misc.c │ ├── scan.c │ ├── stmt.c │ ├── sym.c │ ├── tests/ │ │ ├── input01 │ │ ├── input02 │ │ ├── input03 │ │ ├── input04 │ │ ├── input05 │ │ ├── input06 │ │ ├── input07 │ │ ├── input08 │ │ ├── input09 │ │ ├── mktests │ │ ├── mktestsn │ │ ├── out.input01 │ │ ├── out.input02 │ │ ├── out.input03 │ │ ├── out.input04 │ │ ├── out.input05 │ │ ├── out.input06 │ │ ├── out.input07 │ │ ├── out.input08 │ │ ├── out.input09 │ │ ├── runtests │ │ └── runtestsn │ └── tree.c ├── 12_Types_pt1/ │ ├── Makefile │ ├── Readme.md │ ├── cg.c │ ├── cgn.c │ ├── data.h │ ├── decl.c │ ├── decl.h │ ├── defs.h │ ├── expr.c │ ├── gen.c │ ├── main.c │ ├── misc.c │ ├── scan.c │ ├── stmt.c │ ├── sym.c │ ├── tests/ │ │ ├── input01 │ │ ├── input02 │ │ ├── input03 │ │ ├── input04 │ │ ├── input05 │ │ ├── input06 │ │ ├── input07 │ │ ├── input08 │ │ ├── input09 │ │ ├── input10 │ │ ├── mktests │ │ ├── mktestsn │ │ ├── out.input01 │ │ ├── out.input02 │ │ ├── out.input03 │ │ ├── out.input04 │ │ ├── out.input05 │ │ ├── out.input06 │ │ ├── out.input07 │ │ ├── out.input08 │ │ ├── out.input09 │ │ ├── out.input10 │ │ ├── runtests │ │ └── runtestsn │ ├── tree.c │ └── types.c ├── 13_Functions_pt2/ │ ├── Makefile │ ├── Readme.md │ ├── cg.c │ ├── cgn.c │ ├── data.h │ ├── decl.c │ ├── decl.h │ ├── defs.h │ ├── expr.c │ ├── gen.c │ ├── lib/ │ │ └── printint.c │ ├── main.c │ ├── misc.c │ ├── scan.c │ ├── stmt.c │ ├── sym.c │ ├── tests/ │ │ ├── input01 │ │ ├── input02 │ │ ├── input03 │ │ ├── input04 │ │ ├── input05 │ │ ├── input06 │ │ ├── input07 │ │ ├── input08 │ │ ├── input09 │ │ ├── input10 │ │ ├── input11 │ │ ├── input12 │ │ ├── input13 │ │ ├── input14 │ │ ├── mktests │ │ ├── mktestsn │ │ ├── out.input01 │ │ ├── out.input02 │ │ ├── out.input03 │ │ ├── out.input04 │ │ ├── out.input05 │ │ ├── out.input06 │ │ ├── out.input07 │ │ ├── out.input08 │ │ ├── out.input09 │ │ ├── out.input10 │ │ ├── out.input11 │ │ ├── out.input12 │ │ ├── out.input13 │ │ ├── out.input14 │ │ ├── runtests │ │ └── runtestsn │ ├── tree.c │ └── types.c ├── 14_ARM_Platform/ │ ├── Makefile │ ├── Readme.md │ ├── cg.c │ ├── cg_arm.c │ ├── cgn.c │ ├── data.h │ ├── decl.c │ ├── decl.h │ ├── defs.h │ ├── expr.c │ ├── gen.c │ ├── lib/ │ │ └── printint.c │ ├── main.c │ ├── misc.c │ ├── scan.c │ ├── stmt.c │ ├── sym.c │ ├── tests/ │ │ ├── input01 │ │ ├── input02 │ │ ├── input03 │ │ ├── input04 │ │ ├── input05 │ │ ├── input06 │ │ ├── input07 │ │ ├── input08 │ │ ├── input09 │ │ ├── input10 │ │ ├── input11 │ │ ├── input12 │ │ ├── input13 │ │ ├── input14 │ │ ├── mktests │ │ ├── mktestsn │ │ ├── out.input01 │ │ ├── out.input02 │ │ ├── out.input03 │ │ ├── out.input04 │ │ ├── out.input05 │ │ ├── out.input06 │ │ ├── out.input07 │ │ ├── out.input08 │ │ ├── out.input09 │ │ ├── out.input10 │ │ ├── out.input11 │ │ ├── out.input12 │ │ ├── out.input13 │ │ ├── out.input14 │ │ ├── runtests │ │ └── runtestsn │ ├── tree.c │ └── types.c ├── 15_Pointers_pt1/ │ ├── Makefile │ ├── Readme.md │ ├── cg.c │ ├── cg_arm.c │ ├── cgn.c │ ├── data.h │ ├── decl.c │ ├── decl.h │ ├── defs.h │ ├── expr.c │ ├── gen.c │ ├── lib/ │ │ └── printint.c │ ├── main.c │ ├── misc.c │ ├── scan.c │ ├── stmt.c │ ├── sym.c │ ├── tests/ │ │ ├── input01.c │ │ ├── input02.c │ │ ├── input03.c │ │ ├── input04.c │ │ ├── input05.c │ │ ├── input06.c │ │ ├── input07.c │ │ ├── input08.c │ │ ├── input09.c │ │ ├── input10.c │ │ ├── input11.c │ │ ├── input12.c │ │ ├── input13.c │ │ ├── input14.c │ │ ├── input15.c │ │ ├── mktests │ │ ├── out.input01.c │ │ ├── out.input02.c │ │ ├── out.input03.c │ │ ├── out.input04.c │ │ ├── out.input05.c │ │ ├── out.input06.c │ │ ├── out.input07.c │ │ ├── out.input08.c │ │ ├── out.input09.c │ │ ├── out.input10.c │ │ ├── out.input11.c │ │ ├── out.input12.c │ │ ├── out.input13.c │ │ ├── out.input14.c │ │ ├── out.input15.c │ │ ├── runtests │ │ └── runtestsn │ ├── tree.c │ └── types.c ├── 16_Global_Vars/ │ ├── Makefile │ ├── Readme.md │ ├── cg.c │ ├── cg_arm.c │ ├── cgn.c │ ├── data.h │ ├── decl.c │ ├── decl.h │ ├── defs.h │ ├── expr.c │ ├── gen.c │ ├── lib/ │ │ └── printint.c │ ├── main.c │ ├── misc.c │ ├── scan.c │ ├── stmt.c │ ├── sym.c │ ├── tests/ │ │ ├── input01.c │ │ ├── input02.c │ │ ├── input03.c │ │ ├── input04.c │ │ ├── input05.c │ │ ├── input06.c │ │ ├── input07.c │ │ ├── input08.c │ │ ├── input09.c │ │ ├── input10.c │ │ ├── input11.c │ │ ├── input12.c │ │ ├── input13.c │ │ ├── input14.c │ │ ├── input15.c │ │ ├── input16.c │ │ ├── mktests │ │ ├── out.input01.c │ │ ├── out.input02.c │ │ ├── out.input03.c │ │ ├── out.input04.c │ │ ├── out.input05.c │ │ ├── out.input06.c │ │ ├── out.input07.c │ │ ├── out.input08.c │ │ ├── out.input09.c │ │ ├── out.input10.c │ │ ├── out.input11.c │ │ ├── out.input12.c │ │ ├── out.input13.c │ │ ├── out.input14.c │ │ ├── out.input15.c │ │ ├── out.input16.c │ │ ├── runtests │ │ └── runtestsn │ ├── tree.c │ └── types.c ├── 17_Scaling_Offsets/ │ ├── Makefile │ ├── Readme.md │ ├── cg.c │ ├── cg_arm.c │ ├── cgn.c │ ├── data.h │ ├── decl.c │ ├── decl.h │ ├── defs.h │ ├── expr.c │ ├── gen.c │ ├── lib/ │ │ └── printint.c │ ├── main.c │ ├── misc.c │ ├── scan.c │ ├── stmt.c │ ├── sym.c │ ├── tests/ │ │ ├── input01.c │ │ ├── input02.c │ │ ├── input03.c │ │ ├── input04.c │ │ ├── input05.c │ │ ├── input06.c │ │ ├── input07.c │ │ ├── input08.c │ │ ├── input09.c │ │ ├── input10.c │ │ ├── input11.c │ │ ├── input12.c │ │ ├── input13.c │ │ ├── input14.c │ │ ├── input15.c │ │ ├── input16.c │ │ ├── mktests │ │ ├── out.input01.c │ │ ├── out.input02.c │ │ ├── out.input03.c │ │ ├── out.input04.c │ │ ├── out.input05.c │ │ ├── out.input06.c │ │ ├── out.input07.c │ │ ├── out.input08.c │ │ ├── out.input09.c │ │ ├── out.input10.c │ │ ├── out.input11.c │ │ ├── out.input12.c │ │ ├── out.input13.c │ │ ├── out.input14.c │ │ ├── out.input15.c │ │ ├── out.input16.c │ │ ├── runtests │ │ └── runtestsn │ ├── tree.c │ └── types.c ├── 18_Lvalues_Revisited/ │ ├── Makefile │ ├── Readme.md │ ├── cg.c │ ├── cg_arm.c │ ├── cgn.c │ ├── data.h │ ├── decl.c │ ├── decl.h │ ├── defs.h │ ├── expr.c │ ├── gen.c │ ├── lib/ │ │ └── printint.c │ ├── main.c │ ├── misc.c │ ├── scan.c │ ├── stmt.c │ ├── sym.c │ ├── tests/ │ │ ├── input01.c │ │ ├── input02.c │ │ ├── input03.c │ │ ├── input04.c │ │ ├── input05.c │ │ ├── input06.c │ │ ├── input07.c │ │ ├── input08.c │ │ ├── input09.c │ │ ├── input10.c │ │ ├── input11.c │ │ ├── input12.c │ │ ├── input13.c │ │ ├── input14.c │ │ ├── input15.c │ │ ├── input16.c │ │ ├── input17.c │ │ ├── input18.c │ │ ├── input18a.c │ │ ├── mktests │ │ ├── out.input01.c │ │ ├── out.input02.c │ │ ├── out.input03.c │ │ ├── out.input04.c │ │ ├── out.input05.c │ │ ├── out.input06.c │ │ ├── out.input07.c │ │ ├── out.input08.c │ │ ├── out.input09.c │ │ ├── out.input10.c │ │ ├── out.input11.c │ │ ├── out.input12.c │ │ ├── out.input13.c │ │ ├── out.input14.c │ │ ├── out.input15.c │ │ ├── out.input16.c │ │ ├── out.input17.c │ │ ├── out.input18.c │ │ ├── out.input18a.c │ │ ├── runtests │ │ └── runtestsn │ ├── tree.c │ └── types.c ├── 19_Arrays_pt1/ │ ├── Makefile │ ├── Notes │ ├── Readme.md │ ├── cg.c │ ├── cg_arm.c │ ├── cgn.c │ ├── data.h │ ├── decl.c │ ├── decl.h │ ├── defs.h │ ├── expr.c │ ├── gen.c │ ├── lib/ │ │ └── printint.c │ ├── main.c │ ├── misc.c │ ├── scan.c │ ├── stmt.c │ ├── sym.c │ ├── tests/ │ │ ├── input01.c │ │ ├── input02.c │ │ ├── input03.c │ │ ├── input04.c │ │ ├── input05.c │ │ ├── input06.c │ │ ├── input07.c │ │ ├── input08.c │ │ ├── input09.c │ │ ├── input10.c │ │ ├── input11.c │ │ ├── input12.c │ │ ├── input13.c │ │ ├── input14.c │ │ ├── input15.c │ │ ├── input16.c │ │ ├── input17.c │ │ ├── input18.c │ │ ├── input18a.c │ │ ├── input19.c │ │ ├── input20.c │ │ ├── mktests │ │ ├── out.input01.c │ │ ├── out.input02.c │ │ ├── out.input03.c │ │ ├── out.input04.c │ │ ├── out.input05.c │ │ ├── out.input06.c │ │ ├── out.input07.c │ │ ├── out.input08.c │ │ ├── out.input09.c │ │ ├── out.input10.c │ │ ├── out.input11.c │ │ ├── out.input12.c │ │ ├── out.input13.c │ │ ├── out.input14.c │ │ ├── out.input15.c │ │ ├── out.input16.c │ │ ├── out.input17.c │ │ ├── out.input18.c │ │ ├── out.input18a.c │ │ ├── out.input19.c │ │ ├── out.input20.c │ │ ├── runtests │ │ └── runtestsn │ ├── tree.c │ └── types.c ├── 20_Char_Str_Literals/ │ ├── Makefile │ ├── Readme.md │ ├── cg.c │ ├── cg_arm.c │ ├── cgn.c │ ├── data.h │ ├── decl.c │ ├── decl.h │ ├── defs.h │ ├── expr.c │ ├── gen.c │ ├── lib/ │ │ └── printint.c │ ├── main.c │ ├── misc.c │ ├── scan.c │ ├── stmt.c │ ├── sym.c │ ├── tests/ │ │ ├── input01.c │ │ ├── input02.c │ │ ├── input03.c │ │ ├── input04.c │ │ ├── input05.c │ │ ├── input06.c │ │ ├── input07.c │ │ ├── input08.c │ │ ├── input09.c │ │ ├── input10.c │ │ ├── input11.c │ │ ├── input12.c │ │ ├── input13.c │ │ ├── input14.c │ │ ├── input15.c │ │ ├── input16.c │ │ ├── input17.c │ │ ├── input18.c │ │ ├── input18a.c │ │ ├── input19.c │ │ ├── input20.c │ │ ├── input21.c │ │ ├── mktests │ │ ├── out.input01.c │ │ ├── out.input02.c │ │ ├── out.input03.c │ │ ├── out.input04.c │ │ ├── out.input05.c │ │ ├── out.input06.c │ │ ├── out.input07.c │ │ ├── out.input08.c │ │ ├── out.input09.c │ │ ├── out.input10.c │ │ ├── out.input11.c │ │ ├── out.input12.c │ │ ├── out.input13.c │ │ ├── out.input14.c │ │ ├── out.input15.c │ │ ├── out.input16.c │ │ ├── out.input17.c │ │ ├── out.input18.c │ │ ├── out.input18a.c │ │ ├── out.input19.c │ │ ├── out.input20.c │ │ ├── out.input21.c │ │ ├── runtests │ │ └── runtestsn │ ├── tree.c │ └── types.c ├── 21_More_Operators/ │ ├── Makefile │ ├── Readme.md │ ├── cg.c │ ├── cg_arm.c │ ├── cgn.c │ ├── data.h │ ├── decl.c │ ├── decl.h │ ├── defs.h │ ├── expr.c │ ├── gen.c │ ├── lib/ │ │ └── printint.c │ ├── main.c │ ├── misc.c │ ├── scan.c │ ├── stmt.c │ ├── sym.c │ ├── tests/ │ │ ├── input01.c │ │ ├── input02.c │ │ ├── input03.c │ │ ├── input04.c │ │ ├── input05.c │ │ ├── input06.c │ │ ├── input07.c │ │ ├── input08.c │ │ ├── input09.c │ │ ├── input10.c │ │ ├── input11.c │ │ ├── input12.c │ │ ├── input13.c │ │ ├── input14.c │ │ ├── input15.c │ │ ├── input16.c │ │ ├── input17.c │ │ ├── input18.c │ │ ├── input18a.c │ │ ├── input19.c │ │ ├── input20.c │ │ ├── input21.c │ │ ├── input22.c │ │ ├── input23.c │ │ ├── input24.c │ │ ├── mktests │ │ ├── out.input01.c │ │ ├── out.input02.c │ │ ├── out.input03.c │ │ ├── out.input04.c │ │ ├── out.input05.c │ │ ├── out.input06.c │ │ ├── out.input07.c │ │ ├── out.input08.c │ │ ├── out.input09.c │ │ ├── out.input10.c │ │ ├── out.input11.c │ │ ├── out.input12.c │ │ ├── out.input13.c │ │ ├── out.input14.c │ │ ├── out.input15.c │ │ ├── out.input16.c │ │ ├── out.input17.c │ │ ├── out.input18.c │ │ ├── out.input18a.c │ │ ├── out.input19.c │ │ ├── out.input20.c │ │ ├── out.input21.c │ │ ├── out.input22.c │ │ ├── out.input23.c │ │ ├── out.input24.c │ │ ├── runtests │ │ └── runtestsn │ ├── tree.c │ └── types.c ├── 22_Design_Locals/ │ └── Readme.md ├── 23_Local_Variables/ │ ├── Makefile │ ├── Readme.md │ ├── cg.c │ ├── cg_arm.c │ ├── cgn.c │ ├── data.h │ ├── decl.c │ ├── decl.h │ ├── defs.h │ ├── expr.c │ ├── gen.c │ ├── lib/ │ │ └── printint.c │ ├── main.c │ ├── misc.c │ ├── scan.c │ ├── stmt.c │ ├── sym.c │ ├── tests/ │ │ ├── input01.c │ │ ├── input02.c │ │ ├── input03.c │ │ ├── input04.c │ │ ├── input05.c │ │ ├── input06.c │ │ ├── input07.c │ │ ├── input08.c │ │ ├── input09.c │ │ ├── input10.c │ │ ├── input11.c │ │ ├── input12.c │ │ ├── input13.c │ │ ├── input14.c │ │ ├── input15.c │ │ ├── input16.c │ │ ├── input17.c │ │ ├── input18.c │ │ ├── input18a.c │ │ ├── input19.c │ │ ├── input20.c │ │ ├── input21.c │ │ ├── input22.c │ │ ├── input23.c │ │ ├── input24.c │ │ ├── input25.c │ │ ├── mktests │ │ ├── out.input01.c │ │ ├── out.input02.c │ │ ├── out.input03.c │ │ ├── out.input04.c │ │ ├── out.input05.c │ │ ├── out.input06.c │ │ ├── out.input07.c │ │ ├── out.input08.c │ │ ├── out.input09.c │ │ ├── out.input10.c │ │ ├── out.input11.c │ │ ├── out.input12.c │ │ ├── out.input13.c │ │ ├── out.input14.c │ │ ├── out.input15.c │ │ ├── out.input16.c │ │ ├── out.input17.c │ │ ├── out.input18.c │ │ ├── out.input18a.c │ │ ├── out.input19.c │ │ ├── out.input20.c │ │ ├── out.input21.c │ │ ├── out.input22.c │ │ ├── out.input23.c │ │ ├── out.input24.c │ │ ├── out.input25.c │ │ ├── runtests │ │ └── runtestsn │ ├── tree.c │ └── types.c ├── 24_Function_Params/ │ ├── Makefile │ ├── Readme.md │ ├── cg.c │ ├── cg_arm.c │ ├── cgn.c │ ├── data.h │ ├── decl.c │ ├── decl.h │ ├── defs.h │ ├── expr.c │ ├── gen.c │ ├── input27a.c │ ├── input27b.c │ ├── lib/ │ │ └── printint.c │ ├── main.c │ ├── misc.c │ ├── scan.c │ ├── stmt.c │ ├── sym.c │ ├── tests/ │ │ ├── input01.c │ │ ├── input02.c │ │ ├── input03.c │ │ ├── input04.c │ │ ├── input05.c │ │ ├── input06.c │ │ ├── input07.c │ │ ├── input08.c │ │ ├── input09.c │ │ ├── input10.c │ │ ├── input11.c │ │ ├── input12.c │ │ ├── input13.c │ │ ├── input14.c │ │ ├── input15.c │ │ ├── input16.c │ │ ├── input17.c │ │ ├── input18.c │ │ ├── input18a.c │ │ ├── input19.c │ │ ├── input20.c │ │ ├── input21.c │ │ ├── input22.c │ │ ├── input23.c │ │ ├── input24.c │ │ ├── input25.c │ │ ├── input26.c │ │ ├── mktests │ │ ├── out.input01.c │ │ ├── out.input02.c │ │ ├── out.input03.c │ │ ├── out.input04.c │ │ ├── out.input05.c │ │ ├── out.input06.c │ │ ├── out.input07.c │ │ ├── out.input08.c │ │ ├── out.input09.c │ │ ├── out.input10.c │ │ ├── out.input11.c │ │ ├── out.input12.c │ │ ├── out.input13.c │ │ ├── out.input14.c │ │ ├── out.input15.c │ │ ├── out.input16.c │ │ ├── out.input17.c │ │ ├── out.input18.c │ │ ├── out.input18a.c │ │ ├── out.input19.c │ │ ├── out.input20.c │ │ ├── out.input21.c │ │ ├── out.input22.c │ │ ├── out.input23.c │ │ ├── out.input24.c │ │ ├── out.input25.c │ │ ├── out.input26.c │ │ ├── runtests │ │ └── runtestsn │ ├── tree.c │ └── types.c ├── 25_Function_Arguments/ │ ├── Makefile │ ├── Readme.md │ ├── cg.c │ ├── cg_arm.c │ ├── cgn.c │ ├── data.h │ ├── decl.c │ ├── decl.h │ ├── defs.h │ ├── expr.c │ ├── gen.c │ ├── lib/ │ │ └── printint.c │ ├── main.c │ ├── misc.c │ ├── scan.c │ ├── stmt.c │ ├── sym.c │ ├── tests/ │ │ ├── input01.c │ │ ├── input02.c │ │ ├── input03.c │ │ ├── input04.c │ │ ├── input05.c │ │ ├── input06.c │ │ ├── input07.c │ │ ├── input08.c │ │ ├── input09.c │ │ ├── input10.c │ │ ├── input11.c │ │ ├── input12.c │ │ ├── input13.c │ │ ├── input14.c │ │ ├── input15.c │ │ ├── input16.c │ │ ├── input17.c │ │ ├── input18.c │ │ ├── input18a.c │ │ ├── input19.c │ │ ├── input20.c │ │ ├── input21.c │ │ ├── input22.c │ │ ├── input23.c │ │ ├── input24.c │ │ ├── input25.c │ │ ├── input26.c │ │ ├── input27.c │ │ ├── input28.c │ │ ├── mktests │ │ ├── out.input01.c │ │ ├── out.input02.c │ │ ├── out.input03.c │ │ ├── out.input04.c │ │ ├── out.input05.c │ │ ├── out.input06.c │ │ ├── out.input07.c │ │ ├── out.input08.c │ │ ├── out.input09.c │ │ ├── out.input10.c │ │ ├── out.input11.c │ │ ├── out.input12.c │ │ ├── out.input13.c │ │ ├── out.input14.c │ │ ├── out.input15.c │ │ ├── out.input16.c │ │ ├── out.input17.c │ │ ├── out.input18.c │ │ ├── out.input18a.c │ │ ├── out.input19.c │ │ ├── out.input20.c │ │ ├── out.input21.c │ │ ├── out.input22.c │ │ ├── out.input23.c │ │ ├── out.input24.c │ │ ├── out.input25.c │ │ ├── out.input26.c │ │ ├── out.input27.c │ │ ├── out.input28.c │ │ ├── runtests │ │ └── runtestsn │ ├── tree.c │ └── types.c ├── 26_Prototypes/ │ ├── Makefile │ ├── Readme.md │ ├── cg.c │ ├── cg_arm.c │ ├── cgn.c │ ├── data.h │ ├── decl.c │ ├── decl.h │ ├── defs.h │ ├── expr.c │ ├── gen.c │ ├── lib/ │ │ └── printint.c │ ├── main.c │ ├── misc.c │ ├── scan.c │ ├── stmt.c │ ├── sym.c │ ├── tests/ │ │ ├── input01.c │ │ ├── input02.c │ │ ├── input03.c │ │ ├── input04.c │ │ ├── input05.c │ │ ├── input06.c │ │ ├── input07.c │ │ ├── input08.c │ │ ├── input09.c │ │ ├── input10.c │ │ ├── input11.c │ │ ├── input12.c │ │ ├── input13.c │ │ ├── input14.c │ │ ├── input15.c │ │ ├── input16.c │ │ ├── input17.c │ │ ├── input18.c │ │ ├── input18a.c │ │ ├── input19.c │ │ ├── input20.c │ │ ├── input21.c │ │ ├── input22.c │ │ ├── input23.c │ │ ├── input24.c │ │ ├── input25.c │ │ ├── input26.c │ │ ├── input27.c │ │ ├── input28.c │ │ ├── input29.c │ │ ├── input30.c │ │ ├── mktests │ │ ├── out.input01.c │ │ ├── out.input02.c │ │ ├── out.input03.c │ │ ├── out.input04.c │ │ ├── out.input05.c │ │ ├── out.input06.c │ │ ├── out.input07.c │ │ ├── out.input08.c │ │ ├── out.input09.c │ │ ├── out.input10.c │ │ ├── out.input11.c │ │ ├── out.input12.c │ │ ├── out.input13.c │ │ ├── out.input14.c │ │ ├── out.input15.c │ │ ├── out.input16.c │ │ ├── out.input17.c │ │ ├── out.input18.c │ │ ├── out.input18a.c │ │ ├── out.input19.c │ │ ├── out.input20.c │ │ ├── out.input21.c │ │ ├── out.input22.c │ │ ├── out.input23.c │ │ ├── out.input24.c │ │ ├── out.input25.c │ │ ├── out.input26.c │ │ ├── out.input27.c │ │ ├── out.input28.c │ │ ├── out.input29.c │ │ ├── out.input30.c │ │ ├── runtests │ │ └── runtestsn │ ├── tree.c │ └── types.c ├── 27_Testing_Errors/ │ ├── Makefile │ ├── Readme.md │ ├── cg.c │ ├── cg_arm.c │ ├── cgn.c │ ├── data.h │ ├── decl.c │ ├── decl.h │ ├── defs.h │ ├── expr.c │ ├── gen.c │ ├── lib/ │ │ └── printint.c │ ├── main.c │ ├── misc.c │ ├── scan.c │ ├── stmt.c │ ├── sym.c │ ├── tests/ │ │ ├── err.input31.c │ │ ├── err.input32.c │ │ ├── err.input33.c │ │ ├── err.input34.c │ │ ├── err.input35.c │ │ ├── err.input36.c │ │ ├── err.input37.c │ │ ├── err.input38.c │ │ ├── err.input39.c │ │ ├── err.input40.c │ │ ├── err.input41.c │ │ ├── err.input42.c │ │ ├── err.input43.c │ │ ├── err.input44.c │ │ ├── err.input45.c │ │ ├── err.input46.c │ │ ├── err.input47.c │ │ ├── err.input48.c │ │ ├── err.input49.c │ │ ├── err.input50.c │ │ ├── err.input51.c │ │ ├── err.input52.c │ │ ├── input01.c │ │ ├── input02.c │ │ ├── input03.c │ │ ├── input04.c │ │ ├── input05.c │ │ ├── input06.c │ │ ├── input07.c │ │ ├── input08.c │ │ ├── input09.c │ │ ├── input10.c │ │ ├── input11.c │ │ ├── input12.c │ │ ├── input13.c │ │ ├── input14.c │ │ ├── input15.c │ │ ├── input16.c │ │ ├── input17.c │ │ ├── input18.c │ │ ├── input18a.c │ │ ├── input19.c │ │ ├── input20.c │ │ ├── input21.c │ │ ├── input22.c │ │ ├── input23.c │ │ ├── input24.c │ │ ├── input25.c │ │ ├── input26.c │ │ ├── input27.c │ │ ├── input28.c │ │ ├── input29.c │ │ ├── input30.c │ │ ├── input31.c │ │ ├── input32.c │ │ ├── input33.c │ │ ├── input34.c │ │ ├── input35.c │ │ ├── input36.c │ │ ├── input37.c │ │ ├── input38.c │ │ ├── input39.c │ │ ├── input40.c │ │ ├── input41.c │ │ ├── input42.c │ │ ├── input43.c │ │ ├── input44.c │ │ ├── input45.c │ │ ├── input46.c │ │ ├── input47.c │ │ ├── input48.c │ │ ├── input49.c │ │ ├── input50.c │ │ ├── input51.c │ │ ├── input52.c │ │ ├── input53.c │ │ ├── input54.c │ │ ├── mktests │ │ ├── out.input01.c │ │ ├── out.input02.c │ │ ├── out.input03.c │ │ ├── out.input04.c │ │ ├── out.input05.c │ │ ├── out.input06.c │ │ ├── out.input07.c │ │ ├── out.input08.c │ │ ├── out.input09.c │ │ ├── out.input10.c │ │ ├── out.input11.c │ │ ├── out.input12.c │ │ ├── out.input13.c │ │ ├── out.input14.c │ │ ├── out.input15.c │ │ ├── out.input16.c │ │ ├── out.input17.c │ │ ├── out.input18.c │ │ ├── out.input18a.c │ │ ├── out.input19.c │ │ ├── out.input20.c │ │ ├── out.input21.c │ │ ├── out.input22.c │ │ ├── out.input23.c │ │ ├── out.input24.c │ │ ├── out.input25.c │ │ ├── out.input26.c │ │ ├── out.input27.c │ │ ├── out.input28.c │ │ ├── out.input29.c │ │ ├── out.input30.c │ │ ├── out.input53.c │ │ ├── out.input54.c │ │ ├── runtests │ │ └── runtestsn │ ├── tree.c │ └── types.c ├── 28_Runtime_Flags/ │ ├── Makefile │ ├── Readme.md │ ├── cg.c │ ├── cg_arm.c │ ├── cgn.c │ ├── data.h │ ├── decl.c │ ├── decl.h │ ├── defs.h │ ├── expr.c │ ├── gen.c │ ├── main.c │ ├── misc.c │ ├── scan.c │ ├── stmt.c │ ├── sym.c │ ├── tests/ │ │ ├── err.input31.c │ │ ├── err.input32.c │ │ ├── err.input33.c │ │ ├── err.input34.c │ │ ├── err.input35.c │ │ ├── err.input36.c │ │ ├── err.input37.c │ │ ├── err.input38.c │ │ ├── err.input39.c │ │ ├── err.input40.c │ │ ├── err.input41.c │ │ ├── err.input42.c │ │ ├── err.input43.c │ │ ├── err.input44.c │ │ ├── err.input45.c │ │ ├── err.input46.c │ │ ├── err.input47.c │ │ ├── err.input48.c │ │ ├── err.input49.c │ │ ├── err.input50.c │ │ ├── err.input51.c │ │ ├── err.input52.c │ │ ├── input01.c │ │ ├── input02.c │ │ ├── input03.c │ │ ├── input04.c │ │ ├── input05.c │ │ ├── input06.c │ │ ├── input07.c │ │ ├── input08.c │ │ ├── input09.c │ │ ├── input10.c │ │ ├── input11.c │ │ ├── input12.c │ │ ├── input13.c │ │ ├── input14.c │ │ ├── input15.c │ │ ├── input16.c │ │ ├── input17.c │ │ ├── input18.c │ │ ├── input18a.c │ │ ├── input19.c │ │ ├── input20.c │ │ ├── input21.c │ │ ├── input22.c │ │ ├── input23.c │ │ ├── input24.c │ │ ├── input25.c │ │ ├── input26.c │ │ ├── input27.c │ │ ├── input28.c │ │ ├── input29.c │ │ ├── input30.c │ │ ├── input31.c │ │ ├── input32.c │ │ ├── input33.c │ │ ├── input34.c │ │ ├── input35.c │ │ ├── input36.c │ │ ├── input37.c │ │ ├── input38.c │ │ ├── input39.c │ │ ├── input40.c │ │ ├── input41.c │ │ ├── input42.c │ │ ├── input43.c │ │ ├── input44.c │ │ ├── input45.c │ │ ├── input46.c │ │ ├── input47.c │ │ ├── input48.c │ │ ├── input49.c │ │ ├── input50.c │ │ ├── input51.c │ │ ├── input52.c │ │ ├── input53.c │ │ ├── input54.c │ │ ├── mktests │ │ ├── out.input01.c │ │ ├── out.input02.c │ │ ├── out.input03.c │ │ ├── out.input04.c │ │ ├── out.input05.c │ │ ├── out.input06.c │ │ ├── out.input07.c │ │ ├── out.input08.c │ │ ├── out.input09.c │ │ ├── out.input10.c │ │ ├── out.input11.c │ │ ├── out.input12.c │ │ ├── out.input13.c │ │ ├── out.input14.c │ │ ├── out.input15.c │ │ ├── out.input16.c │ │ ├── out.input17.c │ │ ├── out.input18.c │ │ ├── out.input18a.c │ │ ├── out.input19.c │ │ ├── out.input20.c │ │ ├── out.input21.c │ │ ├── out.input22.c │ │ ├── out.input23.c │ │ ├── out.input24.c │ │ ├── out.input25.c │ │ ├── out.input26.c │ │ ├── out.input27.c │ │ ├── out.input28.c │ │ ├── out.input29.c │ │ ├── out.input30.c │ │ ├── out.input53.c │ │ ├── out.input54.c │ │ ├── runtests │ │ └── runtestsn │ ├── tree.c │ └── types.c ├── 29_Refactoring/ │ ├── Makefile │ ├── Readme.md │ ├── cg.c │ ├── cg_arm.c │ ├── cgn.c │ ├── data.h │ ├── decl.c │ ├── decl.h │ ├── defs.h │ ├── expr.c │ ├── gen.c │ ├── main.c │ ├── misc.c │ ├── scan.c │ ├── stmt.c │ ├── sym.c │ ├── tests/ │ │ ├── err.input31.c │ │ ├── err.input32.c │ │ ├── err.input33.c │ │ ├── err.input34.c │ │ ├── err.input35.c │ │ ├── err.input36.c │ │ ├── err.input37.c │ │ ├── err.input38.c │ │ ├── err.input39.c │ │ ├── err.input40.c │ │ ├── err.input41.c │ │ ├── err.input42.c │ │ ├── err.input43.c │ │ ├── err.input44.c │ │ ├── err.input45.c │ │ ├── err.input46.c │ │ ├── err.input47.c │ │ ├── err.input48.c │ │ ├── err.input49.c │ │ ├── err.input50.c │ │ ├── err.input51.c │ │ ├── err.input52.c │ │ ├── input01.c │ │ ├── input02.c │ │ ├── input03.c │ │ ├── input04.c │ │ ├── input05.c │ │ ├── input06.c │ │ ├── input07.c │ │ ├── input08.c │ │ ├── input09.c │ │ ├── input10.c │ │ ├── input11.c │ │ ├── input12.c │ │ ├── input13.c │ │ ├── input14.c │ │ ├── input15.c │ │ ├── input16.c │ │ ├── input17.c │ │ ├── input18.c │ │ ├── input18a.c │ │ ├── input19.c │ │ ├── input20.c │ │ ├── input21.c │ │ ├── input22.c │ │ ├── input23.c │ │ ├── input24.c │ │ ├── input25.c │ │ ├── input26.c │ │ ├── input27.c │ │ ├── input28.c │ │ ├── input29.c │ │ ├── input30.c │ │ ├── input31.c │ │ ├── input32.c │ │ ├── input33.c │ │ ├── input34.c │ │ ├── input35.c │ │ ├── input36.c │ │ ├── input37.c │ │ ├── input38.c │ │ ├── input39.c │ │ ├── input40.c │ │ ├── input41.c │ │ ├── input42.c │ │ ├── input43.c │ │ ├── input44.c │ │ ├── input45.c │ │ ├── input46.c │ │ ├── input47.c │ │ ├── input48.c │ │ ├── input49.c │ │ ├── input50.c │ │ ├── input51.c │ │ ├── input52.c │ │ ├── input53.c │ │ ├── input54.c │ │ ├── input55.c │ │ ├── mktests │ │ ├── out.input01.c │ │ ├── out.input02.c │ │ ├── out.input03.c │ │ ├── out.input04.c │ │ ├── out.input05.c │ │ ├── out.input06.c │ │ ├── out.input07.c │ │ ├── out.input08.c │ │ ├── out.input09.c │ │ ├── out.input10.c │ │ ├── out.input11.c │ │ ├── out.input12.c │ │ ├── out.input13.c │ │ ├── out.input14.c │ │ ├── out.input15.c │ │ ├── out.input16.c │ │ ├── out.input17.c │ │ ├── out.input18.c │ │ ├── out.input18a.c │ │ ├── out.input19.c │ │ ├── out.input20.c │ │ ├── out.input21.c │ │ ├── out.input22.c │ │ ├── out.input23.c │ │ ├── out.input24.c │ │ ├── out.input25.c │ │ ├── out.input26.c │ │ ├── out.input27.c │ │ ├── out.input28.c │ │ ├── out.input29.c │ │ ├── out.input30.c │ │ ├── out.input53.c │ │ ├── out.input54.c │ │ ├── out.input55.c │ │ ├── runtests │ │ └── runtestsn │ ├── tree.c │ └── types.c ├── 30_Design_Composites/ │ ├── Figs/ │ │ ├── newsymlists.dia │ │ └── structunion1.dia │ ├── Makefile │ ├── Readme.md │ ├── cg.c │ ├── cg_arm.c │ ├── cgn.c │ ├── data.h │ ├── decl.c │ ├── decl.h │ ├── defs.h │ ├── expr.c │ ├── gen.c │ ├── main.c │ ├── misc.c │ ├── scan.c │ ├── stmt.c │ ├── sym.c │ ├── tests/ │ │ ├── err.input31.c │ │ ├── err.input32.c │ │ ├── err.input33.c │ │ ├── err.input34.c │ │ ├── err.input35.c │ │ ├── err.input36.c │ │ ├── err.input37.c │ │ ├── err.input38.c │ │ ├── err.input39.c │ │ ├── err.input40.c │ │ ├── err.input41.c │ │ ├── err.input42.c │ │ ├── err.input43.c │ │ ├── err.input44.c │ │ ├── err.input45.c │ │ ├── err.input46.c │ │ ├── err.input47.c │ │ ├── err.input48.c │ │ ├── err.input49.c │ │ ├── err.input50.c │ │ ├── err.input51.c │ │ ├── err.input52.c │ │ ├── input01.c │ │ ├── input02.c │ │ ├── input03.c │ │ ├── input04.c │ │ ├── input05.c │ │ ├── input06.c │ │ ├── input07.c │ │ ├── input08.c │ │ ├── input09.c │ │ ├── input10.c │ │ ├── input11.c │ │ ├── input12.c │ │ ├── input13.c │ │ ├── input14.c │ │ ├── input15.c │ │ ├── input16.c │ │ ├── input17.c │ │ ├── input18.c │ │ ├── input18a.c │ │ ├── input19.c │ │ ├── input20.c │ │ ├── input21.c │ │ ├── input22.c │ │ ├── input23.c │ │ ├── input24.c │ │ ├── input25.c │ │ ├── input26.c │ │ ├── input27.c │ │ ├── input28.c │ │ ├── input29.c │ │ ├── input30.c │ │ ├── input31.c │ │ ├── input32.c │ │ ├── input33.c │ │ ├── input34.c │ │ ├── input35.c │ │ ├── input36.c │ │ ├── input37.c │ │ ├── input38.c │ │ ├── input39.c │ │ ├── input40.c │ │ ├── input41.c │ │ ├── input42.c │ │ ├── input43.c │ │ ├── input44.c │ │ ├── input45.c │ │ ├── input46.c │ │ ├── input47.c │ │ ├── input48.c │ │ ├── input49.c │ │ ├── input50.c │ │ ├── input51.c │ │ ├── input52.c │ │ ├── input53.c │ │ ├── input54.c │ │ ├── input55.c │ │ ├── mktests │ │ ├── out.input01.c │ │ ├── out.input02.c │ │ ├── out.input03.c │ │ ├── out.input04.c │ │ ├── out.input05.c │ │ ├── out.input06.c │ │ ├── out.input07.c │ │ ├── out.input08.c │ │ ├── out.input09.c │ │ ├── out.input10.c │ │ ├── out.input11.c │ │ ├── out.input12.c │ │ ├── out.input13.c │ │ ├── out.input14.c │ │ ├── out.input15.c │ │ ├── out.input16.c │ │ ├── out.input17.c │ │ ├── out.input18.c │ │ ├── out.input18a.c │ │ ├── out.input19.c │ │ ├── out.input20.c │ │ ├── out.input21.c │ │ ├── out.input22.c │ │ ├── out.input23.c │ │ ├── out.input24.c │ │ ├── out.input25.c │ │ ├── out.input26.c │ │ ├── out.input27.c │ │ ├── out.input28.c │ │ ├── out.input29.c │ │ ├── out.input30.c │ │ ├── out.input53.c │ │ ├── out.input54.c │ │ ├── out.input55.c │ │ ├── runtests │ │ └── runtestsn │ ├── tree.c │ └── types.c ├── 31_Struct_Declarations/ │ ├── Makefile │ ├── Readme.md │ ├── cg.c │ ├── cg_arm.c │ ├── cgn.c │ ├── data.h │ ├── decl.c │ ├── decl.h │ ├── defs.h │ ├── expr.c │ ├── gen.c │ ├── main.c │ ├── misc.c │ ├── scan.c │ ├── stmt.c │ ├── sym.c │ ├── tests/ │ │ ├── err.input31.c │ │ ├── err.input32.c │ │ ├── err.input33.c │ │ ├── err.input34.c │ │ ├── err.input35.c │ │ ├── err.input36.c │ │ ├── err.input37.c │ │ ├── err.input38.c │ │ ├── err.input39.c │ │ ├── err.input40.c │ │ ├── err.input41.c │ │ ├── err.input42.c │ │ ├── err.input43.c │ │ ├── err.input44.c │ │ ├── err.input45.c │ │ ├── err.input46.c │ │ ├── err.input47.c │ │ ├── err.input48.c │ │ ├── err.input49.c │ │ ├── err.input50.c │ │ ├── err.input51.c │ │ ├── err.input52.c │ │ ├── err.input56.c │ │ ├── err.input57.c │ │ ├── input01.c │ │ ├── input02.c │ │ ├── input03.c │ │ ├── input04.c │ │ ├── input05.c │ │ ├── input06.c │ │ ├── input07.c │ │ ├── input08.c │ │ ├── input09.c │ │ ├── input10.c │ │ ├── input11.c │ │ ├── input12.c │ │ ├── input13.c │ │ ├── input14.c │ │ ├── input15.c │ │ ├── input16.c │ │ ├── input17.c │ │ ├── input18.c │ │ ├── input18a.c │ │ ├── input19.c │ │ ├── input20.c │ │ ├── input21.c │ │ ├── input22.c │ │ ├── input23.c │ │ ├── input24.c │ │ ├── input25.c │ │ ├── input26.c │ │ ├── input27.c │ │ ├── input28.c │ │ ├── input29.c │ │ ├── input30.c │ │ ├── input31.c │ │ ├── input32.c │ │ ├── input33.c │ │ ├── input34.c │ │ ├── input35.c │ │ ├── input36.c │ │ ├── input37.c │ │ ├── input38.c │ │ ├── input39.c │ │ ├── input40.c │ │ ├── input41.c │ │ ├── input42.c │ │ ├── input43.c │ │ ├── input44.c │ │ ├── input45.c │ │ ├── input46.c │ │ ├── input47.c │ │ ├── input48.c │ │ ├── input49.c │ │ ├── input50.c │ │ ├── input51.c │ │ ├── input52.c │ │ ├── input53.c │ │ ├── input54.c │ │ ├── input55.c │ │ ├── input56.c │ │ ├── input57.c │ │ ├── mktests │ │ ├── out.input01.c │ │ ├── out.input02.c │ │ ├── out.input03.c │ │ ├── out.input04.c │ │ ├── out.input05.c │ │ ├── out.input06.c │ │ ├── out.input07.c │ │ ├── out.input08.c │ │ ├── out.input09.c │ │ ├── out.input10.c │ │ ├── out.input11.c │ │ ├── out.input12.c │ │ ├── out.input13.c │ │ ├── out.input14.c │ │ ├── out.input15.c │ │ ├── out.input16.c │ │ ├── out.input17.c │ │ ├── out.input18.c │ │ ├── out.input18a.c │ │ ├── out.input19.c │ │ ├── out.input20.c │ │ ├── out.input21.c │ │ ├── out.input22.c │ │ ├── out.input23.c │ │ ├── out.input24.c │ │ ├── out.input25.c │ │ ├── out.input26.c │ │ ├── out.input27.c │ │ ├── out.input28.c │ │ ├── out.input29.c │ │ ├── out.input30.c │ │ ├── out.input53.c │ │ ├── out.input54.c │ │ ├── out.input55.c │ │ ├── runtests │ │ └── runtestsn │ ├── tree.c │ └── types.c ├── 32_Struct_Access_pt1/ │ ├── Makefile │ ├── Readme.md │ ├── cg.c │ ├── cg_arm.c │ ├── cgn.c │ ├── data.h │ ├── decl.c │ ├── decl.h │ ├── defs.h │ ├── expr.c │ ├── gen.c │ ├── main.c │ ├── misc.c │ ├── scan.c │ ├── stmt.c │ ├── sym.c │ ├── tests/ │ │ ├── err.input31.c │ │ ├── err.input32.c │ │ ├── err.input33.c │ │ ├── err.input34.c │ │ ├── err.input35.c │ │ ├── err.input36.c │ │ ├── err.input37.c │ │ ├── err.input38.c │ │ ├── err.input39.c │ │ ├── err.input40.c │ │ ├── err.input41.c │ │ ├── err.input42.c │ │ ├── err.input43.c │ │ ├── err.input44.c │ │ ├── err.input45.c │ │ ├── err.input46.c │ │ ├── err.input47.c │ │ ├── err.input48.c │ │ ├── err.input49.c │ │ ├── err.input50.c │ │ ├── err.input51.c │ │ ├── err.input52.c │ │ ├── err.input56.c │ │ ├── err.input57.c │ │ ├── err.input59.c │ │ ├── err.input60.c │ │ ├── err.input61.c │ │ ├── input01.c │ │ ├── input02.c │ │ ├── input03.c │ │ ├── input04.c │ │ ├── input05.c │ │ ├── input06.c │ │ ├── input07.c │ │ ├── input08.c │ │ ├── input09.c │ │ ├── input10.c │ │ ├── input11.c │ │ ├── input12.c │ │ ├── input13.c │ │ ├── input14.c │ │ ├── input15.c │ │ ├── input16.c │ │ ├── input17.c │ │ ├── input18.c │ │ ├── input18a.c │ │ ├── input19.c │ │ ├── input20.c │ │ ├── input21.c │ │ ├── input22.c │ │ ├── input23.c │ │ ├── input24.c │ │ ├── input25.c │ │ ├── input26.c │ │ ├── input27.c │ │ ├── input28.c │ │ ├── input29.c │ │ ├── input30.c │ │ ├── input31.c │ │ ├── input32.c │ │ ├── input33.c │ │ ├── input34.c │ │ ├── input35.c │ │ ├── input36.c │ │ ├── input37.c │ │ ├── input38.c │ │ ├── input39.c │ │ ├── input40.c │ │ ├── input41.c │ │ ├── input42.c │ │ ├── input43.c │ │ ├── input44.c │ │ ├── input45.c │ │ ├── input46.c │ │ ├── input47.c │ │ ├── input48.c │ │ ├── input49.c │ │ ├── input50.c │ │ ├── input51.c │ │ ├── input52.c │ │ ├── input53.c │ │ ├── input54.c │ │ ├── input55.c │ │ ├── input56.c │ │ ├── input57.c │ │ ├── input58.c │ │ ├── input59.c │ │ ├── input60.c │ │ ├── input61.c │ │ ├── mktests │ │ ├── out.input01.c │ │ ├── out.input02.c │ │ ├── out.input03.c │ │ ├── out.input04.c │ │ ├── out.input05.c │ │ ├── out.input06.c │ │ ├── out.input07.c │ │ ├── out.input08.c │ │ ├── out.input09.c │ │ ├── out.input10.c │ │ ├── out.input11.c │ │ ├── out.input12.c │ │ ├── out.input13.c │ │ ├── out.input14.c │ │ ├── out.input15.c │ │ ├── out.input16.c │ │ ├── out.input17.c │ │ ├── out.input18.c │ │ ├── out.input18a.c │ │ ├── out.input19.c │ │ ├── out.input20.c │ │ ├── out.input21.c │ │ ├── out.input22.c │ │ ├── out.input23.c │ │ ├── out.input24.c │ │ ├── out.input25.c │ │ ├── out.input26.c │ │ ├── out.input27.c │ │ ├── out.input28.c │ │ ├── out.input29.c │ │ ├── out.input30.c │ │ ├── out.input53.c │ │ ├── out.input54.c │ │ ├── out.input55.c │ │ ├── out.input58.c │ │ ├── runtests │ │ └── runtestsn │ ├── tree.c │ └── types.c ├── 33_Unions/ │ ├── Makefile │ ├── Readme.md │ ├── cg.c │ ├── cg_arm.c │ ├── cgn.c │ ├── data.h │ ├── decl.c │ ├── decl.h │ ├── defs.h │ ├── expr.c │ ├── gen.c │ ├── main.c │ ├── misc.c │ ├── scan.c │ ├── stmt.c │ ├── sym.c │ ├── tests/ │ │ ├── err.input31.c │ │ ├── err.input32.c │ │ ├── err.input33.c │ │ ├── err.input34.c │ │ ├── err.input35.c │ │ ├── err.input36.c │ │ ├── err.input37.c │ │ ├── err.input38.c │ │ ├── err.input39.c │ │ ├── err.input40.c │ │ ├── err.input41.c │ │ ├── err.input42.c │ │ ├── err.input43.c │ │ ├── err.input44.c │ │ ├── err.input45.c │ │ ├── err.input46.c │ │ ├── err.input47.c │ │ ├── err.input48.c │ │ ├── err.input49.c │ │ ├── err.input50.c │ │ ├── err.input51.c │ │ ├── err.input52.c │ │ ├── err.input56.c │ │ ├── err.input57.c │ │ ├── err.input59.c │ │ ├── err.input60.c │ │ ├── err.input61.c │ │ ├── input01.c │ │ ├── input02.c │ │ ├── input03.c │ │ ├── input04.c │ │ ├── input05.c │ │ ├── input06.c │ │ ├── input07.c │ │ ├── input08.c │ │ ├── input09.c │ │ ├── input10.c │ │ ├── input11.c │ │ ├── input12.c │ │ ├── input13.c │ │ ├── input14.c │ │ ├── input15.c │ │ ├── input16.c │ │ ├── input17.c │ │ ├── input18.c │ │ ├── input18a.c │ │ ├── input19.c │ │ ├── input20.c │ │ ├── input21.c │ │ ├── input22.c │ │ ├── input23.c │ │ ├── input24.c │ │ ├── input25.c │ │ ├── input26.c │ │ ├── input27.c │ │ ├── input28.c │ │ ├── input29.c │ │ ├── input30.c │ │ ├── input31.c │ │ ├── input32.c │ │ ├── input33.c │ │ ├── input34.c │ │ ├── input35.c │ │ ├── input36.c │ │ ├── input37.c │ │ ├── input38.c │ │ ├── input39.c │ │ ├── input40.c │ │ ├── input41.c │ │ ├── input42.c │ │ ├── input43.c │ │ ├── input44.c │ │ ├── input45.c │ │ ├── input46.c │ │ ├── input47.c │ │ ├── input48.c │ │ ├── input49.c │ │ ├── input50.c │ │ ├── input51.c │ │ ├── input52.c │ │ ├── input53.c │ │ ├── input54.c │ │ ├── input55.c │ │ ├── input56.c │ │ ├── input57.c │ │ ├── input58.c │ │ ├── input59.c │ │ ├── input60.c │ │ ├── input61.c │ │ ├── input62.c │ │ ├── mktests │ │ ├── out.input01.c │ │ ├── out.input02.c │ │ ├── out.input03.c │ │ ├── out.input04.c │ │ ├── out.input05.c │ │ ├── out.input06.c │ │ ├── out.input07.c │ │ ├── out.input08.c │ │ ├── out.input09.c │ │ ├── out.input10.c │ │ ├── out.input11.c │ │ ├── out.input12.c │ │ ├── out.input13.c │ │ ├── out.input14.c │ │ ├── out.input15.c │ │ ├── out.input16.c │ │ ├── out.input17.c │ │ ├── out.input18.c │ │ ├── out.input18a.c │ │ ├── out.input19.c │ │ ├── out.input20.c │ │ ├── out.input21.c │ │ ├── out.input22.c │ │ ├── out.input23.c │ │ ├── out.input24.c │ │ ├── out.input25.c │ │ ├── out.input26.c │ │ ├── out.input27.c │ │ ├── out.input28.c │ │ ├── out.input29.c │ │ ├── out.input30.c │ │ ├── out.input53.c │ │ ├── out.input54.c │ │ ├── out.input55.c │ │ ├── out.input58.c │ │ ├── out.input62.c │ │ ├── runtests │ │ └── runtestsn │ ├── tree.c │ └── types.c ├── 34_Enums_and_Typedefs/ │ ├── Makefile │ ├── Readme.md │ ├── cg.c │ ├── cg_arm.c │ ├── cgn.c │ ├── data.h │ ├── decl.c │ ├── decl.h │ ├── defs.h │ ├── expr.c │ ├── gen.c │ ├── main.c │ ├── misc.c │ ├── scan.c │ ├── stmt.c │ ├── sym.c │ ├── tests/ │ │ ├── err.input31.c │ │ ├── err.input32.c │ │ ├── err.input33.c │ │ ├── err.input34.c │ │ ├── err.input35.c │ │ ├── err.input36.c │ │ ├── err.input37.c │ │ ├── err.input38.c │ │ ├── err.input39.c │ │ ├── err.input40.c │ │ ├── err.input41.c │ │ ├── err.input42.c │ │ ├── err.input43.c │ │ ├── err.input44.c │ │ ├── err.input45.c │ │ ├── err.input46.c │ │ ├── err.input47.c │ │ ├── err.input48.c │ │ ├── err.input49.c │ │ ├── err.input50.c │ │ ├── err.input51.c │ │ ├── err.input52.c │ │ ├── err.input56.c │ │ ├── err.input57.c │ │ ├── err.input59.c │ │ ├── err.input60.c │ │ ├── err.input61.c │ │ ├── err.input64.c │ │ ├── err.input65.c │ │ ├── err.input66.c │ │ ├── err.input68.c │ │ ├── err.input69.c │ │ ├── input01.c │ │ ├── input02.c │ │ ├── input03.c │ │ ├── input04.c │ │ ├── input05.c │ │ ├── input06.c │ │ ├── input07.c │ │ ├── input08.c │ │ ├── input09.c │ │ ├── input10.c │ │ ├── input11.c │ │ ├── input12.c │ │ ├── input13.c │ │ ├── input14.c │ │ ├── input15.c │ │ ├── input16.c │ │ ├── input17.c │ │ ├── input18.c │ │ ├── input18a.c │ │ ├── input19.c │ │ ├── input20.c │ │ ├── input21.c │ │ ├── input22.c │ │ ├── input23.c │ │ ├── input24.c │ │ ├── input25.c │ │ ├── input26.c │ │ ├── input27.c │ │ ├── input28.c │ │ ├── input29.c │ │ ├── input30.c │ │ ├── input31.c │ │ ├── input32.c │ │ ├── input33.c │ │ ├── input34.c │ │ ├── input35.c │ │ ├── input36.c │ │ ├── input37.c │ │ ├── input38.c │ │ ├── input39.c │ │ ├── input40.c │ │ ├── input41.c │ │ ├── input42.c │ │ ├── input43.c │ │ ├── input44.c │ │ ├── input45.c │ │ ├── input46.c │ │ ├── input47.c │ │ ├── input48.c │ │ ├── input49.c │ │ ├── input50.c │ │ ├── input51.c │ │ ├── input52.c │ │ ├── input53.c │ │ ├── input54.c │ │ ├── input55.c │ │ ├── input56.c │ │ ├── input57.c │ │ ├── input58.c │ │ ├── input59.c │ │ ├── input60.c │ │ ├── input61.c │ │ ├── input62.c │ │ ├── input63.c │ │ ├── input64.c │ │ ├── input65.c │ │ ├── input66.c │ │ ├── input67.c │ │ ├── input68.c │ │ ├── input69.c │ │ ├── mktests │ │ ├── out.input01.c │ │ ├── out.input02.c │ │ ├── out.input03.c │ │ ├── out.input04.c │ │ ├── out.input05.c │ │ ├── out.input06.c │ │ ├── out.input07.c │ │ ├── out.input08.c │ │ ├── out.input09.c │ │ ├── out.input10.c │ │ ├── out.input11.c │ │ ├── out.input12.c │ │ ├── out.input13.c │ │ ├── out.input14.c │ │ ├── out.input15.c │ │ ├── out.input16.c │ │ ├── out.input17.c │ │ ├── out.input18.c │ │ ├── out.input18a.c │ │ ├── out.input19.c │ │ ├── out.input20.c │ │ ├── out.input21.c │ │ ├── out.input22.c │ │ ├── out.input23.c │ │ ├── out.input24.c │ │ ├── out.input25.c │ │ ├── out.input26.c │ │ ├── out.input27.c │ │ ├── out.input28.c │ │ ├── out.input29.c │ │ ├── out.input30.c │ │ ├── out.input53.c │ │ ├── out.input54.c │ │ ├── out.input55.c │ │ ├── out.input58.c │ │ ├── out.input62.c │ │ ├── out.input63.c │ │ ├── out.input67.c │ │ ├── runtests │ │ └── runtestsn │ ├── tree.c │ └── types.c ├── 35_Preprocessor/ │ ├── Makefile │ ├── Readme.md │ ├── cg.c │ ├── cg_arm.c │ ├── cgn.c │ ├── data.h │ ├── decl.c │ ├── decl.h │ ├── defs.h │ ├── expr.c │ ├── gen.c │ ├── include/ │ │ ├── fcntl.h │ │ ├── stddef.h │ │ └── stdio.h │ ├── main.c │ ├── misc.c │ ├── scan.c │ ├── stmt.c │ ├── sym.c │ ├── tests/ │ │ ├── err.input31.c │ │ ├── err.input32.c │ │ ├── err.input33.c │ │ ├── err.input34.c │ │ ├── err.input35.c │ │ ├── err.input36.c │ │ ├── err.input37.c │ │ ├── err.input38.c │ │ ├── err.input39.c │ │ ├── err.input40.c │ │ ├── err.input41.c │ │ ├── err.input42.c │ │ ├── err.input43.c │ │ ├── err.input44.c │ │ ├── err.input45.c │ │ ├── err.input46.c │ │ ├── err.input47.c │ │ ├── err.input48.c │ │ ├── err.input49.c │ │ ├── err.input50.c │ │ ├── err.input51.c │ │ ├── err.input52.c │ │ ├── err.input56.c │ │ ├── err.input57.c │ │ ├── err.input59.c │ │ ├── err.input60.c │ │ ├── err.input61.c │ │ ├── err.input64.c │ │ ├── err.input65.c │ │ ├── err.input66.c │ │ ├── err.input68.c │ │ ├── err.input69.c │ │ ├── input01.c │ │ ├── input02.c │ │ ├── input03.c │ │ ├── input04.c │ │ ├── input05.c │ │ ├── input06.c │ │ ├── input07.c │ │ ├── input08.c │ │ ├── input09.c │ │ ├── input10.c │ │ ├── input11.c │ │ ├── input12.c │ │ ├── input13.c │ │ ├── input14.c │ │ ├── input15.c │ │ ├── input16.c │ │ ├── input17.c │ │ ├── input18.c │ │ ├── input18a.c │ │ ├── input19.c │ │ ├── input20.c │ │ ├── input21.c │ │ ├── input22.c │ │ ├── input23.c │ │ ├── input24.c │ │ ├── input25.c │ │ ├── input26.c │ │ ├── input27.c │ │ ├── input28.c │ │ ├── input29.c │ │ ├── input30.c │ │ ├── input31.c │ │ ├── input32.c │ │ ├── input33.c │ │ ├── input34.c │ │ ├── input35.c │ │ ├── input36.c │ │ ├── input37.c │ │ ├── input38.c │ │ ├── input39.c │ │ ├── input40.c │ │ ├── input41.c │ │ ├── input42.c │ │ ├── input43.c │ │ ├── input44.c │ │ ├── input45.c │ │ ├── input46.c │ │ ├── input47.c │ │ ├── input48.c │ │ ├── input49.c │ │ ├── input50.c │ │ ├── input51.c │ │ ├── input52.c │ │ ├── input53.c │ │ ├── input54.c │ │ ├── input55.c │ │ ├── input56.c │ │ ├── input57.c │ │ ├── input58.c │ │ ├── input59.c │ │ ├── input60.c │ │ ├── input61.c │ │ ├── input62.c │ │ ├── input63.c │ │ ├── input64.c │ │ ├── input65.c │ │ ├── input66.c │ │ ├── input67.c │ │ ├── input68.c │ │ ├── input69.c │ │ ├── input70.c │ │ ├── mktests │ │ ├── out.input01.c │ │ ├── out.input02.c │ │ ├── out.input03.c │ │ ├── out.input04.c │ │ ├── out.input05.c │ │ ├── out.input06.c │ │ ├── out.input07.c │ │ ├── out.input08.c │ │ ├── out.input09.c │ │ ├── out.input10.c │ │ ├── out.input11.c │ │ ├── out.input12.c │ │ ├── out.input13.c │ │ ├── out.input14.c │ │ ├── out.input15.c │ │ ├── out.input16.c │ │ ├── out.input17.c │ │ ├── out.input18.c │ │ ├── out.input18a.c │ │ ├── out.input19.c │ │ ├── out.input20.c │ │ ├── out.input21.c │ │ ├── out.input22.c │ │ ├── out.input23.c │ │ ├── out.input24.c │ │ ├── out.input25.c │ │ ├── out.input26.c │ │ ├── out.input27.c │ │ ├── out.input28.c │ │ ├── out.input29.c │ │ ├── out.input30.c │ │ ├── out.input53.c │ │ ├── out.input54.c │ │ ├── out.input55.c │ │ ├── out.input58.c │ │ ├── out.input62.c │ │ ├── out.input63.c │ │ ├── out.input67.c │ │ ├── out.input70.c │ │ ├── runtests │ │ └── runtestsn │ ├── tree.c │ └── types.c ├── 36_Break_Continue/ │ ├── Makefile │ ├── Readme.md │ ├── cg.c │ ├── cg_arm.c │ ├── cgn.c │ ├── data.h │ ├── decl.c │ ├── decl.h │ ├── defs.h │ ├── expr.c │ ├── gen.c │ ├── include/ │ │ ├── fcntl.h │ │ ├── stddef.h │ │ └── stdio.h │ ├── main.c │ ├── misc.c │ ├── scan.c │ ├── stmt.c │ ├── sym.c │ ├── tests/ │ │ ├── err.input31.c │ │ ├── err.input32.c │ │ ├── err.input33.c │ │ ├── err.input34.c │ │ ├── err.input35.c │ │ ├── err.input36.c │ │ ├── err.input37.c │ │ ├── err.input38.c │ │ ├── err.input39.c │ │ ├── err.input40.c │ │ ├── err.input41.c │ │ ├── err.input42.c │ │ ├── err.input43.c │ │ ├── err.input44.c │ │ ├── err.input45.c │ │ ├── err.input46.c │ │ ├── err.input47.c │ │ ├── err.input48.c │ │ ├── err.input49.c │ │ ├── err.input50.c │ │ ├── err.input51.c │ │ ├── err.input52.c │ │ ├── err.input56.c │ │ ├── err.input57.c │ │ ├── err.input59.c │ │ ├── err.input60.c │ │ ├── err.input61.c │ │ ├── err.input64.c │ │ ├── err.input65.c │ │ ├── err.input66.c │ │ ├── err.input68.c │ │ ├── err.input69.c │ │ ├── err.input72.c │ │ ├── err.input73.c │ │ ├── input01.c │ │ ├── input02.c │ │ ├── input03.c │ │ ├── input04.c │ │ ├── input05.c │ │ ├── input06.c │ │ ├── input07.c │ │ ├── input08.c │ │ ├── input09.c │ │ ├── input10.c │ │ ├── input11.c │ │ ├── input12.c │ │ ├── input13.c │ │ ├── input14.c │ │ ├── input15.c │ │ ├── input16.c │ │ ├── input17.c │ │ ├── input18.c │ │ ├── input18a.c │ │ ├── input19.c │ │ ├── input20.c │ │ ├── input21.c │ │ ├── input22.c │ │ ├── input23.c │ │ ├── input24.c │ │ ├── input25.c │ │ ├── input26.c │ │ ├── input27.c │ │ ├── input28.c │ │ ├── input29.c │ │ ├── input30.c │ │ ├── input31.c │ │ ├── input32.c │ │ ├── input33.c │ │ ├── input34.c │ │ ├── input35.c │ │ ├── input36.c │ │ ├── input37.c │ │ ├── input38.c │ │ ├── input39.c │ │ ├── input40.c │ │ ├── input41.c │ │ ├── input42.c │ │ ├── input43.c │ │ ├── input44.c │ │ ├── input45.c │ │ ├── input46.c │ │ ├── input47.c │ │ ├── input48.c │ │ ├── input49.c │ │ ├── input50.c │ │ ├── input51.c │ │ ├── input52.c │ │ ├── input53.c │ │ ├── input54.c │ │ ├── input55.c │ │ ├── input56.c │ │ ├── input57.c │ │ ├── input58.c │ │ ├── input59.c │ │ ├── input60.c │ │ ├── input61.c │ │ ├── input62.c │ │ ├── input63.c │ │ ├── input64.c │ │ ├── input65.c │ │ ├── input66.c │ │ ├── input67.c │ │ ├── input68.c │ │ ├── input69.c │ │ ├── input70.c │ │ ├── input71.c │ │ ├── input72.c │ │ ├── input73.c │ │ ├── mktests │ │ ├── out.input01.c │ │ ├── out.input02.c │ │ ├── out.input03.c │ │ ├── out.input04.c │ │ ├── out.input05.c │ │ ├── out.input06.c │ │ ├── out.input07.c │ │ ├── out.input08.c │ │ ├── out.input09.c │ │ ├── out.input10.c │ │ ├── out.input11.c │ │ ├── out.input12.c │ │ ├── out.input13.c │ │ ├── out.input14.c │ │ ├── out.input15.c │ │ ├── out.input16.c │ │ ├── out.input17.c │ │ ├── out.input18.c │ │ ├── out.input18a.c │ │ ├── out.input19.c │ │ ├── out.input20.c │ │ ├── out.input21.c │ │ ├── out.input22.c │ │ ├── out.input23.c │ │ ├── out.input24.c │ │ ├── out.input25.c │ │ ├── out.input26.c │ │ ├── out.input27.c │ │ ├── out.input28.c │ │ ├── out.input29.c │ │ ├── out.input30.c │ │ ├── out.input53.c │ │ ├── out.input54.c │ │ ├── out.input55.c │ │ ├── out.input58.c │ │ ├── out.input62.c │ │ ├── out.input63.c │ │ ├── out.input67.c │ │ ├── out.input70.c │ │ ├── out.input71.c │ │ ├── runtests │ │ └── runtestsn │ ├── tree.c │ └── types.c ├── 37_Switch/ │ ├── Figs/ │ │ ├── switch_ast.dia │ │ ├── switch_logic.dia │ │ └── switch_logic2.dia │ ├── Makefile │ ├── Readme.md │ ├── cg.c │ ├── cg_arm.c │ ├── cgn.c │ ├── data.h │ ├── decl.c │ ├── decl.h │ ├── defs.h │ ├── expr.c │ ├── gen.c │ ├── include/ │ │ ├── fcntl.h │ │ ├── stddef.h │ │ └── stdio.h │ ├── main.c │ ├── misc.c │ ├── scan.c │ ├── stmt.c │ ├── sym.c │ ├── tests/ │ │ ├── err.input31.c │ │ ├── err.input32.c │ │ ├── err.input33.c │ │ ├── err.input34.c │ │ ├── err.input35.c │ │ ├── err.input36.c │ │ ├── err.input37.c │ │ ├── err.input38.c │ │ ├── err.input39.c │ │ ├── err.input40.c │ │ ├── err.input41.c │ │ ├── err.input42.c │ │ ├── err.input43.c │ │ ├── err.input44.c │ │ ├── err.input45.c │ │ ├── err.input46.c │ │ ├── err.input47.c │ │ ├── err.input48.c │ │ ├── err.input49.c │ │ ├── err.input50.c │ │ ├── err.input51.c │ │ ├── err.input52.c │ │ ├── err.input56.c │ │ ├── err.input57.c │ │ ├── err.input59.c │ │ ├── err.input60.c │ │ ├── err.input61.c │ │ ├── err.input64.c │ │ ├── err.input65.c │ │ ├── err.input66.c │ │ ├── err.input68.c │ │ ├── err.input69.c │ │ ├── err.input72.c │ │ ├── err.input73.c │ │ ├── err.input75.c │ │ ├── err.input76.c │ │ ├── err.input77.c │ │ ├── err.input78.c │ │ ├── err.input79.c │ │ ├── input01.c │ │ ├── input02.c │ │ ├── input03.c │ │ ├── input04.c │ │ ├── input05.c │ │ ├── input06.c │ │ ├── input07.c │ │ ├── input08.c │ │ ├── input09.c │ │ ├── input10.c │ │ ├── input11.c │ │ ├── input12.c │ │ ├── input13.c │ │ ├── input14.c │ │ ├── input15.c │ │ ├── input16.c │ │ ├── input17.c │ │ ├── input18.c │ │ ├── input18a.c │ │ ├── input19.c │ │ ├── input20.c │ │ ├── input21.c │ │ ├── input22.c │ │ ├── input23.c │ │ ├── input24.c │ │ ├── input25.c │ │ ├── input26.c │ │ ├── input27.c │ │ ├── input28.c │ │ ├── input29.c │ │ ├── input30.c │ │ ├── input31.c │ │ ├── input32.c │ │ ├── input33.c │ │ ├── input34.c │ │ ├── input35.c │ │ ├── input36.c │ │ ├── input37.c │ │ ├── input38.c │ │ ├── input39.c │ │ ├── input40.c │ │ ├── input41.c │ │ ├── input42.c │ │ ├── input43.c │ │ ├── input44.c │ │ ├── input45.c │ │ ├── input46.c │ │ ├── input47.c │ │ ├── input48.c │ │ ├── input49.c │ │ ├── input50.c │ │ ├── input51.c │ │ ├── input52.c │ │ ├── input53.c │ │ ├── input54.c │ │ ├── input55.c │ │ ├── input56.c │ │ ├── input57.c │ │ ├── input58.c │ │ ├── input59.c │ │ ├── input60.c │ │ ├── input61.c │ │ ├── input62.c │ │ ├── input63.c │ │ ├── input64.c │ │ ├── input65.c │ │ ├── input66.c │ │ ├── input67.c │ │ ├── input68.c │ │ ├── input69.c │ │ ├── input70.c │ │ ├── input71.c │ │ ├── input72.c │ │ ├── input73.c │ │ ├── input74.c │ │ ├── input75.c │ │ ├── input76.c │ │ ├── input77.c │ │ ├── input78.c │ │ ├── input79.c │ │ ├── mktests │ │ ├── out.input01.c │ │ ├── out.input02.c │ │ ├── out.input03.c │ │ ├── out.input04.c │ │ ├── out.input05.c │ │ ├── out.input06.c │ │ ├── out.input07.c │ │ ├── out.input08.c │ │ ├── out.input09.c │ │ ├── out.input10.c │ │ ├── out.input11.c │ │ ├── out.input12.c │ │ ├── out.input13.c │ │ ├── out.input14.c │ │ ├── out.input15.c │ │ ├── out.input16.c │ │ ├── out.input17.c │ │ ├── out.input18.c │ │ ├── out.input18a.c │ │ ├── out.input19.c │ │ ├── out.input20.c │ │ ├── out.input21.c │ │ ├── out.input22.c │ │ ├── out.input23.c │ │ ├── out.input24.c │ │ ├── out.input25.c │ │ ├── out.input26.c │ │ ├── out.input27.c │ │ ├── out.input28.c │ │ ├── out.input29.c │ │ ├── out.input30.c │ │ ├── out.input53.c │ │ ├── out.input54.c │ │ ├── out.input55.c │ │ ├── out.input58.c │ │ ├── out.input62.c │ │ ├── out.input63.c │ │ ├── out.input67.c │ │ ├── out.input70.c │ │ ├── out.input71.c │ │ ├── out.input74.c │ │ ├── runtests │ │ └── runtestsn │ ├── tree.c │ └── types.c ├── 38_Dangling_Else/ │ ├── Makefile │ ├── Readme.md │ ├── cg.c │ ├── cg_arm.c │ ├── cgn.c │ ├── data.h │ ├── decl.c │ ├── decl.h │ ├── defs.h │ ├── expr.c │ ├── gen.c │ ├── include/ │ │ ├── fcntl.h │ │ ├── stddef.h │ │ └── stdio.h │ ├── main.c │ ├── misc.c │ ├── scan.c │ ├── stmt.c │ ├── sym.c │ ├── tests/ │ │ ├── err.input31.c │ │ ├── err.input32.c │ │ ├── err.input33.c │ │ ├── err.input34.c │ │ ├── err.input35.c │ │ ├── err.input36.c │ │ ├── err.input37.c │ │ ├── err.input38.c │ │ ├── err.input39.c │ │ ├── err.input40.c │ │ ├── err.input41.c │ │ ├── err.input42.c │ │ ├── err.input43.c │ │ ├── err.input44.c │ │ ├── err.input45.c │ │ ├── err.input46.c │ │ ├── err.input47.c │ │ ├── err.input48.c │ │ ├── err.input49.c │ │ ├── err.input50.c │ │ ├── err.input51.c │ │ ├── err.input52.c │ │ ├── err.input56.c │ │ ├── err.input57.c │ │ ├── err.input59.c │ │ ├── err.input60.c │ │ ├── err.input61.c │ │ ├── err.input64.c │ │ ├── err.input65.c │ │ ├── err.input66.c │ │ ├── err.input68.c │ │ ├── err.input69.c │ │ ├── err.input72.c │ │ ├── err.input73.c │ │ ├── err.input75.c │ │ ├── err.input76.c │ │ ├── err.input77.c │ │ ├── err.input78.c │ │ ├── err.input79.c │ │ ├── input01.c │ │ ├── input02.c │ │ ├── input03.c │ │ ├── input04.c │ │ ├── input05.c │ │ ├── input06.c │ │ ├── input07.c │ │ ├── input08.c │ │ ├── input09.c │ │ ├── input10.c │ │ ├── input11.c │ │ ├── input12.c │ │ ├── input13.c │ │ ├── input14.c │ │ ├── input15.c │ │ ├── input16.c │ │ ├── input17.c │ │ ├── input18.c │ │ ├── input18a.c │ │ ├── input19.c │ │ ├── input20.c │ │ ├── input21.c │ │ ├── input22.c │ │ ├── input23.c │ │ ├── input24.c │ │ ├── input25.c │ │ ├── input26.c │ │ ├── input27.c │ │ ├── input28.c │ │ ├── input29.c │ │ ├── input30.c │ │ ├── input31.c │ │ ├── input32.c │ │ ├── input33.c │ │ ├── input34.c │ │ ├── input35.c │ │ ├── input36.c │ │ ├── input37.c │ │ ├── input38.c │ │ ├── input39.c │ │ ├── input40.c │ │ ├── input41.c │ │ ├── input42.c │ │ ├── input43.c │ │ ├── input44.c │ │ ├── input45.c │ │ ├── input46.c │ │ ├── input47.c │ │ ├── input48.c │ │ ├── input49.c │ │ ├── input50.c │ │ ├── input51.c │ │ ├── input52.c │ │ ├── input53.c │ │ ├── input54.c │ │ ├── input55.c │ │ ├── input56.c │ │ ├── input57.c │ │ ├── input58.c │ │ ├── input59.c │ │ ├── input60.c │ │ ├── input61.c │ │ ├── input62.c │ │ ├── input63.c │ │ ├── input64.c │ │ ├── input65.c │ │ ├── input66.c │ │ ├── input67.c │ │ ├── input68.c │ │ ├── input69.c │ │ ├── input70.c │ │ ├── input71.c │ │ ├── input72.c │ │ ├── input73.c │ │ ├── input74.c │ │ ├── input75.c │ │ ├── input76.c │ │ ├── input77.c │ │ ├── input78.c │ │ ├── input79.c │ │ ├── input80.c │ │ ├── input81.c │ │ ├── input82.c │ │ ├── input83.c │ │ ├── mktests │ │ ├── out.input01.c │ │ ├── out.input02.c │ │ ├── out.input03.c │ │ ├── out.input04.c │ │ ├── out.input05.c │ │ ├── out.input06.c │ │ ├── out.input07.c │ │ ├── out.input08.c │ │ ├── out.input09.c │ │ ├── out.input10.c │ │ ├── out.input11.c │ │ ├── out.input12.c │ │ ├── out.input13.c │ │ ├── out.input14.c │ │ ├── out.input15.c │ │ ├── out.input16.c │ │ ├── out.input17.c │ │ ├── out.input18.c │ │ ├── out.input18a.c │ │ ├── out.input19.c │ │ ├── out.input20.c │ │ ├── out.input21.c │ │ ├── out.input22.c │ │ ├── out.input23.c │ │ ├── out.input24.c │ │ ├── out.input25.c │ │ ├── out.input26.c │ │ ├── out.input27.c │ │ ├── out.input28.c │ │ ├── out.input29.c │ │ ├── out.input30.c │ │ ├── out.input53.c │ │ ├── out.input54.c │ │ ├── out.input55.c │ │ ├── out.input58.c │ │ ├── out.input62.c │ │ ├── out.input63.c │ │ ├── out.input67.c │ │ ├── out.input70.c │ │ ├── out.input71.c │ │ ├── out.input74.c │ │ ├── out.input80.c │ │ ├── out.input81.c │ │ ├── out.input82.c │ │ ├── out.input83.c │ │ ├── runtests │ │ └── runtestsn │ ├── tree.c │ └── types.c ├── 39_Var_Initialisation_pt1/ │ ├── Figs/ │ │ └── decl_call_graph.dia │ ├── Makefile │ ├── Readme.md │ ├── cg.c │ ├── cg_arm.c │ ├── cgn.c │ ├── data.h │ ├── decl.c │ ├── decl.h │ ├── defs.h │ ├── expr.c │ ├── gen.c │ ├── include/ │ │ ├── fcntl.h │ │ ├── stddef.h │ │ └── stdio.h │ ├── main.c │ ├── misc.c │ ├── scan.c │ ├── stmt.c │ ├── sym.c │ ├── tests/ │ │ ├── err.input31.c │ │ ├── err.input32.c │ │ ├── err.input33.c │ │ ├── err.input34.c │ │ ├── err.input35.c │ │ ├── err.input36.c │ │ ├── err.input37.c │ │ ├── err.input38.c │ │ ├── err.input39.c │ │ ├── err.input40.c │ │ ├── err.input41.c │ │ ├── err.input42.c │ │ ├── err.input43.c │ │ ├── err.input44.c │ │ ├── err.input45.c │ │ ├── err.input46.c │ │ ├── err.input47.c │ │ ├── err.input48.c │ │ ├── err.input49.c │ │ ├── err.input50.c │ │ ├── err.input51.c │ │ ├── err.input52.c │ │ ├── err.input56.c │ │ ├── err.input57.c │ │ ├── err.input59.c │ │ ├── err.input60.c │ │ ├── err.input61.c │ │ ├── err.input64.c │ │ ├── err.input65.c │ │ ├── err.input66.c │ │ ├── err.input68.c │ │ ├── err.input69.c │ │ ├── err.input72.c │ │ ├── err.input73.c │ │ ├── err.input75.c │ │ ├── err.input76.c │ │ ├── err.input77.c │ │ ├── err.input78.c │ │ ├── err.input79.c │ │ ├── err.input85.c │ │ ├── err.input86.c │ │ ├── err.input87.c │ │ ├── input01.c │ │ ├── input02.c │ │ ├── input03.c │ │ ├── input04.c │ │ ├── input05.c │ │ ├── input06.c │ │ ├── input07.c │ │ ├── input08.c │ │ ├── input09.c │ │ ├── input10.c │ │ ├── input11.c │ │ ├── input12.c │ │ ├── input13.c │ │ ├── input14.c │ │ ├── input15.c │ │ ├── input16.c │ │ ├── input17.c │ │ ├── input18.c │ │ ├── input18a.c │ │ ├── input19.c │ │ ├── input20.c │ │ ├── input21.c │ │ ├── input22.c │ │ ├── input23.c │ │ ├── input24.c │ │ ├── input25.c │ │ ├── input26.c │ │ ├── input27.c │ │ ├── input28.c │ │ ├── input29.c │ │ ├── input30.c │ │ ├── input31.c │ │ ├── input32.c │ │ ├── input33.c │ │ ├── input34.c │ │ ├── input35.c │ │ ├── input36.c │ │ ├── input37.c │ │ ├── input38.c │ │ ├── input39.c │ │ ├── input40.c │ │ ├── input41.c │ │ ├── input42.c │ │ ├── input43.c │ │ ├── input44.c │ │ ├── input45.c │ │ ├── input46.c │ │ ├── input47.c │ │ ├── input48.c │ │ ├── input49.c │ │ ├── input50.c │ │ ├── input51.c │ │ ├── input52.c │ │ ├── input53.c │ │ ├── input54.c │ │ ├── input55.c │ │ ├── input56.c │ │ ├── input57.c │ │ ├── input58.c │ │ ├── input59.c │ │ ├── input60.c │ │ ├── input61.c │ │ ├── input62.c │ │ ├── input63.c │ │ ├── input64.c │ │ ├── input65.c │ │ ├── input66.c │ │ ├── input67.c │ │ ├── input68.c │ │ ├── input69.c │ │ ├── input70.c │ │ ├── input71.c │ │ ├── input72.c │ │ ├── input73.c │ │ ├── input74.c │ │ ├── input75.c │ │ ├── input76.c │ │ ├── input77.c │ │ ├── input78.c │ │ ├── input79.c │ │ ├── input80.c │ │ ├── input81.c │ │ ├── input82.c │ │ ├── input83.c │ │ ├── input84.c │ │ ├── input85.c │ │ ├── input86.c │ │ ├── input87.c │ │ ├── input88.c │ │ ├── mktests │ │ ├── out.input01.c │ │ ├── out.input02.c │ │ ├── out.input03.c │ │ ├── out.input04.c │ │ ├── out.input05.c │ │ ├── out.input06.c │ │ ├── out.input07.c │ │ ├── out.input08.c │ │ ├── out.input09.c │ │ ├── out.input10.c │ │ ├── out.input11.c │ │ ├── out.input12.c │ │ ├── out.input13.c │ │ ├── out.input14.c │ │ ├── out.input15.c │ │ ├── out.input16.c │ │ ├── out.input17.c │ │ ├── out.input18.c │ │ ├── out.input18a.c │ │ ├── out.input19.c │ │ ├── out.input20.c │ │ ├── out.input21.c │ │ ├── out.input22.c │ │ ├── out.input23.c │ │ ├── out.input24.c │ │ ├── out.input25.c │ │ ├── out.input26.c │ │ ├── out.input27.c │ │ ├── out.input28.c │ │ ├── out.input29.c │ │ ├── out.input30.c │ │ ├── out.input53.c │ │ ├── out.input54.c │ │ ├── out.input55.c │ │ ├── out.input58.c │ │ ├── out.input62.c │ │ ├── out.input63.c │ │ ├── out.input67.c │ │ ├── out.input70.c │ │ ├── out.input71.c │ │ ├── out.input74.c │ │ ├── out.input80.c │ │ ├── out.input81.c │ │ ├── out.input82.c │ │ ├── out.input83.c │ │ ├── out.input84.c │ │ ├── out.input88.c │ │ ├── runtests │ │ └── runtestsn │ ├── tree.c │ └── types.c ├── 40_Var_Initialisation_pt2/ │ ├── Makefile │ ├── Readme.md │ ├── cg.c │ ├── cg_arm.c │ ├── cgn.c │ ├── data.h │ ├── decl.c │ ├── decl.h │ ├── defs.h │ ├── expr.c │ ├── gen.c │ ├── include/ │ │ ├── fcntl.h │ │ ├── stddef.h │ │ └── stdio.h │ ├── main.c │ ├── misc.c │ ├── scan.c │ ├── stmt.c │ ├── sym.c │ ├── tests/ │ │ ├── err.input31.c │ │ ├── err.input32.c │ │ ├── err.input33.c │ │ ├── err.input34.c │ │ ├── err.input35.c │ │ ├── err.input36.c │ │ ├── err.input37.c │ │ ├── err.input38.c │ │ ├── err.input39.c │ │ ├── err.input40.c │ │ ├── err.input41.c │ │ ├── err.input42.c │ │ ├── err.input43.c │ │ ├── err.input44.c │ │ ├── err.input45.c │ │ ├── err.input46.c │ │ ├── err.input47.c │ │ ├── err.input48.c │ │ ├── err.input49.c │ │ ├── err.input50.c │ │ ├── err.input51.c │ │ ├── err.input52.c │ │ ├── err.input56.c │ │ ├── err.input57.c │ │ ├── err.input59.c │ │ ├── err.input60.c │ │ ├── err.input61.c │ │ ├── err.input64.c │ │ ├── err.input65.c │ │ ├── err.input66.c │ │ ├── err.input68.c │ │ ├── err.input69.c │ │ ├── err.input72.c │ │ ├── err.input73.c │ │ ├── err.input75.c │ │ ├── err.input76.c │ │ ├── err.input77.c │ │ ├── err.input78.c │ │ ├── err.input79.c │ │ ├── err.input85.c │ │ ├── err.input86.c │ │ ├── err.input87.c │ │ ├── err.input92.c │ │ ├── err.input93.c │ │ ├── err.input94.c │ │ ├── err.input95.c │ │ ├── err.input96.c │ │ ├── err.input97.c │ │ ├── err.input98.c │ │ ├── input01.c │ │ ├── input02.c │ │ ├── input03.c │ │ ├── input04.c │ │ ├── input05.c │ │ ├── input06.c │ │ ├── input07.c │ │ ├── input08.c │ │ ├── input09.c │ │ ├── input10.c │ │ ├── input11.c │ │ ├── input12.c │ │ ├── input13.c │ │ ├── input14.c │ │ ├── input15.c │ │ ├── input16.c │ │ ├── input17.c │ │ ├── input18.c │ │ ├── input18a.c │ │ ├── input19.c │ │ ├── input20.c │ │ ├── input21.c │ │ ├── input22.c │ │ ├── input23.c │ │ ├── input24.c │ │ ├── input25.c │ │ ├── input26.c │ │ ├── input27.c │ │ ├── input28.c │ │ ├── input29.c │ │ ├── input30.c │ │ ├── input31.c │ │ ├── input32.c │ │ ├── input33.c │ │ ├── input34.c │ │ ├── input35.c │ │ ├── input36.c │ │ ├── input37.c │ │ ├── input38.c │ │ ├── input39.c │ │ ├── input40.c │ │ ├── input41.c │ │ ├── input42.c │ │ ├── input43.c │ │ ├── input44.c │ │ ├── input45.c │ │ ├── input46.c │ │ ├── input47.c │ │ ├── input48.c │ │ ├── input49.c │ │ ├── input50.c │ │ ├── input51.c │ │ ├── input52.c │ │ ├── input53.c │ │ ├── input54.c │ │ ├── input55.c │ │ ├── input56.c │ │ ├── input57.c │ │ ├── input58.c │ │ ├── input59.c │ │ ├── input60.c │ │ ├── input61.c │ │ ├── input62.c │ │ ├── input63.c │ │ ├── input64.c │ │ ├── input65.c │ │ ├── input66.c │ │ ├── input67.c │ │ ├── input68.c │ │ ├── input69.c │ │ ├── input70.c │ │ ├── input71.c │ │ ├── input72.c │ │ ├── input73.c │ │ ├── input74.c │ │ ├── input75.c │ │ ├── input76.c │ │ ├── input77.c │ │ ├── input78.c │ │ ├── input79.c │ │ ├── input80.c │ │ ├── input81.c │ │ ├── input82.c │ │ ├── input83.c │ │ ├── input84.c │ │ ├── input85.c │ │ ├── input86.c │ │ ├── input87.c │ │ ├── input88.c │ │ ├── input89.c │ │ ├── input90.c │ │ ├── input91.c │ │ ├── input92.c │ │ ├── input93.c │ │ ├── input94.c │ │ ├── input95.c │ │ ├── input96.c │ │ ├── input97.c │ │ ├── input98.c │ │ ├── input99.c │ │ ├── mktests │ │ ├── out.input01.c │ │ ├── out.input02.c │ │ ├── out.input03.c │ │ ├── out.input04.c │ │ ├── out.input05.c │ │ ├── out.input06.c │ │ ├── out.input07.c │ │ ├── out.input08.c │ │ ├── out.input09.c │ │ ├── out.input10.c │ │ ├── out.input11.c │ │ ├── out.input12.c │ │ ├── out.input13.c │ │ ├── out.input14.c │ │ ├── out.input15.c │ │ ├── out.input16.c │ │ ├── out.input17.c │ │ ├── out.input18.c │ │ ├── out.input18a.c │ │ ├── out.input19.c │ │ ├── out.input20.c │ │ ├── out.input21.c │ │ ├── out.input22.c │ │ ├── out.input23.c │ │ ├── out.input24.c │ │ ├── out.input25.c │ │ ├── out.input26.c │ │ ├── out.input27.c │ │ ├── out.input28.c │ │ ├── out.input29.c │ │ ├── out.input30.c │ │ ├── out.input53.c │ │ ├── out.input54.c │ │ ├── out.input55.c │ │ ├── out.input58.c │ │ ├── out.input62.c │ │ ├── out.input63.c │ │ ├── out.input67.c │ │ ├── out.input70.c │ │ ├── out.input71.c │ │ ├── out.input74.c │ │ ├── out.input80.c │ │ ├── out.input81.c │ │ ├── out.input82.c │ │ ├── out.input83.c │ │ ├── out.input84.c │ │ ├── out.input88.c │ │ ├── out.input89.c │ │ ├── out.input90.c │ │ ├── out.input91.c │ │ ├── out.input99.c │ │ ├── runtests │ │ └── runtestsn │ ├── tree.c │ └── types.c ├── 41_Local_Var_Init/ │ ├── Makefile │ ├── Readme.md │ ├── cg.c │ ├── cg_arm.c │ ├── cgn.c │ ├── data.h │ ├── decl.c │ ├── decl.h │ ├── defs.h │ ├── expr.c │ ├── gen.c │ ├── include/ │ │ ├── fcntl.h │ │ ├── stddef.h │ │ └── stdio.h │ ├── main.c │ ├── misc.c │ ├── scan.c │ ├── stmt.c │ ├── sym.c │ ├── tests/ │ │ ├── err.input031.c │ │ ├── err.input032.c │ │ ├── err.input033.c │ │ ├── err.input034.c │ │ ├── err.input035.c │ │ ├── err.input036.c │ │ ├── err.input037.c │ │ ├── err.input038.c │ │ ├── err.input039.c │ │ ├── err.input040.c │ │ ├── err.input041.c │ │ ├── err.input042.c │ │ ├── err.input043.c │ │ ├── err.input044.c │ │ ├── err.input045.c │ │ ├── err.input046.c │ │ ├── err.input047.c │ │ ├── err.input048.c │ │ ├── err.input049.c │ │ ├── err.input050.c │ │ ├── err.input051.c │ │ ├── err.input052.c │ │ ├── err.input056.c │ │ ├── err.input057.c │ │ ├── err.input059.c │ │ ├── err.input060.c │ │ ├── err.input061.c │ │ ├── err.input064.c │ │ ├── err.input065.c │ │ ├── err.input066.c │ │ ├── err.input068.c │ │ ├── err.input069.c │ │ ├── err.input072.c │ │ ├── err.input073.c │ │ ├── err.input075.c │ │ ├── err.input076.c │ │ ├── err.input077.c │ │ ├── err.input078.c │ │ ├── err.input079.c │ │ ├── err.input085.c │ │ ├── err.input086.c │ │ ├── err.input087.c │ │ ├── err.input092.c │ │ ├── err.input093.c │ │ ├── err.input094.c │ │ ├── err.input095.c │ │ ├── err.input096.c │ │ ├── err.input097.c │ │ ├── err.input098.c │ │ ├── input001.c │ │ ├── input002.c │ │ ├── input003.c │ │ ├── input004.c │ │ ├── input005.c │ │ ├── input006.c │ │ ├── input007.c │ │ ├── input008.c │ │ ├── input009.c │ │ ├── input010.c │ │ ├── input011.c │ │ ├── input012.c │ │ ├── input013.c │ │ ├── input014.c │ │ ├── input015.c │ │ ├── input016.c │ │ ├── input017.c │ │ ├── input018.c │ │ ├── input018a.c │ │ ├── input019.c │ │ ├── input020.c │ │ ├── input021.c │ │ ├── input022.c │ │ ├── input023.c │ │ ├── input024.c │ │ ├── input025.c │ │ ├── input026.c │ │ ├── input027.c │ │ ├── input028.c │ │ ├── input029.c │ │ ├── input030.c │ │ ├── input031.c │ │ ├── input032.c │ │ ├── input033.c │ │ ├── input034.c │ │ ├── input035.c │ │ ├── input036.c │ │ ├── input037.c │ │ ├── input038.c │ │ ├── input039.c │ │ ├── input040.c │ │ ├── input041.c │ │ ├── input042.c │ │ ├── input043.c │ │ ├── input044.c │ │ ├── input045.c │ │ ├── input046.c │ │ ├── input047.c │ │ ├── input048.c │ │ ├── input049.c │ │ ├── input050.c │ │ ├── input051.c │ │ ├── input052.c │ │ ├── input053.c │ │ ├── input054.c │ │ ├── input055.c │ │ ├── input056.c │ │ ├── input057.c │ │ ├── input058.c │ │ ├── input059.c │ │ ├── input060.c │ │ ├── input061.c │ │ ├── input062.c │ │ ├── input063.c │ │ ├── input064.c │ │ ├── input065.c │ │ ├── input066.c │ │ ├── input067.c │ │ ├── input068.c │ │ ├── input069.c │ │ ├── input070.c │ │ ├── input071.c │ │ ├── input072.c │ │ ├── input073.c │ │ ├── input074.c │ │ ├── input075.c │ │ ├── input076.c │ │ ├── input077.c │ │ ├── input078.c │ │ ├── input079.c │ │ ├── input080.c │ │ ├── input081.c │ │ ├── input082.c │ │ ├── input083.c │ │ ├── input084.c │ │ ├── input085.c │ │ ├── input086.c │ │ ├── input087.c │ │ ├── input088.c │ │ ├── input089.c │ │ ├── input090.c │ │ ├── input091.c │ │ ├── input092.c │ │ ├── input093.c │ │ ├── input094.c │ │ ├── input095.c │ │ ├── input096.c │ │ ├── input097.c │ │ ├── input098.c │ │ ├── input099.c │ │ ├── input100.c │ │ ├── mktests │ │ ├── out.input001.c │ │ ├── out.input002.c │ │ ├── out.input003.c │ │ ├── out.input004.c │ │ ├── out.input005.c │ │ ├── out.input006.c │ │ ├── out.input007.c │ │ ├── out.input008.c │ │ ├── out.input009.c │ │ ├── out.input010.c │ │ ├── out.input011.c │ │ ├── out.input012.c │ │ ├── out.input013.c │ │ ├── out.input014.c │ │ ├── out.input015.c │ │ ├── out.input016.c │ │ ├── out.input017.c │ │ ├── out.input018.c │ │ ├── out.input018a.c │ │ ├── out.input019.c │ │ ├── out.input020.c │ │ ├── out.input021.c │ │ ├── out.input022.c │ │ ├── out.input023.c │ │ ├── out.input024.c │ │ ├── out.input025.c │ │ ├── out.input026.c │ │ ├── out.input027.c │ │ ├── out.input028.c │ │ ├── out.input029.c │ │ ├── out.input030.c │ │ ├── out.input053.c │ │ ├── out.input054.c │ │ ├── out.input055.c │ │ ├── out.input058.c │ │ ├── out.input062.c │ │ ├── out.input063.c │ │ ├── out.input067.c │ │ ├── out.input070.c │ │ ├── out.input071.c │ │ ├── out.input074.c │ │ ├── out.input080.c │ │ ├── out.input081.c │ │ ├── out.input082.c │ │ ├── out.input083.c │ │ ├── out.input084.c │ │ ├── out.input088.c │ │ ├── out.input089.c │ │ ├── out.input090.c │ │ ├── out.input091.c │ │ ├── out.input099.c │ │ ├── out.input100.c │ │ ├── runtests │ │ └── runtestsn │ ├── tree.c │ └── types.c ├── 42_Casting/ │ ├── Makefile │ ├── Readme.md │ ├── cg.c │ ├── cg_arm.c │ ├── cgn.c │ ├── data.h │ ├── decl.c │ ├── decl.h │ ├── defs.h │ ├── expr.c │ ├── gen.c │ ├── include/ │ │ ├── ctype.h │ │ ├── fcntl.h │ │ ├── stddef.h │ │ ├── stdio.h │ │ ├── stdlib.h │ │ ├── string.h │ │ └── unistd.h │ ├── main.c │ ├── misc.c │ ├── scan.c │ ├── stmt.c │ ├── sym.c │ ├── tests/ │ │ ├── err.input031.c │ │ ├── err.input032.c │ │ ├── err.input033.c │ │ ├── err.input034.c │ │ ├── err.input035.c │ │ ├── err.input036.c │ │ ├── err.input037.c │ │ ├── err.input038.c │ │ ├── err.input039.c │ │ ├── err.input040.c │ │ ├── err.input041.c │ │ ├── err.input042.c │ │ ├── err.input043.c │ │ ├── err.input044.c │ │ ├── err.input045.c │ │ ├── err.input046.c │ │ ├── err.input047.c │ │ ├── err.input048.c │ │ ├── err.input049.c │ │ ├── err.input050.c │ │ ├── err.input051.c │ │ ├── err.input052.c │ │ ├── err.input056.c │ │ ├── err.input057.c │ │ ├── err.input059.c │ │ ├── err.input060.c │ │ ├── err.input061.c │ │ ├── err.input064.c │ │ ├── err.input065.c │ │ ├── err.input066.c │ │ ├── err.input068.c │ │ ├── err.input069.c │ │ ├── err.input072.c │ │ ├── err.input073.c │ │ ├── err.input075.c │ │ ├── err.input076.c │ │ ├── err.input077.c │ │ ├── err.input078.c │ │ ├── err.input079.c │ │ ├── err.input085.c │ │ ├── err.input086.c │ │ ├── err.input087.c │ │ ├── err.input092.c │ │ ├── err.input093.c │ │ ├── err.input094.c │ │ ├── err.input095.c │ │ ├── err.input096.c │ │ ├── err.input097.c │ │ ├── err.input098.c │ │ ├── err.input102.c │ │ ├── err.input103.c │ │ ├── err.input104.c │ │ ├── err.input105.c │ │ ├── input001.c │ │ ├── input002.c │ │ ├── input003.c │ │ ├── input004.c │ │ ├── input005.c │ │ ├── input006.c │ │ ├── input007.c │ │ ├── input008.c │ │ ├── input009.c │ │ ├── input010.c │ │ ├── input011.c │ │ ├── input012.c │ │ ├── input013.c │ │ ├── input014.c │ │ ├── input015.c │ │ ├── input016.c │ │ ├── input017.c │ │ ├── input018.c │ │ ├── input018a.c │ │ ├── input019.c │ │ ├── input020.c │ │ ├── input021.c │ │ ├── input022.c │ │ ├── input023.c │ │ ├── input024.c │ │ ├── input025.c │ │ ├── input026.c │ │ ├── input027.c │ │ ├── input028.c │ │ ├── input029.c │ │ ├── input030.c │ │ ├── input031.c │ │ ├── input032.c │ │ ├── input033.c │ │ ├── input034.c │ │ ├── input035.c │ │ ├── input036.c │ │ ├── input037.c │ │ ├── input038.c │ │ ├── input039.c │ │ ├── input040.c │ │ ├── input041.c │ │ ├── input042.c │ │ ├── input043.c │ │ ├── input044.c │ │ ├── input045.c │ │ ├── input046.c │ │ ├── input047.c │ │ ├── input048.c │ │ ├── input049.c │ │ ├── input050.c │ │ ├── input051.c │ │ ├── input052.c │ │ ├── input053.c │ │ ├── input054.c │ │ ├── input055.c │ │ ├── input056.c │ │ ├── input057.c │ │ ├── input058.c │ │ ├── input059.c │ │ ├── input060.c │ │ ├── input061.c │ │ ├── input062.c │ │ ├── input063.c │ │ ├── input064.c │ │ ├── input065.c │ │ ├── input066.c │ │ ├── input067.c │ │ ├── input068.c │ │ ├── input069.c │ │ ├── input070.c │ │ ├── input071.c │ │ ├── input072.c │ │ ├── input073.c │ │ ├── input074.c │ │ ├── input075.c │ │ ├── input076.c │ │ ├── input077.c │ │ ├── input078.c │ │ ├── input079.c │ │ ├── input080.c │ │ ├── input081.c │ │ ├── input082.c │ │ ├── input083.c │ │ ├── input084.c │ │ ├── input085.c │ │ ├── input086.c │ │ ├── input087.c │ │ ├── input088.c │ │ ├── input089.c │ │ ├── input090.c │ │ ├── input091.c │ │ ├── input092.c │ │ ├── input093.c │ │ ├── input094.c │ │ ├── input095.c │ │ ├── input096.c │ │ ├── input097.c │ │ ├── input098.c │ │ ├── input099.c │ │ ├── input100.c │ │ ├── input101.c │ │ ├── input102.c │ │ ├── input103.c │ │ ├── input104.c │ │ ├── input105.c │ │ ├── input106.c │ │ ├── input107.c │ │ ├── input108.c │ │ ├── mktests │ │ ├── out.input001.c │ │ ├── out.input002.c │ │ ├── out.input003.c │ │ ├── out.input004.c │ │ ├── out.input005.c │ │ ├── out.input006.c │ │ ├── out.input007.c │ │ ├── out.input008.c │ │ ├── out.input009.c │ │ ├── out.input010.c │ │ ├── out.input011.c │ │ ├── out.input012.c │ │ ├── out.input013.c │ │ ├── out.input014.c │ │ ├── out.input015.c │ │ ├── out.input016.c │ │ ├── out.input017.c │ │ ├── out.input018.c │ │ ├── out.input018a.c │ │ ├── out.input019.c │ │ ├── out.input020.c │ │ ├── out.input021.c │ │ ├── out.input022.c │ │ ├── out.input023.c │ │ ├── out.input024.c │ │ ├── out.input025.c │ │ ├── out.input026.c │ │ ├── out.input027.c │ │ ├── out.input028.c │ │ ├── out.input029.c │ │ ├── out.input030.c │ │ ├── out.input053.c │ │ ├── out.input054.c │ │ ├── out.input055.c │ │ ├── out.input058.c │ │ ├── out.input062.c │ │ ├── out.input063.c │ │ ├── out.input067.c │ │ ├── out.input070.c │ │ ├── out.input071.c │ │ ├── out.input074.c │ │ ├── out.input080.c │ │ ├── out.input081.c │ │ ├── out.input082.c │ │ ├── out.input083.c │ │ ├── out.input084.c │ │ ├── out.input088.c │ │ ├── out.input089.c │ │ ├── out.input090.c │ │ ├── out.input091.c │ │ ├── out.input099.c │ │ ├── out.input100.c │ │ ├── out.input101.c │ │ ├── out.input106.c │ │ ├── out.input107.c │ │ ├── out.input108.c │ │ ├── runtests │ │ └── runtestsn │ ├── tree.c │ └── types.c ├── 43_More_Operators/ │ ├── Makefile │ ├── Readme.md │ ├── cg.c │ ├── cg_arm.c │ ├── cgn.c │ ├── data.h │ ├── decl.c │ ├── decl.h │ ├── defs.h │ ├── expr.c │ ├── gen.c │ ├── include/ │ │ ├── ctype.h │ │ ├── fcntl.h │ │ ├── stddef.h │ │ ├── stdio.h │ │ ├── stdlib.h │ │ ├── string.h │ │ └── unistd.h │ ├── main.c │ ├── misc.c │ ├── scan.c │ ├── stmt.c │ ├── sym.c │ ├── tests/ │ │ ├── err.input031.c │ │ ├── err.input032.c │ │ ├── err.input033.c │ │ ├── err.input034.c │ │ ├── err.input035.c │ │ ├── err.input036.c │ │ ├── err.input037.c │ │ ├── err.input038.c │ │ ├── err.input039.c │ │ ├── err.input040.c │ │ ├── err.input041.c │ │ ├── err.input042.c │ │ ├── err.input043.c │ │ ├── err.input044.c │ │ ├── err.input045.c │ │ ├── err.input046.c │ │ ├── err.input047.c │ │ ├── err.input048.c │ │ ├── err.input049.c │ │ ├── err.input050.c │ │ ├── err.input051.c │ │ ├── err.input052.c │ │ ├── err.input056.c │ │ ├── err.input057.c │ │ ├── err.input059.c │ │ ├── err.input060.c │ │ ├── err.input061.c │ │ ├── err.input064.c │ │ ├── err.input065.c │ │ ├── err.input066.c │ │ ├── err.input068.c │ │ ├── err.input069.c │ │ ├── err.input072.c │ │ ├── err.input073.c │ │ ├── err.input075.c │ │ ├── err.input076.c │ │ ├── err.input077.c │ │ ├── err.input078.c │ │ ├── err.input079.c │ │ ├── err.input085.c │ │ ├── err.input086.c │ │ ├── err.input087.c │ │ ├── err.input092.c │ │ ├── err.input093.c │ │ ├── err.input094.c │ │ ├── err.input095.c │ │ ├── err.input096.c │ │ ├── err.input097.c │ │ ├── err.input098.c │ │ ├── err.input102.c │ │ ├── err.input103.c │ │ ├── err.input104.c │ │ ├── err.input105.c │ │ ├── input001.c │ │ ├── input002.c │ │ ├── input003.c │ │ ├── input004.c │ │ ├── input005.c │ │ ├── input006.c │ │ ├── input007.c │ │ ├── input008.c │ │ ├── input009.c │ │ ├── input010.c │ │ ├── input011.c │ │ ├── input012.c │ │ ├── input013.c │ │ ├── input014.c │ │ ├── input015.c │ │ ├── input016.c │ │ ├── input017.c │ │ ├── input018.c │ │ ├── input018a.c │ │ ├── input019.c │ │ ├── input020.c │ │ ├── input021.c │ │ ├── input022.c │ │ ├── input023.c │ │ ├── input024.c │ │ ├── input025.c │ │ ├── input026.c │ │ ├── input027.c │ │ ├── input028.c │ │ ├── input029.c │ │ ├── input030.c │ │ ├── input031.c │ │ ├── input032.c │ │ ├── input033.c │ │ ├── input034.c │ │ ├── input035.c │ │ ├── input036.c │ │ ├── input037.c │ │ ├── input038.c │ │ ├── input039.c │ │ ├── input040.c │ │ ├── input041.c │ │ ├── input042.c │ │ ├── input043.c │ │ ├── input044.c │ │ ├── input045.c │ │ ├── input046.c │ │ ├── input047.c │ │ ├── input048.c │ │ ├── input049.c │ │ ├── input050.c │ │ ├── input051.c │ │ ├── input052.c │ │ ├── input053.c │ │ ├── input054.c │ │ ├── input055.c │ │ ├── input056.c │ │ ├── input057.c │ │ ├── input058.c │ │ ├── input059.c │ │ ├── input060.c │ │ ├── input061.c │ │ ├── input062.c │ │ ├── input063.c │ │ ├── input064.c │ │ ├── input065.c │ │ ├── input066.c │ │ ├── input067.c │ │ ├── input068.c │ │ ├── input069.c │ │ ├── input070.c │ │ ├── input071.c │ │ ├── input072.c │ │ ├── input073.c │ │ ├── input074.c │ │ ├── input075.c │ │ ├── input076.c │ │ ├── input077.c │ │ ├── input078.c │ │ ├── input079.c │ │ ├── input080.c │ │ ├── input081.c │ │ ├── input082.c │ │ ├── input083.c │ │ ├── input084.c │ │ ├── input085.c │ │ ├── input086.c │ │ ├── input087.c │ │ ├── input088.c │ │ ├── input089.c │ │ ├── input090.c │ │ ├── input091.c │ │ ├── input092.c │ │ ├── input093.c │ │ ├── input094.c │ │ ├── input095.c │ │ ├── input096.c │ │ ├── input097.c │ │ ├── input098.c │ │ ├── input099.c │ │ ├── input100.c │ │ ├── input101.c │ │ ├── input102.c │ │ ├── input103.c │ │ ├── input104.c │ │ ├── input105.c │ │ ├── input106.c │ │ ├── input107.c │ │ ├── input108.c │ │ ├── input109.c │ │ ├── input110.c │ │ ├── mktests │ │ ├── out.input001.c │ │ ├── out.input002.c │ │ ├── out.input003.c │ │ ├── out.input004.c │ │ ├── out.input005.c │ │ ├── out.input006.c │ │ ├── out.input007.c │ │ ├── out.input008.c │ │ ├── out.input009.c │ │ ├── out.input010.c │ │ ├── out.input011.c │ │ ├── out.input012.c │ │ ├── out.input013.c │ │ ├── out.input014.c │ │ ├── out.input015.c │ │ ├── out.input016.c │ │ ├── out.input017.c │ │ ├── out.input018.c │ │ ├── out.input018a.c │ │ ├── out.input019.c │ │ ├── out.input020.c │ │ ├── out.input021.c │ │ ├── out.input022.c │ │ ├── out.input023.c │ │ ├── out.input024.c │ │ ├── out.input025.c │ │ ├── out.input026.c │ │ ├── out.input027.c │ │ ├── out.input028.c │ │ ├── out.input029.c │ │ ├── out.input030.c │ │ ├── out.input053.c │ │ ├── out.input054.c │ │ ├── out.input055.c │ │ ├── out.input058.c │ │ ├── out.input062.c │ │ ├── out.input063.c │ │ ├── out.input067.c │ │ ├── out.input070.c │ │ ├── out.input071.c │ │ ├── out.input074.c │ │ ├── out.input080.c │ │ ├── out.input081.c │ │ ├── out.input082.c │ │ ├── out.input083.c │ │ ├── out.input084.c │ │ ├── out.input088.c │ │ ├── out.input089.c │ │ ├── out.input090.c │ │ ├── out.input091.c │ │ ├── out.input099.c │ │ ├── out.input100.c │ │ ├── out.input101.c │ │ ├── out.input106.c │ │ ├── out.input107.c │ │ ├── out.input108.c │ │ ├── out.input109.c │ │ ├── out.input110.c │ │ ├── runtests │ │ └── runtestsn │ ├── tree.c │ └── types.c ├── 44_Fold_Optimisation/ │ ├── Makefile │ ├── Readme.md │ ├── cg.c │ ├── cg_arm.c │ ├── cgn.c │ ├── data.h │ ├── decl.c │ ├── decl.h │ ├── defs.h │ ├── expr.c │ ├── gen.c │ ├── include/ │ │ ├── ctype.h │ │ ├── fcntl.h │ │ ├── stddef.h │ │ ├── stdio.h │ │ ├── stdlib.h │ │ ├── string.h │ │ └── unistd.h │ ├── main.c │ ├── misc.c │ ├── opt.c │ ├── scan.c │ ├── stmt.c │ ├── sym.c │ ├── tests/ │ │ ├── err.input031.c │ │ ├── err.input032.c │ │ ├── err.input033.c │ │ ├── err.input034.c │ │ ├── err.input035.c │ │ ├── err.input036.c │ │ ├── err.input037.c │ │ ├── err.input038.c │ │ ├── err.input039.c │ │ ├── err.input040.c │ │ ├── err.input041.c │ │ ├── err.input042.c │ │ ├── err.input043.c │ │ ├── err.input044.c │ │ ├── err.input045.c │ │ ├── err.input046.c │ │ ├── err.input047.c │ │ ├── err.input048.c │ │ ├── err.input049.c │ │ ├── err.input050.c │ │ ├── err.input051.c │ │ ├── err.input052.c │ │ ├── err.input056.c │ │ ├── err.input057.c │ │ ├── err.input059.c │ │ ├── err.input060.c │ │ ├── err.input061.c │ │ ├── err.input064.c │ │ ├── err.input065.c │ │ ├── err.input066.c │ │ ├── err.input068.c │ │ ├── err.input069.c │ │ ├── err.input072.c │ │ ├── err.input073.c │ │ ├── err.input075.c │ │ ├── err.input076.c │ │ ├── err.input077.c │ │ ├── err.input078.c │ │ ├── err.input079.c │ │ ├── err.input085.c │ │ ├── err.input086.c │ │ ├── err.input087.c │ │ ├── err.input092.c │ │ ├── err.input093.c │ │ ├── err.input094.c │ │ ├── err.input095.c │ │ ├── err.input096.c │ │ ├── err.input097.c │ │ ├── err.input098.c │ │ ├── err.input102.c │ │ ├── err.input103.c │ │ ├── err.input104.c │ │ ├── err.input105.c │ │ ├── input001.c │ │ ├── input002.c │ │ ├── input003.c │ │ ├── input004.c │ │ ├── input005.c │ │ ├── input006.c │ │ ├── input007.c │ │ ├── input008.c │ │ ├── input009.c │ │ ├── input010.c │ │ ├── input011.c │ │ ├── input012.c │ │ ├── input013.c │ │ ├── input014.c │ │ ├── input015.c │ │ ├── input016.c │ │ ├── input017.c │ │ ├── input018.c │ │ ├── input018a.c │ │ ├── input019.c │ │ ├── input020.c │ │ ├── input021.c │ │ ├── input022.c │ │ ├── input023.c │ │ ├── input024.c │ │ ├── input025.c │ │ ├── input026.c │ │ ├── input027.c │ │ ├── input028.c │ │ ├── input029.c │ │ ├── input030.c │ │ ├── input031.c │ │ ├── input032.c │ │ ├── input033.c │ │ ├── input034.c │ │ ├── input035.c │ │ ├── input036.c │ │ ├── input037.c │ │ ├── input038.c │ │ ├── input039.c │ │ ├── input040.c │ │ ├── input041.c │ │ ├── input042.c │ │ ├── input043.c │ │ ├── input044.c │ │ ├── input045.c │ │ ├── input046.c │ │ ├── input047.c │ │ ├── input048.c │ │ ├── input049.c │ │ ├── input050.c │ │ ├── input051.c │ │ ├── input052.c │ │ ├── input053.c │ │ ├── input054.c │ │ ├── input055.c │ │ ├── input056.c │ │ ├── input057.c │ │ ├── input058.c │ │ ├── input059.c │ │ ├── input060.c │ │ ├── input061.c │ │ ├── input062.c │ │ ├── input063.c │ │ ├── input064.c │ │ ├── input065.c │ │ ├── input066.c │ │ ├── input067.c │ │ ├── input068.c │ │ ├── input069.c │ │ ├── input070.c │ │ ├── input071.c │ │ ├── input072.c │ │ ├── input073.c │ │ ├── input074.c │ │ ├── input075.c │ │ ├── input076.c │ │ ├── input077.c │ │ ├── input078.c │ │ ├── input079.c │ │ ├── input080.c │ │ ├── input081.c │ │ ├── input082.c │ │ ├── input083.c │ │ ├── input084.c │ │ ├── input085.c │ │ ├── input086.c │ │ ├── input087.c │ │ ├── input088.c │ │ ├── input089.c │ │ ├── input090.c │ │ ├── input091.c │ │ ├── input092.c │ │ ├── input093.c │ │ ├── input094.c │ │ ├── input095.c │ │ ├── input096.c │ │ ├── input097.c │ │ ├── input098.c │ │ ├── input099.c │ │ ├── input100.c │ │ ├── input101.c │ │ ├── input102.c │ │ ├── input103.c │ │ ├── input104.c │ │ ├── input105.c │ │ ├── input106.c │ │ ├── input107.c │ │ ├── input108.c │ │ ├── input109.c │ │ ├── input110.c │ │ ├── input111.c │ │ ├── mktests │ │ ├── out.input001.c │ │ ├── out.input002.c │ │ ├── out.input003.c │ │ ├── out.input004.c │ │ ├── out.input005.c │ │ ├── out.input006.c │ │ ├── out.input007.c │ │ ├── out.input008.c │ │ ├── out.input009.c │ │ ├── out.input010.c │ │ ├── out.input011.c │ │ ├── out.input012.c │ │ ├── out.input013.c │ │ ├── out.input014.c │ │ ├── out.input015.c │ │ ├── out.input016.c │ │ ├── out.input017.c │ │ ├── out.input018.c │ │ ├── out.input018a.c │ │ ├── out.input019.c │ │ ├── out.input020.c │ │ ├── out.input021.c │ │ ├── out.input022.c │ │ ├── out.input023.c │ │ ├── out.input024.c │ │ ├── out.input025.c │ │ ├── out.input026.c │ │ ├── out.input027.c │ │ ├── out.input028.c │ │ ├── out.input029.c │ │ ├── out.input030.c │ │ ├── out.input053.c │ │ ├── out.input054.c │ │ ├── out.input055.c │ │ ├── out.input058.c │ │ ├── out.input062.c │ │ ├── out.input063.c │ │ ├── out.input067.c │ │ ├── out.input070.c │ │ ├── out.input071.c │ │ ├── out.input074.c │ │ ├── out.input080.c │ │ ├── out.input081.c │ │ ├── out.input082.c │ │ ├── out.input083.c │ │ ├── out.input084.c │ │ ├── out.input088.c │ │ ├── out.input089.c │ │ ├── out.input090.c │ │ ├── out.input091.c │ │ ├── out.input099.c │ │ ├── out.input100.c │ │ ├── out.input101.c │ │ ├── out.input106.c │ │ ├── out.input107.c │ │ ├── out.input108.c │ │ ├── out.input109.c │ │ ├── out.input110.c │ │ ├── out.input111.c │ │ ├── runtests │ │ └── runtestsn │ ├── tree.c │ └── types.c ├── 45_Globals_Again/ │ ├── Makefile │ ├── Readme.md │ ├── cg.c │ ├── cg_arm.c │ ├── cgn.c │ ├── data.h │ ├── decl.c │ ├── decl.h │ ├── defs.h │ ├── expr.c │ ├── gen.c │ ├── include/ │ │ ├── ctype.h │ │ ├── fcntl.h │ │ ├── stddef.h │ │ ├── stdio.h │ │ ├── stdlib.h │ │ ├── string.h │ │ └── unistd.h │ ├── main.c │ ├── misc.c │ ├── opt.c │ ├── scan.c │ ├── stmt.c │ ├── sym.c │ ├── tests/ │ │ ├── err.input031.c │ │ ├── err.input032.c │ │ ├── err.input033.c │ │ ├── err.input034.c │ │ ├── err.input035.c │ │ ├── err.input036.c │ │ ├── err.input037.c │ │ ├── err.input038.c │ │ ├── err.input039.c │ │ ├── err.input040.c │ │ ├── err.input041.c │ │ ├── err.input042.c │ │ ├── err.input043.c │ │ ├── err.input044.c │ │ ├── err.input045.c │ │ ├── err.input046.c │ │ ├── err.input047.c │ │ ├── err.input048.c │ │ ├── err.input049.c │ │ ├── err.input050.c │ │ ├── err.input051.c │ │ ├── err.input052.c │ │ ├── err.input056.c │ │ ├── err.input057.c │ │ ├── err.input059.c │ │ ├── err.input060.c │ │ ├── err.input061.c │ │ ├── err.input064.c │ │ ├── err.input065.c │ │ ├── err.input066.c │ │ ├── err.input068.c │ │ ├── err.input069.c │ │ ├── err.input072.c │ │ ├── err.input073.c │ │ ├── err.input075.c │ │ ├── err.input076.c │ │ ├── err.input077.c │ │ ├── err.input078.c │ │ ├── err.input079.c │ │ ├── err.input085.c │ │ ├── err.input086.c │ │ ├── err.input087.c │ │ ├── err.input092.c │ │ ├── err.input093.c │ │ ├── err.input094.c │ │ ├── err.input095.c │ │ ├── err.input096.c │ │ ├── err.input097.c │ │ ├── err.input098.c │ │ ├── err.input102.c │ │ ├── err.input103.c │ │ ├── err.input104.c │ │ ├── err.input105.c │ │ ├── input001.c │ │ ├── input002.c │ │ ├── input003.c │ │ ├── input004.c │ │ ├── input005.c │ │ ├── input006.c │ │ ├── input007.c │ │ ├── input008.c │ │ ├── input009.c │ │ ├── input010.c │ │ ├── input011.c │ │ ├── input012.c │ │ ├── input013.c │ │ ├── input014.c │ │ ├── input015.c │ │ ├── input016.c │ │ ├── input017.c │ │ ├── input018.c │ │ ├── input018a.c │ │ ├── input019.c │ │ ├── input020.c │ │ ├── input021.c │ │ ├── input022.c │ │ ├── input023.c │ │ ├── input024.c │ │ ├── input025.c │ │ ├── input026.c │ │ ├── input027.c │ │ ├── input028.c │ │ ├── input029.c │ │ ├── input030.c │ │ ├── input031.c │ │ ├── input032.c │ │ ├── input033.c │ │ ├── input034.c │ │ ├── input035.c │ │ ├── input036.c │ │ ├── input037.c │ │ ├── input038.c │ │ ├── input039.c │ │ ├── input040.c │ │ ├── input041.c │ │ ├── input042.c │ │ ├── input043.c │ │ ├── input044.c │ │ ├── input045.c │ │ ├── input046.c │ │ ├── input047.c │ │ ├── input048.c │ │ ├── input049.c │ │ ├── input050.c │ │ ├── input051.c │ │ ├── input052.c │ │ ├── input053.c │ │ ├── input054.c │ │ ├── input055.c │ │ ├── input056.c │ │ ├── input057.c │ │ ├── input058.c │ │ ├── input059.c │ │ ├── input060.c │ │ ├── input061.c │ │ ├── input062.c │ │ ├── input063.c │ │ ├── input064.c │ │ ├── input065.c │ │ ├── input066.c │ │ ├── input067.c │ │ ├── input068.c │ │ ├── input069.c │ │ ├── input070.c │ │ ├── input071.c │ │ ├── input072.c │ │ ├── input073.c │ │ ├── input074.c │ │ ├── input075.c │ │ ├── input076.c │ │ ├── input077.c │ │ ├── input078.c │ │ ├── input079.c │ │ ├── input080.c │ │ ├── input081.c │ │ ├── input082.c │ │ ├── input083.c │ │ ├── input084.c │ │ ├── input085.c │ │ ├── input086.c │ │ ├── input087.c │ │ ├── input088.c │ │ ├── input089.c │ │ ├── input090.c │ │ ├── input091.c │ │ ├── input092.c │ │ ├── input093.c │ │ ├── input094.c │ │ ├── input095.c │ │ ├── input096.c │ │ ├── input097.c │ │ ├── input098.c │ │ ├── input099.c │ │ ├── input100.c │ │ ├── input101.c │ │ ├── input102.c │ │ ├── input103.c │ │ ├── input104.c │ │ ├── input105.c │ │ ├── input106.c │ │ ├── input107.c │ │ ├── input108.c │ │ ├── input109.c │ │ ├── input110.c │ │ ├── input111.c │ │ ├── input112.c │ │ ├── mktests │ │ ├── out.input001.c │ │ ├── out.input002.c │ │ ├── out.input003.c │ │ ├── out.input004.c │ │ ├── out.input005.c │ │ ├── out.input006.c │ │ ├── out.input007.c │ │ ├── out.input008.c │ │ ├── out.input009.c │ │ ├── out.input010.c │ │ ├── out.input011.c │ │ ├── out.input012.c │ │ ├── out.input013.c │ │ ├── out.input014.c │ │ ├── out.input015.c │ │ ├── out.input016.c │ │ ├── out.input017.c │ │ ├── out.input018.c │ │ ├── out.input018a.c │ │ ├── out.input019.c │ │ ├── out.input020.c │ │ ├── out.input021.c │ │ ├── out.input022.c │ │ ├── out.input023.c │ │ ├── out.input024.c │ │ ├── out.input025.c │ │ ├── out.input026.c │ │ ├── out.input027.c │ │ ├── out.input028.c │ │ ├── out.input029.c │ │ ├── out.input030.c │ │ ├── out.input053.c │ │ ├── out.input054.c │ │ ├── out.input055.c │ │ ├── out.input058.c │ │ ├── out.input062.c │ │ ├── out.input063.c │ │ ├── out.input067.c │ │ ├── out.input070.c │ │ ├── out.input071.c │ │ ├── out.input074.c │ │ ├── out.input080.c │ │ ├── out.input081.c │ │ ├── out.input082.c │ │ ├── out.input083.c │ │ ├── out.input084.c │ │ ├── out.input088.c │ │ ├── out.input089.c │ │ ├── out.input090.c │ │ ├── out.input091.c │ │ ├── out.input099.c │ │ ├── out.input100.c │ │ ├── out.input101.c │ │ ├── out.input106.c │ │ ├── out.input107.c │ │ ├── out.input108.c │ │ ├── out.input109.c │ │ ├── out.input110.c │ │ ├── out.input111.c │ │ ├── out.input112.c │ │ ├── runtests │ │ └── runtestsn │ ├── tree.c │ └── types.c ├── 46_Void_Functions/ │ ├── Makefile │ ├── Readme.md │ ├── cg.c │ ├── cg_arm.c │ ├── cgn.c │ ├── data.h │ ├── decl.c │ ├── decl.h │ ├── defs.h │ ├── expr.c │ ├── gen.c │ ├── include/ │ │ ├── ctype.h │ │ ├── errno.h │ │ ├── fcntl.h │ │ ├── stddef.h │ │ ├── stdio.h │ │ ├── stdlib.h │ │ ├── string.h │ │ └── unistd.h │ ├── main.c │ ├── misc.c │ ├── opt.c │ ├── scan.c │ ├── stmt.c │ ├── sym.c │ ├── tests/ │ │ ├── err.input031.c │ │ ├── err.input032.c │ │ ├── err.input033.c │ │ ├── err.input034.c │ │ ├── err.input035.c │ │ ├── err.input036.c │ │ ├── err.input037.c │ │ ├── err.input038.c │ │ ├── err.input039.c │ │ ├── err.input040.c │ │ ├── err.input041.c │ │ ├── err.input042.c │ │ ├── err.input043.c │ │ ├── err.input044.c │ │ ├── err.input045.c │ │ ├── err.input046.c │ │ ├── err.input047.c │ │ ├── err.input048.c │ │ ├── err.input049.c │ │ ├── err.input050.c │ │ ├── err.input051.c │ │ ├── err.input052.c │ │ ├── err.input056.c │ │ ├── err.input057.c │ │ ├── err.input059.c │ │ ├── err.input060.c │ │ ├── err.input061.c │ │ ├── err.input064.c │ │ ├── err.input065.c │ │ ├── err.input066.c │ │ ├── err.input068.c │ │ ├── err.input069.c │ │ ├── err.input072.c │ │ ├── err.input073.c │ │ ├── err.input075.c │ │ ├── err.input076.c │ │ ├── err.input077.c │ │ ├── err.input078.c │ │ ├── err.input079.c │ │ ├── err.input085.c │ │ ├── err.input086.c │ │ ├── err.input087.c │ │ ├── err.input092.c │ │ ├── err.input093.c │ │ ├── err.input094.c │ │ ├── err.input095.c │ │ ├── err.input096.c │ │ ├── err.input097.c │ │ ├── err.input098.c │ │ ├── err.input102.c │ │ ├── err.input103.c │ │ ├── err.input104.c │ │ ├── err.input105.c │ │ ├── input001.c │ │ ├── input002.c │ │ ├── input003.c │ │ ├── input004.c │ │ ├── input005.c │ │ ├── input006.c │ │ ├── input007.c │ │ ├── input008.c │ │ ├── input009.c │ │ ├── input010.c │ │ ├── input011.c │ │ ├── input012.c │ │ ├── input013.c │ │ ├── input014.c │ │ ├── input015.c │ │ ├── input016.c │ │ ├── input017.c │ │ ├── input018.c │ │ ├── input018a.c │ │ ├── input019.c │ │ ├── input020.c │ │ ├── input021.c │ │ ├── input022.c │ │ ├── input023.c │ │ ├── input024.c │ │ ├── input025.c │ │ ├── input026.c │ │ ├── input027.c │ │ ├── input028.c │ │ ├── input029.c │ │ ├── input030.c │ │ ├── input031.c │ │ ├── input032.c │ │ ├── input033.c │ │ ├── input034.c │ │ ├── input035.c │ │ ├── input036.c │ │ ├── input037.c │ │ ├── input038.c │ │ ├── input039.c │ │ ├── input040.c │ │ ├── input041.c │ │ ├── input042.c │ │ ├── input043.c │ │ ├── input044.c │ │ ├── input045.c │ │ ├── input046.c │ │ ├── input047.c │ │ ├── input048.c │ │ ├── input049.c │ │ ├── input050.c │ │ ├── input051.c │ │ ├── input052.c │ │ ├── input053.c │ │ ├── input054.c │ │ ├── input055.c │ │ ├── input056.c │ │ ├── input057.c │ │ ├── input058.c │ │ ├── input059.c │ │ ├── input060.c │ │ ├── input061.c │ │ ├── input062.c │ │ ├── input063.c │ │ ├── input064.c │ │ ├── input065.c │ │ ├── input066.c │ │ ├── input067.c │ │ ├── input068.c │ │ ├── input069.c │ │ ├── input070.c │ │ ├── input071.c │ │ ├── input072.c │ │ ├── input073.c │ │ ├── input074.c │ │ ├── input075.c │ │ ├── input076.c │ │ ├── input077.c │ │ ├── input078.c │ │ ├── input079.c │ │ ├── input080.c │ │ ├── input081.c │ │ ├── input082.c │ │ ├── input083.c │ │ ├── input084.c │ │ ├── input085.c │ │ ├── input086.c │ │ ├── input087.c │ │ ├── input088.c │ │ ├── input089.c │ │ ├── input090.c │ │ ├── input091.c │ │ ├── input092.c │ │ ├── input093.c │ │ ├── input094.c │ │ ├── input095.c │ │ ├── input096.c │ │ ├── input097.c │ │ ├── input098.c │ │ ├── input099.c │ │ ├── input100.c │ │ ├── input101.c │ │ ├── input102.c │ │ ├── input103.c │ │ ├── input104.c │ │ ├── input105.c │ │ ├── input106.c │ │ ├── input107.c │ │ ├── input108.c │ │ ├── input109.c │ │ ├── input110.c │ │ ├── input111.c │ │ ├── input112.c │ │ ├── input113.c │ │ ├── input114.c │ │ ├── mktests │ │ ├── out.input001.c │ │ ├── out.input002.c │ │ ├── out.input003.c │ │ ├── out.input004.c │ │ ├── out.input005.c │ │ ├── out.input006.c │ │ ├── out.input007.c │ │ ├── out.input008.c │ │ ├── out.input009.c │ │ ├── out.input010.c │ │ ├── out.input011.c │ │ ├── out.input012.c │ │ ├── out.input013.c │ │ ├── out.input014.c │ │ ├── out.input015.c │ │ ├── out.input016.c │ │ ├── out.input017.c │ │ ├── out.input018.c │ │ ├── out.input018a.c │ │ ├── out.input019.c │ │ ├── out.input020.c │ │ ├── out.input021.c │ │ ├── out.input022.c │ │ ├── out.input023.c │ │ ├── out.input024.c │ │ ├── out.input025.c │ │ ├── out.input026.c │ │ ├── out.input027.c │ │ ├── out.input028.c │ │ ├── out.input029.c │ │ ├── out.input030.c │ │ ├── out.input053.c │ │ ├── out.input054.c │ │ ├── out.input055.c │ │ ├── out.input058.c │ │ ├── out.input062.c │ │ ├── out.input063.c │ │ ├── out.input067.c │ │ ├── out.input070.c │ │ ├── out.input071.c │ │ ├── out.input074.c │ │ ├── out.input080.c │ │ ├── out.input081.c │ │ ├── out.input082.c │ │ ├── out.input083.c │ │ ├── out.input084.c │ │ ├── out.input088.c │ │ ├── out.input089.c │ │ ├── out.input090.c │ │ ├── out.input091.c │ │ ├── out.input099.c │ │ ├── out.input100.c │ │ ├── out.input101.c │ │ ├── out.input106.c │ │ ├── out.input107.c │ │ ├── out.input108.c │ │ ├── out.input109.c │ │ ├── out.input110.c │ │ ├── out.input111.c │ │ ├── out.input112.c │ │ ├── out.input113.c │ │ ├── out.input114.c │ │ ├── runtests │ │ └── runtestsn │ ├── tree.c │ └── types.c ├── 47_Sizeof/ │ ├── Makefile │ ├── Readme.md │ ├── cg.c │ ├── cg_arm.c │ ├── cgn.c │ ├── data.h │ ├── decl.c │ ├── decl.h │ ├── defs.h │ ├── expr.c │ ├── gen.c │ ├── include/ │ │ ├── ctype.h │ │ ├── errno.h │ │ ├── fcntl.h │ │ ├── stddef.h │ │ ├── stdio.h │ │ ├── stdlib.h │ │ ├── string.h │ │ └── unistd.h │ ├── main.c │ ├── misc.c │ ├── opt.c │ ├── scan.c │ ├── stmt.c │ ├── sym.c │ ├── tests/ │ │ ├── err.input031.c │ │ ├── err.input032.c │ │ ├── err.input033.c │ │ ├── err.input034.c │ │ ├── err.input035.c │ │ ├── err.input036.c │ │ ├── err.input037.c │ │ ├── err.input038.c │ │ ├── err.input039.c │ │ ├── err.input040.c │ │ ├── err.input041.c │ │ ├── err.input042.c │ │ ├── err.input043.c │ │ ├── err.input044.c │ │ ├── err.input045.c │ │ ├── err.input046.c │ │ ├── err.input047.c │ │ ├── err.input048.c │ │ ├── err.input049.c │ │ ├── err.input050.c │ │ ├── err.input051.c │ │ ├── err.input052.c │ │ ├── err.input056.c │ │ ├── err.input057.c │ │ ├── err.input059.c │ │ ├── err.input060.c │ │ ├── err.input061.c │ │ ├── err.input064.c │ │ ├── err.input065.c │ │ ├── err.input066.c │ │ ├── err.input068.c │ │ ├── err.input069.c │ │ ├── err.input072.c │ │ ├── err.input073.c │ │ ├── err.input075.c │ │ ├── err.input076.c │ │ ├── err.input077.c │ │ ├── err.input078.c │ │ ├── err.input079.c │ │ ├── err.input085.c │ │ ├── err.input086.c │ │ ├── err.input087.c │ │ ├── err.input092.c │ │ ├── err.input093.c │ │ ├── err.input094.c │ │ ├── err.input095.c │ │ ├── err.input096.c │ │ ├── err.input097.c │ │ ├── err.input098.c │ │ ├── err.input102.c │ │ ├── err.input103.c │ │ ├── err.input104.c │ │ ├── err.input105.c │ │ ├── input001.c │ │ ├── input002.c │ │ ├── input003.c │ │ ├── input004.c │ │ ├── input005.c │ │ ├── input006.c │ │ ├── input007.c │ │ ├── input008.c │ │ ├── input009.c │ │ ├── input010.c │ │ ├── input011.c │ │ ├── input012.c │ │ ├── input013.c │ │ ├── input014.c │ │ ├── input015.c │ │ ├── input016.c │ │ ├── input017.c │ │ ├── input018.c │ │ ├── input018a.c │ │ ├── input019.c │ │ ├── input020.c │ │ ├── input021.c │ │ ├── input022.c │ │ ├── input023.c │ │ ├── input024.c │ │ ├── input025.c │ │ ├── input026.c │ │ ├── input027.c │ │ ├── input028.c │ │ ├── input029.c │ │ ├── input030.c │ │ ├── input031.c │ │ ├── input032.c │ │ ├── input033.c │ │ ├── input034.c │ │ ├── input035.c │ │ ├── input036.c │ │ ├── input037.c │ │ ├── input038.c │ │ ├── input039.c │ │ ├── input040.c │ │ ├── input041.c │ │ ├── input042.c │ │ ├── input043.c │ │ ├── input044.c │ │ ├── input045.c │ │ ├── input046.c │ │ ├── input047.c │ │ ├── input048.c │ │ ├── input049.c │ │ ├── input050.c │ │ ├── input051.c │ │ ├── input052.c │ │ ├── input053.c │ │ ├── input054.c │ │ ├── input055.c │ │ ├── input056.c │ │ ├── input057.c │ │ ├── input058.c │ │ ├── input059.c │ │ ├── input060.c │ │ ├── input061.c │ │ ├── input062.c │ │ ├── input063.c │ │ ├── input064.c │ │ ├── input065.c │ │ ├── input066.c │ │ ├── input067.c │ │ ├── input068.c │ │ ├── input069.c │ │ ├── input070.c │ │ ├── input071.c │ │ ├── input072.c │ │ ├── input073.c │ │ ├── input074.c │ │ ├── input075.c │ │ ├── input076.c │ │ ├── input077.c │ │ ├── input078.c │ │ ├── input079.c │ │ ├── input080.c │ │ ├── input081.c │ │ ├── input082.c │ │ ├── input083.c │ │ ├── input084.c │ │ ├── input085.c │ │ ├── input086.c │ │ ├── input087.c │ │ ├── input088.c │ │ ├── input089.c │ │ ├── input090.c │ │ ├── input091.c │ │ ├── input092.c │ │ ├── input093.c │ │ ├── input094.c │ │ ├── input095.c │ │ ├── input096.c │ │ ├── input097.c │ │ ├── input098.c │ │ ├── input099.c │ │ ├── input100.c │ │ ├── input101.c │ │ ├── input102.c │ │ ├── input103.c │ │ ├── input104.c │ │ ├── input105.c │ │ ├── input106.c │ │ ├── input107.c │ │ ├── input108.c │ │ ├── input109.c │ │ ├── input110.c │ │ ├── input111.c │ │ ├── input112.c │ │ ├── input113.c │ │ ├── input114.c │ │ ├── input115.c │ │ ├── mktests │ │ ├── out.input001.c │ │ ├── out.input002.c │ │ ├── out.input003.c │ │ ├── out.input004.c │ │ ├── out.input005.c │ │ ├── out.input006.c │ │ ├── out.input007.c │ │ ├── out.input008.c │ │ ├── out.input009.c │ │ ├── out.input010.c │ │ ├── out.input011.c │ │ ├── out.input012.c │ │ ├── out.input013.c │ │ ├── out.input014.c │ │ ├── out.input015.c │ │ ├── out.input016.c │ │ ├── out.input017.c │ │ ├── out.input018.c │ │ ├── out.input018a.c │ │ ├── out.input019.c │ │ ├── out.input020.c │ │ ├── out.input021.c │ │ ├── out.input022.c │ │ ├── out.input023.c │ │ ├── out.input024.c │ │ ├── out.input025.c │ │ ├── out.input026.c │ │ ├── out.input027.c │ │ ├── out.input028.c │ │ ├── out.input029.c │ │ ├── out.input030.c │ │ ├── out.input053.c │ │ ├── out.input054.c │ │ ├── out.input055.c │ │ ├── out.input058.c │ │ ├── out.input062.c │ │ ├── out.input063.c │ │ ├── out.input067.c │ │ ├── out.input070.c │ │ ├── out.input071.c │ │ ├── out.input074.c │ │ ├── out.input080.c │ │ ├── out.input081.c │ │ ├── out.input082.c │ │ ├── out.input083.c │ │ ├── out.input084.c │ │ ├── out.input088.c │ │ ├── out.input089.c │ │ ├── out.input090.c │ │ ├── out.input091.c │ │ ├── out.input099.c │ │ ├── out.input100.c │ │ ├── out.input101.c │ │ ├── out.input106.c │ │ ├── out.input107.c │ │ ├── out.input108.c │ │ ├── out.input109.c │ │ ├── out.input110.c │ │ ├── out.input111.c │ │ ├── out.input112.c │ │ ├── out.input113.c │ │ ├── out.input114.c │ │ ├── out.input115.c │ │ ├── runtests │ │ └── runtestsn │ ├── tree.c │ └── types.c ├── 48_Static/ │ ├── Makefile │ ├── Readme.md │ ├── cg.c │ ├── cg_arm.c │ ├── cgn.c │ ├── data.h │ ├── decl.c │ ├── decl.h │ ├── defs.h │ ├── expr.c │ ├── gen.c │ ├── include/ │ │ ├── ctype.h │ │ ├── errno.h │ │ ├── fcntl.h │ │ ├── stddef.h │ │ ├── stdio.h │ │ ├── stdlib.h │ │ ├── string.h │ │ └── unistd.h │ ├── main.c │ ├── misc.c │ ├── opt.c │ ├── scan.c │ ├── stmt.c │ ├── sym.c │ ├── tests/ │ │ ├── err.input031.c │ │ ├── err.input032.c │ │ ├── err.input033.c │ │ ├── err.input034.c │ │ ├── err.input035.c │ │ ├── err.input036.c │ │ ├── err.input037.c │ │ ├── err.input038.c │ │ ├── err.input039.c │ │ ├── err.input040.c │ │ ├── err.input041.c │ │ ├── err.input042.c │ │ ├── err.input043.c │ │ ├── err.input044.c │ │ ├── err.input045.c │ │ ├── err.input046.c │ │ ├── err.input047.c │ │ ├── err.input048.c │ │ ├── err.input049.c │ │ ├── err.input050.c │ │ ├── err.input051.c │ │ ├── err.input052.c │ │ ├── err.input056.c │ │ ├── err.input057.c │ │ ├── err.input059.c │ │ ├── err.input060.c │ │ ├── err.input061.c │ │ ├── err.input064.c │ │ ├── err.input065.c │ │ ├── err.input066.c │ │ ├── err.input068.c │ │ ├── err.input069.c │ │ ├── err.input072.c │ │ ├── err.input073.c │ │ ├── err.input075.c │ │ ├── err.input076.c │ │ ├── err.input077.c │ │ ├── err.input078.c │ │ ├── err.input079.c │ │ ├── err.input085.c │ │ ├── err.input086.c │ │ ├── err.input087.c │ │ ├── err.input092.c │ │ ├── err.input093.c │ │ ├── err.input094.c │ │ ├── err.input095.c │ │ ├── err.input096.c │ │ ├── err.input097.c │ │ ├── err.input098.c │ │ ├── err.input102.c │ │ ├── err.input103.c │ │ ├── err.input104.c │ │ ├── err.input105.c │ │ ├── err.input118.c │ │ ├── input001.c │ │ ├── input002.c │ │ ├── input003.c │ │ ├── input004.c │ │ ├── input005.c │ │ ├── input006.c │ │ ├── input007.c │ │ ├── input008.c │ │ ├── input009.c │ │ ├── input010.c │ │ ├── input011.c │ │ ├── input012.c │ │ ├── input013.c │ │ ├── input014.c │ │ ├── input015.c │ │ ├── input016.c │ │ ├── input017.c │ │ ├── input018.c │ │ ├── input018a.c │ │ ├── input019.c │ │ ├── input020.c │ │ ├── input021.c │ │ ├── input022.c │ │ ├── input023.c │ │ ├── input024.c │ │ ├── input025.c │ │ ├── input026.c │ │ ├── input027.c │ │ ├── input028.c │ │ ├── input029.c │ │ ├── input030.c │ │ ├── input031.c │ │ ├── input032.c │ │ ├── input033.c │ │ ├── input034.c │ │ ├── input035.c │ │ ├── input036.c │ │ ├── input037.c │ │ ├── input038.c │ │ ├── input039.c │ │ ├── input040.c │ │ ├── input041.c │ │ ├── input042.c │ │ ├── input043.c │ │ ├── input044.c │ │ ├── input045.c │ │ ├── input046.c │ │ ├── input047.c │ │ ├── input048.c │ │ ├── input049.c │ │ ├── input050.c │ │ ├── input051.c │ │ ├── input052.c │ │ ├── input053.c │ │ ├── input054.c │ │ ├── input055.c │ │ ├── input056.c │ │ ├── input057.c │ │ ├── input058.c │ │ ├── input059.c │ │ ├── input060.c │ │ ├── input061.c │ │ ├── input062.c │ │ ├── input063.c │ │ ├── input064.c │ │ ├── input065.c │ │ ├── input066.c │ │ ├── input067.c │ │ ├── input068.c │ │ ├── input069.c │ │ ├── input070.c │ │ ├── input071.c │ │ ├── input072.c │ │ ├── input073.c │ │ ├── input074.c │ │ ├── input075.c │ │ ├── input076.c │ │ ├── input077.c │ │ ├── input078.c │ │ ├── input079.c │ │ ├── input080.c │ │ ├── input081.c │ │ ├── input082.c │ │ ├── input083.c │ │ ├── input084.c │ │ ├── input085.c │ │ ├── input086.c │ │ ├── input087.c │ │ ├── input088.c │ │ ├── input089.c │ │ ├── input090.c │ │ ├── input091.c │ │ ├── input092.c │ │ ├── input093.c │ │ ├── input094.c │ │ ├── input095.c │ │ ├── input096.c │ │ ├── input097.c │ │ ├── input098.c │ │ ├── input099.c │ │ ├── input100.c │ │ ├── input101.c │ │ ├── input102.c │ │ ├── input103.c │ │ ├── input104.c │ │ ├── input105.c │ │ ├── input106.c │ │ ├── input107.c │ │ ├── input108.c │ │ ├── input109.c │ │ ├── input110.c │ │ ├── input111.c │ │ ├── input112.c │ │ ├── input113.c │ │ ├── input114.c │ │ ├── input115.c │ │ ├── input116.c │ │ ├── input117.c │ │ ├── input118.c │ │ ├── mktests │ │ ├── out.input001.c │ │ ├── out.input002.c │ │ ├── out.input003.c │ │ ├── out.input004.c │ │ ├── out.input005.c │ │ ├── out.input006.c │ │ ├── out.input007.c │ │ ├── out.input008.c │ │ ├── out.input009.c │ │ ├── out.input010.c │ │ ├── out.input011.c │ │ ├── out.input012.c │ │ ├── out.input013.c │ │ ├── out.input014.c │ │ ├── out.input015.c │ │ ├── out.input016.c │ │ ├── out.input017.c │ │ ├── out.input018.c │ │ ├── out.input018a.c │ │ ├── out.input019.c │ │ ├── out.input020.c │ │ ├── out.input021.c │ │ ├── out.input022.c │ │ ├── out.input023.c │ │ ├── out.input024.c │ │ ├── out.input025.c │ │ ├── out.input026.c │ │ ├── out.input027.c │ │ ├── out.input028.c │ │ ├── out.input029.c │ │ ├── out.input030.c │ │ ├── out.input053.c │ │ ├── out.input054.c │ │ ├── out.input055.c │ │ ├── out.input058.c │ │ ├── out.input062.c │ │ ├── out.input063.c │ │ ├── out.input067.c │ │ ├── out.input070.c │ │ ├── out.input071.c │ │ ├── out.input074.c │ │ ├── out.input080.c │ │ ├── out.input081.c │ │ ├── out.input082.c │ │ ├── out.input083.c │ │ ├── out.input084.c │ │ ├── out.input088.c │ │ ├── out.input089.c │ │ ├── out.input090.c │ │ ├── out.input091.c │ │ ├── out.input099.c │ │ ├── out.input100.c │ │ ├── out.input101.c │ │ ├── out.input106.c │ │ ├── out.input107.c │ │ ├── out.input108.c │ │ ├── out.input109.c │ │ ├── out.input110.c │ │ ├── out.input111.c │ │ ├── out.input112.c │ │ ├── out.input113.c │ │ ├── out.input114.c │ │ ├── out.input115.c │ │ ├── out.input116.c │ │ ├── out.input117.c │ │ ├── runtests │ │ └── runtestsn │ ├── tree.c │ └── types.c ├── 49_Ternary/ │ ├── Makefile │ ├── Readme.md │ ├── cg.c │ ├── cg_arm.c │ ├── cgn.c │ ├── data.h │ ├── decl.c │ ├── decl.h │ ├── defs.h │ ├── expr.c │ ├── gen.c │ ├── include/ │ │ ├── ctype.h │ │ ├── errno.h │ │ ├── fcntl.h │ │ ├── stddef.h │ │ ├── stdio.h │ │ ├── stdlib.h │ │ ├── string.h │ │ └── unistd.h │ ├── main.c │ ├── misc.c │ ├── opt.c │ ├── scan.c │ ├── stmt.c │ ├── sym.c │ ├── tests/ │ │ ├── err.input031.c │ │ ├── err.input032.c │ │ ├── err.input033.c │ │ ├── err.input034.c │ │ ├── err.input035.c │ │ ├── err.input036.c │ │ ├── err.input037.c │ │ ├── err.input038.c │ │ ├── err.input039.c │ │ ├── err.input040.c │ │ ├── err.input041.c │ │ ├── err.input042.c │ │ ├── err.input043.c │ │ ├── err.input044.c │ │ ├── err.input045.c │ │ ├── err.input046.c │ │ ├── err.input047.c │ │ ├── err.input048.c │ │ ├── err.input049.c │ │ ├── err.input050.c │ │ ├── err.input051.c │ │ ├── err.input052.c │ │ ├── err.input056.c │ │ ├── err.input057.c │ │ ├── err.input059.c │ │ ├── err.input060.c │ │ ├── err.input061.c │ │ ├── err.input064.c │ │ ├── err.input065.c │ │ ├── err.input066.c │ │ ├── err.input068.c │ │ ├── err.input069.c │ │ ├── err.input072.c │ │ ├── err.input073.c │ │ ├── err.input075.c │ │ ├── err.input076.c │ │ ├── err.input077.c │ │ ├── err.input078.c │ │ ├── err.input079.c │ │ ├── err.input085.c │ │ ├── err.input086.c │ │ ├── err.input087.c │ │ ├── err.input092.c │ │ ├── err.input093.c │ │ ├── err.input094.c │ │ ├── err.input095.c │ │ ├── err.input096.c │ │ ├── err.input097.c │ │ ├── err.input098.c │ │ ├── err.input102.c │ │ ├── err.input103.c │ │ ├── err.input104.c │ │ ├── err.input105.c │ │ ├── err.input118.c │ │ ├── input001.c │ │ ├── input002.c │ │ ├── input003.c │ │ ├── input004.c │ │ ├── input005.c │ │ ├── input006.c │ │ ├── input007.c │ │ ├── input008.c │ │ ├── input009.c │ │ ├── input010.c │ │ ├── input011.c │ │ ├── input012.c │ │ ├── input013.c │ │ ├── input014.c │ │ ├── input015.c │ │ ├── input016.c │ │ ├── input017.c │ │ ├── input018.c │ │ ├── input018a.c │ │ ├── input019.c │ │ ├── input020.c │ │ ├── input021.c │ │ ├── input022.c │ │ ├── input023.c │ │ ├── input024.c │ │ ├── input025.c │ │ ├── input026.c │ │ ├── input027.c │ │ ├── input028.c │ │ ├── input029.c │ │ ├── input030.c │ │ ├── input031.c │ │ ├── input032.c │ │ ├── input033.c │ │ ├── input034.c │ │ ├── input035.c │ │ ├── input036.c │ │ ├── input037.c │ │ ├── input038.c │ │ ├── input039.c │ │ ├── input040.c │ │ ├── input041.c │ │ ├── input042.c │ │ ├── input043.c │ │ ├── input044.c │ │ ├── input045.c │ │ ├── input046.c │ │ ├── input047.c │ │ ├── input048.c │ │ ├── input049.c │ │ ├── input050.c │ │ ├── input051.c │ │ ├── input052.c │ │ ├── input053.c │ │ ├── input054.c │ │ ├── input055.c │ │ ├── input056.c │ │ ├── input057.c │ │ ├── input058.c │ │ ├── input059.c │ │ ├── input060.c │ │ ├── input061.c │ │ ├── input062.c │ │ ├── input063.c │ │ ├── input064.c │ │ ├── input065.c │ │ ├── input066.c │ │ ├── input067.c │ │ ├── input068.c │ │ ├── input069.c │ │ ├── input070.c │ │ ├── input071.c │ │ ├── input072.c │ │ ├── input073.c │ │ ├── input074.c │ │ ├── input075.c │ │ ├── input076.c │ │ ├── input077.c │ │ ├── input078.c │ │ ├── input079.c │ │ ├── input080.c │ │ ├── input081.c │ │ ├── input082.c │ │ ├── input083.c │ │ ├── input084.c │ │ ├── input085.c │ │ ├── input086.c │ │ ├── input087.c │ │ ├── input088.c │ │ ├── input089.c │ │ ├── input090.c │ │ ├── input091.c │ │ ├── input092.c │ │ ├── input093.c │ │ ├── input094.c │ │ ├── input095.c │ │ ├── input096.c │ │ ├── input097.c │ │ ├── input098.c │ │ ├── input099.c │ │ ├── input100.c │ │ ├── input101.c │ │ ├── input102.c │ │ ├── input103.c │ │ ├── input104.c │ │ ├── input105.c │ │ ├── input106.c │ │ ├── input107.c │ │ ├── input108.c │ │ ├── input109.c │ │ ├── input110.c │ │ ├── input111.c │ │ ├── input112.c │ │ ├── input113.c │ │ ├── input114.c │ │ ├── input115.c │ │ ├── input116.c │ │ ├── input117.c │ │ ├── input118.c │ │ ├── input119.c │ │ ├── input120.c │ │ ├── input121.c │ │ ├── mktests │ │ ├── out.input001.c │ │ ├── out.input002.c │ │ ├── out.input003.c │ │ ├── out.input004.c │ │ ├── out.input005.c │ │ ├── out.input006.c │ │ ├── out.input007.c │ │ ├── out.input008.c │ │ ├── out.input009.c │ │ ├── out.input010.c │ │ ├── out.input011.c │ │ ├── out.input012.c │ │ ├── out.input013.c │ │ ├── out.input014.c │ │ ├── out.input015.c │ │ ├── out.input016.c │ │ ├── out.input017.c │ │ ├── out.input018.c │ │ ├── out.input018a.c │ │ ├── out.input019.c │ │ ├── out.input020.c │ │ ├── out.input021.c │ │ ├── out.input022.c │ │ ├── out.input023.c │ │ ├── out.input024.c │ │ ├── out.input025.c │ │ ├── out.input026.c │ │ ├── out.input027.c │ │ ├── out.input028.c │ │ ├── out.input029.c │ │ ├── out.input030.c │ │ ├── out.input053.c │ │ ├── out.input054.c │ │ ├── out.input055.c │ │ ├── out.input058.c │ │ ├── out.input062.c │ │ ├── out.input063.c │ │ ├── out.input067.c │ │ ├── out.input070.c │ │ ├── out.input071.c │ │ ├── out.input074.c │ │ ├── out.input080.c │ │ ├── out.input081.c │ │ ├── out.input082.c │ │ ├── out.input083.c │ │ ├── out.input084.c │ │ ├── out.input088.c │ │ ├── out.input089.c │ │ ├── out.input090.c │ │ ├── out.input091.c │ │ ├── out.input099.c │ │ ├── out.input100.c │ │ ├── out.input101.c │ │ ├── out.input106.c │ │ ├── out.input107.c │ │ ├── out.input108.c │ │ ├── out.input109.c │ │ ├── out.input110.c │ │ ├── out.input111.c │ │ ├── out.input112.c │ │ ├── out.input113.c │ │ ├── out.input114.c │ │ ├── out.input115.c │ │ ├── out.input116.c │ │ ├── out.input117.c │ │ ├── out.input119.c │ │ ├── out.input120.c │ │ ├── out.input121.c │ │ ├── runtests │ │ └── runtestsn │ ├── tree.c │ └── types.c ├── 50_Mop_up_pt1/ │ ├── Makefile │ ├── Readme.md │ ├── cg.c │ ├── cg_arm.c │ ├── cgn.c │ ├── data.h │ ├── decl.c │ ├── decl.h │ ├── defs.h │ ├── expr.c │ ├── gen.c │ ├── include/ │ │ ├── ctype.h │ │ ├── errno.h │ │ ├── fcntl.h │ │ ├── stddef.h │ │ ├── stdio.h │ │ ├── stdlib.h │ │ ├── string.h │ │ └── unistd.h │ ├── main.c │ ├── misc.c │ ├── opt.c │ ├── scan.c │ ├── stmt.c │ ├── sym.c │ ├── tests/ │ │ ├── err.input031.c │ │ ├── err.input032.c │ │ ├── err.input033.c │ │ ├── err.input034.c │ │ ├── err.input035.c │ │ ├── err.input036.c │ │ ├── err.input037.c │ │ ├── err.input038.c │ │ ├── err.input039.c │ │ ├── err.input040.c │ │ ├── err.input041.c │ │ ├── err.input042.c │ │ ├── err.input043.c │ │ ├── err.input044.c │ │ ├── err.input045.c │ │ ├── err.input046.c │ │ ├── err.input047.c │ │ ├── err.input048.c │ │ ├── err.input049.c │ │ ├── err.input050.c │ │ ├── err.input051.c │ │ ├── err.input052.c │ │ ├── err.input056.c │ │ ├── err.input057.c │ │ ├── err.input059.c │ │ ├── err.input060.c │ │ ├── err.input061.c │ │ ├── err.input064.c │ │ ├── err.input065.c │ │ ├── err.input066.c │ │ ├── err.input068.c │ │ ├── err.input069.c │ │ ├── err.input072.c │ │ ├── err.input073.c │ │ ├── err.input075.c │ │ ├── err.input076.c │ │ ├── err.input077.c │ │ ├── err.input078.c │ │ ├── err.input079.c │ │ ├── err.input085.c │ │ ├── err.input086.c │ │ ├── err.input087.c │ │ ├── err.input092.c │ │ ├── err.input093.c │ │ ├── err.input094.c │ │ ├── err.input095.c │ │ ├── err.input096.c │ │ ├── err.input097.c │ │ ├── err.input098.c │ │ ├── err.input102.c │ │ ├── err.input103.c │ │ ├── err.input104.c │ │ ├── err.input105.c │ │ ├── err.input118.c │ │ ├── input001.c │ │ ├── input002.c │ │ ├── input003.c │ │ ├── input004.c │ │ ├── input005.c │ │ ├── input006.c │ │ ├── input007.c │ │ ├── input008.c │ │ ├── input009.c │ │ ├── input010.c │ │ ├── input011.c │ │ ├── input012.c │ │ ├── input013.c │ │ ├── input014.c │ │ ├── input015.c │ │ ├── input016.c │ │ ├── input017.c │ │ ├── input018.c │ │ ├── input018a.c │ │ ├── input019.c │ │ ├── input020.c │ │ ├── input021.c │ │ ├── input022.c │ │ ├── input023.c │ │ ├── input024.c │ │ ├── input025.c │ │ ├── input026.c │ │ ├── input027.c │ │ ├── input028.c │ │ ├── input029.c │ │ ├── input030.c │ │ ├── input031.c │ │ ├── input032.c │ │ ├── input033.c │ │ ├── input034.c │ │ ├── input035.c │ │ ├── input036.c │ │ ├── input037.c │ │ ├── input038.c │ │ ├── input039.c │ │ ├── input040.c │ │ ├── input041.c │ │ ├── input042.c │ │ ├── input043.c │ │ ├── input044.c │ │ ├── input045.c │ │ ├── input046.c │ │ ├── input047.c │ │ ├── input048.c │ │ ├── input049.c │ │ ├── input050.c │ │ ├── input051.c │ │ ├── input052.c │ │ ├── input053.c │ │ ├── input054.c │ │ ├── input055.c │ │ ├── input056.c │ │ ├── input057.c │ │ ├── input058.c │ │ ├── input059.c │ │ ├── input060.c │ │ ├── input061.c │ │ ├── input062.c │ │ ├── input063.c │ │ ├── input064.c │ │ ├── input065.c │ │ ├── input066.c │ │ ├── input067.c │ │ ├── input068.c │ │ ├── input069.c │ │ ├── input070.c │ │ ├── input071.c │ │ ├── input072.c │ │ ├── input073.c │ │ ├── input074.c │ │ ├── input075.c │ │ ├── input076.c │ │ ├── input077.c │ │ ├── input078.c │ │ ├── input079.c │ │ ├── input080.c │ │ ├── input081.c │ │ ├── input082.c │ │ ├── input083.c │ │ ├── input084.c │ │ ├── input085.c │ │ ├── input086.c │ │ ├── input087.c │ │ ├── input088.c │ │ ├── input089.c │ │ ├── input090.c │ │ ├── input091.c │ │ ├── input092.c │ │ ├── input093.c │ │ ├── input094.c │ │ ├── input095.c │ │ ├── input096.c │ │ ├── input097.c │ │ ├── input098.c │ │ ├── input099.c │ │ ├── input100.c │ │ ├── input101.c │ │ ├── input102.c │ │ ├── input103.c │ │ ├── input104.c │ │ ├── input105.c │ │ ├── input106.c │ │ ├── input107.c │ │ ├── input108.c │ │ ├── input109.c │ │ ├── input110.c │ │ ├── input111.c │ │ ├── input112.c │ │ ├── input113.c │ │ ├── input114.c │ │ ├── input115.c │ │ ├── input116.c │ │ ├── input117.c │ │ ├── input118.c │ │ ├── input119.c │ │ ├── input120.c │ │ ├── input121.c │ │ ├── input122.c │ │ ├── input123.c │ │ ├── mktests │ │ ├── out.input001.c │ │ ├── out.input002.c │ │ ├── out.input003.c │ │ ├── out.input004.c │ │ ├── out.input005.c │ │ ├── out.input006.c │ │ ├── out.input007.c │ │ ├── out.input008.c │ │ ├── out.input009.c │ │ ├── out.input010.c │ │ ├── out.input011.c │ │ ├── out.input012.c │ │ ├── out.input013.c │ │ ├── out.input014.c │ │ ├── out.input015.c │ │ ├── out.input016.c │ │ ├── out.input017.c │ │ ├── out.input018.c │ │ ├── out.input018a.c │ │ ├── out.input019.c │ │ ├── out.input020.c │ │ ├── out.input021.c │ │ ├── out.input022.c │ │ ├── out.input023.c │ │ ├── out.input024.c │ │ ├── out.input025.c │ │ ├── out.input026.c │ │ ├── out.input027.c │ │ ├── out.input028.c │ │ ├── out.input029.c │ │ ├── out.input030.c │ │ ├── out.input053.c │ │ ├── out.input054.c │ │ ├── out.input055.c │ │ ├── out.input058.c │ │ ├── out.input062.c │ │ ├── out.input063.c │ │ ├── out.input067.c │ │ ├── out.input070.c │ │ ├── out.input071.c │ │ ├── out.input074.c │ │ ├── out.input080.c │ │ ├── out.input081.c │ │ ├── out.input082.c │ │ ├── out.input083.c │ │ ├── out.input084.c │ │ ├── out.input088.c │ │ ├── out.input089.c │ │ ├── out.input090.c │ │ ├── out.input091.c │ │ ├── out.input099.c │ │ ├── out.input100.c │ │ ├── out.input101.c │ │ ├── out.input106.c │ │ ├── out.input107.c │ │ ├── out.input108.c │ │ ├── out.input109.c │ │ ├── out.input110.c │ │ ├── out.input111.c │ │ ├── out.input112.c │ │ ├── out.input113.c │ │ ├── out.input114.c │ │ ├── out.input115.c │ │ ├── out.input116.c │ │ ├── out.input117.c │ │ ├── out.input119.c │ │ ├── out.input120.c │ │ ├── out.input121.c │ │ ├── out.input122.c │ │ ├── out.input123.c │ │ ├── runtests │ │ └── runtestsn │ ├── tree.c │ └── types.c ├── 51_Arrays_pt2/ │ ├── Makefile │ ├── Readme.md │ ├── cg.c │ ├── cg_arm.c │ ├── cgn.c │ ├── data.h │ ├── decl.c │ ├── decl.h │ ├── defs.h │ ├── expr.c │ ├── gen.c │ ├── include/ │ │ ├── ctype.h │ │ ├── errno.h │ │ ├── fcntl.h │ │ ├── stddef.h │ │ ├── stdio.h │ │ ├── stdlib.h │ │ ├── string.h │ │ └── unistd.h │ ├── main.c │ ├── misc.c │ ├── opt.c │ ├── scan.c │ ├── stmt.c │ ├── sym.c │ ├── tests/ │ │ ├── err.input031.c │ │ ├── err.input032.c │ │ ├── err.input033.c │ │ ├── err.input034.c │ │ ├── err.input035.c │ │ ├── err.input036.c │ │ ├── err.input037.c │ │ ├── err.input038.c │ │ ├── err.input039.c │ │ ├── err.input040.c │ │ ├── err.input041.c │ │ ├── err.input042.c │ │ ├── err.input043.c │ │ ├── err.input044.c │ │ ├── err.input045.c │ │ ├── err.input046.c │ │ ├── err.input047.c │ │ ├── err.input048.c │ │ ├── err.input049.c │ │ ├── err.input050.c │ │ ├── err.input051.c │ │ ├── err.input052.c │ │ ├── err.input056.c │ │ ├── err.input057.c │ │ ├── err.input059.c │ │ ├── err.input060.c │ │ ├── err.input061.c │ │ ├── err.input064.c │ │ ├── err.input065.c │ │ ├── err.input066.c │ │ ├── err.input068.c │ │ ├── err.input069.c │ │ ├── err.input072.c │ │ ├── err.input073.c │ │ ├── err.input075.c │ │ ├── err.input076.c │ │ ├── err.input077.c │ │ ├── err.input078.c │ │ ├── err.input079.c │ │ ├── err.input085.c │ │ ├── err.input086.c │ │ ├── err.input087.c │ │ ├── err.input092.c │ │ ├── err.input093.c │ │ ├── err.input094.c │ │ ├── err.input095.c │ │ ├── err.input096.c │ │ ├── err.input097.c │ │ ├── err.input098.c │ │ ├── err.input102.c │ │ ├── err.input103.c │ │ ├── err.input104.c │ │ ├── err.input105.c │ │ ├── err.input118.c │ │ ├── err.input124.c │ │ ├── err.input126.c │ │ ├── input001.c │ │ ├── input002.c │ │ ├── input003.c │ │ ├── input004.c │ │ ├── input005.c │ │ ├── input006.c │ │ ├── input007.c │ │ ├── input008.c │ │ ├── input009.c │ │ ├── input010.c │ │ ├── input011.c │ │ ├── input012.c │ │ ├── input013.c │ │ ├── input014.c │ │ ├── input015.c │ │ ├── input016.c │ │ ├── input017.c │ │ ├── input018.c │ │ ├── input018a.c │ │ ├── input019.c │ │ ├── input020.c │ │ ├── input021.c │ │ ├── input022.c │ │ ├── input023.c │ │ ├── input024.c │ │ ├── input025.c │ │ ├── input026.c │ │ ├── input027.c │ │ ├── input028.c │ │ ├── input029.c │ │ ├── input030.c │ │ ├── input031.c │ │ ├── input032.c │ │ ├── input033.c │ │ ├── input034.c │ │ ├── input035.c │ │ ├── input036.c │ │ ├── input037.c │ │ ├── input038.c │ │ ├── input039.c │ │ ├── input040.c │ │ ├── input041.c │ │ ├── input042.c │ │ ├── input043.c │ │ ├── input044.c │ │ ├── input045.c │ │ ├── input046.c │ │ ├── input047.c │ │ ├── input048.c │ │ ├── input049.c │ │ ├── input050.c │ │ ├── input051.c │ │ ├── input052.c │ │ ├── input053.c │ │ ├── input054.c │ │ ├── input055.c │ │ ├── input056.c │ │ ├── input057.c │ │ ├── input058.c │ │ ├── input059.c │ │ ├── input060.c │ │ ├── input061.c │ │ ├── input062.c │ │ ├── input063.c │ │ ├── input064.c │ │ ├── input065.c │ │ ├── input066.c │ │ ├── input067.c │ │ ├── input068.c │ │ ├── input069.c │ │ ├── input070.c │ │ ├── input071.c │ │ ├── input072.c │ │ ├── input073.c │ │ ├── input074.c │ │ ├── input075.c │ │ ├── input076.c │ │ ├── input077.c │ │ ├── input078.c │ │ ├── input079.c │ │ ├── input080.c │ │ ├── input081.c │ │ ├── input082.c │ │ ├── input083.c │ │ ├── input084.c │ │ ├── input085.c │ │ ├── input086.c │ │ ├── input087.c │ │ ├── input088.c │ │ ├── input089.c │ │ ├── input090.c │ │ ├── input091.c │ │ ├── input092.c │ │ ├── input093.c │ │ ├── input094.c │ │ ├── input095.c │ │ ├── input096.c │ │ ├── input097.c │ │ ├── input098.c │ │ ├── input099.c │ │ ├── input100.c │ │ ├── input101.c │ │ ├── input102.c │ │ ├── input103.c │ │ ├── input104.c │ │ ├── input105.c │ │ ├── input106.c │ │ ├── input107.c │ │ ├── input108.c │ │ ├── input109.c │ │ ├── input110.c │ │ ├── input111.c │ │ ├── input112.c │ │ ├── input113.c │ │ ├── input114.c │ │ ├── input115.c │ │ ├── input116.c │ │ ├── input117.c │ │ ├── input118.c │ │ ├── input119.c │ │ ├── input120.c │ │ ├── input121.c │ │ ├── input122.c │ │ ├── input123.c │ │ ├── input124.c │ │ ├── input125.c │ │ ├── input126.c │ │ ├── input127.c │ │ ├── mktests │ │ ├── out.input001.c │ │ ├── out.input002.c │ │ ├── out.input003.c │ │ ├── out.input004.c │ │ ├── out.input005.c │ │ ├── out.input006.c │ │ ├── out.input007.c │ │ ├── out.input008.c │ │ ├── out.input009.c │ │ ├── out.input010.c │ │ ├── out.input011.c │ │ ├── out.input012.c │ │ ├── out.input013.c │ │ ├── out.input014.c │ │ ├── out.input015.c │ │ ├── out.input016.c │ │ ├── out.input017.c │ │ ├── out.input018.c │ │ ├── out.input018a.c │ │ ├── out.input019.c │ │ ├── out.input020.c │ │ ├── out.input021.c │ │ ├── out.input022.c │ │ ├── out.input023.c │ │ ├── out.input024.c │ │ ├── out.input025.c │ │ ├── out.input026.c │ │ ├── out.input027.c │ │ ├── out.input028.c │ │ ├── out.input029.c │ │ ├── out.input030.c │ │ ├── out.input053.c │ │ ├── out.input054.c │ │ ├── out.input055.c │ │ ├── out.input058.c │ │ ├── out.input062.c │ │ ├── out.input063.c │ │ ├── out.input067.c │ │ ├── out.input070.c │ │ ├── out.input071.c │ │ ├── out.input074.c │ │ ├── out.input080.c │ │ ├── out.input081.c │ │ ├── out.input082.c │ │ ├── out.input083.c │ │ ├── out.input084.c │ │ ├── out.input088.c │ │ ├── out.input089.c │ │ ├── out.input090.c │ │ ├── out.input091.c │ │ ├── out.input099.c │ │ ├── out.input100.c │ │ ├── out.input101.c │ │ ├── out.input106.c │ │ ├── out.input107.c │ │ ├── out.input108.c │ │ ├── out.input109.c │ │ ├── out.input110.c │ │ ├── out.input111.c │ │ ├── out.input112.c │ │ ├── out.input113.c │ │ ├── out.input114.c │ │ ├── out.input115.c │ │ ├── out.input116.c │ │ ├── out.input117.c │ │ ├── out.input119.c │ │ ├── out.input120.c │ │ ├── out.input121.c │ │ ├── out.input122.c │ │ ├── out.input123.c │ │ ├── out.input125.c │ │ ├── out.input127.c │ │ ├── runtests │ │ └── runtestsn │ ├── tree.c │ └── types.c ├── 52_Pointers_pt2/ │ ├── Makefile │ ├── Readme.md │ ├── cg.c │ ├── cg_arm.c │ ├── cgn.c │ ├── data.h │ ├── decl.c │ ├── decl.h │ ├── defs.h │ ├── expr.c │ ├── gen.c │ ├── include/ │ │ ├── ctype.h │ │ ├── errno.h │ │ ├── fcntl.h │ │ ├── stddef.h │ │ ├── stdio.h │ │ ├── stdlib.h │ │ ├── string.h │ │ └── unistd.h │ ├── main.c │ ├── misc.c │ ├── opt.c │ ├── scan.c │ ├── stmt.c │ ├── sym.c │ ├── tests/ │ │ ├── err.input031.c │ │ ├── err.input032.c │ │ ├── err.input033.c │ │ ├── err.input034.c │ │ ├── err.input035.c │ │ ├── err.input036.c │ │ ├── err.input037.c │ │ ├── err.input038.c │ │ ├── err.input039.c │ │ ├── err.input040.c │ │ ├── err.input041.c │ │ ├── err.input042.c │ │ ├── err.input043.c │ │ ├── err.input044.c │ │ ├── err.input045.c │ │ ├── err.input046.c │ │ ├── err.input047.c │ │ ├── err.input048.c │ │ ├── err.input049.c │ │ ├── err.input050.c │ │ ├── err.input051.c │ │ ├── err.input052.c │ │ ├── err.input056.c │ │ ├── err.input057.c │ │ ├── err.input059.c │ │ ├── err.input060.c │ │ ├── err.input061.c │ │ ├── err.input064.c │ │ ├── err.input065.c │ │ ├── err.input066.c │ │ ├── err.input068.c │ │ ├── err.input069.c │ │ ├── err.input072.c │ │ ├── err.input073.c │ │ ├── err.input075.c │ │ ├── err.input076.c │ │ ├── err.input077.c │ │ ├── err.input078.c │ │ ├── err.input079.c │ │ ├── err.input085.c │ │ ├── err.input086.c │ │ ├── err.input087.c │ │ ├── err.input092.c │ │ ├── err.input093.c │ │ ├── err.input094.c │ │ ├── err.input095.c │ │ ├── err.input096.c │ │ ├── err.input097.c │ │ ├── err.input098.c │ │ ├── err.input102.c │ │ ├── err.input103.c │ │ ├── err.input104.c │ │ ├── err.input105.c │ │ ├── err.input118.c │ │ ├── err.input124.c │ │ ├── err.input126.c │ │ ├── err.input129.c │ │ ├── input001.c │ │ ├── input002.c │ │ ├── input003.c │ │ ├── input004.c │ │ ├── input005.c │ │ ├── input006.c │ │ ├── input007.c │ │ ├── input008.c │ │ ├── input009.c │ │ ├── input010.c │ │ ├── input011.c │ │ ├── input012.c │ │ ├── input013.c │ │ ├── input014.c │ │ ├── input015.c │ │ ├── input016.c │ │ ├── input017.c │ │ ├── input018.c │ │ ├── input018a.c │ │ ├── input019.c │ │ ├── input020.c │ │ ├── input021.c │ │ ├── input022.c │ │ ├── input023.c │ │ ├── input024.c │ │ ├── input025.c │ │ ├── input026.c │ │ ├── input027.c │ │ ├── input028.c │ │ ├── input029.c │ │ ├── input030.c │ │ ├── input031.c │ │ ├── input032.c │ │ ├── input033.c │ │ ├── input034.c │ │ ├── input035.c │ │ ├── input036.c │ │ ├── input037.c │ │ ├── input038.c │ │ ├── input039.c │ │ ├── input040.c │ │ ├── input041.c │ │ ├── input042.c │ │ ├── input043.c │ │ ├── input044.c │ │ ├── input045.c │ │ ├── input046.c │ │ ├── input047.c │ │ ├── input048.c │ │ ├── input049.c │ │ ├── input050.c │ │ ├── input051.c │ │ ├── input052.c │ │ ├── input053.c │ │ ├── input054.c │ │ ├── input055.c │ │ ├── input056.c │ │ ├── input057.c │ │ ├── input058.c │ │ ├── input059.c │ │ ├── input060.c │ │ ├── input061.c │ │ ├── input062.c │ │ ├── input063.c │ │ ├── input064.c │ │ ├── input065.c │ │ ├── input066.c │ │ ├── input067.c │ │ ├── input068.c │ │ ├── input069.c │ │ ├── input070.c │ │ ├── input071.c │ │ ├── input072.c │ │ ├── input073.c │ │ ├── input074.c │ │ ├── input075.c │ │ ├── input076.c │ │ ├── input077.c │ │ ├── input078.c │ │ ├── input079.c │ │ ├── input080.c │ │ ├── input081.c │ │ ├── input082.c │ │ ├── input083.c │ │ ├── input084.c │ │ ├── input085.c │ │ ├── input086.c │ │ ├── input087.c │ │ ├── input088.c │ │ ├── input089.c │ │ ├── input090.c │ │ ├── input091.c │ │ ├── input092.c │ │ ├── input093.c │ │ ├── input094.c │ │ ├── input095.c │ │ ├── input096.c │ │ ├── input097.c │ │ ├── input098.c │ │ ├── input099.c │ │ ├── input100.c │ │ ├── input101.c │ │ ├── input102.c │ │ ├── input103.c │ │ ├── input104.c │ │ ├── input105.c │ │ ├── input106.c │ │ ├── input107.c │ │ ├── input108.c │ │ ├── input109.c │ │ ├── input110.c │ │ ├── input111.c │ │ ├── input112.c │ │ ├── input113.c │ │ ├── input114.c │ │ ├── input115.c │ │ ├── input116.c │ │ ├── input117.c │ │ ├── input118.c │ │ ├── input119.c │ │ ├── input120.c │ │ ├── input121.c │ │ ├── input122.c │ │ ├── input123.c │ │ ├── input124.c │ │ ├── input125.c │ │ ├── input126.c │ │ ├── input127.c │ │ ├── input128.c │ │ ├── input129.c │ │ ├── mktests │ │ ├── out.input001.c │ │ ├── out.input002.c │ │ ├── out.input003.c │ │ ├── out.input004.c │ │ ├── out.input005.c │ │ ├── out.input006.c │ │ ├── out.input007.c │ │ ├── out.input008.c │ │ ├── out.input009.c │ │ ├── out.input010.c │ │ ├── out.input011.c │ │ ├── out.input012.c │ │ ├── out.input013.c │ │ ├── out.input014.c │ │ ├── out.input015.c │ │ ├── out.input016.c │ │ ├── out.input017.c │ │ ├── out.input018.c │ │ ├── out.input018a.c │ │ ├── out.input019.c │ │ ├── out.input020.c │ │ ├── out.input021.c │ │ ├── out.input022.c │ │ ├── out.input023.c │ │ ├── out.input024.c │ │ ├── out.input025.c │ │ ├── out.input026.c │ │ ├── out.input027.c │ │ ├── out.input028.c │ │ ├── out.input029.c │ │ ├── out.input030.c │ │ ├── out.input053.c │ │ ├── out.input054.c │ │ ├── out.input055.c │ │ ├── out.input058.c │ │ ├── out.input062.c │ │ ├── out.input063.c │ │ ├── out.input067.c │ │ ├── out.input070.c │ │ ├── out.input071.c │ │ ├── out.input074.c │ │ ├── out.input080.c │ │ ├── out.input081.c │ │ ├── out.input082.c │ │ ├── out.input083.c │ │ ├── out.input084.c │ │ ├── out.input088.c │ │ ├── out.input089.c │ │ ├── out.input090.c │ │ ├── out.input091.c │ │ ├── out.input099.c │ │ ├── out.input100.c │ │ ├── out.input101.c │ │ ├── out.input106.c │ │ ├── out.input107.c │ │ ├── out.input108.c │ │ ├── out.input109.c │ │ ├── out.input110.c │ │ ├── out.input111.c │ │ ├── out.input112.c │ │ ├── out.input113.c │ │ ├── out.input114.c │ │ ├── out.input115.c │ │ ├── out.input116.c │ │ ├── out.input117.c │ │ ├── out.input119.c │ │ ├── out.input120.c │ │ ├── out.input121.c │ │ ├── out.input122.c │ │ ├── out.input123.c │ │ ├── out.input125.c │ │ ├── out.input127.c │ │ ├── out.input128.c │ │ ├── runtests │ │ └── runtestsn │ ├── tree.c │ └── types.c ├── 53_Mop_up_pt2/ │ ├── Makefile │ ├── Readme.md │ ├── cg.c │ ├── cg_arm.c │ ├── cgn.c │ ├── data.h │ ├── decl.c │ ├── decl.h │ ├── defs.h │ ├── expr.c │ ├── gen.c │ ├── include/ │ │ ├── ctype.h │ │ ├── errno.h │ │ ├── fcntl.h │ │ ├── stddef.h │ │ ├── stdio.h │ │ ├── stdlib.h │ │ ├── string.h │ │ └── unistd.h │ ├── main.c │ ├── misc.c │ ├── opt.c │ ├── scan.c │ ├── stmt.c │ ├── sym.c │ ├── tests/ │ │ ├── err.input031.c │ │ ├── err.input032.c │ │ ├── err.input033.c │ │ ├── err.input034.c │ │ ├── err.input035.c │ │ ├── err.input036.c │ │ ├── err.input037.c │ │ ├── err.input038.c │ │ ├── err.input039.c │ │ ├── err.input040.c │ │ ├── err.input041.c │ │ ├── err.input042.c │ │ ├── err.input043.c │ │ ├── err.input044.c │ │ ├── err.input045.c │ │ ├── err.input046.c │ │ ├── err.input047.c │ │ ├── err.input048.c │ │ ├── err.input049.c │ │ ├── err.input050.c │ │ ├── err.input051.c │ │ ├── err.input052.c │ │ ├── err.input056.c │ │ ├── err.input057.c │ │ ├── err.input059.c │ │ ├── err.input060.c │ │ ├── err.input061.c │ │ ├── err.input064.c │ │ ├── err.input065.c │ │ ├── err.input066.c │ │ ├── err.input068.c │ │ ├── err.input069.c │ │ ├── err.input072.c │ │ ├── err.input073.c │ │ ├── err.input075.c │ │ ├── err.input076.c │ │ ├── err.input077.c │ │ ├── err.input078.c │ │ ├── err.input079.c │ │ ├── err.input085.c │ │ ├── err.input086.c │ │ ├── err.input087.c │ │ ├── err.input092.c │ │ ├── err.input093.c │ │ ├── err.input094.c │ │ ├── err.input095.c │ │ ├── err.input096.c │ │ ├── err.input097.c │ │ ├── err.input098.c │ │ ├── err.input102.c │ │ ├── err.input103.c │ │ ├── err.input104.c │ │ ├── err.input105.c │ │ ├── err.input118.c │ │ ├── err.input124.c │ │ ├── err.input126.c │ │ ├── err.input129.c │ │ ├── err.input136.c │ │ ├── input001.c │ │ ├── input002.c │ │ ├── input003.c │ │ ├── input004.c │ │ ├── input005.c │ │ ├── input006.c │ │ ├── input007.c │ │ ├── input008.c │ │ ├── input009.c │ │ ├── input010.c │ │ ├── input011.c │ │ ├── input012.c │ │ ├── input013.c │ │ ├── input014.c │ │ ├── input015.c │ │ ├── input016.c │ │ ├── input017.c │ │ ├── input018.c │ │ ├── input018a.c │ │ ├── input019.c │ │ ├── input020.c │ │ ├── input021.c │ │ ├── input022.c │ │ ├── input023.c │ │ ├── input024.c │ │ ├── input025.c │ │ ├── input026.c │ │ ├── input027.c │ │ ├── input028.c │ │ ├── input029.c │ │ ├── input030.c │ │ ├── input031.c │ │ ├── input032.c │ │ ├── input033.c │ │ ├── input034.c │ │ ├── input035.c │ │ ├── input036.c │ │ ├── input037.c │ │ ├── input038.c │ │ ├── input039.c │ │ ├── input040.c │ │ ├── input041.c │ │ ├── input042.c │ │ ├── input043.c │ │ ├── input044.c │ │ ├── input045.c │ │ ├── input046.c │ │ ├── input047.c │ │ ├── input048.c │ │ ├── input049.c │ │ ├── input050.c │ │ ├── input051.c │ │ ├── input052.c │ │ ├── input053.c │ │ ├── input054.c │ │ ├── input055.c │ │ ├── input056.c │ │ ├── input057.c │ │ ├── input058.c │ │ ├── input059.c │ │ ├── input060.c │ │ ├── input061.c │ │ ├── input062.c │ │ ├── input063.c │ │ ├── input064.c │ │ ├── input065.c │ │ ├── input066.c │ │ ├── input067.c │ │ ├── input068.c │ │ ├── input069.c │ │ ├── input070.c │ │ ├── input071.c │ │ ├── input072.c │ │ ├── input073.c │ │ ├── input074.c │ │ ├── input075.c │ │ ├── input076.c │ │ ├── input077.c │ │ ├── input078.c │ │ ├── input079.c │ │ ├── input080.c │ │ ├── input081.c │ │ ├── input082.c │ │ ├── input083.c │ │ ├── input084.c │ │ ├── input085.c │ │ ├── input086.c │ │ ├── input087.c │ │ ├── input088.c │ │ ├── input089.c │ │ ├── input090.c │ │ ├── input091.c │ │ ├── input092.c │ │ ├── input093.c │ │ ├── input094.c │ │ ├── input095.c │ │ ├── input096.c │ │ ├── input097.c │ │ ├── input098.c │ │ ├── input099.c │ │ ├── input100.c │ │ ├── input101.c │ │ ├── input102.c │ │ ├── input103.c │ │ ├── input104.c │ │ ├── input105.c │ │ ├── input106.c │ │ ├── input107.c │ │ ├── input108.c │ │ ├── input109.c │ │ ├── input110.c │ │ ├── input111.c │ │ ├── input112.c │ │ ├── input113.c │ │ ├── input114.c │ │ ├── input115.c │ │ ├── input116.c │ │ ├── input117.c │ │ ├── input118.c │ │ ├── input119.c │ │ ├── input120.c │ │ ├── input121.c │ │ ├── input122.c │ │ ├── input123.c │ │ ├── input124.c │ │ ├── input125.c │ │ ├── input126.c │ │ ├── input127.c │ │ ├── input128.c │ │ ├── input129.c │ │ ├── input130.c │ │ ├── input131.c │ │ ├── input132.c │ │ ├── input133.c │ │ ├── input134.c │ │ ├── input135.c │ │ ├── input136.c │ │ ├── mktests │ │ ├── out.input001.c │ │ ├── out.input002.c │ │ ├── out.input003.c │ │ ├── out.input004.c │ │ ├── out.input005.c │ │ ├── out.input006.c │ │ ├── out.input007.c │ │ ├── out.input008.c │ │ ├── out.input009.c │ │ ├── out.input010.c │ │ ├── out.input011.c │ │ ├── out.input012.c │ │ ├── out.input013.c │ │ ├── out.input014.c │ │ ├── out.input015.c │ │ ├── out.input016.c │ │ ├── out.input017.c │ │ ├── out.input018.c │ │ ├── out.input018a.c │ │ ├── out.input019.c │ │ ├── out.input020.c │ │ ├── out.input021.c │ │ ├── out.input022.c │ │ ├── out.input023.c │ │ ├── out.input024.c │ │ ├── out.input025.c │ │ ├── out.input026.c │ │ ├── out.input027.c │ │ ├── out.input028.c │ │ ├── out.input029.c │ │ ├── out.input030.c │ │ ├── out.input053.c │ │ ├── out.input054.c │ │ ├── out.input055.c │ │ ├── out.input058.c │ │ ├── out.input062.c │ │ ├── out.input063.c │ │ ├── out.input067.c │ │ ├── out.input070.c │ │ ├── out.input071.c │ │ ├── out.input074.c │ │ ├── out.input080.c │ │ ├── out.input081.c │ │ ├── out.input082.c │ │ ├── out.input083.c │ │ ├── out.input084.c │ │ ├── out.input088.c │ │ ├── out.input089.c │ │ ├── out.input090.c │ │ ├── out.input091.c │ │ ├── out.input099.c │ │ ├── out.input100.c │ │ ├── out.input101.c │ │ ├── out.input106.c │ │ ├── out.input107.c │ │ ├── out.input108.c │ │ ├── out.input109.c │ │ ├── out.input110.c │ │ ├── out.input111.c │ │ ├── out.input112.c │ │ ├── out.input113.c │ │ ├── out.input114.c │ │ ├── out.input115.c │ │ ├── out.input116.c │ │ ├── out.input117.c │ │ ├── out.input119.c │ │ ├── out.input120.c │ │ ├── out.input121.c │ │ ├── out.input122.c │ │ ├── out.input123.c │ │ ├── out.input125.c │ │ ├── out.input127.c │ │ ├── out.input128.c │ │ ├── out.input130.c │ │ ├── out.input131.c │ │ ├── out.input132.c │ │ ├── out.input133.c │ │ ├── out.input134.c │ │ ├── out.input135.c │ │ ├── runtests │ │ └── runtestsn │ ├── tree.c │ └── types.c ├── 54_Reg_Spills/ │ ├── Makefile │ ├── Readme.md │ ├── cg.c │ ├── cg_arm.c │ ├── cgn.c │ ├── data.h │ ├── decl.c │ ├── decl.h │ ├── defs.h │ ├── expr.c │ ├── gen.c │ ├── include/ │ │ ├── ctype.h │ │ ├── errno.h │ │ ├── fcntl.h │ │ ├── stddef.h │ │ ├── stdio.h │ │ ├── stdlib.h │ │ ├── string.h │ │ └── unistd.h │ ├── main.c │ ├── misc.c │ ├── opt.c │ ├── scan.c │ ├── stmt.c │ ├── sym.c │ ├── tests/ │ │ ├── err.input031.c │ │ ├── err.input032.c │ │ ├── err.input033.c │ │ ├── err.input034.c │ │ ├── err.input035.c │ │ ├── err.input036.c │ │ ├── err.input037.c │ │ ├── err.input038.c │ │ ├── err.input039.c │ │ ├── err.input040.c │ │ ├── err.input041.c │ │ ├── err.input042.c │ │ ├── err.input043.c │ │ ├── err.input044.c │ │ ├── err.input045.c │ │ ├── err.input046.c │ │ ├── err.input047.c │ │ ├── err.input048.c │ │ ├── err.input049.c │ │ ├── err.input050.c │ │ ├── err.input051.c │ │ ├── err.input052.c │ │ ├── err.input056.c │ │ ├── err.input057.c │ │ ├── err.input059.c │ │ ├── err.input060.c │ │ ├── err.input061.c │ │ ├── err.input064.c │ │ ├── err.input065.c │ │ ├── err.input066.c │ │ ├── err.input068.c │ │ ├── err.input069.c │ │ ├── err.input072.c │ │ ├── err.input073.c │ │ ├── err.input075.c │ │ ├── err.input076.c │ │ ├── err.input077.c │ │ ├── err.input078.c │ │ ├── err.input079.c │ │ ├── err.input085.c │ │ ├── err.input086.c │ │ ├── err.input087.c │ │ ├── err.input092.c │ │ ├── err.input093.c │ │ ├── err.input094.c │ │ ├── err.input095.c │ │ ├── err.input096.c │ │ ├── err.input097.c │ │ ├── err.input098.c │ │ ├── err.input102.c │ │ ├── err.input103.c │ │ ├── err.input104.c │ │ ├── err.input105.c │ │ ├── err.input118.c │ │ ├── err.input124.c │ │ ├── err.input126.c │ │ ├── err.input129.c │ │ ├── input001.c │ │ ├── input002.c │ │ ├── input003.c │ │ ├── input004.c │ │ ├── input005.c │ │ ├── input006.c │ │ ├── input007.c │ │ ├── input008.c │ │ ├── input009.c │ │ ├── input010.c │ │ ├── input011.c │ │ ├── input012.c │ │ ├── input013.c │ │ ├── input014.c │ │ ├── input015.c │ │ ├── input016.c │ │ ├── input017.c │ │ ├── input018.c │ │ ├── input018a.c │ │ ├── input019.c │ │ ├── input020.c │ │ ├── input021.c │ │ ├── input022.c │ │ ├── input023.c │ │ ├── input024.c │ │ ├── input025.c │ │ ├── input026.c │ │ ├── input027.c │ │ ├── input028.c │ │ ├── input029.c │ │ ├── input030.c │ │ ├── input031.c │ │ ├── input032.c │ │ ├── input033.c │ │ ├── input034.c │ │ ├── input035.c │ │ ├── input036.c │ │ ├── input037.c │ │ ├── input038.c │ │ ├── input039.c │ │ ├── input040.c │ │ ├── input041.c │ │ ├── input042.c │ │ ├── input043.c │ │ ├── input044.c │ │ ├── input045.c │ │ ├── input046.c │ │ ├── input047.c │ │ ├── input048.c │ │ ├── input049.c │ │ ├── input050.c │ │ ├── input051.c │ │ ├── input052.c │ │ ├── input053.c │ │ ├── input054.c │ │ ├── input055.c │ │ ├── input056.c │ │ ├── input057.c │ │ ├── input058.c │ │ ├── input059.c │ │ ├── input060.c │ │ ├── input061.c │ │ ├── input062.c │ │ ├── input063.c │ │ ├── input064.c │ │ ├── input065.c │ │ ├── input066.c │ │ ├── input067.c │ │ ├── input068.c │ │ ├── input069.c │ │ ├── input070.c │ │ ├── input071.c │ │ ├── input072.c │ │ ├── input073.c │ │ ├── input074.c │ │ ├── input075.c │ │ ├── input076.c │ │ ├── input077.c │ │ ├── input078.c │ │ ├── input079.c │ │ ├── input080.c │ │ ├── input081.c │ │ ├── input082.c │ │ ├── input083.c │ │ ├── input084.c │ │ ├── input085.c │ │ ├── input086.c │ │ ├── input087.c │ │ ├── input088.c │ │ ├── input089.c │ │ ├── input090.c │ │ ├── input091.c │ │ ├── input092.c │ │ ├── input093.c │ │ ├── input094.c │ │ ├── input095.c │ │ ├── input096.c │ │ ├── input097.c │ │ ├── input098.c │ │ ├── input099.c │ │ ├── input100.c │ │ ├── input101.c │ │ ├── input102.c │ │ ├── input103.c │ │ ├── input104.c │ │ ├── input105.c │ │ ├── input106.c │ │ ├── input107.c │ │ ├── input108.c │ │ ├── input109.c │ │ ├── input110.c │ │ ├── input111.c │ │ ├── input112.c │ │ ├── input113.c │ │ ├── input114.c │ │ ├── input115.c │ │ ├── input116.c │ │ ├── input117.c │ │ ├── input118.c │ │ ├── input119.c │ │ ├── input120.c │ │ ├── input121.c │ │ ├── input122.c │ │ ├── input123.c │ │ ├── input124.c │ │ ├── input125.c │ │ ├── input126.c │ │ ├── input127.c │ │ ├── input128.c │ │ ├── input129.c │ │ ├── input130.c │ │ ├── input131.c │ │ ├── input132.c │ │ ├── input133.c │ │ ├── input134.c │ │ ├── input135.c │ │ ├── input136.c │ │ ├── input137.c │ │ ├── input138.c │ │ ├── input139.c │ │ ├── mktests │ │ ├── out.input001.c │ │ ├── out.input002.c │ │ ├── out.input003.c │ │ ├── out.input004.c │ │ ├── out.input005.c │ │ ├── out.input006.c │ │ ├── out.input007.c │ │ ├── out.input008.c │ │ ├── out.input009.c │ │ ├── out.input010.c │ │ ├── out.input011.c │ │ ├── out.input012.c │ │ ├── out.input013.c │ │ ├── out.input014.c │ │ ├── out.input015.c │ │ ├── out.input016.c │ │ ├── out.input017.c │ │ ├── out.input018.c │ │ ├── out.input018a.c │ │ ├── out.input019.c │ │ ├── out.input020.c │ │ ├── out.input021.c │ │ ├── out.input022.c │ │ ├── out.input023.c │ │ ├── out.input024.c │ │ ├── out.input025.c │ │ ├── out.input026.c │ │ ├── out.input027.c │ │ ├── out.input028.c │ │ ├── out.input029.c │ │ ├── out.input030.c │ │ ├── out.input053.c │ │ ├── out.input054.c │ │ ├── out.input055.c │ │ ├── out.input058.c │ │ ├── out.input062.c │ │ ├── out.input063.c │ │ ├── out.input067.c │ │ ├── out.input070.c │ │ ├── out.input071.c │ │ ├── out.input074.c │ │ ├── out.input080.c │ │ ├── out.input081.c │ │ ├── out.input082.c │ │ ├── out.input083.c │ │ ├── out.input084.c │ │ ├── out.input088.c │ │ ├── out.input089.c │ │ ├── out.input090.c │ │ ├── out.input091.c │ │ ├── out.input099.c │ │ ├── out.input100.c │ │ ├── out.input101.c │ │ ├── out.input106.c │ │ ├── out.input107.c │ │ ├── out.input108.c │ │ ├── out.input109.c │ │ ├── out.input110.c │ │ ├── out.input111.c │ │ ├── out.input112.c │ │ ├── out.input113.c │ │ ├── out.input114.c │ │ ├── out.input115.c │ │ ├── out.input116.c │ │ ├── out.input117.c │ │ ├── out.input119.c │ │ ├── out.input120.c │ │ ├── out.input121.c │ │ ├── out.input122.c │ │ ├── out.input123.c │ │ ├── out.input125.c │ │ ├── out.input127.c │ │ ├── out.input128.c │ │ ├── out.input130.c │ │ ├── out.input131.c │ │ ├── out.input132.c │ │ ├── out.input133.c │ │ ├── out.input134.c │ │ ├── out.input135.c │ │ ├── out.input136.c │ │ ├── out.input137.c │ │ ├── out.input138.c │ │ ├── out.input139.c │ │ ├── runtests │ │ └── runtestsn │ ├── tree.c │ ├── types.c │ └── zresults ├── 55_Lazy_Evaluation/ │ └── Readme.md ├── 56_Local_Arrays/ │ ├── Makefile │ ├── Readme.md │ ├── cg.c │ ├── cg_arm.c │ ├── cgn.c │ ├── data.h │ ├── decl.c │ ├── decl.h │ ├── defs.h │ ├── expr.c │ ├── gen.c │ ├── include/ │ │ ├── ctype.h │ │ ├── errno.h │ │ ├── fcntl.h │ │ ├── stddef.h │ │ ├── stdio.h │ │ ├── stdlib.h │ │ ├── string.h │ │ └── unistd.h │ ├── main.c │ ├── misc.c │ ├── opt.c │ ├── scan.c │ ├── stmt.c │ ├── sym.c │ ├── tests/ │ │ ├── err.input031.c │ │ ├── err.input032.c │ │ ├── err.input033.c │ │ ├── err.input034.c │ │ ├── err.input035.c │ │ ├── err.input036.c │ │ ├── err.input037.c │ │ ├── err.input038.c │ │ ├── err.input039.c │ │ ├── err.input040.c │ │ ├── err.input041.c │ │ ├── err.input042.c │ │ ├── err.input043.c │ │ ├── err.input044.c │ │ ├── err.input045.c │ │ ├── err.input046.c │ │ ├── err.input047.c │ │ ├── err.input048.c │ │ ├── err.input049.c │ │ ├── err.input050.c │ │ ├── err.input051.c │ │ ├── err.input052.c │ │ ├── err.input056.c │ │ ├── err.input057.c │ │ ├── err.input059.c │ │ ├── err.input060.c │ │ ├── err.input061.c │ │ ├── err.input064.c │ │ ├── err.input065.c │ │ ├── err.input066.c │ │ ├── err.input068.c │ │ ├── err.input069.c │ │ ├── err.input072.c │ │ ├── err.input073.c │ │ ├── err.input075.c │ │ ├── err.input076.c │ │ ├── err.input077.c │ │ ├── err.input078.c │ │ ├── err.input079.c │ │ ├── err.input085.c │ │ ├── err.input086.c │ │ ├── err.input087.c │ │ ├── err.input092.c │ │ ├── err.input093.c │ │ ├── err.input094.c │ │ ├── err.input095.c │ │ ├── err.input096.c │ │ ├── err.input097.c │ │ ├── err.input098.c │ │ ├── err.input102.c │ │ ├── err.input103.c │ │ ├── err.input104.c │ │ ├── err.input105.c │ │ ├── err.input118.c │ │ ├── err.input124.c │ │ ├── err.input126.c │ │ ├── err.input129.c │ │ ├── err.input141.c │ │ ├── err.input142.c │ │ ├── input001.c │ │ ├── input002.c │ │ ├── input003.c │ │ ├── input004.c │ │ ├── input005.c │ │ ├── input006.c │ │ ├── input007.c │ │ ├── input008.c │ │ ├── input009.c │ │ ├── input010.c │ │ ├── input011.c │ │ ├── input012.c │ │ ├── input013.c │ │ ├── input014.c │ │ ├── input015.c │ │ ├── input016.c │ │ ├── input017.c │ │ ├── input018.c │ │ ├── input018a.c │ │ ├── input019.c │ │ ├── input020.c │ │ ├── input021.c │ │ ├── input022.c │ │ ├── input023.c │ │ ├── input024.c │ │ ├── input025.c │ │ ├── input026.c │ │ ├── input027.c │ │ ├── input028.c │ │ ├── input029.c │ │ ├── input030.c │ │ ├── input031.c │ │ ├── input032.c │ │ ├── input033.c │ │ ├── input035.c │ │ ├── input036.c │ │ ├── input037.c │ │ ├── input038.c │ │ ├── input039.c │ │ ├── input040.c │ │ ├── input041.c │ │ ├── input042.c │ │ ├── input043.c │ │ ├── input044.c │ │ ├── input045.c │ │ ├── input046.c │ │ ├── input047.c │ │ ├── input048.c │ │ ├── input049.c │ │ ├── input050.c │ │ ├── input051.c │ │ ├── input052.c │ │ ├── input053.c │ │ ├── input054.c │ │ ├── input055.c │ │ ├── input056.c │ │ ├── input057.c │ │ ├── input058.c │ │ ├── input059.c │ │ ├── input060.c │ │ ├── input061.c │ │ ├── input062.c │ │ ├── input063.c │ │ ├── input064.c │ │ ├── input065.c │ │ ├── input066.c │ │ ├── input067.c │ │ ├── input068.c │ │ ├── input069.c │ │ ├── input070.c │ │ ├── input071.c │ │ ├── input072.c │ │ ├── input073.c │ │ ├── input074.c │ │ ├── input075.c │ │ ├── input076.c │ │ ├── input077.c │ │ ├── input078.c │ │ ├── input079.c │ │ ├── input080.c │ │ ├── input081.c │ │ ├── input082.c │ │ ├── input083.c │ │ ├── input084.c │ │ ├── input085.c │ │ ├── input086.c │ │ ├── input087.c │ │ ├── input088.c │ │ ├── input089.c │ │ ├── input090.c │ │ ├── input091.c │ │ ├── input092.c │ │ ├── input093.c │ │ ├── input094.c │ │ ├── input095.c │ │ ├── input096.c │ │ ├── input098.c │ │ ├── input099.c │ │ ├── input100.c │ │ ├── input101.c │ │ ├── input102.c │ │ ├── input103.c │ │ ├── input104.c │ │ ├── input105.c │ │ ├── input106.c │ │ ├── input107.c │ │ ├── input108.c │ │ ├── input109.c │ │ ├── input110.c │ │ ├── input111.c │ │ ├── input112.c │ │ ├── input113.c │ │ ├── input114.c │ │ ├── input115.c │ │ ├── input116.c │ │ ├── input117.c │ │ ├── input118.c │ │ ├── input119.c │ │ ├── input120.c │ │ ├── input121.c │ │ ├── input122.c │ │ ├── input123.c │ │ ├── input124.c │ │ ├── input125.c │ │ ├── input126.c │ │ ├── input127.c │ │ ├── input128.c │ │ ├── input129.c │ │ ├── input130.c │ │ ├── input131.c │ │ ├── input132.c │ │ ├── input133.c │ │ ├── input134.c │ │ ├── input135.c │ │ ├── input136.c │ │ ├── input137.c │ │ ├── input138.c │ │ ├── input139.c │ │ ├── input140.c │ │ ├── input141.c │ │ ├── input142.c │ │ ├── mktests │ │ ├── out.input001.c │ │ ├── out.input002.c │ │ ├── out.input003.c │ │ ├── out.input004.c │ │ ├── out.input005.c │ │ ├── out.input006.c │ │ ├── out.input007.c │ │ ├── out.input008.c │ │ ├── out.input009.c │ │ ├── out.input010.c │ │ ├── out.input011.c │ │ ├── out.input012.c │ │ ├── out.input013.c │ │ ├── out.input014.c │ │ ├── out.input015.c │ │ ├── out.input016.c │ │ ├── out.input017.c │ │ ├── out.input018.c │ │ ├── out.input018a.c │ │ ├── out.input019.c │ │ ├── out.input020.c │ │ ├── out.input021.c │ │ ├── out.input022.c │ │ ├── out.input023.c │ │ ├── out.input024.c │ │ ├── out.input025.c │ │ ├── out.input026.c │ │ ├── out.input027.c │ │ ├── out.input028.c │ │ ├── out.input029.c │ │ ├── out.input030.c │ │ ├── out.input053.c │ │ ├── out.input054.c │ │ ├── out.input055.c │ │ ├── out.input058.c │ │ ├── out.input062.c │ │ ├── out.input063.c │ │ ├── out.input067.c │ │ ├── out.input070.c │ │ ├── out.input071.c │ │ ├── out.input074.c │ │ ├── out.input080.c │ │ ├── out.input081.c │ │ ├── out.input082.c │ │ ├── out.input083.c │ │ ├── out.input084.c │ │ ├── out.input088.c │ │ ├── out.input089.c │ │ ├── out.input090.c │ │ ├── out.input091.c │ │ ├── out.input099.c │ │ ├── out.input100.c │ │ ├── out.input101.c │ │ ├── out.input106.c │ │ ├── out.input107.c │ │ ├── out.input108.c │ │ ├── out.input109.c │ │ ├── out.input110.c │ │ ├── out.input111.c │ │ ├── out.input112.c │ │ ├── out.input113.c │ │ ├── out.input114.c │ │ ├── out.input115.c │ │ ├── out.input116.c │ │ ├── out.input117.c │ │ ├── out.input119.c │ │ ├── out.input120.c │ │ ├── out.input121.c │ │ ├── out.input122.c │ │ ├── out.input123.c │ │ ├── out.input125.c │ │ ├── out.input127.c │ │ ├── out.input128.c │ │ ├── out.input130.c │ │ ├── out.input131.c │ │ ├── out.input132.c │ │ ├── out.input133.c │ │ ├── out.input134.c │ │ ├── out.input135.c │ │ ├── out.input136.c │ │ ├── out.input137.c │ │ ├── out.input138.c │ │ ├── out.input139.c │ │ ├── out.input140.c │ │ ├── runtests │ │ └── runtestsn │ ├── tree.c │ └── types.c ├── 57_Mop_up_pt3/ │ ├── Makefile │ ├── Readme.md │ ├── cg.c │ ├── cg_arm.c │ ├── cgn.c │ ├── data.h │ ├── decl.c │ ├── decl.h │ ├── defs.h │ ├── expr.c │ ├── gen.c │ ├── include/ │ │ ├── ctype.h │ │ ├── errno.h │ │ ├── fcntl.h │ │ ├── stddef.h │ │ ├── stdio.h │ │ ├── stdlib.h │ │ ├── string.h │ │ └── unistd.h │ ├── main.c │ ├── misc.c │ ├── opt.c │ ├── scan.c │ ├── stmt.c │ ├── sym.c │ ├── tests/ │ │ ├── err.input031.c │ │ ├── err.input032.c │ │ ├── err.input033.c │ │ ├── err.input034.c │ │ ├── err.input035.c │ │ ├── err.input036.c │ │ ├── err.input037.c │ │ ├── err.input038.c │ │ ├── err.input039.c │ │ ├── err.input040.c │ │ ├── err.input041.c │ │ ├── err.input042.c │ │ ├── err.input043.c │ │ ├── err.input044.c │ │ ├── err.input045.c │ │ ├── err.input046.c │ │ ├── err.input047.c │ │ ├── err.input048.c │ │ ├── err.input049.c │ │ ├── err.input050.c │ │ ├── err.input051.c │ │ ├── err.input052.c │ │ ├── err.input056.c │ │ ├── err.input057.c │ │ ├── err.input059.c │ │ ├── err.input060.c │ │ ├── err.input061.c │ │ ├── err.input064.c │ │ ├── err.input065.c │ │ ├── err.input066.c │ │ ├── err.input068.c │ │ ├── err.input069.c │ │ ├── err.input072.c │ │ ├── err.input073.c │ │ ├── err.input075.c │ │ ├── err.input076.c │ │ ├── err.input077.c │ │ ├── err.input078.c │ │ ├── err.input079.c │ │ ├── err.input085.c │ │ ├── err.input086.c │ │ ├── err.input087.c │ │ ├── err.input092.c │ │ ├── err.input093.c │ │ ├── err.input094.c │ │ ├── err.input095.c │ │ ├── err.input096.c │ │ ├── err.input097.c │ │ ├── err.input098.c │ │ ├── err.input102.c │ │ ├── err.input103.c │ │ ├── err.input104.c │ │ ├── err.input105.c │ │ ├── err.input118.c │ │ ├── err.input124.c │ │ ├── err.input126.c │ │ ├── err.input129.c │ │ ├── err.input141.c │ │ ├── err.input142.c │ │ ├── input001.c │ │ ├── input002.c │ │ ├── input003.c │ │ ├── input004.c │ │ ├── input005.c │ │ ├── input006.c │ │ ├── input007.c │ │ ├── input008.c │ │ ├── input009.c │ │ ├── input010.c │ │ ├── input011.c │ │ ├── input012.c │ │ ├── input013.c │ │ ├── input014.c │ │ ├── input015.c │ │ ├── input016.c │ │ ├── input017.c │ │ ├── input018.c │ │ ├── input018a.c │ │ ├── input019.c │ │ ├── input020.c │ │ ├── input021.c │ │ ├── input022.c │ │ ├── input023.c │ │ ├── input024.c │ │ ├── input025.c │ │ ├── input026.c │ │ ├── input027.c │ │ ├── input028.c │ │ ├── input029.c │ │ ├── input030.c │ │ ├── input031.c │ │ ├── input032.c │ │ ├── input033.c │ │ ├── input035.c │ │ ├── input036.c │ │ ├── input037.c │ │ ├── input038.c │ │ ├── input039.c │ │ ├── input040.c │ │ ├── input041.c │ │ ├── input042.c │ │ ├── input043.c │ │ ├── input044.c │ │ ├── input045.c │ │ ├── input046.c │ │ ├── input047.c │ │ ├── input048.c │ │ ├── input049.c │ │ ├── input050.c │ │ ├── input051.c │ │ ├── input052.c │ │ ├── input053.c │ │ ├── input054.c │ │ ├── input055.c │ │ ├── input056.c │ │ ├── input057.c │ │ ├── input058.c │ │ ├── input059.c │ │ ├── input060.c │ │ ├── input061.c │ │ ├── input062.c │ │ ├── input063.c │ │ ├── input064.c │ │ ├── input065.c │ │ ├── input066.c │ │ ├── input067.c │ │ ├── input068.c │ │ ├── input069.c │ │ ├── input070.c │ │ ├── input071.c │ │ ├── input072.c │ │ ├── input073.c │ │ ├── input074.c │ │ ├── input075.c │ │ ├── input076.c │ │ ├── input077.c │ │ ├── input078.c │ │ ├── input079.c │ │ ├── input080.c │ │ ├── input081.c │ │ ├── input082.c │ │ ├── input083.c │ │ ├── input084.c │ │ ├── input085.c │ │ ├── input086.c │ │ ├── input087.c │ │ ├── input088.c │ │ ├── input089.c │ │ ├── input090.c │ │ ├── input091.c │ │ ├── input092.c │ │ ├── input093.c │ │ ├── input094.c │ │ ├── input095.c │ │ ├── input096.c │ │ ├── input098.c │ │ ├── input099.c │ │ ├── input100.c │ │ ├── input101.c │ │ ├── input102.c │ │ ├── input103.c │ │ ├── input104.c │ │ ├── input105.c │ │ ├── input106.c │ │ ├── input107.c │ │ ├── input108.c │ │ ├── input109.c │ │ ├── input110.c │ │ ├── input111.c │ │ ├── input112.c │ │ ├── input113.c │ │ ├── input114.c │ │ ├── input115.c │ │ ├── input116.c │ │ ├── input117.c │ │ ├── input118.c │ │ ├── input119.c │ │ ├── input120.c │ │ ├── input121.c │ │ ├── input122.c │ │ ├── input123.c │ │ ├── input124.c │ │ ├── input125.c │ │ ├── input126.c │ │ ├── input127.c │ │ ├── input128.c │ │ ├── input129.c │ │ ├── input130.c │ │ ├── input131.c │ │ ├── input132.c │ │ ├── input133.c │ │ ├── input134.c │ │ ├── input135.c │ │ ├── input136.c │ │ ├── input137.c │ │ ├── input138.c │ │ ├── input139.c │ │ ├── input140.c │ │ ├── input141.c │ │ ├── input142.c │ │ ├── input143.c │ │ ├── input144.c │ │ ├── input145.c │ │ ├── mktests │ │ ├── nasmext.inc │ │ ├── out.input001.c │ │ ├── out.input002.c │ │ ├── out.input003.c │ │ ├── out.input004.c │ │ ├── out.input005.c │ │ ├── out.input006.c │ │ ├── out.input007.c │ │ ├── out.input008.c │ │ ├── out.input009.c │ │ ├── out.input010.c │ │ ├── out.input011.c │ │ ├── out.input012.c │ │ ├── out.input013.c │ │ ├── out.input014.c │ │ ├── out.input015.c │ │ ├── out.input016.c │ │ ├── out.input017.c │ │ ├── out.input018.c │ │ ├── out.input018a.c │ │ ├── out.input019.c │ │ ├── out.input020.c │ │ ├── out.input021.c │ │ ├── out.input022.c │ │ ├── out.input023.c │ │ ├── out.input024.c │ │ ├── out.input025.c │ │ ├── out.input026.c │ │ ├── out.input027.c │ │ ├── out.input028.c │ │ ├── out.input029.c │ │ ├── out.input030.c │ │ ├── out.input053.c │ │ ├── out.input054.c │ │ ├── out.input055.c │ │ ├── out.input058.c │ │ ├── out.input062.c │ │ ├── out.input063.c │ │ ├── out.input067.c │ │ ├── out.input070.c │ │ ├── out.input071.c │ │ ├── out.input074.c │ │ ├── out.input080.c │ │ ├── out.input081.c │ │ ├── out.input082.c │ │ ├── out.input083.c │ │ ├── out.input084.c │ │ ├── out.input088.c │ │ ├── out.input089.c │ │ ├── out.input090.c │ │ ├── out.input091.c │ │ ├── out.input099.c │ │ ├── out.input100.c │ │ ├── out.input101.c │ │ ├── out.input106.c │ │ ├── out.input107.c │ │ ├── out.input108.c │ │ ├── out.input109.c │ │ ├── out.input110.c │ │ ├── out.input111.c │ │ ├── out.input112.c │ │ ├── out.input113.c │ │ ├── out.input114.c │ │ ├── out.input115.c │ │ ├── out.input116.c │ │ ├── out.input117.c │ │ ├── out.input119.c │ │ ├── out.input120.c │ │ ├── out.input121.c │ │ ├── out.input122.c │ │ ├── out.input123.c │ │ ├── out.input125.c │ │ ├── out.input127.c │ │ ├── out.input128.c │ │ ├── out.input130.c │ │ ├── out.input131.c │ │ ├── out.input132.c │ │ ├── out.input133.c │ │ ├── out.input134.c │ │ ├── out.input135.c │ │ ├── out.input136.c │ │ ├── out.input137.c │ │ ├── out.input138.c │ │ ├── out.input139.c │ │ ├── out.input140.c │ │ ├── out.input143.c │ │ ├── out.input144.c │ │ ├── out.input145.c │ │ ├── runtests │ │ └── runtestsn │ ├── tree.c │ └── types.c ├── 58_Ptr_Increments/ │ ├── Makefile │ ├── Readme.md │ ├── cg.c │ ├── cg_arm.c │ ├── cgn.c │ ├── data.h │ ├── decl.c │ ├── decl.h │ ├── defs.h │ ├── expr.c │ ├── gen.c │ ├── include/ │ │ ├── ctype.h │ │ ├── errno.h │ │ ├── fcntl.h │ │ ├── stddef.h │ │ ├── stdio.h │ │ ├── stdlib.h │ │ ├── string.h │ │ └── unistd.h │ ├── main.c │ ├── misc.c │ ├── opt.c │ ├── scan.c │ ├── stmt.c │ ├── sym.c │ ├── tests/ │ │ ├── err.input031.c │ │ ├── err.input032.c │ │ ├── err.input033.c │ │ ├── err.input034.c │ │ ├── err.input035.c │ │ ├── err.input036.c │ │ ├── err.input037.c │ │ ├── err.input038.c │ │ ├── err.input039.c │ │ ├── err.input040.c │ │ ├── err.input041.c │ │ ├── err.input042.c │ │ ├── err.input043.c │ │ ├── err.input044.c │ │ ├── err.input045.c │ │ ├── err.input046.c │ │ ├── err.input047.c │ │ ├── err.input048.c │ │ ├── err.input049.c │ │ ├── err.input050.c │ │ ├── err.input051.c │ │ ├── err.input052.c │ │ ├── err.input056.c │ │ ├── err.input057.c │ │ ├── err.input059.c │ │ ├── err.input060.c │ │ ├── err.input061.c │ │ ├── err.input064.c │ │ ├── err.input065.c │ │ ├── err.input066.c │ │ ├── err.input068.c │ │ ├── err.input069.c │ │ ├── err.input072.c │ │ ├── err.input073.c │ │ ├── err.input075.c │ │ ├── err.input076.c │ │ ├── err.input077.c │ │ ├── err.input078.c │ │ ├── err.input079.c │ │ ├── err.input085.c │ │ ├── err.input086.c │ │ ├── err.input087.c │ │ ├── err.input092.c │ │ ├── err.input093.c │ │ ├── err.input094.c │ │ ├── err.input095.c │ │ ├── err.input096.c │ │ ├── err.input097.c │ │ ├── err.input098.c │ │ ├── err.input102.c │ │ ├── err.input103.c │ │ ├── err.input104.c │ │ ├── err.input105.c │ │ ├── err.input118.c │ │ ├── err.input124.c │ │ ├── err.input126.c │ │ ├── err.input129.c │ │ ├── err.input141.c │ │ ├── err.input142.c │ │ ├── input001.c │ │ ├── input002.c │ │ ├── input003.c │ │ ├── input004.c │ │ ├── input005.c │ │ ├── input006.c │ │ ├── input007.c │ │ ├── input008.c │ │ ├── input009.c │ │ ├── input010.c │ │ ├── input011.c │ │ ├── input012.c │ │ ├── input013.c │ │ ├── input014.c │ │ ├── input015.c │ │ ├── input016.c │ │ ├── input017.c │ │ ├── input018.c │ │ ├── input018a.c │ │ ├── input019.c │ │ ├── input020.c │ │ ├── input021.c │ │ ├── input022.c │ │ ├── input023.c │ │ ├── input024.c │ │ ├── input025.c │ │ ├── input026.c │ │ ├── input027.c │ │ ├── input028.c │ │ ├── input029.c │ │ ├── input030.c │ │ ├── input031.c │ │ ├── input032.c │ │ ├── input033.c │ │ ├── input035.c │ │ ├── input036.c │ │ ├── input037.c │ │ ├── input038.c │ │ ├── input039.c │ │ ├── input040.c │ │ ├── input041.c │ │ ├── input042.c │ │ ├── input043.c │ │ ├── input044.c │ │ ├── input045.c │ │ ├── input046.c │ │ ├── input047.c │ │ ├── input048.c │ │ ├── input049.c │ │ ├── input050.c │ │ ├── input051.c │ │ ├── input052.c │ │ ├── input053.c │ │ ├── input054.c │ │ ├── input055.c │ │ ├── input056.c │ │ ├── input057.c │ │ ├── input058.c │ │ ├── input059.c │ │ ├── input060.c │ │ ├── input061.c │ │ ├── input062.c │ │ ├── input063.c │ │ ├── input064.c │ │ ├── input065.c │ │ ├── input066.c │ │ ├── input067.c │ │ ├── input068.c │ │ ├── input069.c │ │ ├── input070.c │ │ ├── input071.c │ │ ├── input072.c │ │ ├── input073.c │ │ ├── input074.c │ │ ├── input075.c │ │ ├── input076.c │ │ ├── input077.c │ │ ├── input078.c │ │ ├── input079.c │ │ ├── input080.c │ │ ├── input081.c │ │ ├── input082.c │ │ ├── input083.c │ │ ├── input084.c │ │ ├── input085.c │ │ ├── input086.c │ │ ├── input087.c │ │ ├── input088.c │ │ ├── input089.c │ │ ├── input090.c │ │ ├── input091.c │ │ ├── input092.c │ │ ├── input093.c │ │ ├── input094.c │ │ ├── input095.c │ │ ├── input096.c │ │ ├── input098.c │ │ ├── input099.c │ │ ├── input100.c │ │ ├── input101.c │ │ ├── input102.c │ │ ├── input103.c │ │ ├── input104.c │ │ ├── input105.c │ │ ├── input106.c │ │ ├── input107.c │ │ ├── input108.c │ │ ├── input109.c │ │ ├── input110.c │ │ ├── input111.c │ │ ├── input112.c │ │ ├── input113.c │ │ ├── input114.c │ │ ├── input115.c │ │ ├── input116.c │ │ ├── input117.c │ │ ├── input118.c │ │ ├── input119.c │ │ ├── input120.c │ │ ├── input121.c │ │ ├── input122.c │ │ ├── input123.c │ │ ├── input124.c │ │ ├── input125.c │ │ ├── input126.c │ │ ├── input127.c │ │ ├── input128.c │ │ ├── input129.c │ │ ├── input130.c │ │ ├── input131.c │ │ ├── input132.c │ │ ├── input133.c │ │ ├── input134.c │ │ ├── input135.c │ │ ├── input136.c │ │ ├── input137.c │ │ ├── input138.c │ │ ├── input139.c │ │ ├── input140.c │ │ ├── input141.c │ │ ├── input142.c │ │ ├── input143.c │ │ ├── input144.c │ │ ├── input145.c │ │ ├── input146.c │ │ ├── input147.c │ │ ├── mktests │ │ ├── nasmext.inc │ │ ├── out.input001.c │ │ ├── out.input002.c │ │ ├── out.input003.c │ │ ├── out.input004.c │ │ ├── out.input005.c │ │ ├── out.input006.c │ │ ├── out.input007.c │ │ ├── out.input008.c │ │ ├── out.input009.c │ │ ├── out.input010.c │ │ ├── out.input011.c │ │ ├── out.input012.c │ │ ├── out.input013.c │ │ ├── out.input014.c │ │ ├── out.input015.c │ │ ├── out.input016.c │ │ ├── out.input017.c │ │ ├── out.input018.c │ │ ├── out.input018a.c │ │ ├── out.input019.c │ │ ├── out.input020.c │ │ ├── out.input021.c │ │ ├── out.input022.c │ │ ├── out.input023.c │ │ ├── out.input024.c │ │ ├── out.input025.c │ │ ├── out.input026.c │ │ ├── out.input027.c │ │ ├── out.input028.c │ │ ├── out.input029.c │ │ ├── out.input030.c │ │ ├── out.input053.c │ │ ├── out.input054.c │ │ ├── out.input055.c │ │ ├── out.input058.c │ │ ├── out.input062.c │ │ ├── out.input063.c │ │ ├── out.input067.c │ │ ├── out.input070.c │ │ ├── out.input071.c │ │ ├── out.input074.c │ │ ├── out.input080.c │ │ ├── out.input081.c │ │ ├── out.input082.c │ │ ├── out.input083.c │ │ ├── out.input084.c │ │ ├── out.input088.c │ │ ├── out.input089.c │ │ ├── out.input090.c │ │ ├── out.input091.c │ │ ├── out.input099.c │ │ ├── out.input100.c │ │ ├── out.input101.c │ │ ├── out.input106.c │ │ ├── out.input107.c │ │ ├── out.input108.c │ │ ├── out.input109.c │ │ ├── out.input110.c │ │ ├── out.input111.c │ │ ├── out.input112.c │ │ ├── out.input113.c │ │ ├── out.input114.c │ │ ├── out.input115.c │ │ ├── out.input116.c │ │ ├── out.input117.c │ │ ├── out.input119.c │ │ ├── out.input120.c │ │ ├── out.input121.c │ │ ├── out.input122.c │ │ ├── out.input123.c │ │ ├── out.input125.c │ │ ├── out.input127.c │ │ ├── out.input128.c │ │ ├── out.input130.c │ │ ├── out.input131.c │ │ ├── out.input132.c │ │ ├── out.input133.c │ │ ├── out.input134.c │ │ ├── out.input135.c │ │ ├── out.input136.c │ │ ├── out.input137.c │ │ ├── out.input138.c │ │ ├── out.input139.c │ │ ├── out.input140.c │ │ ├── out.input143.c │ │ ├── out.input144.c │ │ ├── out.input145.c │ │ ├── out.input146.c │ │ ├── out.input147.c │ │ ├── runtests │ │ └── runtestsn │ ├── tree.c │ └── types.c ├── 59_WDIW_pt1/ │ ├── Makefile │ ├── Readme.md │ ├── cg.c │ ├── cg_arm.c │ ├── cgn.c │ ├── data.h │ ├── decl.c │ ├── decl.h │ ├── defs.h │ ├── expr.c │ ├── gen.c │ ├── include/ │ │ ├── ctype.h │ │ ├── errno.h │ │ ├── fcntl.h │ │ ├── stddef.h │ │ ├── stdio.h │ │ ├── stdlib.h │ │ ├── string.h │ │ └── unistd.h │ ├── main.c │ ├── misc.c │ ├── opt.c │ ├── scan.c │ ├── stmt.c │ ├── sym.c │ ├── tests/ │ │ ├── err.input031.c │ │ ├── err.input032.c │ │ ├── err.input033.c │ │ ├── err.input034.c │ │ ├── err.input035.c │ │ ├── err.input036.c │ │ ├── err.input037.c │ │ ├── err.input038.c │ │ ├── err.input039.c │ │ ├── err.input040.c │ │ ├── err.input041.c │ │ ├── err.input042.c │ │ ├── err.input043.c │ │ ├── err.input044.c │ │ ├── err.input045.c │ │ ├── err.input046.c │ │ ├── err.input047.c │ │ ├── err.input048.c │ │ ├── err.input049.c │ │ ├── err.input050.c │ │ ├── err.input051.c │ │ ├── err.input052.c │ │ ├── err.input056.c │ │ ├── err.input057.c │ │ ├── err.input059.c │ │ ├── err.input060.c │ │ ├── err.input061.c │ │ ├── err.input064.c │ │ ├── err.input065.c │ │ ├── err.input066.c │ │ ├── err.input068.c │ │ ├── err.input069.c │ │ ├── err.input072.c │ │ ├── err.input073.c │ │ ├── err.input075.c │ │ ├── err.input076.c │ │ ├── err.input077.c │ │ ├── err.input078.c │ │ ├── err.input079.c │ │ ├── err.input085.c │ │ ├── err.input086.c │ │ ├── err.input087.c │ │ ├── err.input092.c │ │ ├── err.input093.c │ │ ├── err.input094.c │ │ ├── err.input095.c │ │ ├── err.input096.c │ │ ├── err.input097.c │ │ ├── err.input098.c │ │ ├── err.input102.c │ │ ├── err.input103.c │ │ ├── err.input104.c │ │ ├── err.input105.c │ │ ├── err.input118.c │ │ ├── err.input124.c │ │ ├── err.input126.c │ │ ├── err.input129.c │ │ ├── err.input141.c │ │ ├── err.input142.c │ │ ├── input001.c │ │ ├── input002.c │ │ ├── input003.c │ │ ├── input004.c │ │ ├── input005.c │ │ ├── input006.c │ │ ├── input007.c │ │ ├── input008.c │ │ ├── input009.c │ │ ├── input010.c │ │ ├── input011.c │ │ ├── input012.c │ │ ├── input013.c │ │ ├── input014.c │ │ ├── input015.c │ │ ├── input016.c │ │ ├── input017.c │ │ ├── input018.c │ │ ├── input018a.c │ │ ├── input019.c │ │ ├── input020.c │ │ ├── input021.c │ │ ├── input022.c │ │ ├── input023.c │ │ ├── input024.c │ │ ├── input025.c │ │ ├── input026.c │ │ ├── input027.c │ │ ├── input028.c │ │ ├── input029.c │ │ ├── input030.c │ │ ├── input031.c │ │ ├── input032.c │ │ ├── input033.c │ │ ├── input035.c │ │ ├── input036.c │ │ ├── input037.c │ │ ├── input038.c │ │ ├── input039.c │ │ ├── input040.c │ │ ├── input041.c │ │ ├── input042.c │ │ ├── input043.c │ │ ├── input044.c │ │ ├── input045.c │ │ ├── input046.c │ │ ├── input047.c │ │ ├── input048.c │ │ ├── input049.c │ │ ├── input050.c │ │ ├── input051.c │ │ ├── input052.c │ │ ├── input053.c │ │ ├── input054.c │ │ ├── input055.c │ │ ├── input056.c │ │ ├── input057.c │ │ ├── input058.c │ │ ├── input059.c │ │ ├── input060.c │ │ ├── input061.c │ │ ├── input062.c │ │ ├── input063.c │ │ ├── input064.c │ │ ├── input065.c │ │ ├── input066.c │ │ ├── input067.c │ │ ├── input068.c │ │ ├── input069.c │ │ ├── input070.c │ │ ├── input071.c │ │ ├── input072.c │ │ ├── input073.c │ │ ├── input074.c │ │ ├── input075.c │ │ ├── input076.c │ │ ├── input077.c │ │ ├── input078.c │ │ ├── input079.c │ │ ├── input080.c │ │ ├── input081.c │ │ ├── input082.c │ │ ├── input083.c │ │ ├── input084.c │ │ ├── input085.c │ │ ├── input086.c │ │ ├── input087.c │ │ ├── input088.c │ │ ├── input089.c │ │ ├── input090.c │ │ ├── input091.c │ │ ├── input092.c │ │ ├── input093.c │ │ ├── input094.c │ │ ├── input095.c │ │ ├── input096.c │ │ ├── input098.c │ │ ├── input099.c │ │ ├── input100.c │ │ ├── input101.c │ │ ├── input102.c │ │ ├── input103.c │ │ ├── input104.c │ │ ├── input105.c │ │ ├── input106.c │ │ ├── input107.c │ │ ├── input108.c │ │ ├── input109.c │ │ ├── input110.c │ │ ├── input111.c │ │ ├── input112.c │ │ ├── input113.c │ │ ├── input114.c │ │ ├── input115.c │ │ ├── input116.c │ │ ├── input117.c │ │ ├── input118.c │ │ ├── input119.c │ │ ├── input120.c │ │ ├── input121.c │ │ ├── input122.c │ │ ├── input123.c │ │ ├── input124.c │ │ ├── input125.c │ │ ├── input126.c │ │ ├── input127.c │ │ ├── input128.c │ │ ├── input129.c │ │ ├── input130.c │ │ ├── input131.c │ │ ├── input132.c │ │ ├── input133.c │ │ ├── input134.c │ │ ├── input135.c │ │ ├── input136.c │ │ ├── input137.c │ │ ├── input138.c │ │ ├── input139.c │ │ ├── input140.c │ │ ├── input141.c │ │ ├── input142.c │ │ ├── input143.c │ │ ├── input144.c │ │ ├── input145.c │ │ ├── input146.c │ │ ├── input147.c │ │ ├── input148.c │ │ ├── mktests │ │ ├── out.input001.c │ │ ├── out.input002.c │ │ ├── out.input003.c │ │ ├── out.input004.c │ │ ├── out.input005.c │ │ ├── out.input006.c │ │ ├── out.input007.c │ │ ├── out.input008.c │ │ ├── out.input009.c │ │ ├── out.input010.c │ │ ├── out.input011.c │ │ ├── out.input012.c │ │ ├── out.input013.c │ │ ├── out.input014.c │ │ ├── out.input015.c │ │ ├── out.input016.c │ │ ├── out.input017.c │ │ ├── out.input018.c │ │ ├── out.input018a.c │ │ ├── out.input019.c │ │ ├── out.input020.c │ │ ├── out.input021.c │ │ ├── out.input022.c │ │ ├── out.input023.c │ │ ├── out.input024.c │ │ ├── out.input025.c │ │ ├── out.input026.c │ │ ├── out.input027.c │ │ ├── out.input028.c │ │ ├── out.input029.c │ │ ├── out.input030.c │ │ ├── out.input053.c │ │ ├── out.input054.c │ │ ├── out.input055.c │ │ ├── out.input058.c │ │ ├── out.input062.c │ │ ├── out.input063.c │ │ ├── out.input067.c │ │ ├── out.input070.c │ │ ├── out.input071.c │ │ ├── out.input074.c │ │ ├── out.input080.c │ │ ├── out.input081.c │ │ ├── out.input082.c │ │ ├── out.input083.c │ │ ├── out.input084.c │ │ ├── out.input088.c │ │ ├── out.input089.c │ │ ├── out.input090.c │ │ ├── out.input091.c │ │ ├── out.input099.c │ │ ├── out.input100.c │ │ ├── out.input101.c │ │ ├── out.input106.c │ │ ├── out.input107.c │ │ ├── out.input108.c │ │ ├── out.input109.c │ │ ├── out.input110.c │ │ ├── out.input111.c │ │ ├── out.input112.c │ │ ├── out.input113.c │ │ ├── out.input114.c │ │ ├── out.input115.c │ │ ├── out.input116.c │ │ ├── out.input117.c │ │ ├── out.input119.c │ │ ├── out.input120.c │ │ ├── out.input121.c │ │ ├── out.input122.c │ │ ├── out.input123.c │ │ ├── out.input125.c │ │ ├── out.input127.c │ │ ├── out.input128.c │ │ ├── out.input130.c │ │ ├── out.input131.c │ │ ├── out.input132.c │ │ ├── out.input133.c │ │ ├── out.input134.c │ │ ├── out.input135.c │ │ ├── out.input136.c │ │ ├── out.input137.c │ │ ├── out.input138.c │ │ ├── out.input139.c │ │ ├── out.input140.c │ │ ├── out.input143.c │ │ ├── out.input144.c │ │ ├── out.input145.c │ │ ├── out.input146.c │ │ ├── out.input147.c │ │ ├── out.input148.c │ │ ├── runtests │ │ ├── runtests0 │ │ └── runtestsn │ ├── tree.c │ └── types.c ├── 60_TripleTest/ │ ├── Makefile │ ├── Readme.md │ ├── cg.c │ ├── cg_arm.c │ ├── cgn.c │ ├── data.h │ ├── decl.c │ ├── decl.h │ ├── defs.h │ ├── expr.c │ ├── gen.c │ ├── include/ │ │ ├── ctype.h │ │ ├── errno.h │ │ ├── fcntl.h │ │ ├── stddef.h │ │ ├── stdio.h │ │ ├── stdlib.h │ │ ├── string.h │ │ └── unistd.h │ ├── main.c │ ├── misc.c │ ├── opt.c │ ├── scan.c │ ├── stmt.c │ ├── sym.c │ ├── tests/ │ │ ├── err.input031.c │ │ ├── err.input032.c │ │ ├── err.input033.c │ │ ├── err.input034.c │ │ ├── err.input035.c │ │ ├── err.input036.c │ │ ├── err.input037.c │ │ ├── err.input038.c │ │ ├── err.input039.c │ │ ├── err.input040.c │ │ ├── err.input041.c │ │ ├── err.input042.c │ │ ├── err.input043.c │ │ ├── err.input044.c │ │ ├── err.input045.c │ │ ├── err.input046.c │ │ ├── err.input047.c │ │ ├── err.input048.c │ │ ├── err.input049.c │ │ ├── err.input050.c │ │ ├── err.input051.c │ │ ├── err.input052.c │ │ ├── err.input056.c │ │ ├── err.input057.c │ │ ├── err.input059.c │ │ ├── err.input060.c │ │ ├── err.input061.c │ │ ├── err.input064.c │ │ ├── err.input065.c │ │ ├── err.input066.c │ │ ├── err.input068.c │ │ ├── err.input069.c │ │ ├── err.input072.c │ │ ├── err.input073.c │ │ ├── err.input075.c │ │ ├── err.input076.c │ │ ├── err.input077.c │ │ ├── err.input078.c │ │ ├── err.input079.c │ │ ├── err.input085.c │ │ ├── err.input086.c │ │ ├── err.input087.c │ │ ├── err.input092.c │ │ ├── err.input093.c │ │ ├── err.input094.c │ │ ├── err.input095.c │ │ ├── err.input096.c │ │ ├── err.input097.c │ │ ├── err.input098.c │ │ ├── err.input102.c │ │ ├── err.input103.c │ │ ├── err.input104.c │ │ ├── err.input105.c │ │ ├── err.input118.c │ │ ├── err.input124.c │ │ ├── err.input126.c │ │ ├── err.input129.c │ │ ├── err.input141.c │ │ ├── err.input142.c │ │ ├── input001.c │ │ ├── input002.c │ │ ├── input003.c │ │ ├── input004.c │ │ ├── input005.c │ │ ├── input006.c │ │ ├── input007.c │ │ ├── input008.c │ │ ├── input009.c │ │ ├── input010.c │ │ ├── input011.c │ │ ├── input012.c │ │ ├── input013.c │ │ ├── input014.c │ │ ├── input015.c │ │ ├── input016.c │ │ ├── input017.c │ │ ├── input018.c │ │ ├── input018a.c │ │ ├── input019.c │ │ ├── input020.c │ │ ├── input021.c │ │ ├── input022.c │ │ ├── input023.c │ │ ├── input024.c │ │ ├── input025.c │ │ ├── input026.c │ │ ├── input027.c │ │ ├── input028.c │ │ ├── input029.c │ │ ├── input030.c │ │ ├── input031.c │ │ ├── input032.c │ │ ├── input033.c │ │ ├── input035.c │ │ ├── input036.c │ │ ├── input037.c │ │ ├── input038.c │ │ ├── input039.c │ │ ├── input040.c │ │ ├── input041.c │ │ ├── input042.c │ │ ├── input043.c │ │ ├── input044.c │ │ ├── input045.c │ │ ├── input046.c │ │ ├── input047.c │ │ ├── input048.c │ │ ├── input049.c │ │ ├── input050.c │ │ ├── input051.c │ │ ├── input052.c │ │ ├── input053.c │ │ ├── input054.c │ │ ├── input055.c │ │ ├── input056.c │ │ ├── input057.c │ │ ├── input058.c │ │ ├── input059.c │ │ ├── input060.c │ │ ├── input061.c │ │ ├── input062.c │ │ ├── input063.c │ │ ├── input064.c │ │ ├── input065.c │ │ ├── input066.c │ │ ├── input067.c │ │ ├── input068.c │ │ ├── input069.c │ │ ├── input070.c │ │ ├── input071.c │ │ ├── input072.c │ │ ├── input073.c │ │ ├── input074.c │ │ ├── input075.c │ │ ├── input076.c │ │ ├── input077.c │ │ ├── input078.c │ │ ├── input079.c │ │ ├── input080.c │ │ ├── input081.c │ │ ├── input082.c │ │ ├── input083.c │ │ ├── input084.c │ │ ├── input085.c │ │ ├── input086.c │ │ ├── input087.c │ │ ├── input088.c │ │ ├── input089.c │ │ ├── input090.c │ │ ├── input091.c │ │ ├── input092.c │ │ ├── input093.c │ │ ├── input094.c │ │ ├── input095.c │ │ ├── input096.c │ │ ├── input098.c │ │ ├── input099.c │ │ ├── input100.c │ │ ├── input101.c │ │ ├── input102.c │ │ ├── input103.c │ │ ├── input104.c │ │ ├── input105.c │ │ ├── input106.c │ │ ├── input107.c │ │ ├── input108.c │ │ ├── input109.c │ │ ├── input110.c │ │ ├── input111.c │ │ ├── input112.c │ │ ├── input113.c │ │ ├── input114.c │ │ ├── input115.c │ │ ├── input116.c │ │ ├── input117.c │ │ ├── input118.c │ │ ├── input119.c │ │ ├── input120.c │ │ ├── input121.c │ │ ├── input122.c │ │ ├── input123.c │ │ ├── input124.c │ │ ├── input125.c │ │ ├── input126.c │ │ ├── input127.c │ │ ├── input128.c │ │ ├── input129.c │ │ ├── input130.c │ │ ├── input131.c │ │ ├── input132.c │ │ ├── input133.c │ │ ├── input134.c │ │ ├── input135.c │ │ ├── input136.c │ │ ├── input137.c │ │ ├── input138.c │ │ ├── input139.c │ │ ├── input140.c │ │ ├── input141.c │ │ ├── input142.c │ │ ├── input143.c │ │ ├── input144.c │ │ ├── input145.c │ │ ├── input146.c │ │ ├── input147.c │ │ ├── input148.c │ │ ├── input149.c │ │ ├── mktests │ │ ├── out.input001.c │ │ ├── out.input002.c │ │ ├── out.input003.c │ │ ├── out.input004.c │ │ ├── out.input005.c │ │ ├── out.input006.c │ │ ├── out.input007.c │ │ ├── out.input008.c │ │ ├── out.input009.c │ │ ├── out.input010.c │ │ ├── out.input011.c │ │ ├── out.input012.c │ │ ├── out.input013.c │ │ ├── out.input014.c │ │ ├── out.input015.c │ │ ├── out.input016.c │ │ ├── out.input017.c │ │ ├── out.input018.c │ │ ├── out.input018a.c │ │ ├── out.input019.c │ │ ├── out.input020.c │ │ ├── out.input021.c │ │ ├── out.input022.c │ │ ├── out.input023.c │ │ ├── out.input024.c │ │ ├── out.input025.c │ │ ├── out.input026.c │ │ ├── out.input027.c │ │ ├── out.input028.c │ │ ├── out.input029.c │ │ ├── out.input030.c │ │ ├── out.input053.c │ │ ├── out.input054.c │ │ ├── out.input055.c │ │ ├── out.input058.c │ │ ├── out.input062.c │ │ ├── out.input063.c │ │ ├── out.input067.c │ │ ├── out.input070.c │ │ ├── out.input071.c │ │ ├── out.input074.c │ │ ├── out.input080.c │ │ ├── out.input081.c │ │ ├── out.input082.c │ │ ├── out.input083.c │ │ ├── out.input084.c │ │ ├── out.input088.c │ │ ├── out.input089.c │ │ ├── out.input090.c │ │ ├── out.input091.c │ │ ├── out.input099.c │ │ ├── out.input100.c │ │ ├── out.input101.c │ │ ├── out.input106.c │ │ ├── out.input107.c │ │ ├── out.input108.c │ │ ├── out.input109.c │ │ ├── out.input110.c │ │ ├── out.input111.c │ │ ├── out.input112.c │ │ ├── out.input113.c │ │ ├── out.input114.c │ │ ├── out.input115.c │ │ ├── out.input116.c │ │ ├── out.input117.c │ │ ├── out.input119.c │ │ ├── out.input120.c │ │ ├── out.input121.c │ │ ├── out.input122.c │ │ ├── out.input123.c │ │ ├── out.input125.c │ │ ├── out.input127.c │ │ ├── out.input128.c │ │ ├── out.input130.c │ │ ├── out.input131.c │ │ ├── out.input132.c │ │ ├── out.input133.c │ │ ├── out.input134.c │ │ ├── out.input135.c │ │ ├── out.input136.c │ │ ├── out.input137.c │ │ ├── out.input138.c │ │ ├── out.input139.c │ │ ├── out.input140.c │ │ ├── out.input143.c │ │ ├── out.input144.c │ │ ├── out.input145.c │ │ ├── out.input146.c │ │ ├── out.input147.c │ │ ├── out.input148.c │ │ ├── out.input149.c │ │ ├── runtests │ │ ├── runtests0 │ │ ├── runtests0n │ │ └── runtestsn │ ├── tree.c │ └── types.c ├── 61_What_Next/ │ └── Readme.md ├── 62_Cleanup/ │ ├── Makefile │ ├── Readme.md │ ├── cg.c │ ├── cg_arm.c │ ├── cgn.c │ ├── data.h │ ├── decl.c │ ├── decl.h │ ├── defs.h │ ├── expr.c │ ├── gen.c │ ├── include/ │ │ ├── ctype.h │ │ ├── errno.h │ │ ├── fcntl.h │ │ ├── stddef.h │ │ ├── stdio.h │ │ ├── stdlib.h │ │ ├── string.h │ │ └── unistd.h │ ├── main.c │ ├── misc.c │ ├── opt.c │ ├── scan.c │ ├── stmt.c │ ├── sym.c │ ├── tests/ │ │ ├── err.input031.c │ │ ├── err.input032.c │ │ ├── err.input033.c │ │ ├── err.input034.c │ │ ├── err.input035.c │ │ ├── err.input036.c │ │ ├── err.input037.c │ │ ├── err.input038.c │ │ ├── err.input039.c │ │ ├── err.input040.c │ │ ├── err.input041.c │ │ ├── err.input042.c │ │ ├── err.input043.c │ │ ├── err.input044.c │ │ ├── err.input045.c │ │ ├── err.input046.c │ │ ├── err.input047.c │ │ ├── err.input048.c │ │ ├── err.input049.c │ │ ├── err.input050.c │ │ ├── err.input051.c │ │ ├── err.input052.c │ │ ├── err.input056.c │ │ ├── err.input057.c │ │ ├── err.input059.c │ │ ├── err.input060.c │ │ ├── err.input061.c │ │ ├── err.input064.c │ │ ├── err.input065.c │ │ ├── err.input066.c │ │ ├── err.input068.c │ │ ├── err.input069.c │ │ ├── err.input072.c │ │ ├── err.input073.c │ │ ├── err.input075.c │ │ ├── err.input076.c │ │ ├── err.input077.c │ │ ├── err.input078.c │ │ ├── err.input079.c │ │ ├── err.input085.c │ │ ├── err.input086.c │ │ ├── err.input087.c │ │ ├── err.input092.c │ │ ├── err.input093.c │ │ ├── err.input094.c │ │ ├── err.input095.c │ │ ├── err.input096.c │ │ ├── err.input097.c │ │ ├── err.input098.c │ │ ├── err.input102.c │ │ ├── err.input103.c │ │ ├── err.input104.c │ │ ├── err.input105.c │ │ ├── err.input118.c │ │ ├── err.input124.c │ │ ├── err.input126.c │ │ ├── err.input129.c │ │ ├── err.input141.c │ │ ├── err.input142.c │ │ ├── input001.c │ │ ├── input002.c │ │ ├── input003.c │ │ ├── input004.c │ │ ├── input005.c │ │ ├── input006.c │ │ ├── input007.c │ │ ├── input008.c │ │ ├── input009.c │ │ ├── input010.c │ │ ├── input011.c │ │ ├── input012.c │ │ ├── input013.c │ │ ├── input014.c │ │ ├── input015.c │ │ ├── input016.c │ │ ├── input017.c │ │ ├── input018.c │ │ ├── input018a.c │ │ ├── input019.c │ │ ├── input020.c │ │ ├── input021.c │ │ ├── input022.c │ │ ├── input023.c │ │ ├── input024.c │ │ ├── input025.c │ │ ├── input026.c │ │ ├── input027.c │ │ ├── input028.c │ │ ├── input029.c │ │ ├── input030.c │ │ ├── input031.c │ │ ├── input032.c │ │ ├── input033.c │ │ ├── input035.c │ │ ├── input036.c │ │ ├── input037.c │ │ ├── input038.c │ │ ├── input039.c │ │ ├── input040.c │ │ ├── input041.c │ │ ├── input042.c │ │ ├── input043.c │ │ ├── input044.c │ │ ├── input045.c │ │ ├── input046.c │ │ ├── input047.c │ │ ├── input048.c │ │ ├── input049.c │ │ ├── input050.c │ │ ├── input051.c │ │ ├── input052.c │ │ ├── input053.c │ │ ├── input054.c │ │ ├── input055.c │ │ ├── input056.c │ │ ├── input057.c │ │ ├── input058.c │ │ ├── input059.c │ │ ├── input060.c │ │ ├── input061.c │ │ ├── input062.c │ │ ├── input063.c │ │ ├── input064.c │ │ ├── input065.c │ │ ├── input066.c │ │ ├── input067.c │ │ ├── input068.c │ │ ├── input069.c │ │ ├── input070.c │ │ ├── input071.c │ │ ├── input072.c │ │ ├── input073.c │ │ ├── input074.c │ │ ├── input075.c │ │ ├── input076.c │ │ ├── input077.c │ │ ├── input078.c │ │ ├── input079.c │ │ ├── input080.c │ │ ├── input081.c │ │ ├── input082.c │ │ ├── input083.c │ │ ├── input084.c │ │ ├── input085.c │ │ ├── input086.c │ │ ├── input087.c │ │ ├── input088.c │ │ ├── input089.c │ │ ├── input090.c │ │ ├── input091.c │ │ ├── input092.c │ │ ├── input093.c │ │ ├── input094.c │ │ ├── input095.c │ │ ├── input096.c │ │ ├── input098.c │ │ ├── input099.c │ │ ├── input100.c │ │ ├── input101.c │ │ ├── input102.c │ │ ├── input103.c │ │ ├── input104.c │ │ ├── input105.c │ │ ├── input106.c │ │ ├── input107.c │ │ ├── input108.c │ │ ├── input109.c │ │ ├── input110.c │ │ ├── input111.c │ │ ├── input112.c │ │ ├── input113.c │ │ ├── input114.c │ │ ├── input115.c │ │ ├── input116.c │ │ ├── input117.c │ │ ├── input118.c │ │ ├── input119.c │ │ ├── input120.c │ │ ├── input121.c │ │ ├── input122.c │ │ ├── input123.c │ │ ├── input124.c │ │ ├── input125.c │ │ ├── input126.c │ │ ├── input127.c │ │ ├── input128.c │ │ ├── input129.c │ │ ├── input130.c │ │ ├── input131.c │ │ ├── input132.c │ │ ├── input133.c │ │ ├── input134.c │ │ ├── input135.c │ │ ├── input136.c │ │ ├── input137.c │ │ ├── input138.c │ │ ├── input139.c │ │ ├── input140.c │ │ ├── input141.c │ │ ├── input142.c │ │ ├── input143.c │ │ ├── input144.c │ │ ├── input145.c │ │ ├── input146.c │ │ ├── input147.c │ │ ├── input148.c │ │ ├── input149.c │ │ ├── input150.c │ │ ├── mktests │ │ ├── out.input001.c │ │ ├── out.input002.c │ │ ├── out.input003.c │ │ ├── out.input004.c │ │ ├── out.input005.c │ │ ├── out.input006.c │ │ ├── out.input007.c │ │ ├── out.input008.c │ │ ├── out.input009.c │ │ ├── out.input010.c │ │ ├── out.input011.c │ │ ├── out.input012.c │ │ ├── out.input013.c │ │ ├── out.input014.c │ │ ├── out.input015.c │ │ ├── out.input016.c │ │ ├── out.input017.c │ │ ├── out.input018.c │ │ ├── out.input018a.c │ │ ├── out.input019.c │ │ ├── out.input020.c │ │ ├── out.input021.c │ │ ├── out.input022.c │ │ ├── out.input023.c │ │ ├── out.input024.c │ │ ├── out.input025.c │ │ ├── out.input026.c │ │ ├── out.input027.c │ │ ├── out.input028.c │ │ ├── out.input029.c │ │ ├── out.input030.c │ │ ├── out.input053.c │ │ ├── out.input054.c │ │ ├── out.input055.c │ │ ├── out.input058.c │ │ ├── out.input062.c │ │ ├── out.input063.c │ │ ├── out.input067.c │ │ ├── out.input070.c │ │ ├── out.input071.c │ │ ├── out.input074.c │ │ ├── out.input080.c │ │ ├── out.input081.c │ │ ├── out.input082.c │ │ ├── out.input083.c │ │ ├── out.input084.c │ │ ├── out.input088.c │ │ ├── out.input089.c │ │ ├── out.input090.c │ │ ├── out.input091.c │ │ ├── out.input099.c │ │ ├── out.input100.c │ │ ├── out.input101.c │ │ ├── out.input106.c │ │ ├── out.input107.c │ │ ├── out.input108.c │ │ ├── out.input109.c │ │ ├── out.input110.c │ │ ├── out.input111.c │ │ ├── out.input112.c │ │ ├── out.input113.c │ │ ├── out.input114.c │ │ ├── out.input115.c │ │ ├── out.input116.c │ │ ├── out.input117.c │ │ ├── out.input119.c │ │ ├── out.input120.c │ │ ├── out.input121.c │ │ ├── out.input122.c │ │ ├── out.input123.c │ │ ├── out.input125.c │ │ ├── out.input127.c │ │ ├── out.input128.c │ │ ├── out.input130.c │ │ ├── out.input131.c │ │ ├── out.input132.c │ │ ├── out.input133.c │ │ ├── out.input134.c │ │ ├── out.input135.c │ │ ├── out.input136.c │ │ ├── out.input137.c │ │ ├── out.input138.c │ │ ├── out.input139.c │ │ ├── out.input140.c │ │ ├── out.input143.c │ │ ├── out.input144.c │ │ ├── out.input145.c │ │ ├── out.input146.c │ │ ├── out.input147.c │ │ ├── out.input148.c │ │ ├── out.input149.c │ │ ├── out.input150.c │ │ ├── runtests │ │ ├── runtests0 │ │ ├── runtests0n │ │ └── runtestsn │ ├── tree.c │ └── types.c ├── 63_QBE/ │ ├── Makefile │ ├── Readme.md │ ├── cg.c │ ├── data.h │ ├── decl.c │ ├── decl.h │ ├── defs.h │ ├── expr.c │ ├── gen.c │ ├── include/ │ │ ├── ctype.h │ │ ├── errno.h │ │ ├── fcntl.h │ │ ├── stddef.h │ │ ├── stdio.h │ │ ├── stdlib.h │ │ ├── string.h │ │ └── unistd.h │ ├── main.c │ ├── misc.c │ ├── opt.c │ ├── scan.c │ ├── stmt.c │ ├── sym.c │ ├── tests/ │ │ ├── err.input031.c │ │ ├── err.input032.c │ │ ├── err.input033.c │ │ ├── err.input034.c │ │ ├── err.input035.c │ │ ├── err.input036.c │ │ ├── err.input037.c │ │ ├── err.input038.c │ │ ├── err.input039.c │ │ ├── err.input040.c │ │ ├── err.input041.c │ │ ├── err.input042.c │ │ ├── err.input043.c │ │ ├── err.input044.c │ │ ├── err.input045.c │ │ ├── err.input046.c │ │ ├── err.input047.c │ │ ├── err.input048.c │ │ ├── err.input049.c │ │ ├── err.input050.c │ │ ├── err.input051.c │ │ ├── err.input052.c │ │ ├── err.input056.c │ │ ├── err.input057.c │ │ ├── err.input059.c │ │ ├── err.input060.c │ │ ├── err.input061.c │ │ ├── err.input064.c │ │ ├── err.input065.c │ │ ├── err.input066.c │ │ ├── err.input068.c │ │ ├── err.input069.c │ │ ├── err.input072.c │ │ ├── err.input073.c │ │ ├── err.input075.c │ │ ├── err.input076.c │ │ ├── err.input077.c │ │ ├── err.input078.c │ │ ├── err.input079.c │ │ ├── err.input085.c │ │ ├── err.input086.c │ │ ├── err.input087.c │ │ ├── err.input092.c │ │ ├── err.input093.c │ │ ├── err.input094.c │ │ ├── err.input095.c │ │ ├── err.input096.c │ │ ├── err.input097.c │ │ ├── err.input098.c │ │ ├── err.input102.c │ │ ├── err.input103.c │ │ ├── err.input104.c │ │ ├── err.input105.c │ │ ├── err.input118.c │ │ ├── err.input124.c │ │ ├── err.input126.c │ │ ├── err.input129.c │ │ ├── err.input141.c │ │ ├── err.input142.c │ │ ├── input001.c │ │ ├── input002.c │ │ ├── input003.c │ │ ├── input004.c │ │ ├── input005.c │ │ ├── input006.c │ │ ├── input007.c │ │ ├── input008.c │ │ ├── input009.c │ │ ├── input010.c │ │ ├── input011.c │ │ ├── input012.c │ │ ├── input013.c │ │ ├── input014.c │ │ ├── input015.c │ │ ├── input016.c │ │ ├── input017.c │ │ ├── input018.c │ │ ├── input018a.c │ │ ├── input019.c │ │ ├── input020.c │ │ ├── input021.c │ │ ├── input022.c │ │ ├── input023.c │ │ ├── input024.c │ │ ├── input025.c │ │ ├── input026.c │ │ ├── input027.c │ │ ├── input028.c │ │ ├── input029.c │ │ ├── input030.c │ │ ├── input031.c │ │ ├── input032.c │ │ ├── input033.c │ │ ├── input035.c │ │ ├── input036.c │ │ ├── input037.c │ │ ├── input038.c │ │ ├── input039.c │ │ ├── input040.c │ │ ├── input041.c │ │ ├── input042.c │ │ ├── input043.c │ │ ├── input044.c │ │ ├── input045.c │ │ ├── input046.c │ │ ├── input047.c │ │ ├── input048.c │ │ ├── input049.c │ │ ├── input050.c │ │ ├── input051.c │ │ ├── input052.c │ │ ├── input053.c │ │ ├── input054.c │ │ ├── input055.c │ │ ├── input056.c │ │ ├── input057.c │ │ ├── input058.c │ │ ├── input059.c │ │ ├── input060.c │ │ ├── input061.c │ │ ├── input062.c │ │ ├── input063.c │ │ ├── input064.c │ │ ├── input065.c │ │ ├── input066.c │ │ ├── input067.c │ │ ├── input068.c │ │ ├── input069.c │ │ ├── input070.c │ │ ├── input071.c │ │ ├── input072.c │ │ ├── input073.c │ │ ├── input074.c │ │ ├── input075.c │ │ ├── input076.c │ │ ├── input077.c │ │ ├── input078.c │ │ ├── input079.c │ │ ├── input080.c │ │ ├── input081.c │ │ ├── input082.c │ │ ├── input083.c │ │ ├── input084.c │ │ ├── input085.c │ │ ├── input086.c │ │ ├── input087.c │ │ ├── input088.c │ │ ├── input089.c │ │ ├── input090.c │ │ ├── input091.c │ │ ├── input092.c │ │ ├── input093.c │ │ ├── input094.c │ │ ├── input095.c │ │ ├── input096.c │ │ ├── input098.c │ │ ├── input099.c │ │ ├── input100.c │ │ ├── input101.c │ │ ├── input102.c │ │ ├── input103.c │ │ ├── input104.c │ │ ├── input105.c │ │ ├── input106.c │ │ ├── input107.c │ │ ├── input108.c │ │ ├── input109.c │ │ ├── input110.c │ │ ├── input111.c │ │ ├── input112.c │ │ ├── input113.c │ │ ├── input114.c │ │ ├── input115.c │ │ ├── input116.c │ │ ├── input117.c │ │ ├── input118.c │ │ ├── input119.c │ │ ├── input120.c │ │ ├── input121.c │ │ ├── input122.c │ │ ├── input123.c │ │ ├── input124.c │ │ ├── input125.c │ │ ├── input126.c │ │ ├── input127.c │ │ ├── input128.c │ │ ├── input129.c │ │ ├── input130.c │ │ ├── input131.c │ │ ├── input132.c │ │ ├── input133.c │ │ ├── input134.c │ │ ├── input135.c │ │ ├── input136.c │ │ ├── input137.c │ │ ├── input138.c │ │ ├── input139.c │ │ ├── input140.c │ │ ├── input141.c │ │ ├── input142.c │ │ ├── input143.c │ │ ├── input144.c │ │ ├── input145.c │ │ ├── input146.c │ │ ├── input147.c │ │ ├── input148.c │ │ ├── input149.c │ │ ├── input150.c │ │ ├── input151.c │ │ ├── input152.c │ │ ├── input153.c │ │ ├── input154.c │ │ ├── mktests │ │ ├── out.input001.c │ │ ├── out.input002.c │ │ ├── out.input003.c │ │ ├── out.input004.c │ │ ├── out.input005.c │ │ ├── out.input006.c │ │ ├── out.input007.c │ │ ├── out.input008.c │ │ ├── out.input009.c │ │ ├── out.input010.c │ │ ├── out.input011.c │ │ ├── out.input012.c │ │ ├── out.input013.c │ │ ├── out.input014.c │ │ ├── out.input015.c │ │ ├── out.input016.c │ │ ├── out.input017.c │ │ ├── out.input018.c │ │ ├── out.input018a.c │ │ ├── out.input019.c │ │ ├── out.input020.c │ │ ├── out.input021.c │ │ ├── out.input022.c │ │ ├── out.input023.c │ │ ├── out.input024.c │ │ ├── out.input025.c │ │ ├── out.input026.c │ │ ├── out.input027.c │ │ ├── out.input028.c │ │ ├── out.input029.c │ │ ├── out.input030.c │ │ ├── out.input053.c │ │ ├── out.input054.c │ │ ├── out.input055.c │ │ ├── out.input058.c │ │ ├── out.input062.c │ │ ├── out.input063.c │ │ ├── out.input067.c │ │ ├── out.input070.c │ │ ├── out.input071.c │ │ ├── out.input074.c │ │ ├── out.input080.c │ │ ├── out.input081.c │ │ ├── out.input082.c │ │ ├── out.input083.c │ │ ├── out.input084.c │ │ ├── out.input088.c │ │ ├── out.input089.c │ │ ├── out.input090.c │ │ ├── out.input091.c │ │ ├── out.input099.c │ │ ├── out.input100.c │ │ ├── out.input101.c │ │ ├── out.input106.c │ │ ├── out.input107.c │ │ ├── out.input108.c │ │ ├── out.input109.c │ │ ├── out.input110.c │ │ ├── out.input111.c │ │ ├── out.input112.c │ │ ├── out.input113.c │ │ ├── out.input114.c │ │ ├── out.input115.c │ │ ├── out.input116.c │ │ ├── out.input117.c │ │ ├── out.input119.c │ │ ├── out.input120.c │ │ ├── out.input121.c │ │ ├── out.input122.c │ │ ├── out.input123.c │ │ ├── out.input125.c │ │ ├── out.input127.c │ │ ├── out.input128.c │ │ ├── out.input130.c │ │ ├── out.input131.c │ │ ├── out.input132.c │ │ ├── out.input133.c │ │ ├── out.input134.c │ │ ├── out.input135.c │ │ ├── out.input136.c │ │ ├── out.input137.c │ │ ├── out.input138.c │ │ ├── out.input139.c │ │ ├── out.input140.c │ │ ├── out.input143.c │ │ ├── out.input144.c │ │ ├── out.input145.c │ │ ├── out.input146.c │ │ ├── out.input147.c │ │ ├── out.input148.c │ │ ├── out.input149.c │ │ ├── out.input150.c │ │ ├── out.input151.c │ │ ├── out.input152.c │ │ ├── out.input153.c │ │ ├── out.input154.c │ │ ├── runtests │ │ └── runtests2 │ ├── tree.c │ └── types.c ├── 64_6809_Target/ │ ├── 6809triple_test │ ├── Makefile │ ├── Readme.md │ ├── cg.h │ ├── cg6809.c │ ├── cgen.c │ ├── cgqbe.c │ ├── cpeep.c │ ├── data.h │ ├── decl.c │ ├── decl.h │ ├── defs.h │ ├── desym.c │ ├── detok.c │ ├── detree.c │ ├── docs/ │ │ ├── NOTES.md │ │ ├── copt.1 │ │ └── long_regs.dia │ ├── expr.c │ ├── expr.h │ ├── gen.c │ ├── gen.h │ ├── include/ │ │ ├── 6809/ │ │ │ ├── ctype.h │ │ │ ├── errno.h │ │ │ ├── fcntl.h │ │ │ ├── stddef.h │ │ │ ├── stdint.h │ │ │ ├── stdio.h │ │ │ ├── stdlib.h │ │ │ ├── string.h │ │ │ ├── sys/ │ │ │ │ ├── stat.h │ │ │ │ ├── types.h │ │ │ │ └── wait.h │ │ │ └── unistd.h │ │ └── qbe/ │ │ ├── ctype.h │ │ ├── errno.h │ │ ├── fcntl.h │ │ ├── stddef.h │ │ ├── stdio.h │ │ ├── stdlib.h │ │ ├── string.h │ │ ├── sys/ │ │ │ └── wait.h │ │ └── unistd.h │ ├── lib/ │ │ └── 6809/ │ │ ├── Makefile │ │ ├── crt0.o │ │ ├── crt0.s │ │ ├── lib6809.a │ │ ├── libc.a │ │ └── rules.6809 │ ├── misc.c │ ├── misc.h │ ├── opt.c │ ├── opt.h │ ├── parse.c │ ├── parse.h │ ├── scan.c │ ├── stmt.c │ ├── stmt.h │ ├── sym.c │ ├── sym.h │ ├── targ6809.c │ ├── target.h │ ├── targqbe.c │ ├── tests/ │ │ ├── Makefile │ │ ├── err.input031.c │ │ ├── err.input032.c │ │ ├── err.input033.c │ │ ├── err.input034.c │ │ ├── err.input035.c │ │ ├── err.input036.c │ │ ├── err.input037.c │ │ ├── err.input038.c │ │ ├── err.input039.c │ │ ├── err.input041.c │ │ ├── err.input042.c │ │ ├── err.input043.c │ │ ├── err.input044.c │ │ ├── err.input045.c │ │ ├── err.input046.c │ │ ├── err.input047.c │ │ ├── err.input048.c │ │ ├── err.input049.c │ │ ├── err.input050.c │ │ ├── err.input051.c │ │ ├── err.input052.c │ │ ├── err.input056.c │ │ ├── err.input057.c │ │ ├── err.input059.c │ │ ├── err.input060.c │ │ ├── err.input061.c │ │ ├── err.input064.c │ │ ├── err.input065.c │ │ ├── err.input066.c │ │ ├── err.input068.c │ │ ├── err.input069.c │ │ ├── err.input072.c │ │ ├── err.input073.c │ │ ├── err.input075.c │ │ ├── err.input076.c │ │ ├── err.input077.c │ │ ├── err.input078.c │ │ ├── err.input079.c │ │ ├── err.input085.c │ │ ├── err.input086.c │ │ ├── err.input087.c │ │ ├── err.input092.c │ │ ├── err.input093.c │ │ ├── err.input094.c │ │ ├── err.input095.c │ │ ├── err.input096.c │ │ ├── err.input097.c │ │ ├── err.input098.c │ │ ├── err.input102.c │ │ ├── err.input103.c │ │ ├── err.input104.c │ │ ├── err.input105.c │ │ ├── err.input118.c │ │ ├── err.input124.c │ │ ├── err.input126.c │ │ ├── err.input129.c │ │ ├── err.input141.c │ │ ├── err.input142.c │ │ ├── input.rules.6809 │ │ ├── input001.c │ │ ├── input002.c │ │ ├── input003.c │ │ ├── input004.c │ │ ├── input005.c │ │ ├── input006.c │ │ ├── input007.c │ │ ├── input008.c │ │ ├── input009.c │ │ ├── input010.c │ │ ├── input011.c │ │ ├── input012.c │ │ ├── input013.c │ │ ├── input014.c │ │ ├── input015.c │ │ ├── input016.c │ │ ├── input017.c │ │ ├── input018.c │ │ ├── input018a.c │ │ ├── input019.c │ │ ├── input020.c │ │ ├── input021.c │ │ ├── input022.c │ │ ├── input023.c │ │ ├── input024.c │ │ ├── input025.c │ │ ├── input026.c │ │ ├── input027.c │ │ ├── input028.c │ │ ├── input029.c │ │ ├── input030.c │ │ ├── input031.c │ │ ├── input032.c │ │ ├── input033.c │ │ ├── input035.c │ │ ├── input036.c │ │ ├── input037.c │ │ ├── input038.c │ │ ├── input039.c │ │ ├── input041.c │ │ ├── input042.c │ │ ├── input043.c │ │ ├── input044.c │ │ ├── input045.c │ │ ├── input046.c │ │ ├── input047.c │ │ ├── input048.c │ │ ├── input049.c │ │ ├── input050.c │ │ ├── input051.c │ │ ├── input052.c │ │ ├── input053.c │ │ ├── input054.c │ │ ├── input055.c │ │ ├── input056.c │ │ ├── input057.c │ │ ├── input058.c │ │ ├── input059.c │ │ ├── input060.c │ │ ├── input061.c │ │ ├── input063.c │ │ ├── input064.c │ │ ├── input065.c │ │ ├── input066.c │ │ ├── input067.c │ │ ├── input068.c │ │ ├── input069.c │ │ ├── input070.c │ │ ├── input071.c │ │ ├── input072.c │ │ ├── input073.c │ │ ├── input074.c │ │ ├── input075.c │ │ ├── input076.c │ │ ├── input077.c │ │ ├── input078.c │ │ ├── input079.c │ │ ├── input080.c │ │ ├── input081.c │ │ ├── input082.c │ │ ├── input083.c │ │ ├── input084.c │ │ ├── input085.c │ │ ├── input086.c │ │ ├── input087.c │ │ ├── input088.c │ │ ├── input089.c │ │ ├── input090.c │ │ ├── input091.c │ │ ├── input092.c │ │ ├── input093.c │ │ ├── input094.c │ │ ├── input095.c │ │ ├── input096.c │ │ ├── input098.c │ │ ├── input099.c │ │ ├── input100.c │ │ ├── input101.c │ │ ├── input102.c │ │ ├── input103.c │ │ ├── input104.c │ │ ├── input105.c │ │ ├── input106.c │ │ ├── input107.c │ │ ├── input108.c │ │ ├── input109.c │ │ ├── input110.c │ │ ├── input111.c │ │ ├── input112.c │ │ ├── input113.c │ │ ├── input114.c │ │ ├── input116.c │ │ ├── input117.c │ │ ├── input118.c │ │ ├── input119.c │ │ ├── input120.c │ │ ├── input121.c │ │ ├── input122.c │ │ ├── input123.c │ │ ├── input124.c │ │ ├── input125.c │ │ ├── input126.c │ │ ├── input127.c │ │ ├── input128.c │ │ ├── input129.c │ │ ├── input130.c │ │ ├── input131.c │ │ ├── input132.c │ │ ├── input133.c │ │ ├── input134.c │ │ ├── input135.c │ │ ├── input136.c │ │ ├── input137.c │ │ ├── input138.c │ │ ├── input139.c │ │ ├── input140.c │ │ ├── input141.c │ │ ├── input142.c │ │ ├── input143.c │ │ ├── input145.c │ │ ├── input146.c │ │ ├── input147.c │ │ ├── input148.c │ │ ├── input149.c │ │ ├── input150.c │ │ ├── input151.c │ │ ├── input152.c │ │ ├── input153.c │ │ ├── input154.c │ │ ├── input155.c │ │ ├── input156.c │ │ ├── input157.c │ │ ├── input159.c │ │ ├── input160.c │ │ ├── input161.c │ │ ├── input162.c │ │ ├── input163.c │ │ ├── input164.c │ │ ├── input165.c │ │ ├── input166.c │ │ ├── mktests │ │ ├── onetest │ │ ├── out.input001.c │ │ ├── out.input002.c │ │ ├── out.input003.c │ │ ├── out.input004.c │ │ ├── out.input005.c │ │ ├── out.input006.c │ │ ├── out.input007.c │ │ ├── out.input008.c │ │ ├── out.input009.c │ │ ├── out.input010.c │ │ ├── out.input011.c │ │ ├── out.input012.c │ │ ├── out.input013.c │ │ ├── out.input014.c │ │ ├── out.input015.c │ │ ├── out.input016.c │ │ ├── out.input017.c │ │ ├── out.input018.c │ │ ├── out.input018a.c │ │ ├── out.input019.c │ │ ├── out.input020.c │ │ ├── out.input021.c │ │ ├── out.input022.c │ │ ├── out.input023.c │ │ ├── out.input024.c │ │ ├── out.input025.c │ │ ├── out.input026.c │ │ ├── out.input027.c │ │ ├── out.input028.c │ │ ├── out.input029.c │ │ ├── out.input030.c │ │ ├── out.input053.c │ │ ├── out.input054.c │ │ ├── out.input055.c │ │ ├── out.input058.c │ │ ├── out.input063.c │ │ ├── out.input067.c │ │ ├── out.input070.c │ │ ├── out.input071.c │ │ ├── out.input074.c │ │ ├── out.input080.c │ │ ├── out.input081.c │ │ ├── out.input082.c │ │ ├── out.input083.c │ │ ├── out.input084.c │ │ ├── out.input088.c │ │ ├── out.input089.c │ │ ├── out.input090.c │ │ ├── out.input091.c │ │ ├── out.input099.c │ │ ├── out.input100.c │ │ ├── out.input101.c │ │ ├── out.input106.c │ │ ├── out.input107.c │ │ ├── out.input108.c │ │ ├── out.input109.c │ │ ├── out.input110.c │ │ ├── out.input111.c │ │ ├── out.input112.c │ │ ├── out.input113.c │ │ ├── out.input114.c │ │ ├── out.input116.c │ │ ├── out.input117.c │ │ ├── out.input119.c │ │ ├── out.input120.c │ │ ├── out.input121.c │ │ ├── out.input122.c │ │ ├── out.input123.c │ │ ├── out.input125.c │ │ ├── out.input127.c │ │ ├── out.input128.c │ │ ├── out.input130.c │ │ ├── out.input131.c │ │ ├── out.input132.c │ │ ├── out.input133.c │ │ ├── out.input134.c │ │ ├── out.input135.c │ │ ├── out.input136.c │ │ ├── out.input137.c │ │ ├── out.input138.c │ │ ├── out.input139.c │ │ ├── out.input140.c │ │ ├── out.input143.c │ │ ├── out.input145.c │ │ ├── out.input146.c │ │ ├── out.input147.c │ │ ├── out.input148.c │ │ ├── out.input149.c │ │ ├── out.input150.c │ │ ├── out.input151.c │ │ ├── out.input152.c │ │ ├── out.input153.c │ │ ├── out.input154.c │ │ ├── out.input155.c │ │ ├── out.input156.c │ │ ├── out.input157.c │ │ ├── out.input159.c │ │ ├── out.input160.c │ │ ├── out.input161.c │ │ ├── out.input162.c │ │ ├── out.input163.c │ │ ├── out.input164.c │ │ ├── out.input165.c │ │ ├── out.input166.c │ │ └── runtests │ ├── tree.c │ ├── tree.h │ ├── tstring.c │ ├── types.c │ ├── types.h │ ├── wcc.c │ └── wcc.h ├── LICENSE └── Readme.md ================================================ FILE CONTENTS ================================================ ================================================ FILE: 00_Introduction/Readme.md ================================================ # Part 0: Introduction I've decided to go on a compiler writing journey. In the past I've written some [assemblers](https://github.com/DoctorWkt/pdp7-unix/blob/master/tools/as7), and I've written a [simple compiler](https://github.com/DoctorWkt/h-compiler) for a typeless language. But I've never written a compiler that can compile itself. So that's where I'm headed on this journey. As part of the process, I'm going to write up my work so that others can follow along. This will also help me to clarify my thoughts and ideas. Hopefully you, and I, will find this useful! ## Goals of the Journey Here are my goals, and non-goals, for the journey: + To write a self-compiling compiler. I think that if the compiler can compile itself, it gets to call itself a *real* compiler. + To target at least one real hardware platform. I've seen a few compilers that generate code for hypothetical machines. I want my compiler to work on real hardware. Also, if possible, I want to write the compiler so that it can support multiple backends for different hardware platforms. + Practical before research. There's a whole lot of research in the area of compilers. I want to start from absolute zero on this journey, so I'll tend to go for a practical approach and not a theory-heavy approach. That said, there will be times when I'll need to introduce (and implement) some theory-based stuff. + Follow the KISS principle: keep it simple, stupid! I'm definitely going to be using Ken Thompson's principle here: "When in doubt, use brute force." + Take a lot of small steps to reach the final goal. I'll break the journey up into a lot of simple steps instead of taking large leaps. This will make each new addition to the compiler a bite-sized and easily digestible thing. ## Target Language The choice of a target language is difficult. If I choose a high-level language like Python, Go etc., then I'll have to implement a whole pile of libraries and classes as they are built-in to the language. I could write a compiler for a language like Lisp, but these can be [done easily](ftp://publications.ai.mit.edu/ai-publications/pdf/AIM-039.pdf). Instead, I've fallen back on the old standby and I'm going to write a compiler for a subset of C, enough to allow the compiler to compile itself. C is just a step up from assembly language (for some subset of C, not [C18](https://en.wikipedia.org/wiki/C18_(C_standard_revision))), and this will help make the task of compiling the C code down to assembly somewhat easier. Oh, and I also like C. ## The Basics of a Compiler's Job The job of a compiler is to translate input in one language (usually a high-level language) into a different output language (usually a lower-level language than the input). The main steps are: ![](Figs/parsing_steps.png) + Do [lexical analysis](https://en.wikipedia.org/wiki/Lexical_analysis) to recognise the lexical elements. In several languages, `=` is different to `==`, so you can't just read a single `=`. We call these lexical elements *tokens*. + [Parse](https://en.wikipedia.org/wiki/Parsing) the input, i.e. recognise the syntax and structural elements of the input and ensure that they conform to the *grammar* of the language. For example, your language might have this decision-making structure: ``` if (x < 23) { print("x is smaller than 23\n"); } ``` > but in another language you might write: ``` if (x < 23): print("x is smaller than 23\n") ``` > This is also the place where the compiler can detect syntax errors, like if the semicolon was missing on the end of the first *print* statement. + Do [semantic analysis](https://en.wikipedia.org/wiki/Semantic_analysis_(compilers)) of the input, i.e. understand the meaning of the input. This is actually different from recognising the syntax and structure. For example, in English, a sentence might have the form ` `. The following two sentences have the same structure, but completely different meaning: ``` David ate lovely bananas. Jennifer hates green tomatoes. ``` + [Translate](https://en.wikipedia.org/wiki/Code_generation_(compiler)) the meaning of the input into a different language. Here we convert the input, parts at a time, into a lower-level language. ## Resources There's a lot of compiler resources out on the Internet. Here are the ones I'll be looking at. ### Learning Resources If you want to start with some books, papers and tools on compilers, I'd highly recommend this list: + [Curated list of awesome resources on Compilers, Interpreters and Runtimes](https://github.com/aalhour/awesome-compilers) by Ahmad Alhour ### Existing Compilers While I'm going to build my own compiler, I plan on looking at other compilers for ideas and probably also borrow some of their code. Here are the ones I'm looking at: + [SubC](http://www.t3x.org/subc/) by Nils M Holm + [Swieros C Compiler](https://github.com/rswier/swieros/blob/master/root/bin/c.c) by Robert Swierczek + [fbcc](https://github.com/DoctorWkt/fbcc) by Fabrice Bellard + [tcc](https://bellard.org/tcc/), also by Fabrice Bellard and others + [catc](https://github.com/yui0/catc) by Yuichiro Nakada + [amacc](https://github.com/jserv/amacc) by Jim Huang + [Small C](https://en.wikipedia.org/wiki/Small-C) by Ron Cain, James E. Hendrix, derivatives by others In particular, I'll be using a lot of the ideas, and some of the code, from the SubC compiler. ## Setting Up the Development Environment Assuming that you want to come along on this journey, here's what you'll need. I'm going to use a Linux development environment, so download and set up your favourite Linux system: I'm using Lubuntu 18.04. I'm going to target two hardware platforms: Intel x86-64 and 32-bit ARM. I'll use a PC running Lubuntu 18.04 as the Intel target, and a Raspberry Pi running Raspbian as the ARM target. On the Intel platform, we are going to need an existing C compiler. So, install this package (I give the Ubuntu/Debian commands): ``` $ sudo apt-get install build-essential ``` If there are any more tools required for a vanilla Linux system, let me know. Finally, clone a copy of this Github repository. ## The Next Step In the next part of our compiler writing journey, we will start with the code to scan our input file and find the *tokens* that are the lexical elements of our language. [Next step](../01_Scanner/Readme.md) ================================================ FILE: 01_Scanner/Makefile ================================================ scanner: main.c scan.c cc -o scanner -g main.c scan.c clean: rm -f scanner *.o ================================================ FILE: 01_Scanner/Readme.md ================================================ # Part 1: Introduction to Lexical Scanning We start our compiler writing journey with a simple lexical scanner. As I mentioned in the previous part, the job of the scanner is to identify the lexical elements, or *tokens*, in the input language. We will start with a language that has only five lexical elements: + the four basic maths operators: `*`, `/`, `+` and `-` + decimal whole numbers which have 1 or more digits `0` .. `9` Each token that we scan is going to be stored in this structure (from `defs.h`): ```c // Token structure struct token { int token; int intvalue; }; ``` where the `token` field can be one of these values (from `defs.h`): ```c // Tokens enum { T_PLUS, T_MINUS, T_STAR, T_SLASH, T_INTLIT }; ``` When the token is a `T_INTLIT` (i.e. an integer literal), the `intvalue` field will hold the value of the integer that we scanned in. ## Functions in `scan.c` The `scan.c` file holds the functions of our lexical scanner. We are going to read in one character at a time from our input file. However, there will be times when we need to "put back" a character if we have read too far ahead in the input stream. We also want to track what line we are currently on so that we can print the line number in our debug messages. All of this is done by the `next()` function: ```c // Get the next character from the input file. static int next(void) { int c; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return c; } c = fgetc(Infile); // Read from input file if ('\n' == c) Line++; // Increment line count return c; } ``` The `Putback` and `Line` variables are defined in `data.h` along with our input file pointer: ```c extern_ int Line; extern_ int Putback; extern_ FILE *Infile; ``` All C files will include this where `extern_` is replaced with `extern`. But `main.c` will remove the `extern_`; hence, these variables will "belong" to `main.c`. Finally, how do we put a character back into the input stream? Thus: ```c // Put back an unwanted character static void putback(int c) { Putback = c; } ``` ## Ignoring Whitespace We need a function that reads and silently skips whitespace characters until it gets a non-whitespace character, and returns it. Thus: ```c // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } ``` ## Scanning Tokens: `scan()` So now we can read characters in while skipping whitespace; we can also put back a character if we read one character too far ahead. We can now write our first lexical scanner: ```c // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c; // Skip whitespace c = skip(); // Determine the token based on // the input character switch (c) { case EOF: return (0); case '+': t->token = T_PLUS; break; case '-': t->token = T_MINUS; break; case '*': t->token = T_STAR; break; case '/': t->token = T_SLASH; break; default: // More here soon } // We found a token return (1); } ``` That's it for the simple one-character tokens: for each recognised character, turn it into a token. You may ask: why not just put the recognised character into the `struct token`? The answer is that later we will need to recognise multi-character tokens such as `==` and keywords like `if` and `while`. So it will make life easier to have an enumerated list of token values. ## Integer Literal Values In fact, we already have to face this situation as we also need to recognise integer literal values like `3827` and `87731`. Here is the missing `default` code from the `switch` statement: ```c default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } printf("Unrecognised character %c on line %d\n", c, Line); exit(1); ``` Once we hit a decimal digit character, we call the helper function `scanint()` with this first character. It will return the scanned integer value. To do this, it has to read each character in turn, check that it's a legitimate digit, and build up the final number. Here is the code: ```c // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0; // Convert each character into an int value while ((k = chrpos("0123456789", c)) >= 0) { val = val * 10 + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return val; } ``` We start with a zero `val` value. Each time we get a character in the set `0` to `9` we convert this to an `int` value with `chrpos()`. We make `val` 10 times bigger and then add this new digit to it. For example, if we have the characters `3`, `2`, `8`, we do: + `val= 0 * 10 + 3`, i.e. 3 + `val= 3 * 10 + 2`, i.e. 32 + `val= 32 * 10 + 8`, i.e. 328 Right at the end, did you notice the call to `putback(c)`? We found a character that's not a decimal digit at this point. We can't simply discard it, but luckily we can put it back in the input stream to be consumed later. You may also ask at this point: why not simply subtract the ASCII value of '0' from `c` to make it an integer? The answer is that, later on, we will be able to do `chrpos("0123456789abcdef")` to convert hexadecimal digits as well. Here's the code for `chrpos()`: ```c // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { char *p; p = strchr(s, c); return (p ? p - s : -1); } ``` And that's it for the lexical scanner code in `scan.c` for now. ## Putting the Scanner to Work The code in `main.c` puts the above scanner to work. The `main()` function opens up a file and then scans it for tokens: ```c void main(int argc, char *argv[]) { ... init(); ... Infile = fopen(argv[1], "r"); ... scanfile(); exit(0); } ``` And `scanfile()` loops while there is a new token and prints out the details of the token: ```c // List of printable tokens char *tokstr[] = { "+", "-", "*", "/", "intlit" }; // Loop scanning in all the tokens in the input file. // Print out details of each token found. static void scanfile() { struct token T; while (scan(&T)) { printf("Token %s", tokstr[T.token]); if (T.token == T_INTLIT) printf(", value %d", T.intvalue); printf("\n"); } } ``` ## Some Example Input Files I've provided some example input files so you can see what tokens the scanner finds in each file, and what input files the scanner rejects. ``` $ make cc -o scanner -g main.c scan.c $ cat input01 2 + 3 * 5 - 8 / 3 $ ./scanner input01 Token intlit, value 2 Token + Token intlit, value 3 Token * Token intlit, value 5 Token - Token intlit, value 8 Token / Token intlit, value 3 $ cat input04 23 + 18 - 45.6 * 2 / 18 $ ./scanner input04 Token intlit, value 23 Token + Token intlit, value 18 Token - Token intlit, value 45 Unrecognised character . on line 3 ``` ## Conclusion and What's Next We've started small and we have a simple lexical scanner that recognises the four main maths operators and also integer literal values. We saw that we needed to skip whitespace and put back characters if we read too far into the input. Single character tokens are easy to scan, but multi-character tokens are a bit harder. But at the end, the `scan()` function returns the next token from the input file in a `struct token` variable: ```c struct token { int token; int intvalue; }; ``` In the next part of our compiler writing journey, we will build a recursive descent parser to interpret the grammar of our input files, and calculate & print out the final value for each file. [Next step](../02_Parser/Readme.md) ================================================ FILE: 01_Scanner/data.h ================================================ #ifndef extern_ #define extern_ extern #endif // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 extern_ int Line; extern_ int Putback; extern_ FILE *Infile; ================================================ FILE: 01_Scanner/decl.h ================================================ // Function prototypes for all compiler files // Copyright (c) 2019 Warren Toomey, GPL3 int scan(struct token *t); ================================================ FILE: 01_Scanner/defs.h ================================================ #include #include #include #include // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 // Tokens enum { T_PLUS, T_MINUS, T_STAR, T_SLASH, T_INTLIT }; // Token structure struct token { int token; int intvalue; }; ================================================ FILE: 01_Scanner/input01 ================================================ 2 + 3 * 5 - 8 / 3 ================================================ FILE: 01_Scanner/input02 ================================================ 13 -6+ 4* 5 + 08 / 3 ================================================ FILE: 01_Scanner/input03 ================================================ 12 34 + -56 * / - - 8 + * 2 ================================================ FILE: 01_Scanner/input04 ================================================ 23 + 18 - 45.6 * 2 / 18 ================================================ FILE: 01_Scanner/input05 ================================================ 23 * 456abcdefg ================================================ FILE: 01_Scanner/main.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "decl.h" #include // Compiler setup and top-level execution // Copyright (c) 2019 Warren Toomey, GPL3 // Compiler setup and top-level execution // Copyright (c) 2019 Warren Toomey, GPL3 // Initialise global variables static void init() { Line = 1; Putback = '\n'; } // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "Usage: %s infile\n", prog); exit(1); } // List of printable tokens char *tokstr[] = { "+", "-", "*", "/", "intlit" }; // Loop scanning in all the tokens in the input file. // Print out details of each token found. static void scanfile() { struct token T; while (scan(&T)) { printf("Token %s", tokstr[T.token]); if (T.token == T_INTLIT) printf(", value %d", T.intvalue); printf("\n"); } } // Main program: check arguments and print a usage // if we don't have an argument. Open up the input // file and call scanfile() to scan the tokens in it. void main(int argc, char *argv[]) { if (argc != 2) usage(argv[0]); init(); if ((Infile = fopen(argv[1], "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", argv[1], strerror(errno)); exit(1); } scanfile(); exit(0); } ================================================ FILE: 01_Scanner/scan.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { char *p; p = strchr(s, c); return (p ? p - s : -1); } // Get the next character from the input file. static int next(void) { int c; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return c; } c = fgetc(Infile); // Read from input file if ('\n' == c) Line++; // Increment line count return c; } // Put back an unwanted character static void putback(int c) { Putback = c; } // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0; // Convert each character into an int value while ((k = chrpos("0123456789", c)) >= 0) { val = val * 10 + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return val; } // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c; // Skip whitespace c = skip(); // Determine the token based on // the input character switch (c) { case EOF: return (0); case '+': t->token = T_PLUS; break; case '-': t->token = T_MINUS; break; case '*': t->token = T_STAR; break; case '/': t->token = T_SLASH; break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } printf("Unrecognised character %c on line %d\n", c, Line); exit(1); } // We found a token return (1); } ================================================ FILE: 02_Parser/Makefile ================================================ parser: expr.c interp.c main.c scan.c tree.c cc -o parser -g expr.c interp.c main.c scan.c tree.c clean: rm -f parser *.o ================================================ FILE: 02_Parser/Readme.md ================================================ # Part 2: Introduction to Parsing In this part of our compiler writing journey, I'm going to introduce the basics of a parser. As I mentioned in the first part, the job of the parser is to recognise the syntax and structural elements of the input and ensure that they conform to the *grammar* of the language. We already have several language elements that we can scan in, i.e. our tokens: + the four basic maths operators: `*`, `/`, `+` and `-` + decimal whole numbers which have 1 or more digits `0` .. `9` Now let's define a grammar for the language that our parser will recognise. ## BNF: Backus-Naur Form You will come across the use of [BNF ](https://en.wikipedia.org/wiki/Backus%E2%80%93Naur_form) at some point if you get into dealing with computer languages. I will just introduce enough of the BNF syntax here to express the grammar we want to recognise. We want a grammar to express maths expressions with whole numbers. Here is the BNF description of the grammar: ``` expression: number | expression '*' expression | expression '/' expression | expression '+' expression | expression '-' expression ; number: T_INTLIT ; ``` The vertical bars separate options in the grammar, so the above says: + An expression could be just a number, or + An expression is two expressions separated by a '*' token, or + An expression is two expressions separated by a '/' token, or + An expression is two expressions separated by a '+' token, or + An expression is two expressions separated by a '-' token + A number is always a T_INTLIT token It should be pretty obvious that the BNF definition of the grammar is *recursive*: an expression is defined by referencing other expressions. But there is a way to *bottom-out" the recursion: when an expression turns out to be a number, this is always a T_INTLIT token and thus not recursive. In BNF, we say that "expression" and "number" are *non-terminal* symbols, as they are produced by rules in the grammar. However, T_INTLIT is a *terminal* symbol as it is not defined by any rule. Instead, it is an already-recognised token in the language. Similarly, the four maths operator tokens are terminal symbols. ## Recursive Descent Parsing Given that the grammar for our language is recursive, it makes sense for us to try and parse it recursively. What we need to do is to read in a token, then *look ahead* to the next token. Based on what the next token is, we can then decide what path we need to take to parse the input. This may require us to recursively call a function that has already been called. In our case, the first token in any expression will be a number and this may be followed by maths operator. After that there may only be a single number, or there may be the start of a whole new expression. How can we parse this recursively? We can write pseudo-code that looks like this: ``` function expression() { Scan and check the first token is a number. Error if it's not Get the next token If we have reached the end of the input, return, i.e. base case Otherwise, call expression() } ``` Let's run this function on the input `2 + 3 - 5 T_EOF` where `T_EOF` is a token that reflects the end of the input. I will number each call to `expression()`. ``` expression0: Scan in the 2, it's a number Get next token, +, which isn't T_EOF Call expression() expression1: Scan in the 3, it's a number Get next token, -, which isn't T_EOF Call expression() expression2: Scan in the 5, it's a number Get next token, T_EOF, so return from expression2 return from expression1 return from expression0 ``` Yes, the function was able to recursively parse the input `2 + 3 - 5 T_EOF`. Of course, we haven't done anything with the input, but that isn't the job of the parser. The parser's job is to *recognise* the input, and warn of any syntax errors. Someone else is going to do the *semantic analysis* of the input, i.e. to understand and perform the meaning of this input. > Later on, you will see that this isn't actually true. It often makes sense to intertwine the syntax analysis and semantic analysis. ## Abstract Syntax Trees To do the semantic analysis, we need code that either interprets the recognised input, or translates it to another format, e.g. assembly code. In this part of the journey, we will build an interpreter for the input. But to get there, we are first going to convert the input into an [abstract syntax tree](https://en.wikipedia.org/wiki/Abstract_syntax_tree), also known as an AST. I highly recommend you read this short explanation of ASTs: + [Leveling Up One’s Parsing Game With ASTs](https://medium.com/basecs/leveling-up-ones-parsing-game-with-asts-d7a6fc2400ff) by Vaidehi Joshi It's well written and really help to explain the purpose and structure of ASTs. Don't worry, I'll be here when you get back. The structure of each node in the AST that we will build is described in `defs.h`: ```c // AST node types enum { A_ADD, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, A_INTLIT }; // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree struct ASTnode *left; // Left and right child trees struct ASTnode *right; int intvalue; // For A_INTLIT, the integer value }; ``` Some AST nodes, like those with `op` values `A_ADD` and `A_SUBTRACT` have two child ASTs that are pointed to by `left` and `right`. Later on, we will add or subtract the values of the sub-trees. Alternatively, an AST node with the `op` value A_INTLIT represents an integer value. It has no sub-tree children, just a value in the `intvalue` field. ## Building AST Nodes and Trees The code in `tree.c` has the functions to build ASTs. The most general function, `mkastnode()`, takes values for all four fields in an AST node. It allocates the node, populates the field values and returns a pointer to the node: ```c // Build and return a generic AST node struct ASTnode *mkastnode(int op, struct ASTnode *left, struct ASTnode *right, int intvalue) { struct ASTnode *n; // Malloc a new ASTnode n = (struct ASTnode *) malloc(sizeof(struct ASTnode)); if (n == NULL) { fprintf(stderr, "Unable to malloc in mkastnode()\n"); exit(1); } // Copy in the field values and return it n->op = op; n->left = left; n->right = right; n->intvalue = intvalue; return (n); } ``` Given this, we can write more specific functions that make a leaf AST node (i.e. one with no children), and make an AST node with a single child: ```c // Make an AST leaf node struct ASTnode *mkastleaf(int op, int intvalue) { return (mkastnode(op, NULL, NULL, intvalue)); } // Make a unary AST node: only one child struct ASTnode *mkastunary(int op, struct ASTnode *left, int intvalue) { return (mkastnode(op, left, NULL, intvalue)); ``` ## Purpose of the AST We are going to use an AST to store each expression that we recognise so that, later on, we can traverse it recursively to calculate the final value of the expression. We do want to deal with the precedence of the maths operators. Here is an example. Consider the expression `2 * 3 + 4 * 5`. Now, multiplication has higher precedence that addition. Therefore, we want to *bind* the multiplication operands together and perform these operations before we do the addition. If we generated the AST tree to look like this: ``` + / \ / \ / \ * * / \ / \ 2 3 4 5 ``` then, when traversing the tree, we would perform `2*3` first, then `4*5`. Once we have these results, we can then pass them up to the root of the tree to perform the addition. ## A Naive Expression Parser Now, we could re-use the token values from our scanner as the AST node operation values, but I like to keep the concept of tokens and AST nodes separate. So, to start with, I'm going to have a function to map the token values into AST node operation values. This, along with the rest of the parser, is in `expr.c`: ```c // Convert a token into an AST operation. int arithop(int tok) { switch (tok) { case T_PLUS: return (A_ADD); case T_MINUS: return (A_SUBTRACT); case T_STAR: return (A_MULTIPLY); case T_SLASH: return (A_DIVIDE); default: fprintf(stderr, "unknown token in arithop() on line %d\n", Line); exit(1); } } ``` The default statement in the switch statement fires when we can't convert the given token into an AST node type. It's going to form part of the syntax checking in our parser. We need a function to check that the next token is an integer literal, and to build an AST node to hold the literal value. Here it is: ```c // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(void) { struct ASTnode *n; // For an INTLIT token, make a leaf AST node for it // and scan in the next token. Otherwise, a syntax error // for any other token type. switch (Token.token) { case T_INTLIT: n = mkastleaf(A_INTLIT, Token.intvalue); scan(&Token); return (n); default: fprintf(stderr, "syntax error on line %d\n", Line); exit(1); } } ``` This assumes that there is a global variable `Token`, and that it already has the most recent token scanned in from the input. In `data.h`: ```c extern_ struct token Token; ``` and in `main()`: ```c scan(&Token); // Get the first token from the input n = binexpr(); // Parse the expression in the file ``` Now we can write the code for the parser: ```c // Return an AST tree whose root is a binary operator struct ASTnode *binexpr(void) { struct ASTnode *n, *left, *right; int nodetype; // Get the integer literal on the left. // Fetch the next token at the same time. left = primary(); // If no tokens left, return just the left node if (Token.token == T_EOF) return (left); // Convert the token into a node type nodetype = arithop(Token.token); // Get the next token in scan(&Token); // Recursively get the right-hand tree right = binexpr(); // Now build a tree with both sub-trees n = mkastnode(nodetype, left, right, 0); return (n); } ``` Notice that nowhere in this naive parser code is there anything to deal with different operator precedence. As it stands, the code treats all operators as having equal precedence. If you follow the code as it parses the expression `2 * 3 + 4 * 5`, you will see that it builds this AST: ``` * / \ 2 + / \ 3 * / \ 4 5 ``` This is definitely not correct. It will multiply `4*5` to get 20, then do `3+20` to get 23 instead of doing `2*3` to get 6. So why did I do this? I wanted to show you that writing a simple parser is easy, but getting it to also do the semantic analysis is harder. ## Interpreting the Tree Now that we have our (incorrect) AST tree, let's write some code to interpret it. Again, we are going to write recursive code to traverse the tree. Here's the pseudo-code: ``` interpretTree: First, interpret the left-hand sub-tree and get its value Then, interpret the right-hand sub-tree and get its value Perform the operation in the node at the root of our tree on the two sub-tree values, and return this value ``` Going back to the correct AST tree: ``` + / \ / \ / \ * * / \ / \ 2 3 4 5 ``` the call structure would look like: ``` interpretTree0(tree with +): Call interpretTree1(left tree with *): Call interpretTree2(tree with 2): No maths operation, just return 2 Call interpretTree3(tree with 3): No maths operation, just return 3 Perform 2 * 3, return 6 Call interpretTree1(right tree with *): Call interpretTree2(tree with 4): No maths operation, just return 4 Call interpretTree3(tree with 5): No maths operation, just return 5 Perform 4 * 5, return 20 Perform 6 + 20, return 26 ``` ## Code to Interpret the Tree This is in `interp.c` and follows the above pseudo-code: ```c // Given an AST, interpret the // operators in it and return // a final value. int interpretAST(struct ASTnode *n) { int leftval, rightval; // Get the left and right sub-tree values if (n->left) leftval = interpretAST(n->left); if (n->right) rightval = interpretAST(n->right); switch (n->op) { case A_ADD: return (leftval + rightval); case A_SUBTRACT: return (leftval - rightval); case A_MULTIPLY: return (leftval * rightval); case A_DIVIDE: return (leftval / rightval); case A_INTLIT: return (n->intvalue); default: fprintf(stderr, "Unknown AST operator %d\n", n->op); exit(1); } } ``` Again, the default statement in the switch statement fires when we can't interpret the AST node type. It's going to form part of the sematic checking in our parser. ## Building the Parser There is some other code here and the, like the call to the interpreter in `main()`: ```c scan(&Token); // Get the first token from the input n = binexpr(); // Parse the expression in the file printf("%d\n", interpretAST(n)); // Calculate the final result exit(0); ``` You can now build the parser by doing: ``` $ make cc -o parser -g expr.c interp.c main.c scan.c tree.c ``` I've provided several input files for you to test the parser on, but of course you can create your own. Remember, the calculated results are incorrect, but the parser should detect input errors like consecutive numbers, consecutive operators, and a number missing at the end of the input. I've also added some debugging code to the interpreter so you can see which AST tree nodes get evaluated in which order: ``` $ cat input01 2 + 3 * 5 - 8 / 3 $ ./parser input01 int 2 int 3 int 5 int 8 int 3 8 / 3 5 - 2 3 * 3 2 + 9 11 $ cat input02 13 -6+ 4* 5 + 08 / 3 $ ./parser input02 int 13 int 6 int 4 int 5 int 8 int 3 8 / 3 5 + 2 4 * 7 6 + 28 13 - 34 -21 $ cat input03 12 34 + -56 * / - - 8 + * 2 $ ./parser input03 unknown token in arithop() on line 1 $ cat input04 23 + 18 - 45.6 * 2 / 18 $ ./parser input04 Unrecognised character . on line 3 $ cat input05 23 * 456abcdefg $ ./parser input05 Unrecognised character a on line 1 ``` ## Conclusion and What's Next A parser recognises the grammar of the language and checks that the input to the compiler conforms to this grammar. If it doesn't, the parser should print out an error message. As our expression grammar is recursive, we have chosen to write a recursive descent parser to recognise our expressions. Right now the parser works, as shown by the above output, but it fails to get the semantics of the input right. In other words, it doesn't calculate the correct value of the expressions. In the next part of our compiler writing journey, we will modify the parser so that it also does the semantic analysis of the expressions to get the right maths results. [Next step](../03_Precedence/Readme.md) ================================================ FILE: 02_Parser/data.h ================================================ #ifndef extern_ #define extern_ extern #endif // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 extern_ int Line; extern_ int Putback; extern_ FILE *Infile; extern_ struct token Token; ================================================ FILE: 02_Parser/decl.h ================================================ // Function prototypes for all compiler files // Copyright (c) 2019 Warren Toomey, GPL3 int scan(struct token *t); struct ASTnode *mkastnode(int op, struct ASTnode *left, struct ASTnode *right, int intvalue); struct ASTnode *mkastleaf(int op, int intvalue); struct ASTnode *mkastunary(int op, struct ASTnode *left, int intvalue); struct ASTnode *binexpr(void); int interpretAST(struct ASTnode *n); ================================================ FILE: 02_Parser/defs.h ================================================ #include #include #include #include // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 // Token types enum { T_EOF, T_PLUS, T_MINUS, T_STAR, T_SLASH, T_INTLIT }; // Token structure struct token { int token; // Token type, from the enum list above int intvalue; // For T_INTLIT, the integer value }; // AST node types enum { A_ADD, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, A_INTLIT }; // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree struct ASTnode *left; // Left and right child trees struct ASTnode *right; int intvalue; // For A_INTLIT, the integer value }; ================================================ FILE: 02_Parser/expr.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of expressions // Copyright (c) 2019 Warren Toomey, GPL3 // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(void) { struct ASTnode *n; // For an INTLIT token, make a leaf AST node for it // and scan in the next token. Otherwise, a syntax error // for any other token type. switch (Token.token) { case T_INTLIT: n = mkastleaf(A_INTLIT, Token.intvalue); scan(&Token); return (n); default: fprintf(stderr, "syntax error on line %d\n", Line); exit(1); } } // Convert a token into an AST operation. int arithop(int tok) { switch (tok) { case T_PLUS: return (A_ADD); case T_MINUS: return (A_SUBTRACT); case T_STAR: return (A_MULTIPLY); case T_SLASH: return (A_DIVIDE); default: fprintf(stderr, "unknown token in arithop() on line %d\n", Line); exit(1); } } // Return an AST tree whose root is a binary operator struct ASTnode *binexpr(void) { struct ASTnode *n, *left, *right; int nodetype; // Get the integer literal on the left. // Fetch the next token at the same time. left = primary(); // If no tokens left, return just the left node if (Token.token == T_EOF) return (left); // Convert the token into a node type nodetype = arithop(Token.token); // Get the next token in scan(&Token); // Recursively get the right-hand tree right = binexpr(); // Now build a tree with both sub-trees n = mkastnode(nodetype, left, right, 0); return (n); } ================================================ FILE: 02_Parser/input01 ================================================ 2 + 3 * 5 - 8 / 3 ================================================ FILE: 02_Parser/input02 ================================================ 13 -6+ 4* 5 + 08 / 3 ================================================ FILE: 02_Parser/input03 ================================================ 12 34 + -56 * / - - 8 + * 2 ================================================ FILE: 02_Parser/input04 ================================================ 23 + 18 - 45.6 * 2 / 18 ================================================ FILE: 02_Parser/input05 ================================================ 23 * 456abcdefg ================================================ FILE: 02_Parser/interp.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree interpreter // Copyright (c) 2019 Warren Toomey, GPL3 // List of AST operators static char *ASTop[] = { "+", "-", "*", "/" }; // Given an AST, interpret the // operators in it and return // a final value. int interpretAST(struct ASTnode *n) { int leftval, rightval; // Get the left and right sub-tree values if (n->left) leftval = interpretAST(n->left); if (n->right) rightval = interpretAST(n->right); // Debug: Print what we are about to do if (n->op == A_INTLIT) printf("int %d\n", n->intvalue); else printf("%d %s %d\n", leftval, ASTop[n->op], rightval); switch (n->op) { case A_ADD: return (leftval + rightval); case A_SUBTRACT: return (leftval - rightval); case A_MULTIPLY: return (leftval * rightval); case A_DIVIDE: return (leftval / rightval); case A_INTLIT: return (n->intvalue); default: fprintf(stderr, "Unknown AST operator %d\n", n->op); exit(1); } } ================================================ FILE: 02_Parser/main.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "decl.h" #include // Compiler setup and top-level execution // Copyright (c) 2019 Warren Toomey, GPL3 // Initialise global variables static void init() { Line = 1; Putback = '\n'; } // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "Usage: %s infile\n", prog); exit(1); } // Main program: check arguments and print a usage // if we don't have an argument. Open up the input // file and call scanfile() to scan the tokens in it. void main(int argc, char *argv[]) { struct ASTnode *n; if (argc != 2) usage(argv[0]); init(); if ((Infile = fopen(argv[1], "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", argv[1], strerror(errno)); exit(1); } scan(&Token); // Get the first token from the input n = binexpr(); // Parse the expression in the file printf("%d\n", interpretAST(n)); // Calculate the final result exit(0); } ================================================ FILE: 02_Parser/scan.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { char *p; p = strchr(s, c); return (p ? p - s : -1); } // Get the next character from the input file. static int next(void) { int c; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return c; } c = fgetc(Infile); // Read from input file if ('\n' == c) Line++; // Increment line count return c; } // Put back an unwanted character static void putback(int c) { Putback = c; } // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0; // Convert each character into an int value while ((k = chrpos("0123456789", c)) >= 0) { val = val * 10 + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return val; } // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c; // Skip whitespace c = skip(); // Determine the token based on // the input character switch (c) { case EOF: t->token = T_EOF; return (0); case '+': t->token = T_PLUS; break; case '-': t->token = T_MINUS; break; case '*': t->token = T_STAR; break; case '/': t->token = T_SLASH; break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } printf("Unrecognised character %c on line %d\n", c, Line); exit(1); } // We found a token return (1); } ================================================ FILE: 02_Parser/tree.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree functions // Copyright (c) 2019 Warren Toomey, GPL3 // Build and return a generic AST node struct ASTnode *mkastnode(int op, struct ASTnode *left, struct ASTnode *right, int intvalue) { struct ASTnode *n; // Malloc a new ASTnode n = (struct ASTnode *) malloc(sizeof(struct ASTnode)); if (n == NULL) { fprintf(stderr, "Unable to malloc in mkastnode()\n"); exit(1); } // Copy in the field values and return it n->op = op; n->left = left; n->right = right; n->intvalue = intvalue; return (n); } // Make an AST leaf node struct ASTnode *mkastleaf(int op, int intvalue) { return (mkastnode(op, NULL, NULL, intvalue)); } // Make a unary AST node: only one child struct ASTnode *mkastunary(int op, struct ASTnode *left, int intvalue) { return (mkastnode(op, left, NULL, intvalue)); } ================================================ FILE: 03_Precedence/Makefile ================================================ parser: expr.c interp.c main.c scan.c tree.c cc -o parser -g expr.c interp.c main.c scan.c tree.c parser2: expr2.c interp.c main.c scan.c tree.c cc -o parser2 -g expr2.c interp.c main.c scan.c tree.c clean: rm -f parser parser2 *.o test: parser -(./parser input01; \ ./parser input02; \ ./parser input03; \ ./parser input04; \ ./parser input05) test2: parser2 -(./parser2 input01; \ ./parser2 input02; \ ./parser2 input03; \ ./parser2 input04; \ ./parser2 input05) ================================================ FILE: 03_Precedence/Readme.md ================================================ # Part 3: Operator Precedence We saw in the previous part of our compiler writing journey that a parser doesn't necessarily enforce the semantics of our language. It only enforces the syntax and structural rules of the grammar. We ended up with code that calculates the wrong value of expressions like `2 * 3 + 4 * 5`, because the code created an AST that looks like: ``` * / \ 2 + / \ 3 * / \ 4 5 ``` instead of: ``` + / \ / \ / \ * * / \ / \ 2 3 4 5 ``` To solve this, we have to add code to our parser to perform operator precedence. There are (at least) two ways of doing this: + Making the operator precedence explicit in the language's grammar + Influencing the existing parser with an operator precedence table ## Making the Operator Precedence Explicit Here is our grammar from the last part of the journey: ``` expression: number | expression '*' expression | expression '/' expression | expression '+' expression | expression '-' expression ; number: T_INTLIT ; ``` Note that there is no differentiation between any of the four maths operators. Let's tweak the grammar so that there is a difference: ``` expression: additive_expression ; additive_expression: multiplicative_expression | additive_expression '+' multiplicative_expression | additive_expression '-' multiplicative_expression ; multiplicative_expression: number | number '*' multiplicative_expression | number '/' multiplicative_expression ; number: T_INTLIT ; ``` We now have two types of expressions: *additive* expressions and *multiplicative* expressions. Note that the grammar now forces the numbers to be part of multiplicative expressions only. This forces the '*' and '/' operators to bind more tightly to the numbers on either side, thus having higher precedence. Any additive expression is actually either a multiplicative expression by itself, or an additive (i.e. multiplicative) expression followed by a '+' or '-' operator then another multiplicative expression. The additive expression is now at a much lower predencence than the multiplicative expression. ## Doing The Above in the Recursive Descent Parser How do we take the above version of our grammar and implement it into our recursive descent parser? I've done this in the file `expr2.c` and I'll cover the code below. The answer is to have a `multiplicative_expr()` function to deal with the '*' and '/' operators, and an `additive_expr()` function to deal with the lower precedence '+' and '-' operators. Both functions are going to read in something and an operator. Then, while there are following operators at the same precedence, each function will parse some more of the input and combine the left and right halves with the first operator. However, `additive_expr()` will have to defer to the higher-precedence `multiplicative_expr()` function. Here is how this is done. ## `additive_expr()` ```c // Return an AST tree whose root is a '+' or '-' binary operator struct ASTnode *additive_expr(void) { struct ASTnode *left, *right; int tokentype; // Get the left sub-tree at a higher precedence than us left = multiplicative_expr(); // If no tokens left, return just the left node tokentype = Token.token; if (tokentype == T_EOF) return (left); // Loop working on token at our level of precedence while (1) { // Fetch in the next integer literal scan(&Token); // Get the right sub-tree at a higher precedence than us right = multiplicative_expr(); // Join the two sub-trees with our low-precedence operator left = mkastnode(arithop(tokentype), left, right, 0); // And get the next token at our precedence tokentype = Token.token; if (tokentype == T_EOF) break; } // Return whatever tree we have created return (left); } ``` Right at the beginning, we immediately call `multiplicative_expr()` in case the first operator is a high-precedence '*' or '/'. That function will only return when it encounters a low-precedence '+' or '-' operator. Thus, when we hit the `while` loop, we know we have a '+' or '-' operator. We loop until there are no tokens left in the input, i.e. when we hit the T_EOF token. Inside the loop, we call `multiplicative_expr()` again in case any future operators are higher precedence than us. Again, this will return when they are not. Once we have a left and right sub-tree, we can combine them with the operator we got the last time around the loop. This repeats, so that if we had the expression `2 + 4 + 6`, we would end up with the AST tree: ``` + / \ + 6 / \ 2 4 ``` But if `multiplicative_expr()` had its own higher precedence operators, we would be combining sub-trees with multiple nodes in them. ## multiplicative_expr() ```c // Return an AST tree whose root is a '*' or '/' binary operator struct ASTnode *multiplicative_expr(void) { struct ASTnode *left, *right; int tokentype; // Get the integer literal on the left. // Fetch the next token at the same time. left = primary(); // If no tokens left, return just the left node tokentype = Token.token; if (tokentype == T_EOF) return (left); // While the token is a '*' or '/' while ((tokentype == T_STAR) || (tokentype == T_SLASH)) { // Fetch in the next integer literal scan(&Token); right = primary(); // Join that with the left integer literal left = mkastnode(arithop(tokentype), left, right, 0); // Update the details of the current token. // If no tokens left, return just the left node tokentype = Token.token; if (tokentype == T_EOF) break; } // Return whatever tree we have created return (left); } ``` The code is similar to `additive_expr()` except that we get to call `primary()` to get real integer literals! We also only loop when we have operators at our high precedence level, i.e. '*' and '/' operators. As soon as we hit a low precedence operator, we simply return the sub-tree that we've built to this point. This goes back to `additive_expr()` to deal with the low precedence operator. ## Drawbacks of the Above The above way of constructing a recursive descent parser with explicit operator precedence can be inefficient because of all the function calls needed to reach the right level of precedence. There also has to be functions to deal with each level of operator precedence, so we end up with lots of lines of code. ## The Alternative: Pratt Parsing One way to cut down on the amount of code is to use a [Pratt parser](https://en.wikipedia.org/wiki/Pratt_parser) which has a table of precedence values associated with each token instead of having functions that replicate the explicit precedence in the grammar. At this point I highly recommend that you read [Pratt Parsers: Expression Parsing Made Easy](https://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/) by Bob Nystrom. Pratt parsers still make my head hurt, so read as much as you can and get comfortable with the basic concept. ## `expr.c`: Pratt Parsing I've implemented Pratt parsing in `expr.c` which is a drop-in replacement for `expr2.c`. Let's start the tour. Firstly, we need some code to determine the precedence levels for each token: ```c // Operator precedence for each token static int OpPrec[] = { 0, 10, 10, 20, 20, 0 }; // EOF + - * / INTLIT // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec = OpPrec[tokentype]; if (prec == 0) { fprintf(stderr, "syntax error on line %d, token %d\n", Line, tokentype); exit(1); } return (prec); } ``` Higher numbers (e.g. 20) mean a higher precedence than lower numbers (e.g. 10). Now, you might ask: why have a function when you have a look-up table called `OpPrec[]`? The answer is: to spot syntax errors. Consider an input that looks like `234 101 + 12`. We can scan in the first two tokens. But if we simply used `OpPrec[]` to get the precedence of the second `101` token, we wouldn't notice that it isn't an operator. Thus, the `op_precedence()` function enforces the correct grammar syntax. Now, instead of having a function for each precedence level, we have a single expression function that uses the table of operator precedences: ```c // Return an AST tree whose root is a binary operator. // Parameter ptp is the previous token's precedence. struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; int tokentype; // Get the integer literal on the left. // Fetch the next token at the same time. left = primary(); // If no tokens left, return just the left node tokentype = Token.token; if (tokentype == T_EOF) return (left); // While the precedence of this token is // more than that of the previous token precedence while (op_precedence(tokentype) > ptp) { // Fetch in the next integer literal scan(&Token); // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Join that sub-tree with ours. Convert the token // into an AST operation at the same time. left = mkastnode(arithop(tokentype), left, right, 0); // Update the details of the current token. // If no tokens left, return just the left node tokentype = Token.token; if (tokentype == T_EOF) return (left); } // Return the tree we have when the precedence // is the same or lower return (left); } ``` Firstly, note that this is still recursive like the previous parser functions. This time, we receive the precedence level of the token that was found before we got called. `main()` will call us with the lowest precedence, 0, but we will call ourselves with higher values. You should also spot that the code is quite similar to the `multiplicative_expr()` function: read in an integer literal, get the operator's token type, then loop building a tree. The difference is the loop condition and body: ```c multiplicative_expr(): while ((tokentype == T_STAR) || (tokentype == T_SLASH)) { scan(&Token); right = primary(); left = mkastnode(arithop(tokentype), left, right, 0); tokentype = Token.token; if (tokentype == T_EOF) return (left); } binexpr(): while (op_precedence(tokentype) > ptp) { scan(&Token); right = binexpr(OpPrec[tokentype]); left = mkastnode(arithop(tokentype), left, right, 0); tokentype = Token.token; if (tokentype == T_EOF) return (left); } ``` With the Pratt parser, when the next operator has a higher precedence than our current token, instead of just getting the next integer literal with `primary()`, we call ourselves with `binexpr(OpPrec[tokentype])` to raise the operator precedence. Once we hit a token at our precedence level or lower, we will simply: ```c return (left); ``` This will either be a sub-tree with lots of nodes and operators at a higher precedence that the operator that called us, or it might be a single integer literal for an operator at the same predence as us. Now we have a single function to do expression parsing. It uses a small helper function to enforce the operator precedence, and thus implements the semantics of our language. ## Putting Both Parsers Into Action You can make two programs, one with each parser: ``` $ make parser # Pratt Parser cc -o parser -g expr.c interp.c main.c scan.c tree.c $ make parser2 # Precedence Climbing cc -o parser2 -g expr2.c interp.c main.c scan.c tree.c ``` You can also test both parsers with the same input files from the previous part of our journey: ``` $ make test (./parser input01; \ ./parser input02; \ ./parser input03; \ ./parser input04; \ ./parser input05) 15 # input01 result 29 # input02 result syntax error on line 1, token 5 # input03 result Unrecognised character . on line 3 # input04 result Unrecognised character a on line 1 # input05 result $ make test2 (./parser2 input01; \ ./parser2 input02; \ ./parser2 input03; \ ./parser2 input04; \ ./parser2 input05) 15 # input01 result 29 # input02 result syntax error on line 1, token 5 # input03 result Unrecognised character . on line 3 # input04 result Unrecognised character a on line 1 # input05 result ``` ## Conclusion and What's Next It's probably time to step back a bit and see where we've got to. We now have: + a scanner that recognises and returns the tokens in our language + a parser that recognises our grammar, reports syntax errors and builds an Abstract Syntax Tree + a precedence table for the parser that implements the semantics of our language + an interpreter that traverses the Abstract Syntax Tree depth-first and calculates the result of the expression in the input What we don't have yet is a compiler. But we are so close to making our first compiler! In the next part of our compiler writing journey, we will replace the interpreter. In its place, we will write a translator that generates x86-64 assembly code for each AST node that has a maths operator. We will also generate some assembly preamble and postamble to support the assembly code that the generator outputs. [Next step](../04_Assembly/Readme.md) ================================================ FILE: 03_Precedence/data.h ================================================ #ifndef extern_ #define extern_ extern #endif // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 extern_ int Line; extern_ int Putback; extern_ FILE *Infile; extern_ struct token Token; ================================================ FILE: 03_Precedence/decl.h ================================================ // Function prototypes for all compiler files // Copyright (c) 2019 Warren Toomey, GPL3 int scan(struct token *t); struct ASTnode *mkastnode(int op, struct ASTnode *left, struct ASTnode *right, int intvalue); struct ASTnode *mkastleaf(int op, int intvalue); struct ASTnode *mkastunary(int op, struct ASTnode *left, int intvalue); struct ASTnode *binexpr(int rbp); int interpretAST(struct ASTnode *n); ================================================ FILE: 03_Precedence/defs.h ================================================ #include #include #include #include // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 // Token types enum { T_EOF, T_PLUS, T_MINUS, T_STAR, T_SLASH, T_INTLIT }; // Token structure struct token { int token; // Token type, from the enum list above int intvalue; // For T_INTLIT, the integer value }; // AST node types enum { A_ADD, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, A_INTLIT }; // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree struct ASTnode *left; // Left and right child trees struct ASTnode *right; int intvalue; // For A_INTLIT, the integer value }; ================================================ FILE: 03_Precedence/expr.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of expressions // Copyright (c) 2019 Warren Toomey, GPL3 // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(void) { struct ASTnode *n; // For an INTLIT token, make a leaf AST node for it // and scan in the next token. Otherwise, a syntax error // for any other token type. switch (Token.token) { case T_INTLIT: n = mkastleaf(A_INTLIT, Token.intvalue); scan(&Token); return (n); default: fprintf(stderr, "syntax error on line %d, token %d\n", Line, Token.token); exit(1); } } // Convert a binary operator token into an AST operation. int arithop(int tokentype) { switch (tokentype) { case T_PLUS: return (A_ADD); case T_MINUS: return (A_SUBTRACT); case T_STAR: return (A_MULTIPLY); case T_SLASH: return (A_DIVIDE); default: fprintf(stderr, "syntax error on line %d, token %d\n", Line, tokentype); exit(1); } } // Operator precedence for each token static int OpPrec[] = { 0, 10, 10, 20, 20, 0 }; // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec = OpPrec[tokentype]; if (prec == 0) { fprintf(stderr, "syntax error on line %d, token %d\n", Line, tokentype); exit(1); } return (prec); } // Return an AST tree whose root is a binary operator. // Parameter ptp is the previous token's precedence. struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; int tokentype; // Get the integer literal on the left. // Fetch the next token at the same time. left = primary(); // If no tokens left, return just the left node tokentype = Token.token; if (tokentype == T_EOF) return (left); // While the precedence of this token is // more than that of the previous token precedence while (op_precedence(tokentype) > ptp) { // Fetch in the next integer literal scan(&Token); // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Join that sub-tree with ours. Convert the token // into an AST operation at the same time. left = mkastnode(arithop(tokentype), left, right, 0); // Update the details of the current token. // If no tokens left, return just the left node tokentype = Token.token; if (tokentype == T_EOF) return (left); } // Return the tree we have when the precedence // is the same or lower return (left); } ================================================ FILE: 03_Precedence/expr2.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of expressions with full recursive descent // Copyright (c) 2019 Warren Toomey, GPL3 // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(void) { struct ASTnode *n; // For an INTLIT token, make a leaf AST node for it // and scan in the next token. Otherwise, a syntax error // for any other token type. switch (Token.token) { case T_INTLIT: n = mkastleaf(A_INTLIT, Token.intvalue); scan(&Token); return (n); default: fprintf(stderr, "syntax error on line %d, token %d\n", Line, Token.token); exit(1); } } // Convert a binary operator token into an AST operation. static int arithop(int tok) { switch (tok) { case T_PLUS: return (A_ADD); case T_MINUS: return (A_SUBTRACT); case T_STAR: return (A_MULTIPLY); case T_SLASH: return (A_DIVIDE); default: fprintf(stderr, "syntax error on line %d, token %d\n", Line, tok); exit(1); } } struct ASTnode *additive_expr(void); // Return an AST tree whose root is a '*' or '/' binary operator struct ASTnode *multiplicative_expr(void) { struct ASTnode *left, *right; int tokentype; // Get the integer literal on the left. // Fetch the next token at the same time. left = primary(); // If no tokens left, return just the left node tokentype = Token.token; if (tokentype == T_EOF) return (left); // While the token is a '*' or '/' while ((tokentype == T_STAR) || (tokentype == T_SLASH)) { // Fetch in the next integer literal scan(&Token); right = primary(); // Join that with the left integer literal left = mkastnode(arithop(tokentype), left, right, 0); // Update the details of the current token. // If no tokens left, return just the left node tokentype = Token.token; if (tokentype == T_EOF) break; } // Return whatever tree we have created return (left); } // Return an AST tree whose root is a '+' or '-' binary operator struct ASTnode *additive_expr(void) { struct ASTnode *left, *right; int tokentype; // Get the left sub-tree at a higher precedence than us left = multiplicative_expr(); // If no tokens left, return just the left node tokentype = Token.token; if (tokentype == T_EOF) return (left); // Cache the '+' or '-' token type // Loop working on token at our level of precedence while (1) { // Fetch in the next integer literal scan(&Token); // Get the right sub-tree at a higher precedence than us right = multiplicative_expr(); // Join the two sub-trees with our low-precedence operator left = mkastnode(arithop(tokentype), left, right, 0); // And get the next token at our precedence tokentype = Token.token; if (tokentype == T_EOF) break; } // Return whatever tree we have created return (left); } struct ASTnode *binexpr(int n) { return (additive_expr()); } ================================================ FILE: 03_Precedence/input01 ================================================ 2 + 3 * 5 - 8 / 3 ================================================ FILE: 03_Precedence/input02 ================================================ 13 -6+ 4* 5 + 08 / 3 ================================================ FILE: 03_Precedence/input03 ================================================ 12 34 + -56 * / - - 8 + * 2 ================================================ FILE: 03_Precedence/input04 ================================================ 23 + 18 - 45.6 * 2 / 18 ================================================ FILE: 03_Precedence/input05 ================================================ 23 * 456abcdefg ================================================ FILE: 03_Precedence/interp.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree interpreter // Copyright (c) 2019 Warren Toomey, GPL3 // List of AST operators static char *ASTop[] = { "+", "-", "*", "/" }; // Given an AST, interpret the // operators in it and return // a final value. int interpretAST(struct ASTnode *n) { int leftval, rightval; // Get the left and right sub-tree values if (n->left) leftval = interpretAST(n->left); if (n->right) rightval = interpretAST(n->right); // Debug: Print what we are about to do // if (n->op == A_INTLIT) // printf("int %d\n", n->intvalue); // else // printf("%d %s %d\n", leftval, ASTop[n->op], rightval); switch (n->op) { case A_ADD: return (leftval + rightval); case A_SUBTRACT: return (leftval - rightval); case A_MULTIPLY: return (leftval * rightval); case A_DIVIDE: return (leftval / rightval); case A_INTLIT: return (n->intvalue); default: fprintf(stderr, "Unknown AST operator %d\n", n->op); exit(1); } } ================================================ FILE: 03_Precedence/main.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "decl.h" #include // Compiler setup and top-level execution // Copyright (c) 2019 Warren Toomey, GPL3 // Initialise global variables static void init() { Line = 1; Putback = '\n'; } // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "Usage: %s infile\n", prog); exit(1); } // Main program: check arguments and print a usage // if we don't have an argument. Open up the input // file and call scanfile() to scan the tokens in it. void main(int argc, char *argv[]) { struct ASTnode *n; if (argc != 2) usage(argv[0]); init(); if ((Infile = fopen(argv[1], "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", argv[1], strerror(errno)); exit(1); } scan(&Token); // Get the first token from the input n = binexpr(0); // Parse the expression in the file printf("%d\n", interpretAST(n)); // Calculate the final result exit(0); } ================================================ FILE: 03_Precedence/scan.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { char *p; p = strchr(s, c); return (p ? p - s : -1); } // Get the next character from the input file. static int next(void) { int c; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return c; } c = fgetc(Infile); // Read from input file if ('\n' == c) Line++; // Increment line count return c; } // Put back an unwanted character static void putback(int c) { Putback = c; } // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0; // Convert each character into an int value while ((k = chrpos("0123456789", c)) >= 0) { val = val * 10 + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return val; } // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c; // Skip whitespace c = skip(); // Determine the token based on // the input character switch (c) { case EOF: t->token = T_EOF; return (0); case '+': t->token = T_PLUS; break; case '-': t->token = T_MINUS; break; case '*': t->token = T_STAR; break; case '/': t->token = T_SLASH; break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } printf("Unrecognised character %c on line %d\n", c, Line); exit(1); } // We found a token return (1); } ================================================ FILE: 03_Precedence/tree.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree functions // Copyright (c) 2019 Warren Toomey, GPL3 // Build and return a generic AST node struct ASTnode *mkastnode(int op, struct ASTnode *left, struct ASTnode *right, int intvalue) { struct ASTnode *n; // Malloc a new ASTnode n = (struct ASTnode *) malloc(sizeof(struct ASTnode)); if (n == NULL) { fprintf(stderr, "Unable to malloc in mkastnode()\n"); exit(1); } // Copy in the field values and return it n->op = op; n->left = left; n->right = right; n->intvalue = intvalue; return (n); } // Make an AST leaf node struct ASTnode *mkastleaf(int op, int intvalue) { return (mkastnode(op, NULL, NULL, intvalue)); } // Make a unary AST node: only one child struct ASTnode *mkastunary(int op, struct ASTnode *left, int intvalue) { return (mkastnode(op, left, NULL, intvalue)); } ================================================ FILE: 04_Assembly/Makefile ================================================ comp1: cg.c expr.c gen.c interp.c main.c scan.c tree.c cc -o comp1 -g cg.c expr.c gen.c interp.c main.c scan.c tree.c compn: cgn.c expr.c gen.c interp.c main.c scan.c tree.c cc -o compn -g cgn.c expr.c gen.c interp.c main.c scan.c tree.c clean: rm -f comp1 compn *.o *.s out test: comp1 ./comp1 input01 cc -o out out.s ./out ./comp1 input02 cc -o out out.s ./out testn: compn ./compn input01 nasm -f elf64 out.s cc -no-pie -o out out.o ./out ./compn input02 nasm -f elf64 out.s cc -no-pie -o out out.o ./out ================================================ FILE: 04_Assembly/Readme.md ================================================ # Part 4: An Actual Compiler It's about time that I met my promise of actually writing a compiler. So in this part of the journey we are going to replace the interpreter in our program with code that generates x86-64 assembly code. ## Revising the Interpreter Before we do, it will be worthwhile to revisit the interpreter code in `interp.c`: ```c int interpretAST(struct ASTnode *n) { int leftval, rightval; if (n->left) leftval = interpretAST(n->left); if (n->right) rightval = interpretAST(n->right); switch (n->op) { case A_ADD: return (leftval + rightval); case A_SUBTRACT: return (leftval - rightval); case A_MULTIPLY: return (leftval * rightval); case A_DIVIDE: return (leftval / rightval); case A_INTLIT: return (n->intvalue); default: fprintf(stderr, "Unknown AST operator %d\n", n->op); exit(1); } } ``` The `interpretAST()` function walks the given AST tree depth-first. It evaluates any left sub-tree, then the right sub-tree. Finally, it uses the `op` value at the base of the current tree to operate on these children. If the `op` value is one of the four maths operators, then this maths operation is performed. If the `op` value indicates that the node is simply an integer literal, the literal value is return. The function returns the final value for this tree. And, as it is recursive, it will calculate the final value for a whole tree one sub-sub-tree at a time. ## Changing to Assembly Code Generation We are going to write an assembly code generator which is generic. This is, in turn, going to call out to a set of CPU-specific code generation functions. Here is the generic assembly code generator in `gen.c`: ```c // Given an AST, generate // assembly code recursively static int genAST(struct ASTnode *n) { int leftreg, rightreg; // Get the left and right sub-tree values if (n->left) leftreg = genAST(n->left); if (n->right) rightreg = genAST(n->right); switch (n->op) { case A_ADD: return (cgadd(leftreg,rightreg)); case A_SUBTRACT: return (cgsub(leftreg,rightreg)); case A_MULTIPLY: return (cgmul(leftreg,rightreg)); case A_DIVIDE: return (cgdiv(leftreg,rightreg)); case A_INTLIT: return (cgload(n->intvalue)); default: fprintf(stderr, "Unknown AST operator %d\n", n->op); exit(1); } } ``` Looks familar, huh?! We are doing the same depth-first tree traversal. This time: + A_INTLIT: load a register with the literal value + Other operators: perform a maths function on the two registers that hold the left-child's and right-child's value Instead of passing values, the code in `genAST()` passes around register identifiers. For example `cgload()` loads a value into a register and returns the identity of the register with the loaded value. `genAST()` itself returns the identity of the register that holds the final value of the tree at this point. That's why the code at the top is getting register identities: ```c if (n->left) leftreg = genAST(n->left); if (n->right) rightreg = genAST(n->right); ``` ## Calling `genAST()` `genAST()` is only going to calculate the value of the expression given to it. We need to print out this final calculation. We're also going to need to wrap the assembly code we generate with some leading code (the *preamble*) and some trailing code (the *postamble*). This is done with the other function in `gen.c`: ```c void generatecode(struct ASTnode *n) { int reg; cgpreamble(); reg= genAST(n); cgprintint(reg); // Print the register with the result as an int cgpostamble(); } ``` ## The x86-64 Code Generator That's the generic code generator out of the road. Now we need to look at the generation of some real assembly code. For now, I'm targetting the x86-64 CPU as this is still one of the most common Linux platforms. So, open up `cg.c` and let's get browsing. ### Allocating Registers Any CPU has a limited number of registers. We will have to allocate a register to hold the integer literal values, plus any calculation that we perform on them. However, once we've used a value, we can often discard the value and hence free up the register holding it. Then we can re-use that register for another value. There are three functions that deal with register allocation: + `freeall_registers()`: Set all registers as available + `alloc_register()`: Allocate a free register + `free_register()`: Free an allocated register I'm not going to go through the code as it's straight forward but with some error checking. Right now, if I run out of registers then the program will crash. Later on, I'll deal with the situation when we have run out of free registers. The code works on generic registers: r0, r1, r2 and r3. There is a table of strings with the actual register names: ```c static char *reglist[4]= { "%r8", "%r9", "%r10", "%r11" }; ``` This makes these functions fairly independent of the CPU architecture. ### Loading a Register This is done in `cgload()`: a register is allocated, then a `movq` instruction loads a literal value into the allocated register. ```c // Load an integer literal value into a register. // Return the number of the register int cgload(int value) { // Get a new register int r= alloc_register(); // Print out the code to initialise it fprintf(Outfile, "\tmovq\t$%d, %s\n", value, reglist[r]); return(r); } ``` ### Adding Two Registers `cgadd()` takes two register numbers and generates the code to add them together. The result is saved in one of the two registers, and the other one is then freed for future use: ```c // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\taddq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return(r2); } ``` Note that addition is *commutative*, so I could have added `r2` to `r1` instead of `r1` to `r2`. The identity of the register with the final value is returned. ### Multiplying Two Registers This is very similar to addition, and again the operation is *commutative*, so any register can be returned: ```c // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timulq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return(r2); } ``` ### Subtracting Two Registers Subtraction is *not* commutative: we have to get the order correct. The second register is subtracted from the first, so we return the first and free the second: ```c // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsubq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return(r1); } ``` ### Dividing Two Registers Division is also not commutative, so the previous notes apply. On the x86-64, it's even more complicated. We need to load `%rax` with the *dividend* from `r1`. This needs to be extended to eight bytes with `cqo`. Then, `idivq` will divide `%rax` with the divisor in `r2`, leaving the *quotient* in `%rax`, so we need to copy it out to either `r1` or `r2`. Then we can free the other register. ```c // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s,%%rax\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidivq\t%s\n", reglist[r2]); fprintf(Outfile, "\tmovq\t%%rax,%s\n", reglist[r1]); free_register(r2); return(r1); } ``` ### Printing A Register There isn't an x86-64 instruction to print a register out as a decimal number. To solve this problem, the assembly preamble contains a function called `printint()` that takes a register argument and calls `printf()` to print this out in decimal. I'm not going to give the code in `cgpreamble()`, but it also contains the beginning code for `main()`, so that we can assemble our output file to get a complete program. The code for `cgpostamble()`, also not given here, simply calls `exit(0)` to end the program. Here, however, is `cgprintint()`: ```c void cgprintint(int r) { fprintf(Outfile, "\tmovq\t%s, %%rdi\n", reglist[r]); fprintf(Outfile, "\tcall\tprintint\n"); free_register(r); } ``` Linux x86-64 expects the first argument to a function to be in the `%rdi` register, so we move our register into `%rdi` before we `call printint`. ## Doing Our First Compile That's about it for the x86-64 code generator. There is some extra code in `main()` to open out `out.s` as our output file. I've also left the interpreter in the program so we can confirm that our assembly calculates the same answer for the input expression as the interpreter. Let's make the compiler and run it on `input01`: ```make $ make cc -o comp1 -g cg.c expr.c gen.c interp.c main.c scan.c tree.c $ make test ./comp1 input01 15 cc -o out out.s ./out 15 ``` Yes! The first 15 is the interpreter's output. The second 15 is the assembly's output. ## Examining the Assembly Output So, exactly what was the assembly output? Well, here is the input file: ``` 2 + 3 * 5 - 8 / 3 ``` and here is `out.s` for this input with comments: ``` .text # Preamble code .LC0: .string "%d\n" # "%d\n" for printf() printint: pushq %rbp movq %rsp, %rbp # Set the frame pointer subq $16, %rsp movl %edi, -4(%rbp) movl -4(%rbp), %eax # Get the printint() argument movl %eax, %esi leaq .LC0(%rip), %rdi # Get the pointer to "%d\n" movl $0, %eax call printf@PLT # Call printf() nop leave # and return ret .globl main .type main, @function main: pushq %rbp movq %rsp, %rbp # Set the frame pointer # End of preamble code movq $2, %r8 # %r8 = 2 movq $3, %r9 # %r9 = 3 movq $5, %r10 # %r10 = 5 imulq %r9, %r10 # %r10 = 3 * 5 = 15 addq %r8, %r10 # %r10 = 2 + 15 = 17 # %r8 and %r9 are now free again movq $8, %r8 # %r8 = 8 movq $3, %r9 # %r9 = 3 movq %r8,%rax cqo # Load dividend %rax with 8 idivq %r9 # Divide by 3 movq %rax,%r8 # Store quotient in %r8, i.e. 2 subq %r8, %r10 # %r10 = 17 - 2 = 15 movq %r10, %rdi # Copy 15 into %rdi in preparation call printint # to call printint() movl $0, %eax # Postamble: call exit(0) popq %rbp ret ``` Excellent! We now have a legitimate compiler: a program that takes an input in one language and generates a translation of that input in another language. We still have to then assemble the output down to machine code and link it with the support libraries, but this is something that we can perform manually for now. Later on, we will write some code to do this automatically. ## Conclusion and What's Next Changing from the interpreter to a generic code generator was trivial, but then we had to write some code to generate real assembly output. To do this, we had to think about how to allocate registers: for now, we have a naive solution. We also had to deal with some x86-64 oddities like the `idivq` instruction. Something I haven't touched on yet is: why bother with generating the AST for an expression? Surely, we could have called `cgadd()` when we hit a '+' token in our Pratt parser, ditto for the other operators. I'm going to leave you to think about this, but I will come back to it in a step or two. In the next part of our compiler writing journey, we will add some statements to our language, so that it starts to resemble a proper computer language. [Next step](../05_Statements/Readme.md) ================================================ FILE: 04_Assembly/cg.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers // and their names static int freereg[4]; static char *reglist[4]= { "%r8", "%r9", "%r10", "%r11" }; // Set all registers as available void freeall_registers(void) { freereg[0]= freereg[1]= freereg[2]= freereg[3]= 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i=0; i<4; i++) { if (freereg[i]) { freereg[i]= 0; return(i); } } fprintf(stderr, "Out of registers!\n"); exit(1); } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) { fprintf(stderr, "Error trying to free register %d\n", reg); exit(1); } freereg[reg]= 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs( "\t.text\n" ".LC0:\n" "\t.string\t\"%d\\n\"\n" "printint:\n" "\tpushq\t%rbp\n" "\tmovq\t%rsp, %rbp\n" "\tsubq\t$16, %rsp\n" "\tmovl\t%edi, -4(%rbp)\n" "\tmovl\t-4(%rbp), %eax\n" "\tmovl\t%eax, %esi\n" "\tleaq .LC0(%rip), %rdi\n" "\tmovl $0, %eax\n" "\tcall printf@PLT\n" "\tnop\n" "\tleave\n" "\tret\n" "\n" "\t.globl\tmain\n" "\t.type\tmain, @function\n" "main:\n" "\tpushq\t%rbp\n" "\tmovq %rsp, %rbp\n", Outfile); } // Print out the assembly postamble void cgpostamble() { fputs( "\tmovl $0, %eax\n" "\tpopq %rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register int cgload(int value) { // Get a new register int r= alloc_register(); // Print out the code to initialise it fprintf(Outfile, "\tmovq\t$%d, %s\n", value, reglist[r]); return(r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\taddq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return(r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsubq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return(r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timulq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return(r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s,%%rax\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidivq\t%s\n", reglist[r2]); fprintf(Outfile, "\tmovq\t%%rax,%s\n", reglist[r1]); free_register(r2); return(r1); } // Call printint() with the given register void cgprintint(int r) { fprintf(Outfile, "\tmovq\t%s, %%rdi\n", reglist[r]); fprintf(Outfile, "\tcall\tprintint\n"); free_register(r); } ================================================ FILE: 04_Assembly/cgn.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers // and their names static int freereg[4]; static char *reglist[4]= { "r8", "r9", "r10", "r11" }; // Set all registers as available void freeall_registers(void) { freereg[0]= freereg[1]= freereg[2]= freereg[3]= 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i=0; i<4; i++) { if (freereg[i]) { freereg[i]= 0; return(i); } } fprintf(stderr, "Out of registers!\n"); exit(1); } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) { fprintf(stderr, "Error trying to free register %d\n", reg); exit(1); } freereg[reg]= 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs( "\tglobal\tmain\n" "\textern\tprintf\n" "\tsection\t.text\n" "LC0:\tdb\t\"%d\",10,0\n" "printint:\n" "\tpush\trbp\n" "\tmov\trbp, rsp\n" "\tsub\trsp, 16\n" "\tmov\t[rbp-4], edi\n" "\tmov\teax, [rbp-4]\n" "\tmov\tesi, eax\n" "\tlea rdi, [rel LC0]\n" "\tmov eax, 0\n" "\tcall printf\n" "\tnop\n" "\tleave\n" "\tret\n" "\n" "main:\n" "\tpush\trbp\n" "\tmov rbp, rsp\n", Outfile); } // Print out the assembly postamble void cgpostamble() { fputs( "\tmov eax, 0\n" "\tpop rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register int cgload(int value) { // Get a new register int r= alloc_register(); // Print out the code to initialise it fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], value); return(r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return(r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return(r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timul\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return(r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmov\trax, %s\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidiv\t%s\n", reglist[r2]); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[r1]); free_register(r2); return(r1); } // Call printint() with the given register void cgprintint(int r) { fprintf(Outfile, "\tmov\trdi, %s\n", reglist[r]); fprintf(Outfile, "\tcall\tprintint\n"); free_register(r); } ================================================ FILE: 04_Assembly/data.h ================================================ #ifndef extern_ #define extern_ extern #endif // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 extern_ int Line; extern_ int Putback; extern_ FILE *Infile; extern_ FILE *Outfile; extern_ struct token Token; ================================================ FILE: 04_Assembly/decl.h ================================================ // Function prototypes for all compiler files // Copyright (c) 2019 Warren Toomey, GPL3 int scan(struct token *t); struct ASTnode *mkastnode(int op, struct ASTnode *left, struct ASTnode *right, int intvalue); struct ASTnode *mkastleaf(int op, int intvalue); struct ASTnode *mkastunary(int op, struct ASTnode *left, int intvalue); struct ASTnode *binexpr(int rbp); int interpretAST(struct ASTnode *n); void generatecode(struct ASTnode *n); void freeall_registers(void); void cgpreamble(); void cgpostamble(); int cgload(int value); int cgadd(int r1, int r2); int cgsub(int r1, int r2); int cgmul(int r1, int r2); int cgdiv(int r1, int r2); void cgprintint(int r); ================================================ FILE: 04_Assembly/defs.h ================================================ #include #include #include #include // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 // Token types enum { T_EOF, T_PLUS, T_MINUS, T_STAR, T_SLASH, T_INTLIT }; // Token structure struct token { int token; // Token type, from the enum list above int intvalue; // For T_INTLIT, the integer value }; // AST node types enum { A_ADD, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, A_INTLIT }; // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree struct ASTnode *left; // Left and right child trees struct ASTnode *right; int intvalue; // For A_INTLIT, the integer value }; ================================================ FILE: 04_Assembly/expr.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of expressions // Copyright (c) 2019 Warren Toomey, GPL3 // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(void) { struct ASTnode *n; // For an INTLIT token, make a leaf AST node for it // and scan in the next token. Otherwise, a syntax error // for any other token type. switch (Token.token) { case T_INTLIT: n = mkastleaf(A_INTLIT, Token.intvalue); scan(&Token); return (n); default: fprintf(stderr, "syntax error on line %d, token %d\n", Line, Token.token); exit(1); } } // Convert a binary operator token into an AST operation. int arithop(int tokentype) { switch (tokentype) { case T_PLUS: return (A_ADD); case T_MINUS: return (A_SUBTRACT); case T_STAR: return (A_MULTIPLY); case T_SLASH: return (A_DIVIDE); default: fprintf(stderr, "syntax error on line %d, token %d\n", Line, tokentype); exit(1); } } // Operator precedence for each token static int OpPrec[] = { 0, 10, 10, 20, 20, 0 }; // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec = OpPrec[tokentype]; if (prec == 0) { fprintf(stderr, "syntax error on line %d, token %d\n", Line, tokentype); exit(1); } return (prec); } // Return an AST tree whose root is a binary operator. // Parameter ptp is the previous token's precedence. struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; int tokentype; // Get the integer literal on the left. // Fetch the next token at the same time. left = primary(); // If no tokens left, return just the left node tokentype = Token.token; if (tokentype == T_EOF) return (left); // While the precedence of this token is // more than that of the previous token precedence while (op_precedence(tokentype) > ptp) { // Fetch in the next integer literal scan(&Token); // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Join that sub-tree with ours. Convert the token // into an AST operation at the same time. left = mkastnode(arithop(tokentype), left, right, 0); // Update the details of the current token. // If no tokens left, return just the left node tokentype = Token.token; if (tokentype == T_EOF) return (left); } // Return the tree we have when the precedence // is the same or lower return (left); } ================================================ FILE: 04_Assembly/gen.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Generic code generator // Copyright (c) 2019 Warren Toomey, GPL3 // Given an AST, generate // assembly code recursively static int genAST(struct ASTnode *n) { int leftreg, rightreg; // Get the left and right sub-tree values if (n->left) leftreg = genAST(n->left); if (n->right) rightreg = genAST(n->right); switch (n->op) { case A_ADD: return (cgadd(leftreg,rightreg)); case A_SUBTRACT: return (cgsub(leftreg,rightreg)); case A_MULTIPLY: return (cgmul(leftreg,rightreg)); case A_DIVIDE: return (cgdiv(leftreg,rightreg)); case A_INTLIT: return (cgload(n->intvalue)); default: fprintf(stderr, "Unknown AST operator %d\n", n->op); exit(1); } } void generatecode(struct ASTnode *n) { int reg; cgpreamble(); reg= genAST(n); cgprintint(reg); cgpostamble(); } ================================================ FILE: 04_Assembly/input01 ================================================ 2 + 3 * 5 - 8 / 3 ================================================ FILE: 04_Assembly/input02 ================================================ 13 -6+ 4* 5 + 08 / 3 ================================================ FILE: 04_Assembly/interp.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree interpreter // Copyright (c) 2019 Warren Toomey, GPL3 // List of AST operators static char *ASTop[] = { "+", "-", "*", "/" }; // Given an AST, interpret the // operators in it and return // a final value. int interpretAST(struct ASTnode *n) { int leftval, rightval; // Get the left and right sub-tree values if (n->left) leftval = interpretAST(n->left); if (n->right) rightval = interpretAST(n->right); // Debug: Print what we are about to do // if (n->op == A_INTLIT) // printf("int %d\n", n->intvalue); // else // printf("%d %s %d\n", leftval, ASTop[n->op], rightval); switch (n->op) { case A_ADD: return (leftval + rightval); case A_SUBTRACT: return (leftval - rightval); case A_MULTIPLY: return (leftval * rightval); case A_DIVIDE: return (leftval / rightval); case A_INTLIT: return (n->intvalue); default: fprintf(stderr, "Unknown AST operator %d\n", n->op); exit(1); } } ================================================ FILE: 04_Assembly/main.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "decl.h" #include // Compiler setup and top-level execution // Copyright (c) 2019 Warren Toomey, GPL3 // Initialise global variables static void init() { Line = 1; Putback = '\n'; } // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "Usage: %s infile\n", prog); exit(1); } // Main program: check arguments and print a usage // if we don't have an argument. Open up the input // file and call scanfile() to scan the tokens in it. void main(int argc, char *argv[]) { struct ASTnode *n; if (argc != 2) usage(argv[0]); init(); // Open up the input file if ((Infile = fopen(argv[1], "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", argv[1], strerror(errno)); exit(1); } // Create the output file if ((Outfile = fopen("out.s", "w")) == NULL) { fprintf(stderr, "Unable to create out.s: %s\n", strerror(errno)); exit(1); } scan(&Token); // Get the first token from the input n = binexpr(0); // Parse the expression in the file printf("%d\n", interpretAST(n)); // Calculate the final result generatecode(n); fclose(Outfile); exit(0); } ================================================ FILE: 04_Assembly/scan.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { char *p; p = strchr(s, c); return (p ? p - s : -1); } // Get the next character from the input file. static int next(void) { int c; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return c; } c = fgetc(Infile); // Read from input file if ('\n' == c) Line++; // Increment line count return c; } // Put back an unwanted character static void putback(int c) { Putback = c; } // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0; // Convert each character into an int value while ((k = chrpos("0123456789", c)) >= 0) { val = val * 10 + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return val; } // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c; // Skip whitespace c = skip(); // Determine the token based on // the input character switch (c) { case EOF: t->token = T_EOF; return (0); case '+': t->token = T_PLUS; break; case '-': t->token = T_MINUS; break; case '*': t->token = T_STAR; break; case '/': t->token = T_SLASH; break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } printf("Unrecognised character %c on line %d\n", c, Line); exit(1); } // We found a token return (1); } ================================================ FILE: 04_Assembly/tree.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree functions // Copyright (c) 2019 Warren Toomey, GPL3 // Build and return a generic AST node struct ASTnode *mkastnode(int op, struct ASTnode *left, struct ASTnode *right, int intvalue) { struct ASTnode *n; // Malloc a new ASTnode n = (struct ASTnode *) malloc(sizeof(struct ASTnode)); if (n == NULL) { fprintf(stderr, "Unable to malloc in mkastnode()\n"); exit(1); } // Copy in the field values and return it n->op = op; n->left = left; n->right = right; n->intvalue = intvalue; return (n); } // Make an AST leaf node struct ASTnode *mkastleaf(int op, int intvalue) { return (mkastnode(op, NULL, NULL, intvalue)); } // Make a unary AST node: only one child struct ASTnode *mkastunary(int op, struct ASTnode *left, int intvalue) { return (mkastnode(op, left, NULL, intvalue)); } ================================================ FILE: 05_Statements/Makefile ================================================ comp1: cg.c expr.c gen.c main.c misc.c scan.c stmt.c tree.c cc -o comp1 -g cg.c expr.c gen.c main.c misc.c scan.c stmt.c tree.c compn: cgn.c expr.c gen.c main.c misc.c scan.c stmt.c tree.c cc -o compn -g cgn.c expr.c gen.c main.c misc.c scan.c stmt.c tree.c clean: rm -f comp1 compn *.o *.s out test: comp1 input01 ./comp1 input01 cc -o out out.s ./out testn: compn input01 ./compn input01 nasm -f elf64 out.s cc -no-pie -o out out.o ./out ================================================ FILE: 05_Statements/Readme.md ================================================ # Part 5: Statements It's time to add some "proper" statements to the grammar of our language. I want to be able to write lines of code like this: ``` print 2 + 3 * 5; print 18 - 6/3 + 4*2; ``` Of course, as we are ignoring whitespace, there's no necessity that all the tokens for one statement are on the same line. Each statement starts with the keyword `print` and is terminated with a semicolon. So these are going to become new tokens in our language. ## BNF Description of the Grammar We've already seen the BNF notation for expressions. Now let's define the BNF syntax for the above types of statements: ``` statements: statement | statement statements ; statement: 'print' expression ';' ; ``` An input file consists of several statements. They are either one statement, or a statement followed by more statements. Each statement starts with the keyword `print`, then one expression, then a semicolon. ## Changes to the Lexical Scanner Before we can get to the code that parses the above syntax, we need to add a few more bits and pieces to the existing code. Let's start with the lexical scanner. Adding a token for semicolons will be easy. Now, the `print` keyword. Later on, we'll have many keywords in the language, plus identifiers for our variables, so we'll need to add some code which helps us to deal with them. In `scan.c`, I've added this code which I've borrowed from the SubC compiler. It reads in alphanumeric characters into a buffer until it hits a non-alphanumeric character. ```c // Scan an identifier from the input file and // store it in buf[]. Return the identifier's length static int scanident(int c, char *buf, int lim) { int i = 0; // Allow digits, alpha and underscores while (isalpha(c) || isdigit(c) || '_' == c) { // Error if we hit the identifier length limit, // else append to buf[] and get next character if (lim - 1 == i) { printf("identifier too long on line %d\n", Line); exit(1); } else if (i < lim - 1) { buf[i++] = c; } c = next(); } // We hit a non-valid character, put it back. // NUL-terminate the buf[] and return the length putback(c); buf[i] = '\0'; return (i); } ``` We also need a function to recognise keywords in the language. One way would be to have a list of keywords, and to walk the list and `strcmp()` each one against the buffer from `scanident()`. The code from SubC has an optimisation: match against the first letter before doing the `strcmp()`. This speeds up the comparison against dozens of keywords. Right now we don't need this optimisation but I've put it in for later: ```c // Given a word from the input, return the matching // keyword token number or 0 if it's not a keyword. // Switch on the first letter so that we don't have // to waste time strcmp()ing against all the keywords. static int keyword(char *s) { switch (*s) { case 'p': if (!strcmp(s, "print")) return (T_PRINT); break; } return (0); } ``` Now, at the bottom of the switch statement in `scan()`, we add this code to recognise semicolons and keywords: ```c case ';': t->token = T_SEMI; break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } else if (isalpha(c) || '_' == c) { // Read in a keyword or identifier scanident(c, Text, TEXTLEN); // If it's a recognised keyword, return that token if (tokentype = keyword(Text)) { t->token = tokentype; break; } // Not a recognised keyword, so an error for now printf("Unrecognised symbol %s on line %d\n", Text, Line); exit(1); } // The character isn't part of any recognised token, error printf("Unrecognised character %c on line %d\n", c, Line); exit(1); ``` I've also added a global `Text` buffer to store the keywords and identifiers: ```c #define TEXTLEN 512 // Length of symbols in input extern_ char Text[TEXTLEN + 1]; // Last identifier scanned ``` ## Changes to the Expression Parser Up to now our input files have contained just a single expression; therefore, in our Pratt parser code in `binexpr()` (in `expr.c`), we had this code to exit the parser: ```c // If no tokens left, return just the left node tokentype = Token.token; if (tokentype == T_EOF) return (left); ``` With our new grammar, each expression is terminated by a semicolon. Thus, we need to change the code in the expression parser to spot the `T_SEMI` tokens and exit the expression parsing: ```c // Return an AST tree whose root is a binary operator. // Parameter ptp is the previous token's precedence. struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; int tokentype; // Get the integer literal on the left. // Fetch the next token at the same time. left = primary(); // If we hit a semicolon, return just the left node tokentype = Token.token; if (tokentype == T_SEMI) return (left); while (op_precedence(tokentype) > ptp) { ... // Update the details of the current token. // If we hit a semicolon, return just the left node tokentype = Token.token; if (tokentype == T_SEMI) return (left); } } ``` ## Changes to the Code Generator I want to keep the generic code generator in `gen.c` separate from the CPU-specific code in `cg.c`. That also means that the rest of the compiler should only ever call the functions in `gen.c`, and only `gen.c` should call the code in `cg.c`. To this end, I've defined some new "front-end" functions in `gen.c`: ```c void genpreamble() { cgpreamble(); } void genpostamble() { cgpostamble(); } void genfreeregs() { freeall_registers(); } void genprintint(int reg) { cgprintint(reg); } ``` ## Adding the Parser for Statements We have a new file `stmt.c`. This will hold the parsing code for all the main statements in our language. Right now, we need to parse the BNF grammar for statements which I gave up above. This is done with this single function. I've converted the recursive definition into a loop: ```c // Parse one or more statements void statements(void) { struct ASTnode *tree; int reg; while (1) { // Match a 'print' as the first token match(T_PRINT, "print"); // Parse the following expression and // generate the assembly code tree = binexpr(0); reg = genAST(tree); genprintint(reg); genfreeregs(); // Match the following semicolon // and stop if we are at EOF semi(); if (Token.token == T_EOF) return; } } ``` In each loop, the code finds a T_PRINT token. It then calls `binexpr()` to parse the expression. Finally, it finds the T_SEMI token. If a T_EOF token follows, we break out of the loop. After each expression tree, the code in `gen.c` is called to convert the tree into assembly code and to call the assembly `printint()` function to print out the final value. ## Some Helper Functions There are a couple of new helper functions in the above code, which I've put into a new file, `misc.c`: ```c // Ensure that the current token is t, // and fetch the next token. Otherwise // throw an error void match(int t, char *what) { if (Token.token == t) { scan(&Token); } else { printf("%s expected on line %d\n", what, Line); exit(1); } } // Match a semicon and fetch the next token void semi(void) { match(T_SEMI, ";"); } ``` These form part of the syntax checking in the parser. Later on, I'll add more short functions to call `match()` to make our syntax checking easier. ## Changes to `main()` `main()` used to call `binexpr()` directly to parse the single expression in the old input files. Now it does this: ```c scan(&Token); // Get the first token from the input genpreamble(); // Output the preamble statements(); // Parse the statements in the input genpostamble(); // Output the postamble fclose(Outfile); // Close the output file and exit exit(0); ``` ## Trying It Out That's about it for the new and changed code. Let's give the new code a whirl. Here is the new input file, `input01`: ``` print 12 * 3; print 18 - 2 * 4; print 1 + 2 + 9 - 5/2 + 3*5; ``` Yes I've decided to check that we have have tokens spread out across multiple lines. To compile and run the input file, do a `make test`: ```make $ make test cc -o comp1 -g cg.c expr.c gen.c main.c misc.c scan.c stmt.c tree.c ./comp1 input01 cc -o out out.s ./out 36 10 25 ``` And it works! ## Conclusion and What's Next We've added our first "real" statement grammar to our language. I've defined it in BNF notation, but it was easier to implement it with a loop and not recursively. Don't worry, we'll go back to doing recursive parsing soon. Along the way we had to modify the scanner, add support for keywords and identifiers, and to more cleanly separate the generic code generator and the CPU-specific generator. In the next part of our compiler writing journey, we will add variables to the language. This will require a significant amount of work. [Next step](../06_Variables/Readme.md) ================================================ FILE: 05_Statements/cg.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers // and their names static int freereg[4]; static char *reglist[4] = { "%r8", "%r9", "%r10", "%r11" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fprintf(stderr, "Out of registers!\n"); exit(1); } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) { fprintf(stderr, "Error trying to free register %d\n", reg); exit(1); } freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n" ".LC0:\n" "\t.string\t\"%d\\n\"\n" "printint:\n" "\tpushq\t%rbp\n" "\tmovq\t%rsp, %rbp\n" "\tsubq\t$16, %rsp\n" "\tmovl\t%edi, -4(%rbp)\n" "\tmovl\t-4(%rbp), %eax\n" "\tmovl\t%eax, %esi\n" "\tleaq .LC0(%rip), %rdi\n" "\tmovl $0, %eax\n" "\tcall printf@PLT\n" "\tnop\n" "\tleave\n" "\tret\n" "\n" "\t.globl\tmain\n" "\t.type\tmain, @function\n" "main:\n" "\tpushq\t%rbp\n" "\tmovq %rsp, %rbp\n", Outfile); } // Print out the assembly postamble void cgpostamble() { fputs("\tmovl $0, %eax\n" "\tpopq %rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register int cgload(int value) { // Get a new register int r = alloc_register(); // Print out the code to initialise it fprintf(Outfile, "\tmovq\t$%d, %s\n", value, reglist[r]); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\taddq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsubq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timulq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s,%%rax\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidivq\t%s\n", reglist[r2]); fprintf(Outfile, "\tmovq\t%%rax,%s\n", reglist[r1]); free_register(r2); return (r1); } // Call printint() with the given register void cgprintint(int r) { fprintf(Outfile, "\tmovq\t%s, %%rdi\n", reglist[r]); fprintf(Outfile, "\tcall\tprintint\n"); free_register(r); } ================================================ FILE: 05_Statements/cgn.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers // and their names static int freereg[4]; static char *reglist[4] = { "r8", "r9", "r10", "r11" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fprintf(stderr, "Out of registers!\n"); exit(1); } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) { fprintf(stderr, "Error trying to free register %d\n", reg); exit(1); } freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\tglobal\tmain\n" "\textern\tprintf\n" "\tsection\t.text\n" "LC0:\tdb\t\"%d\",10,0\n" "printint:\n" "\tpush\trbp\n" "\tmov\trbp, rsp\n" "\tsub\trsp, 16\n" "\tmov\t[rbp-4], edi\n" "\tmov\teax, [rbp-4]\n" "\tmov\tesi, eax\n" "\tlea rdi, [rel LC0]\n" "\tmov eax, 0\n" "\tcall printf\n" "\tnop\n" "\tleave\n" "\tret\n" "\n" "main:\n" "\tpush\trbp\n" "\tmov rbp, rsp\n", Outfile); } // Print out the assembly postamble void cgpostamble() { fputs("\tmov eax, 0\n" "\tpop rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register int cgload(int value) { // Get a new register int r = alloc_register(); // Print out the code to initialise it fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], value); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timul\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmov\trax, %s\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidiv\t%s\n", reglist[r2]); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[r1]); free_register(r2); return (r1); } // Call printint() with the given register void cgprintint(int r) { fprintf(Outfile, "\tmov\trdi, %s\n", reglist[r]); fprintf(Outfile, "\tcall\tprintint\n"); free_register(r); } ================================================ FILE: 05_Statements/data.h ================================================ #ifndef extern_ #define extern_ extern #endif // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 extern_ int Line; // Current line number extern_ int Putback; // Character put back by scanner extern_ FILE *Infile; // Input and output files extern_ FILE *Outfile; extern_ struct token Token; // Last token scanned extern_ char Text[TEXTLEN + 1]; // Last identifier scanned ================================================ FILE: 05_Statements/decl.h ================================================ // Function prototypes for all compiler files // Copyright (c) 2019 Warren Toomey, GPL3 // scan.c int scan(struct token *t); // tree.c struct ASTnode *mkastnode(int op, struct ASTnode *left, struct ASTnode *right, int intvalue); struct ASTnode *mkastleaf(int op, int intvalue); struct ASTnode *mkastunary(int op, struct ASTnode *left, int intvalue); // gen.c int genAST(struct ASTnode *n); void genpreamble(); void genpostamble(); void genfreeregs(); void genprintint(int reg); // cg.c void freeall_registers(void); void cgpreamble(); void cgpostamble(); int cgload(int value); int cgadd(int r1, int r2); int cgsub(int r1, int r2); int cgmul(int r1, int r2); int cgdiv(int r1, int r2); void cgprintint(int r); // expr.c struct ASTnode *binexpr(int ptp); // stmt.c void statements(void); // misc.c void match(int t, char *what); void semi(void); ================================================ FILE: 05_Statements/defs.h ================================================ #include #include #include #include // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 #define TEXTLEN 512 // Length of symbols in input // Token types enum { T_EOF, T_PLUS, T_MINUS, T_STAR, T_SLASH, T_INTLIT, T_SEMI, T_PRINT }; // Token structure struct token { int token; // Token type, from the enum list above int intvalue; // For T_INTLIT, the integer value }; // AST node types enum { A_ADD, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, A_INTLIT }; // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree struct ASTnode *left; // Left and right child trees struct ASTnode *right; int intvalue; // For A_INTLIT, the integer value }; ================================================ FILE: 05_Statements/expr.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of expressions // Copyright (c) 2019 Warren Toomey, GPL3 // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(void) { struct ASTnode *n; // For an INTLIT token, make a leaf AST node for it // and scan in the next token. Otherwise, a syntax error // for any other token type. switch (Token.token) { case T_INTLIT: n = mkastleaf(A_INTLIT, Token.intvalue); scan(&Token); return (n); default: fprintf(stderr, "syntax error on line %d, token %d\n", Line, Token.token); exit(1); } } // Convert a binary operator token into an AST operation. static int arithop(int tokentype) { switch (tokentype) { case T_PLUS: return (A_ADD); case T_MINUS: return (A_SUBTRACT); case T_STAR: return (A_MULTIPLY); case T_SLASH: return (A_DIVIDE); default: fprintf(stderr, "syntax error on line %d, token %d\n", Line, tokentype); exit(1); } } // Operator precedence for each token static int OpPrec[] = { 0, 10, 10, 20, 20, 0 }; // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec = OpPrec[tokentype]; if (prec == 0) { fprintf(stderr, "syntax error on line %d, token %d\n", Line, tokentype); exit(1); } return (prec); } // Return an AST tree whose root is a binary operator. // Parameter ptp is the previous token's precedence. struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; int tokentype; // Get the integer literal on the left. // Fetch the next token at the same time. left = primary(); // If we hit a semicolon, return just the left node tokentype = Token.token; if (tokentype == T_SEMI) return (left); // While the precedence of this token is // more than that of the previous token precedence while (op_precedence(tokentype) > ptp) { // Fetch in the next integer literal scan(&Token); // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Join that sub-tree with ours. Convert the token // into an AST operation at the same time. left = mkastnode(arithop(tokentype), left, right, 0); // Update the details of the current token. // If we hit a semicolon, return just the left node tokentype = Token.token; if (tokentype == T_SEMI) return (left); } // Return the tree we have when the precedence // is the same or lower return (left); } ================================================ FILE: 05_Statements/gen.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Generic code generator // Copyright (c) 2019 Warren Toomey, GPL3 // Given an AST, generate // assembly code recursively int genAST(struct ASTnode *n) { int leftreg, rightreg; // Get the left and right sub-tree values if (n->left) leftreg = genAST(n->left); if (n->right) rightreg = genAST(n->right); switch (n->op) { case A_ADD: return (cgadd(leftreg, rightreg)); case A_SUBTRACT: return (cgsub(leftreg, rightreg)); case A_MULTIPLY: return (cgmul(leftreg, rightreg)); case A_DIVIDE: return (cgdiv(leftreg, rightreg)); case A_INTLIT: return (cgload(n->intvalue)); default: fprintf(stderr, "Unknown AST operator %d\n", n->op); exit(1); } } void genpreamble() { cgpreamble(); } void genpostamble() { cgpostamble(); } void genfreeregs() { freeall_registers(); } void genprintint(int reg) { cgprintint(reg); } ================================================ FILE: 05_Statements/input01 ================================================ print 12 * 3; print 18 - 2 * 4; print 1 + 2 + 9 - 5/2 + 3*5; ================================================ FILE: 05_Statements/main.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "decl.h" #include // Compiler setup and top-level execution // Copyright (c) 2019 Warren Toomey, GPL3 // Initialise global variables static void init() { Line = 1; Putback = '\n'; } // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "Usage: %s infile\n", prog); exit(1); } // Main program: check arguments and print a usage // if we don't have an argument. Open up the input // file and call scanfile() to scan the tokens in it. void main(int argc, char *argv[]) { if (argc != 2) usage(argv[0]); init(); // Open up the input file if ((Infile = fopen(argv[1], "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", argv[1], strerror(errno)); exit(1); } // Create the output file if ((Outfile = fopen("out.s", "w")) == NULL) { fprintf(stderr, "Unable to create out.s: %s\n", strerror(errno)); exit(1); } scan(&Token); // Get the first token from the input genpreamble(); // Output the preamble statements(); // Parse the statements in the input genpostamble(); // Output the postamble fclose(Outfile); // Close the output file and exit exit(0); } ================================================ FILE: 05_Statements/misc.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Miscellaneous functions // Copyright (c) 2019 Warren Toomey, GPL3 // Ensure that the current token is t, // and fetch the next token. Otherwise // throw an error void match(int t, char *what) { if (Token.token == t) { scan(&Token); } else { printf("%s expected on line %d\n", what, Line); exit(1); } } // Match a semicon and fetch the next token void semi(void) { match(T_SEMI, ";"); } ================================================ FILE: 05_Statements/scan.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { char *p; p = strchr(s, c); return (p ? p - s : -1); } // Get the next character from the input file. static int next(void) { int c; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return (c); } c = fgetc(Infile); // Read from input file if ('\n' == c) Line++; // Increment line count return (c); } // Put back an unwanted character static void putback(int c) { Putback = c; } // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0; // Convert each character into an int value while ((k = chrpos("0123456789", c)) >= 0) { val = val * 10 + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return (val); } // Scan an identifier from the input file and // store it in buf[]. Return the identifier's length static int scanident(int c, char *buf, int lim) { int i = 0; // Allow digits, alpha and underscores while (isalpha(c) || isdigit(c) || '_' == c) { // Error if we hit the identifier length limit, // else append to buf[] and get next character if (lim - 1 == i) { printf("identifier too long on line %d\n", Line); exit(1); } else if (i < lim - 1) { buf[i++] = c; } c = next(); } // We hit a non-valid character, put it back. // NUL-terminate the buf[] and return the length putback(c); buf[i] = '\0'; return (i); } // Given a word from the input, return the matching // keyword token number or 0 if it's not a keyword. // Switch on the first letter so that we don't have // to waste time strcmp()ing against all the keywords. static int keyword(char *s) { switch (*s) { case 'p': if (!strcmp(s, "print")) return (T_PRINT); break; } return (0); } // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c, tokentype; // Skip whitespace c = skip(); // Determine the token based on // the input character switch (c) { case EOF: t->token = T_EOF; return (0); case '+': t->token = T_PLUS; break; case '-': t->token = T_MINUS; break; case '*': t->token = T_STAR; break; case '/': t->token = T_SLASH; break; case ';': t->token = T_SEMI; break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } else if (isalpha(c) || '_' == c) { // Read in a keyword or identifier scanident(c, Text, TEXTLEN); // If it's a recognised keyword, return that token if (tokentype = keyword(Text)) { t->token = tokentype; break; } // Not a recognised keyword, so an error for now printf("Unrecognised symbol %s on line %d\n", Text, Line); exit(1); } // The character isn't part of any recognised token, error printf("Unrecognised character %c on line %d\n", c, Line); exit(1); } // We found a token return (1); } ================================================ FILE: 05_Statements/stmt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of statements // Copyright (c) 2019 Warren Toomey, GPL3 // statements: statement // | statement statements // ; // // statement: 'print' expression ';' // ; // Parse one or more statements void statements(void) { struct ASTnode *tree; int reg; while (1) { // Match a 'print' as the first token match(T_PRINT, "print"); // Parse the following expression and // generate the assembly code tree = binexpr(0); reg = genAST(tree); genprintint(reg); genfreeregs(); // Match the following semicolon // and stop if we are at EOF semi(); if (Token.token == T_EOF) return; } } ================================================ FILE: 05_Statements/tree.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree functions // Copyright (c) 2019 Warren Toomey, GPL3 // Build and return a generic AST node struct ASTnode *mkastnode(int op, struct ASTnode *left, struct ASTnode *right, int intvalue) { struct ASTnode *n; // Malloc a new ASTnode n = (struct ASTnode *) malloc(sizeof(struct ASTnode)); if (n == NULL) { fprintf(stderr, "Unable to malloc in mkastnode()\n"); exit(1); } // Copy in the field values and return it n->op = op; n->left = left; n->right = right; n->intvalue = intvalue; return (n); } // Make an AST leaf node struct ASTnode *mkastleaf(int op, int intvalue) { return (mkastnode(op, NULL, NULL, intvalue)); } // Make a unary AST node: only one child struct ASTnode *mkastunary(int op, struct ASTnode *left, int intvalue) { return (mkastnode(op, left, NULL, intvalue)); } ================================================ FILE: 06_Variables/Makefile ================================================ SRCS= cg.c decl.c expr.c gen.c main.c misc.c scan.c stmt.c sym.c tree.c SRCN= cgn.c decl.c expr.c gen.c main.c misc.c scan.c stmt.c sym.c tree.c comp1: $(SRCS) cc -o comp1 -g $(SRCS) compn: $(SRCN) cc -o compn -g $(SRCN) clean: rm -f comp1 compn *.o *.s out test: comp1 input01 input02 ./comp1 input01 cc -o out out.s ./out ./comp1 input02 cc -o out out.s ./out testn: compn input01 input02 ./compn input01 nasm -f elf64 out.s cc -no-pie -o out out.o ./out ./compn input02 nasm -f elf64 out.s cc -no-pie -o out out.o ./out ================================================ FILE: 06_Variables/Readme.md ================================================ # Part 6: Variables I've just finished adding global variables to the compiler and, as I suspected, it was a lot of work. Also, pretty much every file in the compiler got modified in the process. So this part of the journey is going to be long. ## What Do We Want from Variables? We want to be able to: + Declare variables + Use variables to get stored values + Assign to variables Here is `input02` which will be our test program: ``` int fred; int jim; fred= 5; jim= 12; print fred + jim; ``` The most obvious change is that the grammar now has variable declarations, assignment statements and variables names in expressions. However, before we get to that, let's look at how we implement variables. ## The Symbol Table Every compiler is going to need a [symbol table](https://en.wikipedia.org/wiki/Symbol_table). Later on, we will hold more than just global variables. But for now, here is the structure of an entry in the table (from `defs.h`): ```c // Symbol table structure struct symtable { char *name; // Name of a symbol }; ``` We have an array of symbols in `data.h`: ```c #define NSYMBOLS 1024 // Number of symbol table entries extern_ struct symtable Gsym[NSYMBOLS]; // Global symbol table static int Globs = 0; // Position of next free global symbol slot ``` `Globs` is actually in `sym.c`, the file that manages the symbol table. In here we have these management functions: + `int findglob(char *s)`: Determine if the symbol s is in the global symbol table. Return its slot position or -1 if not found. + `static int newglob(void)`: Get the position of a new global symbol slot, or die if we've run out of positions. + `int addglob(char *name)`: Add a global symbol to the symbol table. Return the slot number in the symbol table. The code is fairly straight forward, so I won't bother to give the code here in the discussion. With these functions, we can find symbols and add new symbols to the symbol table. ## Scanning and New Tokens If you look at the example input file, we need a few new tokens: + 'int', known as T_INT + '=', known as T_EQUALS + identifier names, known as T_IDENT The scanning of '=' is easy to add to `scan()`: ```c case '=': t->token = T_EQUALS; break; ``` We can add the 'int' keyword to `keyword()`: ```c case 'i': if (!strcmp(s, "int")) return (T_INT); break; ``` For identifiers, we are already using `scanident()` to store words into the `Text` variable. Instead of dying if a word is not a keyword, we can return a T_IDENT token: ```c if (isalpha(c) || '_' == c) { // Read in a keyword or identifier scanident(c, Text, TEXTLEN); // If it's a recognised keyword, return that token if (tokentype = keyword(Text)) { t->token = tokentype; break; } // Not a recognised keyword, so it must be an identifier t->token = T_IDENT; break; } ``` ## The New Grammar We're about ready to look at the changes to the grammar of our input language. As before, I'll define it with BNF notation: ``` statements: statement | statement statements ; statement: 'print' expression ';' | 'int' identifier ';' | identifier '=' expression ';' ; identifier: T_IDENT ; ``` An identifier is returned as a T_IDENT token, and we already have the code to parse print statements. But, as we now have three types of statements, it makes sense to write a function to deal with each one. Our top-level `statements()` function in `stmt.c` now looks like: ```c // Parse one or more statements void statements(void) { while (1) { switch (Token.token) { case T_PRINT: print_statement(); break; case T_INT: var_declaration(); break; case T_IDENT: assignment_statement(); break; case T_EOF: return; default: fatald("Syntax error, token", Token.token); } } } ``` I've moved the old print statement code into `print_statement()` and you can browse that yourself. ## Variable Declarations Let's look at variable declarations. This is in a new file, `decl.c`, as we are going to have lots of other types of declarations in the future. ```c // Parse the declaration of a variable void var_declaration(void) { // Ensure we have an 'int' token followed by an identifier // and a semicolon. Text now has the identifier's name. // Add it as a known identifier match(T_INT, "int"); ident(); addglob(Text); genglobsym(Text); semi(); } ``` The `ident()` and `semi()` functions are wrappers around `match()`: ```c void semi(void) { match(T_SEMI, ";"); } void ident(void) { match(T_IDENT, "identifier"); } ``` Back to `var_declaration()`, once we have scanned in the idenfiier into the `Text` buffer, we can add this to the global symbol table with `addglob(Text)`. The code in there allows a variable to be declared multiple times (for now). ## Assignment Statements Here's the code for `assignment_statement()` in `stmt.c`: ```c void assignment_statement(void) { struct ASTnode *left, *right, *tree; int id; // Ensure we have an identifier ident(); // Check it's been defined then make a leaf node for it if ((id = findglob(Text)) == -1) { fatals("Undeclared variable", Text); } right = mkastleaf(A_LVIDENT, id); // Ensure we have an equals sign match(T_EQUALS, "="); // Parse the following expression left = binexpr(0); // Make an assignment AST tree tree = mkastnode(A_ASSIGN, left, right, 0); // Generate the assembly code for the assignment genAST(tree, -1); genfreeregs(); // Match the following semicolon semi(); } ``` We have a couple of new AST node types. A_ASSIGN takes the expression in the left-hand child and assigns it to the right-hand child. And the right-hand child will be an A_LVIDENT node. Why did I call this node *A_LVIDENT*? Because it represents an *lvalue* identifier. So what's an [lvalue](https://en.wikipedia.org/wiki/Value_(computer_science)#lrvalue)? An lvalue is a value that is tied to a specific location. Here, it's the address in memory which holds a variable's value. When we do: ``` area = width * height; ``` we *assign* the result of the right-hand side (i.e. the *rvalue*) to the variable in the left-hand side (i.e. the *lvalue*). The *rvalue* isn't tied to a specific location. Here, the expression result is probably in some arbitrary register. Also note that, although the assignment statement has the syntax ``` identifier '=' expression ';' ``` we will make the expression the left sub-tree of the A_ASSIGN node and save the A_LVIDENT details in the right sub-tree. Why? Because we need to evaluate the expression *before* we save it into the variable. ## Changes to the AST Structure We now need to store either an integer literal value in A_INTLIT AST nodes, or the details of the symbol for A_IDENT AST nodes. I've added a *union* to the AST structure to do this (in `defs.h`): ```c // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree struct ASTnode *left; // Left and right child trees struct ASTnode *right; union { int intvalue; // For A_INTLIT, the integer value int id; // For A_IDENT, the symbol slot number } v; }; ``` ## Generating the Assignment Code Let's now look at the changes to `genAST()` in `gen.c` ```c int genAST(struct ASTnode *n, int reg) { int leftreg, rightreg; // Get the left and right sub-tree values if (n->left) leftreg = genAST(n->left, -1); if (n->right) rightreg = genAST(n->right, leftreg); switch (n->op) { ... case A_INTLIT: return (cgloadint(n->v.intvalue)); case A_IDENT: return (cgloadglob(Gsym[n->v.id].name)); case A_LVIDENT: return (cgstorglob(reg, Gsym[n->v.id].name)); case A_ASSIGN: // The work has already been done, return the result return (rightreg); default: fatald("Unknown AST operator", n->op); } ``` Note that we evaluate the left-hand AST child first, and we get back a register number that holds the left-hand sub-tree's value. We now pass this register number to the right-hand sub-tree. We need to do this for A_LVIDENT nodes, so that the `cgstorglob()` function in `cg.c` knows which register holds the rvalue result of the assignment expression. So, consider this AST tree: ``` A_ASSIGN / \ A_INTLIT A_LVIDENT (3) (5) ``` We call `leftreg = genAST(n->left, -1);` to evaluate the A_INTLIT operation. This will `return (cgloadint(n->v.intvalue));`, i.e. load a register with the value 3 and return the register id. Then, we call `rightreg = genAST(n->right, leftreg);` to evaluate the A_LVIDENT operation. This will `return (cgstorglob(reg, Gsym[n->v.id].name));`, i.e. store the register into the variable whose name is in `Gsym[5]`. Then we switch to the A_ASSIGN case. Well, all our work has already been done. The rvalue is still in a register, so let's leave it there and return it. Later, we'll be able to do expressions like: ``` a= b= c = 0; ``` where an assignment is not just a statement but also an expression. ## Generating x86-64 Code You would have noticed that I changed the name of the old `cgload()` function to `cgloadint()`. This is more specific. We now have a function to load the value out of a global variable (in `cg.c`): ```c int cgloadglob(char *identifier) { // Get a new register int r = alloc_register(); // Print out the code to initialise it fprintf(Outfile, "\tmovq\t%s(\%%rip), %s\n", identifier, reglist[r]); return (r); } ``` Similarly, we need a function to save a register into a variable: ```c // Store a register's value into a variable int cgstorglob(int r, char *identifier) { fprintf(Outfile, "\tmovq\t%s, %s(\%%rip)\n", reglist[r], identifier); return (r); } ``` We also need a function to create a new global integer variable: ```c // Generate a global symbol void cgglobsym(char *sym) { fprintf(Outfile, "\t.comm\t%s,8,8\n", sym); } ``` Of course, we can't let the parser access this code directly. Instead, there is a function in the generic code generator in `gen.c` that acts as the interface: ```c void genglobsym(char *s) { cgglobsym(s); } ``` ## Variables in Expressions So now we can assign to variables. But how do we get a variable's value into an expression. Well, we already have a `primary()` function to get an integer literal. Let's modify it to also load a variable's value: ```c // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(void) { struct ASTnode *n; int id; switch (Token.token) { case T_INTLIT: // For an INTLIT token, make a leaf AST node for it. n = mkastleaf(A_INTLIT, Token.intvalue); break; case T_IDENT: // Check that this identifier exists id = findglob(Text); if (id == -1) fatals("Unknown variable", Text); // Make a leaf AST node for it n = mkastleaf(A_IDENT, id); break; default: fatald("Syntax error, token", Token.token); } // Scan in the next token and return the leaf node scan(&Token); return (n); } ``` Note the syntax checking in the T_IDENT case to ensure the variable has been declared before we try to use it. Also note that the AST leaf node that *retrieves* a variable's value is an A_IDENT node. The leaf that saves into a variable is an A_LVIDENT node. This is the difference between *rvalues* and *lvalues*. ## Trying It Out I think that's about it for variable declarations, so let's try it out with the `input02` file: ``` int fred; int jim; fred= 5; jim= 12; print fred + jim; ``` We can `make test` to do this: ``` $ make test cc -o comp1 -g cg.c decl.c expr.c gen.c main.c misc.c scan.c stmt.c sym.c tree.c ... ./comp1 input02 cc -o out out.s ./out 17 ``` As you can see, we calculated `fred + jim` which is 5 + 12 or 17. Here are the new assembly lines in `out.s`: ``` .comm fred,8,8 # Declare fred .comm jim,8,8 # Declare jim ... movq $5, %r8 movq %r8, fred(%rip) # fred = 5 movq $12, %r8 movq %r8, jim(%rip) # jim = 12 movq fred(%rip), %r8 movq jim(%rip), %r9 addq %r8, %r9 # fred + jim ``` ## Other Changes I've probably made a few other changes. The only main one that I can remember is to create some helper functions in `misc.c` to make it easier to report fatal errors: ```c // Print out fatal messages void fatal(char *s) { fprintf(stderr, "%s on line %d\n", s, Line); exit(1); } void fatals(char *s1, char *s2) { fprintf(stderr, "%s:%s on line %d\n", s1, s2, Line); exit(1); } void fatald(char *s, int d) { fprintf(stderr, "%s:%d on line %d\n", s, d, Line); exit(1); } void fatalc(char *s, int c) { fprintf(stderr, "%s:%c on line %d\n", s, c, Line); exit(1); } ``` ## Conclusion and What's Next So that was a lot of work. We had to write the beginnings of symbol table management. We had to deal with two new statement types. We had to add some new tokens and some new AST node types. Finally, we had to add some code to generate the correct x86-64 assembly output. Try writing a few example input files and see if the compiler works as it should, especially if it detects syntax errors and semantic errors (variable use without a declaration). In the next part of our compiler writing journey, we will add the six comparison operators to our language. That will allow us to start on the control structures in the part after that. [Next step](../07_Comparisons/Readme.md) ================================================ FILE: 06_Variables/cg.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names static int freereg[4]; static char *reglist[4] = { "%r8", "%r9", "%r10", "%r11" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n" ".LC0:\n" "\t.string\t\"%d\\n\"\n" "printint:\n" "\tpushq\t%rbp\n" "\tmovq\t%rsp, %rbp\n" "\tsubq\t$16, %rsp\n" "\tmovl\t%edi, -4(%rbp)\n" "\tmovl\t-4(%rbp), %eax\n" "\tmovl\t%eax, %esi\n" "\tleaq .LC0(%rip), %rdi\n" "\tmovl $0, %eax\n" "\tcall printf@PLT\n" "\tnop\n" "\tleave\n" "\tret\n" "\n" "\t.globl\tmain\n" "\t.type\tmain, @function\n" "main:\n" "\tpushq\t%rbp\n" "\tmovq %rsp, %rbp\n", Outfile); } // Print out the assembly postamble void cgpostamble() { fputs("\tmovl $0, %eax\n" "\tpopq %rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register int cgloadint(int value) { // Get a new register int r = alloc_register(); // Print out the code to initialise it fprintf(Outfile, "\tmovq\t$%d, %s\n", value, reglist[r]); return (r); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(char *identifier) { // Get a new register int r = alloc_register(); // Print out the code to initialise it fprintf(Outfile, "\tmovq\t%s(\%%rip), %s\n", identifier, reglist[r]); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\taddq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsubq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timulq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s,%%rax\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidivq\t%s\n", reglist[r2]); fprintf(Outfile, "\tmovq\t%%rax,%s\n", reglist[r1]); free_register(r2); return (r1); } // Call printint() with the given register void cgprintint(int r) { fprintf(Outfile, "\tmovq\t%s, %%rdi\n", reglist[r]); fprintf(Outfile, "\tcall\tprintint\n"); free_register(r); } // Store a register's value into a variable int cgstorglob(int r, char *identifier) { fprintf(Outfile, "\tmovq\t%s, %s(\%%rip)\n", reglist[r], identifier); return (r); } // Generate a global symbol void cgglobsym(char *sym) { fprintf(Outfile, "\t.comm\t%s,8,8\n", sym); } ================================================ FILE: 06_Variables/cgn.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names static int freereg[4]; static char *reglist[4] = { "r8", "r9", "r10", "r11" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\tglobal\tmain\n" "\textern\tprintf\n" "\tsection\t.text\n" "LC0:\tdb\t\"%d\",10,0\n" "printint:\n" "\tpush\trbp\n" "\tmov\trbp, rsp\n" "\tsub\trsp, 16\n" "\tmov\t[rbp-4], edi\n" "\tmov\teax, [rbp-4]\n" "\tmov\tesi, eax\n" "\tlea rdi, [rel LC0]\n" "\tmov eax, 0\n" "\tcall printf\n" "\tnop\n" "\tleave\n" "\tret\n" "\n" "main:\n" "\tpush\trbp\n" "\tmov rbp, rsp\n", Outfile); } // Print out the assembly postamble void cgpostamble() { fputs("\tmov eax, 0\n" "\tpop rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register int cgloadint(int value) { // Get a new register int r = alloc_register(); // Print out the code to initialise it fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], value); return (r); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(char *identifier) { // Get a new register int r = alloc_register(); // Print out the code to initialise it fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], identifier); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timul\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmov\trax, %s\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidiv\t%s\n", reglist[r2]); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[r1]); free_register(r2); return (r1); } // Call printint() with the given register void cgprintint(int r) { fprintf(Outfile, "\tmov\trdi, %s\n", reglist[r]); fprintf(Outfile, "\tcall\tprintint\n"); free_register(r); } // Store a register's value into a variable int cgstorglob(int r, char *identifier) { fprintf(Outfile, "\tmov\t[%s], %s\n", identifier, reglist[r]); return (r); } // Generate a global symbol void cgglobsym(char *sym) { fprintf(Outfile, "\tcommon\t%s 8:8\n", sym); } ================================================ FILE: 06_Variables/data.h ================================================ #ifndef extern_ #define extern_ extern #endif // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 extern_ int Line; // Current line number extern_ int Putback; // Character put back by scanner extern_ FILE *Infile; // Input and output files extern_ FILE *Outfile; extern_ struct token Token; // Last token scanned extern_ char Text[TEXTLEN + 1]; // Last identifier scanned extern_ struct symtable Gsym[NSYMBOLS]; // Global symbol table ================================================ FILE: 06_Variables/decl.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of declarations // Copyright (c) 2019 Warren Toomey, GPL3 // Parse the declaration of a variable void var_declaration(void) { // Ensure we have an 'int' token followed by an identifier // and a semicolon. Text now has the identifier's name. // Add it as a known identifier match(T_INT, "int"); ident(); addglob(Text); genglobsym(Text); semi(); } ================================================ FILE: 06_Variables/decl.h ================================================ // Function prototypes for all compiler files // Copyright (c) 2019 Warren Toomey, GPL3 // scan.c int scan(struct token *t); // tree.c struct ASTnode *mkastnode(int op, struct ASTnode *left, struct ASTnode *right, int intvalue); struct ASTnode *mkastleaf(int op, int intvalue); struct ASTnode *mkastunary(int op, struct ASTnode *left, int intvalue); // gen.c int genAST(struct ASTnode *n, int reg); void genpreamble(); void genpostamble(); void genfreeregs(); void genprintint(int reg); void genglobsym(char *s); // cg.c void freeall_registers(void); void cgpreamble(); void cgpostamble(); int cgloadint(int value); int cgloadglob(char *identifier); int cgadd(int r1, int r2); int cgsub(int r1, int r2); int cgmul(int r1, int r2); int cgdiv(int r1, int r2); void cgprintint(int r); int cgstorglob(int r, char *identifier); void cgglobsym(char *sym); // expr.c struct ASTnode *binexpr(int ptp); // stmt.c void statements(void); // misc.c void match(int t, char *what); void semi(void); void ident(void); void fatal(char *s); void fatals(char *s1, char *s2); void fatald(char *s, int d); void fatalc(char *s, int c); // sym.c int findglob(char *s); int addglob(char *name); // decl.c void var_declaration(void); ================================================ FILE: 06_Variables/defs.h ================================================ #include #include #include #include // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 #define TEXTLEN 512 // Length of symbols in input #define NSYMBOLS 1024 // Number of symbol table entries // Token types enum { T_EOF, T_PLUS, T_MINUS, T_STAR, T_SLASH, T_INTLIT, T_SEMI, T_EQUALS, T_IDENT, // Keywords T_PRINT, T_INT }; // Token structure struct token { int token; // Token type, from the enum list above int intvalue; // For T_INTLIT, the integer value }; // AST node types enum { A_ADD, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, A_INTLIT, A_IDENT, A_LVIDENT, A_ASSIGN }; // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree struct ASTnode *left; // Left and right child trees struct ASTnode *right; union { int intvalue; // For A_INTLIT, the integer value int id; // For A_IDENT, the symbol slot number } v; }; // Symbol table structure struct symtable { char *name; // Name of a symbol }; ================================================ FILE: 06_Variables/expr.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of expressions // Copyright (c) 2019 Warren Toomey, GPL3 // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(void) { struct ASTnode *n; int id; switch (Token.token) { case T_INTLIT: // For an INTLIT token, make a leaf AST node for it. n = mkastleaf(A_INTLIT, Token.intvalue); break; case T_IDENT: // Check that this identifier exists id = findglob(Text); if (id == -1) fatals("Unknown variable", Text); // Make a leaf AST node for it n = mkastleaf(A_IDENT, id); break; default: fatald("Syntax error, token", Token.token); } // Scan in the next token and return the leaf node scan(&Token); return (n); } // Convert a binary operator token into an AST operation. static int arithop(int tokentype) { switch (tokentype) { case T_PLUS: return (A_ADD); case T_MINUS: return (A_SUBTRACT); case T_STAR: return (A_MULTIPLY); case T_SLASH: return (A_DIVIDE); default: fatald("Syntax error, token", tokentype); } } // Operator precedence for each token static int OpPrec[] = { 0, 10, 10, 20, 20, 0 }; // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec = OpPrec[tokentype]; if (prec == 0) fatald("Syntax error, token", tokentype); return (prec); } // Return an AST tree whose root is a binary operator. // Parameter ptp is the previous token's precedence. struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; int tokentype; // Get the primary tree on the left. // Fetch the next token at the same time. left = primary(); // If we hit a semicolon, return just the left node tokentype = Token.token; if (tokentype == T_SEMI) return (left); // While the precedence of this token is // more than that of the previous token precedence while (op_precedence(tokentype) > ptp) { // Fetch in the next integer literal scan(&Token); // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Join that sub-tree with ours. Convert the token // into an AST operation at the same time. left = mkastnode(arithop(tokentype), left, right, 0); // Update the details of the current token. // If we hit a semicolon, return just the left node tokentype = Token.token; if (tokentype == T_SEMI) return (left); } // Return the tree we have when the precedence // is the same or lower return (left); } ================================================ FILE: 06_Variables/gen.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Generic code generator // Copyright (c) 2019 Warren Toomey, GPL3 // Given an AST, generate assembly code recursively. // Return the register id with the tree's final value int genAST(struct ASTnode *n, int reg) { int leftreg, rightreg; // Get the left and right sub-tree values if (n->left) leftreg = genAST(n->left, -1); if (n->right) rightreg = genAST(n->right, leftreg); switch (n->op) { case A_ADD: return (cgadd(leftreg, rightreg)); case A_SUBTRACT: return (cgsub(leftreg, rightreg)); case A_MULTIPLY: return (cgmul(leftreg, rightreg)); case A_DIVIDE: return (cgdiv(leftreg, rightreg)); case A_INTLIT: return (cgloadint(n->v.intvalue)); case A_IDENT: return (cgloadglob(Gsym[n->v.id].name)); case A_LVIDENT: return (cgstorglob(reg, Gsym[n->v.id].name)); case A_ASSIGN: // The work has already been done, return the result return (rightreg); default: fatald("Unknown AST operator", n->op); } } void genpreamble() { cgpreamble(); } void genpostamble() { cgpostamble(); } void genfreeregs() { freeall_registers(); } void genprintint(int reg) { cgprintint(reg); } void genglobsym(char *s) { cgglobsym(s); } ================================================ FILE: 06_Variables/input01 ================================================ print 12 * 3; print 18 - 2 * 4; print 1 + 2 + 9 - 5/2 + 3*5; ================================================ FILE: 06_Variables/input02 ================================================ int fred; int jim; fred= 5; jim= 12; print fred + jim; ================================================ FILE: 06_Variables/input03 ================================================ int x; x= 1; print x; x= x + 1; print x; x= x + 1; print x; x= x + 1; print x; x= x + 1; print x; ================================================ FILE: 06_Variables/main.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "decl.h" #include // Compiler setup and top-level execution // Copyright (c) 2019 Warren Toomey, GPL3 // Initialise global variables static void init() { Line = 1; Putback = '\n'; } // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "Usage: %s infile\n", prog); exit(1); } // Main program: check arguments and print a usage // if we don't have an argument. Open up the input // file and call scanfile() to scan the tokens in it. void main(int argc, char *argv[]) { if (argc != 2) usage(argv[0]); init(); // Open up the input file if ((Infile = fopen(argv[1], "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", argv[1], strerror(errno)); exit(1); } // Create the output file if ((Outfile = fopen("out.s", "w")) == NULL) { fprintf(stderr, "Unable to create out.s: %s\n", strerror(errno)); exit(1); } scan(&Token); // Get the first token from the input genpreamble(); // Output the preamble statements(); // Parse the statements in the input genpostamble(); // Output the postamble fclose(Outfile); // Close the output file and exit exit(0); } ================================================ FILE: 06_Variables/misc.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Miscellaneous functions // Copyright (c) 2019 Warren Toomey, GPL3 // Ensure that the current token is t, // and fetch the next token. Otherwise // throw an error void match(int t, char *what) { if (Token.token == t) { scan(&Token); } else { fatals("Expected", what); } } // Match a semicolon and fetch the next token void semi(void) { match(T_SEMI, ";"); } // Match an identifier and fetch the next token void ident(void) { match(T_IDENT, "identifier"); } // Print out fatal messages void fatal(char *s) { fprintf(stderr, "%s on line %d\n", s, Line); exit(1); } void fatals(char *s1, char *s2) { fprintf(stderr, "%s:%s on line %d\n", s1, s2, Line); exit(1); } void fatald(char *s, int d) { fprintf(stderr, "%s:%d on line %d\n", s, d, Line); exit(1); } void fatalc(char *s, int c) { fprintf(stderr, "%s:%c on line %d\n", s, c, Line); exit(1); } ================================================ FILE: 06_Variables/scan.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { char *p; p = strchr(s, c); return (p ? p - s : -1); } // Get the next character from the input file. static int next(void) { int c; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return (c); } c = fgetc(Infile); // Read from input file if ('\n' == c) Line++; // Increment line count return (c); } // Put back an unwanted character static void putback(int c) { Putback = c; } // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0; // Convert each character into an int value while ((k = chrpos("0123456789", c)) >= 0) { val = val * 10 + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return (val); } // Scan an identifier from the input file and // store it in buf[]. Return the identifier's length static int scanident(int c, char *buf, int lim) { int i = 0; // Allow digits, alpha and underscores while (isalpha(c) || isdigit(c) || '_' == c) { // Error if we hit the identifier length limit, // else append to buf[] and get next character if (lim - 1 == i) { fatal("Identifier too long"); } else if (i < lim - 1) { buf[i++] = c; } c = next(); } // We hit a non-valid character, put it back. // NUL-terminate the buf[] and return the length putback(c); buf[i] = '\0'; return (i); } // Given a word from the input, return the matching // keyword token number or 0 if it's not a keyword. // Switch on the first letter so that we don't have // to waste time strcmp()ing against all the keywords. static int keyword(char *s) { switch (*s) { case 'i': if (!strcmp(s, "int")) return (T_INT); break; case 'p': if (!strcmp(s, "print")) return (T_PRINT); break; } return (0); } // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c, tokentype; // Skip whitespace c = skip(); // Determine the token based on // the input character switch (c) { case EOF: t->token = T_EOF; return (0); case '+': t->token = T_PLUS; break; case '-': t->token = T_MINUS; break; case '*': t->token = T_STAR; break; case '/': t->token = T_SLASH; break; case ';': t->token = T_SEMI; break; case '=': t->token = T_EQUALS; break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } else if (isalpha(c) || '_' == c) { // Read in a keyword or identifier scanident(c, Text, TEXTLEN); // If it's a recognised keyword, return that token if (tokentype = keyword(Text)) { t->token = tokentype; break; } // Not a recognised keyword, so it must be an identifier t->token = T_IDENT; break; } // The character isn't part of any recognised token, error fatalc("Unrecognised character", c); } // We found a token return (1); } ================================================ FILE: 06_Variables/stmt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of statements // Copyright (c) 2019 Warren Toomey, GPL3 // statements: statement // | statement statements // ; // // statement: 'print' expression ';' // | 'int' identifier ';' // | identifier '=' expression ';' // ; // // identifier: T_IDENT // ; void print_statement(void) { struct ASTnode *tree; int reg; // Match a 'print' as the first token match(T_PRINT, "print"); // Parse the following expression and // generate the assembly code tree = binexpr(0); reg = genAST(tree, -1); genprintint(reg); genfreeregs(); // Match the following semicolon semi(); } void assignment_statement(void) { struct ASTnode *left, *right, *tree; int id; // Ensure we have an identifier ident(); // Check it's been defined then make a leaf node for it if ((id = findglob(Text)) == -1) { fatals("Undeclared variable", Text); } right = mkastleaf(A_LVIDENT, id); // Ensure we have an equals sign match(T_EQUALS, "="); // Parse the following expression left = binexpr(0); // Make an assignment AST tree tree = mkastnode(A_ASSIGN, left, right, 0); // Generate the assembly code for the assignment genAST(tree, -1); genfreeregs(); // Match the following semicolon semi(); } // Parse one or more statements void statements(void) { while (1) { switch (Token.token) { case T_PRINT: print_statement(); break; case T_INT: var_declaration(); break; case T_IDENT: assignment_statement(); break; case T_EOF: return; default: fatald("Syntax error, token", Token.token); } } } ================================================ FILE: 06_Variables/sym.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Symbol table functions // Copyright (c) 2019 Warren Toomey, GPL3 static int Globs = 0; // Position of next free global symbol slot // Determine if the symbol s is in the global symbol table. // Return its slot position or -1 if not found. int findglob(char *s) { int i; for (i = 0; i < Globs; i++) { if (*s == *Gsym[i].name && !strcmp(s, Gsym[i].name)) return (i); } return (-1); } // Get the position of a new global symbol slot, or die // if we've run out of positions. static int newglob(void) { int p; if ((p = Globs++) >= NSYMBOLS) fatal("Too many global symbols"); return (p); } // Add a global symbol to the symbol table. // Return the slot number in the symbol table int addglob(char *name) { int y; // If this is already in the symbol table, return the existing slot if ((y = findglob(name)) != -1) return (y); // Otherwise get a new slot, fill it in and // return the slot number y = newglob(); Gsym[y].name = strdup(name); return (y); } ================================================ FILE: 06_Variables/tree.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree functions // Copyright (c) 2019 Warren Toomey, GPL3 // Build and return a generic AST node struct ASTnode *mkastnode(int op, struct ASTnode *left, struct ASTnode *right, int intvalue) { struct ASTnode *n; // Malloc a new ASTnode n = (struct ASTnode *) malloc(sizeof(struct ASTnode)); if (n == NULL) fatal("Unable to malloc in mkastnode()"); // Copy in the field values and return it n->op = op; n->left = left; n->right = right; n->v.intvalue = intvalue; return (n); } // Make an AST leaf node struct ASTnode *mkastleaf(int op, int intvalue) { return (mkastnode(op, NULL, NULL, intvalue)); } // Make a unary AST node: only one child struct ASTnode *mkastunary(int op, struct ASTnode *left, int intvalue) { return (mkastnode(op, left, NULL, intvalue)); } ================================================ FILE: 07_Comparisons/Makefile ================================================ SRCS= cg.c decl.c expr.c gen.c main.c misc.c scan.c stmt.c sym.c tree.c SRCN= cgn.c decl.c expr.c gen.c main.c misc.c scan.c stmt.c sym.c tree.c comp1: $(SRCS) cc -o comp1 -g $(SRCS) compn: $(SRCN) cc -o compn -g $(SRCN) clean: rm -f comp1 compn *.o *.s out test: comp1 input04 ./comp1 input04 cc -o out out.s ./out testn: compn input04 ./compn input04 nasm -f elf64 out.s cc -no-pie -o out out.o ./out ================================================ FILE: 07_Comparisons/Readme.md ================================================ # Part 7: Comparison Operators I was going to add IF statements next, but then I realised that I'd better add some comparison operators first. This turned out to be quite easy, because they are binary operators like the existing ones. So let's quickly see what the changes are to add the six comparison operators: `==`, `!=`, `<`, `>`, `<=` and `>=`. ## Adding New Tokens We will have six new tokens, so let's add them to `defs.h`: ```c // Token types enum { T_EOF, T_PLUS, T_MINUS, T_STAR, T_SLASH, T_EQ, T_NE, T_LT, T_GT, T_LE, T_GE, T_INTLIT, T_SEMI, T_ASSIGN, T_IDENT, // Keywords T_PRINT, T_INT }; ``` I've rearranged the tokens so that the ones with precedence come, in low-to-high precedence order, before the tokens that don't have any precedence. ## Scanning The Tokens Now we have to scan them in. Note that we have to distinguish between `=` and `==`, `<` and `<=`, `>` and `>=`. So we will need to read in an extra character from the input and put it back if we don't need it. Here's the new code in `scan()` from `scan.c`: ```c case '=': if ((c = next()) == '=') { t->token = T_EQ; } else { putback(c); t->token = T_ASSIGN; } break; case '!': if ((c = next()) == '=') { t->token = T_NE; } else { fatalc("Unrecognised character", c); } break; case '<': if ((c = next()) == '=') { t->token = T_LE; } else { putback(c); t->token = T_LT; } break; case '>': if ((c = next()) == '=') { t->token = T_GE; } else { putback(c); t->token = T_GT; } break; ``` I also changed the name of the `=` token to T_ASSIGN to ensure I didn't get confused between it and the new T_EQ token. ## New Expression Code We can now scan in the six new tokens. So now we have to parse them when they appear in expressions, and also enforce their operator precedence. By now you would have worked out that: + I'm building what will become a self-compiling compiler + in the C language + using the SubC compiler as a reference. The implication is that I'm writing a compiler for enough of a subset of C (just as SubC) so that it will compile itself. Therefore, I should use the normal [C operator precedence order](https://en.cppreference.com/w/c/language/operator_precedence). This means that the comparison operators have lower precedence than multiply and divide. I also realised that the switch statement I was using to map tokens to AST node types was only going to get bigger. So I decided to rearrange the AST node types so that there is a 1:1 mapping between them for all the binary operators (in `defs.h`): ```c // AST node types. The first few line up // with the related tokens enum { A_ADD=1, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE, A_INTLIT, A_IDENT, A_LVIDENT, A_ASSIGN }; ``` Now in `expr.c`, I can simplify the token to AST node conversion and also add in the new tokens' precedence: ```c // Convert a binary operator token into an AST operation. // We rely on a 1:1 mapping from token to AST operation static int arithop(int tokentype) { if (tokentype > T_EOF && tokentype < T_INTLIT) return(tokentype); fatald("Syntax error, token", tokentype); } // Operator precedence for each token. Must // match up with the order of tokens in defs.h static int OpPrec[] = { 0, 10, 10, // T_EOF, T_PLUS, T_MINUS 20, 20, // T_STAR, T_SLASH 30, 30, // T_EQ, T_NE 40, 40, 40, 40 // T_LT, T_GT, T_LE, T_GE }; ``` That's it for the parsing and operator precedence! ## Code Generation As the six new operators are binary operators, it's easy to modify the generic code generator in `gen.c` to deal with them: ```c case A_EQ: return (cgequal(leftreg, rightreg)); case A_NE: return (cgnotequal(leftreg, rightreg)); case A_LT: return (cglessthan(leftreg, rightreg)); case A_GT: return (cggreaterthan(leftreg, rightreg)); case A_LE: return (cglessequal(leftreg, rightreg)); case A_GE: return (cggreaterequal(leftreg, rightreg)); ``` ## x86-64 Code Generation Now it gets a bit tricky. In C, the comparison operators return a value. If they evaluate true, their result is 1. If they evaluate false, their result is 0. We need to write x86-64 assembly code to reflect this. Luckily there are some x86-64 instructions to do this. Unfortunately, there are some issues to deal with along the way. Consider this x86-64 instruction: ``` cmpq %r8,%r9 ``` The above `cmpq` instruction performs %r9 - %r8 and sets several status flags including the negative and zero flags. Thus, we can look at the flag combinations to see the result of the comparisons: | Comparison | Operation | Flags If True | |------------|-----------|---------------| | %r8 == %r9 | %r9 - %r8 | Zero | | %r8 != %r9 | %r9 - %r8 | Not Zero | | %r8 > %r9 | %r9 - %r8 | Not Zero, Negative | | %r8 < %r9 | %r9 - %r8 | Not Zero, Not Negative | | %r8 >= %r9 | %r9 - %r8 | Zero or Negative | | %r8 <= %r9 | %r9 - %r8 | Zero or Not Negative | There are six x86-64 instructions which set a register to 1 or 0 based on the two flag values: `sete`, `setne`, `setg`, `setl`, `setge` and `setle` in the order of the above table rows. The problem is, these instructions only set the lowest byte of a register. If the register already has bits set outside of the lowest byte, they will stay set. So we might set a variable to 1, but if it already has the value 1000 (decimal), then it will now be 1001 which is not what we want. The solution is to `andq` the register after the `setX` instruction to get rid of the unwanted bits. In `cg.c` there is a general comparison function to do this: ```c // Compare two registers. static int cgcompare(int r1, int r2, char *how) { fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\t%s\n", how, breglist[r2]); fprintf(Outfile, "\tandq\t$255,%s\n", reglist[r2]); free_register(r1); return (r2); } ``` where `how` is one of the `setX` instructions. Note that we perform ``` cmpq reglist[r2], reglist[r1] ``` because this is actually `reglist[r1] - reglist[r2]` which is what we really want. ## x86-64 Registers We need to take a short diversion here to discuss the registers in the x86-64 architecture. x86-64 has several 64-bit general purpose registers, but we can also use different register names to access and work on subsections of these registers. ![](https://i.stack.imgur.com/N0KnG.png) The above image from *stack.imgur.com* shows that, for the 64-bit *r8* register, we can access the low 32 bits of this register by using the "*r8d*" register. Similarly, the "*r8w*" register is the low 16 bits and the "*r8b*" register is the low 8 bits of the *r8* register. In the `cgcompare()` function, the code uses the `reglist[]` array to compare the two 64-bit registers, but then sets a flag in the 8-bit version of the second register by using the names in the `breglist[]` array. The x86-64 architecture only allows the `setX` instructions to operate on the 8-bit register names, thus the need for the `breglist[]` array. ## Creating Several Compare Instructions Now that we have this general function, we can write the six actual comparison functions: ```c int cgequal(int r1, int r2) { return(cgcompare(r1, r2, "sete")); } int cgnotequal(int r1, int r2) { return(cgcompare(r1, r2, "setne")); } int cglessthan(int r1, int r2) { return(cgcompare(r1, r2, "setl")); } int cggreaterthan(int r1, int r2) { return(cgcompare(r1, r2, "setg")); } int cglessequal(int r1, int r2) { return(cgcompare(r1, r2, "setle")); } int cggreaterequal(int r1, int r2) { return(cgcompare(r1, r2, "setge")); } ``` As with the other binary operator functions, one register is freed and the other register returns with the result. # Putting It Into Action Have a look at the `input04` input file: ```c int x; x= 7 < 9; print x; x= 7 <= 9; print x; x= 7 != 9; print x; x= 7 == 7; print x; x= 7 >= 7; print x; x= 7 <= 7; print x; x= 9 > 7; print x; x= 9 >= 7; print x; x= 9 != 7; print x; ``` All of these comparisons are true, so we should print nine 1s out. Do a `make test` to confirm this. Let's look at the assembly code output by the first comparison: ``` movq $7, %r8 movq $9, %r9 cmpq %r9, %r8 # Perform %r8 - %r9, i.e. 7 - 9 setl %r9b # Set %r9b to 1 if 7 is less than 9 andq $255,%r9 # Remove all other bits in %r9 movq %r9, x(%rip) # Save the result in x movq x(%rip), %r8 movq %r8, %rdi call printint # Print x out ``` Yes there is some inefficient assembly code above. We haven't even started to worry about optimised code yet. To quote Donald Knuth: > **Premature optimization is the root of all evil (or at least most of it) in programming.** ## Conclusion and What's Next That was a nice and easy addition to the compiler. The next part of the journey will be much more complicated. In the next part of our compiler writing journey, we will add IF statements to the compiler and make use of the comparison operators that we just added. [Next step](../08_If_Statements/Readme.md) ================================================ FILE: 07_Comparisons/cg.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names // We need a list of byte registers, too static int freereg[4]; static char *reglist[4] = { "%r8", "%r9", "%r10", "%r11" }; static char *breglist[4] = { "%r8b", "%r9b", "%r10b", "%r11b" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n" ".LC0:\n" "\t.string\t\"%d\\n\"\n" "printint:\n" "\tpushq\t%rbp\n" "\tmovq\t%rsp, %rbp\n" "\tsubq\t$16, %rsp\n" "\tmovl\t%edi, -4(%rbp)\n" "\tmovl\t-4(%rbp), %eax\n" "\tmovl\t%eax, %esi\n" "\tleaq .LC0(%rip), %rdi\n" "\tmovl $0, %eax\n" "\tcall printf@PLT\n" "\tnop\n" "\tleave\n" "\tret\n" "\n" "\t.globl\tmain\n" "\t.type\tmain, @function\n" "main:\n" "\tpushq\t%rbp\n" "\tmovq %rsp, %rbp\n", Outfile); } // Print out the assembly postamble void cgpostamble() { fputs("\tmovl $0, %eax\n" "\tpopq %rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register int cgloadint(int value) { // Get a new register int r = alloc_register(); // Print out the code to initialise it fprintf(Outfile, "\tmovq\t$%d, %s\n", value, reglist[r]); return (r); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(char *identifier) { // Get a new register int r = alloc_register(); // Print out the code to initialise it fprintf(Outfile, "\tmovq\t%s(\%%rip), %s\n", identifier, reglist[r]); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\taddq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsubq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timulq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s,%%rax\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidivq\t%s\n", reglist[r2]); fprintf(Outfile, "\tmovq\t%%rax,%s\n", reglist[r1]); free_register(r2); return (r1); } // Call printint() with the given register void cgprintint(int r) { fprintf(Outfile, "\tmovq\t%s, %%rdi\n", reglist[r]); fprintf(Outfile, "\tcall\tprintint\n"); free_register(r); } // Store a register's value into a variable int cgstorglob(int r, char *identifier) { fprintf(Outfile, "\tmovq\t%s, %s(\%%rip)\n", reglist[r], identifier); return (r); } // Generate a global symbol void cgglobsym(char *sym) { fprintf(Outfile, "\t.comm\t%s,8,8\n", sym); } // Compare two registers. static int cgcompare(int r1, int r2, char *how) { fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\t%s\n", how, breglist[r2]); fprintf(Outfile, "\tandq\t$255,%s\n", reglist[r2]); free_register(r1); return (r2); } int cgequal(int r1, int r2) { return(cgcompare(r1, r2, "sete")); } int cgnotequal(int r1, int r2) { return(cgcompare(r1, r2, "setne")); } int cglessthan(int r1, int r2) { return(cgcompare(r1, r2, "setl")); } int cggreaterthan(int r1, int r2) { return(cgcompare(r1, r2, "setg")); } int cglessequal(int r1, int r2) { return(cgcompare(r1, r2, "setle")); } int cggreaterequal(int r1, int r2) { return(cgcompare(r1, r2, "setge")); } ================================================ FILE: 07_Comparisons/cgn.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names static int freereg[4]; static char *reglist[4] = { "r8", "r9", "r10", "r11" }; static char *breglist[4] = { "r8b", "r9b", "r10b", "r11b" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\tglobal\tmain\n" "\textern\tprintf\n" "\tsection\t.text\n" "LC0:\tdb\t\"%d\",10,0\n" "printint:\n" "\tpush\trbp\n" "\tmov\trbp, rsp\n" "\tsub\trsp, 16\n" "\tmov\t[rbp-4], edi\n" "\tmov\teax, [rbp-4]\n" "\tmov\tesi, eax\n" "\tlea rdi, [rel LC0]\n" "\tmov eax, 0\n" "\tcall printf\n" "\tnop\n" "\tleave\n" "\tret\n" "\n" "main:\n" "\tpush\trbp\n" "\tmov rbp, rsp\n", Outfile); } // Print out the assembly postamble void cgpostamble() { fputs("\tmov eax, 0\n" "\tpop rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register int cgloadint(int value) { // Get a new register int r = alloc_register(); // Print out the code to initialise it fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], value); return (r); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(char *identifier) { // Get a new register int r = alloc_register(); // Print out the code to initialise it fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], identifier); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timul\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmov\trax, %s\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidiv\t%s\n", reglist[r2]); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[r1]); free_register(r2); return (r1); } // Call printint() with the given register void cgprintint(int r) { fprintf(Outfile, "\tmov\trdi, %s\n", reglist[r]); fprintf(Outfile, "\tcall\tprintint\n"); free_register(r); } // Store a register's value into a variable int cgstorglob(int r, char *identifier) { fprintf(Outfile, "\tmov\t[%s], %s\n", identifier, reglist[r]); return (r); } // Generate a global symbol void cgglobsym(char *sym) { fprintf(Outfile, "\tcommon\t%s 8:8\n", sym); } // Compare two registers. static int cgcompare(int r1, int r2, char *how) { fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s\n", how, breglist[r2]); fprintf(Outfile, "\tand\t%s, 255\n", reglist[r2]); free_register(r1); return (r2); } int cgequal(int r1, int r2) { return(cgcompare(r1, r2, "sete")); } int cgnotequal(int r1, int r2) { return(cgcompare(r1, r2, "setne")); } int cglessthan(int r1, int r2) { return(cgcompare(r1, r2, "setl")); } int cggreaterthan(int r1, int r2) { return(cgcompare(r1, r2, "setg")); } int cglessequal(int r1, int r2) { return(cgcompare(r1, r2, "setle")); } int cggreaterequal(int r1, int r2) { return(cgcompare(r1, r2, "setge")); } ================================================ FILE: 07_Comparisons/data.h ================================================ #ifndef extern_ #define extern_ extern #endif // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 extern_ int Line; // Current line number extern_ int Putback; // Character put back by scanner extern_ FILE *Infile; // Input and output files extern_ FILE *Outfile; extern_ struct token Token; // Last token scanned extern_ char Text[TEXTLEN + 1]; // Last identifier scanned extern_ struct symtable Gsym[NSYMBOLS]; // Global symbol table ================================================ FILE: 07_Comparisons/decl.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of declarations // Copyright (c) 2019 Warren Toomey, GPL3 // Parse the declaration of a variable void var_declaration(void) { // Ensure we have an 'int' token followed by an identifier // and a semicolon. Text now has the identifier's name. // Add it as a known identifier match(T_INT, "int"); ident(); addglob(Text); genglobsym(Text); semi(); } ================================================ FILE: 07_Comparisons/decl.h ================================================ // Function prototypes for all compiler files // Copyright (c) 2019 Warren Toomey, GPL3 // scan.c int scan(struct token *t); // tree.c struct ASTnode *mkastnode(int op, struct ASTnode *left, struct ASTnode *right, int intvalue); struct ASTnode *mkastleaf(int op, int intvalue); struct ASTnode *mkastunary(int op, struct ASTnode *left, int intvalue); // gen.c int genAST(struct ASTnode *n, int reg); void genpreamble(); void genpostamble(); void genfreeregs(); void genprintint(int reg); void genglobsym(char *s); // cg.c void freeall_registers(void); void cgpreamble(); void cgpostamble(); int cgloadint(int value); int cgloadglob(char *identifier); int cgadd(int r1, int r2); int cgsub(int r1, int r2); int cgmul(int r1, int r2); int cgdiv(int r1, int r2); void cgprintint(int r); int cgstorglob(int r, char *identifier); void cgglobsym(char *sym); int cgequal(int r1, int r2); int cgnotequal(int r1, int r2); int cglessthan(int r1, int r2); int cggreaterthan(int r1, int r2); int cglessequal(int r1, int r2); int cggreaterequal(int r1, int r2); // expr.c struct ASTnode *binexpr(int ptp); // stmt.c void statements(void); // misc.c void match(int t, char *what); void semi(void); void ident(void); void fatal(char *s); void fatals(char *s1, char *s2); void fatald(char *s, int d); void fatalc(char *s, int c); // sym.c int findglob(char *s); int addglob(char *name); // decl.c void var_declaration(void); ================================================ FILE: 07_Comparisons/defs.h ================================================ #include #include #include #include // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 #define TEXTLEN 512 // Length of symbols in input #define NSYMBOLS 1024 // Number of symbol table entries // Token types enum { T_EOF, T_PLUS, T_MINUS, T_STAR, T_SLASH, T_EQ, T_NE, T_LT, T_GT, T_LE, T_GE, T_INTLIT, T_SEMI, T_ASSIGN, T_IDENT, // Keywords T_PRINT, T_INT }; // Token structure struct token { int token; // Token type, from the enum list above int intvalue; // For T_INTLIT, the integer value }; // AST node types. The first few line up // with the related tokens enum { A_ADD=1, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE, A_INTLIT, A_IDENT, A_LVIDENT, A_ASSIGN }; // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree struct ASTnode *left; // Left and right child trees struct ASTnode *right; union { int intvalue; // For A_INTLIT, the integer value int id; // For A_IDENT, the symbol slot number } v; }; // Symbol table structure struct symtable { char *name; // Name of a symbol }; ================================================ FILE: 07_Comparisons/expr.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of expressions // Copyright (c) 2019 Warren Toomey, GPL3 // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(void) { struct ASTnode *n; int id; switch (Token.token) { case T_INTLIT: // For an INTLIT token, make a leaf AST node for it. n = mkastleaf(A_INTLIT, Token.intvalue); break; case T_IDENT: // Check that this identifier exists id = findglob(Text); if (id == -1) fatals("Unknown variable", Text); // Make a leaf AST node for it n = mkastleaf(A_IDENT, id); break; default: fatald("Syntax error, token", Token.token); } // Scan in the next token and return the leaf node scan(&Token); return (n); } // Convert a binary operator token into an AST operation. // We rely on a 1:1 mapping from token to AST operation static int arithop(int tokentype) { if (tokentype > T_EOF && tokentype < T_INTLIT) return(tokentype); fatald("Syntax error, token", tokentype); } // Operator precedence for each token. Must // match up with the order of tokens in defs.h static int OpPrec[] = { 0, 10, 10, // T_EOF, T_PLUS, T_MINUS 20, 20, // T_STAR, T_SLASH 30, 30, // T_EQ, T_NE 40, 40, 40, 40 // T_LT, T_GT, T_LE, T_GE }; // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec = OpPrec[tokentype]; if (prec == 0) fatald("Syntax error, token", tokentype); return (prec); } // Return an AST tree whose root is a binary operator. // Parameter ptp is the previous token's precedence. struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; int tokentype; // Get the primary tree on the left. // Fetch the next token at the same time. left = primary(); // If we hit a semicolon, return just the left node tokentype = Token.token; if (tokentype == T_SEMI) return (left); // While the precedence of this token is // more than that of the previous token precedence while (op_precedence(tokentype) > ptp) { // Fetch in the next integer literal scan(&Token); // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Join that sub-tree with ours. Convert the token // into an AST operation at the same time. left = mkastnode(arithop(tokentype), left, right, 0); // Update the details of the current token. // If we hit a semicolon, return just the left node tokentype = Token.token; if (tokentype == T_SEMI) return (left); } // Return the tree we have when the precedence // is the same or lower return (left); } ================================================ FILE: 07_Comparisons/gen.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Generic code generator // Copyright (c) 2019 Warren Toomey, GPL3 // Given an AST, generate assembly code recursively. // Return the register id with the tree's final value int genAST(struct ASTnode *n, int reg) { int leftreg, rightreg; // Get the left and right sub-tree values if (n->left) leftreg = genAST(n->left, -1); if (n->right) rightreg = genAST(n->right, leftreg); switch (n->op) { case A_ADD: return (cgadd(leftreg, rightreg)); case A_SUBTRACT: return (cgsub(leftreg, rightreg)); case A_MULTIPLY: return (cgmul(leftreg, rightreg)); case A_DIVIDE: return (cgdiv(leftreg, rightreg)); case A_EQ: return (cgequal(leftreg, rightreg)); case A_NE: return (cgnotequal(leftreg, rightreg)); case A_LT: return (cglessthan(leftreg, rightreg)); case A_GT: return (cggreaterthan(leftreg, rightreg)); case A_LE: return (cglessequal(leftreg, rightreg)); case A_GE: return (cggreaterequal(leftreg, rightreg)); case A_INTLIT: return (cgloadint(n->v.intvalue)); case A_IDENT: return (cgloadglob(Gsym[n->v.id].name)); case A_LVIDENT: return (cgstorglob(reg, Gsym[n->v.id].name)); case A_ASSIGN: // The work has already been done, return the result return (rightreg); default: fatald("Unknown AST operator", n->op); } } void genpreamble() { cgpreamble(); } void genpostamble() { cgpostamble(); } void genfreeregs() { freeall_registers(); } void genprintint(int reg) { cgprintint(reg); } void genglobsym(char *s) { cgglobsym(s); } ================================================ FILE: 07_Comparisons/input01 ================================================ print 12 * 3; print 18 - 2 * 4; print 1 + 2 + 9 - 5/2 + 3*5; ================================================ FILE: 07_Comparisons/input02 ================================================ int fred; int jim; fred= 5; jim= 12; print fred + jim; ================================================ FILE: 07_Comparisons/input03 ================================================ int x; x= 1; print x; x= x + 1; print x; x= x + 1; print x; x= x + 1; print x; x= x + 1; print x; ================================================ FILE: 07_Comparisons/input04 ================================================ int x; x= 7 < 9; print x; x= 7 <= 9; print x; x= 7 != 9; print x; x= 7 == 7; print x; x= 7 >= 7; print x; x= 7 <= 7; print x; x= 9 > 7; print x; x= 9 >= 7; print x; x= 9 != 7; print x; ================================================ FILE: 07_Comparisons/main.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "decl.h" #include // Compiler setup and top-level execution // Copyright (c) 2019 Warren Toomey, GPL3 // Initialise global variables static void init() { Line = 1; Putback = '\n'; } // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "Usage: %s infile\n", prog); exit(1); } // Main program: check arguments and print a usage // if we don't have an argument. Open up the input // file and call scanfile() to scan the tokens in it. void main(int argc, char *argv[]) { if (argc != 2) usage(argv[0]); init(); // Open up the input file if ((Infile = fopen(argv[1], "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", argv[1], strerror(errno)); exit(1); } // Create the output file if ((Outfile = fopen("out.s", "w")) == NULL) { fprintf(stderr, "Unable to create out.s: %s\n", strerror(errno)); exit(1); } scan(&Token); // Get the first token from the input genpreamble(); // Output the preamble statements(); // Parse the statements in the input genpostamble(); // Output the postamble fclose(Outfile); // Close the output file and exit exit(0); } ================================================ FILE: 07_Comparisons/misc.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Miscellaneous functions // Copyright (c) 2019 Warren Toomey, GPL3 // Ensure that the current token is t, // and fetch the next token. Otherwise // throw an error void match(int t, char *what) { if (Token.token == t) { scan(&Token); } else { fatals("Expected", what); } } // Match a semicolon and fetch the next token void semi(void) { match(T_SEMI, ";"); } // Match an identifier and fetch the next token void ident(void) { match(T_IDENT, "identifier"); } // Print out fatal messages void fatal(char *s) { fprintf(stderr, "%s on line %d\n", s, Line); exit(1); } void fatals(char *s1, char *s2) { fprintf(stderr, "%s:%s on line %d\n", s1, s2, Line); exit(1); } void fatald(char *s, int d) { fprintf(stderr, "%s:%d on line %d\n", s, d, Line); exit(1); } void fatalc(char *s, int c) { fprintf(stderr, "%s:%c on line %d\n", s, c, Line); exit(1); } ================================================ FILE: 07_Comparisons/scan.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { char *p; p = strchr(s, c); return (p ? p - s : -1); } // Get the next character from the input file. static int next(void) { int c; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return (c); } c = fgetc(Infile); // Read from input file if ('\n' == c) Line++; // Increment line count return (c); } // Put back an unwanted character static void putback(int c) { Putback = c; } // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0; // Convert each character into an int value while ((k = chrpos("0123456789", c)) >= 0) { val = val * 10 + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return (val); } // Scan an identifier from the input file and // store it in buf[]. Return the identifier's length static int scanident(int c, char *buf, int lim) { int i = 0; // Allow digits, alpha and underscores while (isalpha(c) || isdigit(c) || '_' == c) { // Error if we hit the identifier length limit, // else append to buf[] and get next character if (lim - 1 == i) { fatal("Identifier too long"); } else if (i < lim - 1) { buf[i++] = c; } c = next(); } // We hit a non-valid character, put it back. // NUL-terminate the buf[] and return the length putback(c); buf[i] = '\0'; return (i); } // Given a word from the input, return the matching // keyword token number or 0 if it's not a keyword. // Switch on the first letter so that we don't have // to waste time strcmp()ing against all the keywords. static int keyword(char *s) { switch (*s) { case 'i': if (!strcmp(s, "int")) return (T_INT); break; case 'p': if (!strcmp(s, "print")) return (T_PRINT); break; } return (0); } // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c, tokentype; // Skip whitespace c = skip(); // Determine the token based on // the input character switch (c) { case EOF: t->token = T_EOF; return (0); case '+': t->token = T_PLUS; break; case '-': t->token = T_MINUS; break; case '*': t->token = T_STAR; break; case '/': t->token = T_SLASH; break; case ';': t->token = T_SEMI; break; case '=': if ((c = next()) == '=') { t->token = T_EQ; } else { putback(c); t->token = T_ASSIGN; } break; case '!': if ((c = next()) == '=') { t->token = T_NE; } else { fatalc("Unrecognised character", c); } break; case '<': if ((c = next()) == '=') { t->token = T_LE; } else { putback(c); t->token = T_LT; } break; case '>': if ((c = next()) == '=') { t->token = T_GE; } else { putback(c); t->token = T_GT; } break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } else if (isalpha(c) || '_' == c) { // Read in a keyword or identifier scanident(c, Text, TEXTLEN); // If it's a recognised keyword, return that token if (tokentype = keyword(Text)) { t->token = tokentype; break; } // Not a recognised keyword, so it must be an identifier t->token = T_IDENT; break; } // The character isn't part of any recognised token, error fatalc("Unrecognised character", c); } // We found a token return (1); } ================================================ FILE: 07_Comparisons/stmt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of statements // Copyright (c) 2019 Warren Toomey, GPL3 // statements: statement // | statement statements // ; // // statement: 'print' expression ';' // | 'int' identifier ';' // | identifier '=' expression ';' // ; // // identifier: T_IDENT // ; void print_statement(void) { struct ASTnode *tree; int reg; // Match a 'print' as the first token match(T_PRINT, "print"); // Parse the following expression and // generate the assembly code tree = binexpr(0); reg = genAST(tree, -1); genprintint(reg); genfreeregs(); // Match the following semicolon semi(); } void assignment_statement(void) { struct ASTnode *left, *right, *tree; int id; // Ensure we have an identifier ident(); // Check it's been defined then make a leaf node for it if ((id = findglob(Text)) == -1) { fatals("Undeclared variable", Text); } right = mkastleaf(A_LVIDENT, id); // Ensure we have an equals sign match(T_ASSIGN, "="); // Parse the following expression left = binexpr(0); // Make an assignment AST tree tree = mkastnode(A_ASSIGN, left, right, 0); // Generate the assembly code for the assignment genAST(tree, -1); genfreeregs(); // Match the following semicolon semi(); } // Parse one or more statements void statements(void) { while (1) { switch (Token.token) { case T_PRINT: print_statement(); break; case T_INT: var_declaration(); break; case T_IDENT: assignment_statement(); break; case T_EOF: return; default: fatald("Syntax error, token", Token.token); } } } ================================================ FILE: 07_Comparisons/sym.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Symbol table functions // Copyright (c) 2019 Warren Toomey, GPL3 static int Globs = 0; // Position of next free global symbol slot // Determine if the symbol s is in the global symbol table. // Return its slot position or -1 if not found. int findglob(char *s) { int i; for (i = 0; i < Globs; i++) { if (*s == *Gsym[i].name && !strcmp(s, Gsym[i].name)) return (i); } return (-1); } // Get the position of a new global symbol slot, or die // if we've run out of positions. static int newglob(void) { int p; if ((p = Globs++) >= NSYMBOLS) fatal("Too many global symbols"); return (p); } // Add a global symbol to the symbol table. // Return the slot number in the symbol table int addglob(char *name) { int y; // If this is already in the symbol table, return the existing slot if ((y = findglob(name)) != -1) return (y); // Otherwise get a new slot, fill it in and // return the slot number y = newglob(); Gsym[y].name = strdup(name); return (y); } ================================================ FILE: 07_Comparisons/tree.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree functions // Copyright (c) 2019 Warren Toomey, GPL3 // Build and return a generic AST node struct ASTnode *mkastnode(int op, struct ASTnode *left, struct ASTnode *right, int intvalue) { struct ASTnode *n; // Malloc a new ASTnode n = (struct ASTnode *) malloc(sizeof(struct ASTnode)); if (n == NULL) fatal("Unable to malloc in mkastnode()"); // Copy in the field values and return it n->op = op; n->left = left; n->right = right; n->v.intvalue = intvalue; return (n); } // Make an AST leaf node struct ASTnode *mkastleaf(int op, int intvalue) { return (mkastnode(op, NULL, NULL, intvalue)); } // Make a unary AST node: only one child struct ASTnode *mkastunary(int op, struct ASTnode *left, int intvalue) { return (mkastnode(op, left, NULL, intvalue)); } ================================================ FILE: 08_If_Statements/Makefile ================================================ SRCS= cg.c decl.c expr.c gen.c main.c misc.c scan.c stmt.c sym.c tree.c SRCN= cgn.c decl.c expr.c gen.c main.c misc.c scan.c stmt.c sym.c tree.c comp1: $(SRCS) cc -o comp1 -g $(SRCS) compn: $(SRCN) cc -o compn -g $(SRCN) clean: rm -f comp1 compn *.o *.s out test: comp1 input05 ./comp1 input05 cc -o out out.s ./out testn: compn input05 ./compn input05 nasm -f elf64 out.s cc -no-pie -o out out.o ./out ================================================ FILE: 08_If_Statements/Readme.md ================================================ # Part 8: If Statements Now that we can compare values, it's time to add IF statements to our language. So, firstly, let's look at the general syntax of IF statements and how they get converted into assembly language. ## The IF Syntax The IF statement syntax is: ``` if (condition is true) perform this first block of code else perform this other block of code ``` Now, how is this normally converted into assembly language? It turns out that we do the opposite comparison and jump/branch if the opposite comparison is true: ``` perform the opposite comparison jump to L1 if true perform the first block of code jump to L2 L1: perform the other block of code L2: ``` where `L1` and `L2` are assembly language labels. ## Generating The Assembly in Our Compiler Right now, we output code to set a register based on a comparison, e.g. ``` int x; x= 7 < 9; From input04 ``` becomes ``` movq $7, %r8 movq $9, %r9 cmpq %r9, %r8 setl %r9b Set if less than andq $255,%r9 ``` But for an IF statement, we need to jump on the opposite comparison: ``` if (7 < 9) ``` should become: ``` movq $7, %r8 movq $9, %r9 cmpq %r9, %r8 jge L1 Jump if greater then or equal to .... L1: ``` So, I've implemented IF statements in this part of our journey. As this is a working project, I did have to undo a few things and refactor them as part of the journey. I'll try to cover the changes as well as the additions along the way. ## New Tokens and the Dangling Else We are going to need a bunch of new tokens in our language. I also (for now) want to avoid the [dangling else problem](https://en.wikipedia.org/wiki/Dangling_else). To that end, I've changed the grammar so that all groups of statements are wrapped around '{' ... '}' curly brackets; I called such a grouping a "compound statement". We also need '(' ... ')' parentheses to hold the IF expression, plus keywords 'if' and 'else'. Thus, the new tokens are (in `defs.h`): ```c T_LBRACE, T_RBRACE, T_LPAREN, T_RPAREN, // Keywords ..., T_IF, T_ELSE ``` ## Scanning the Tokens The single-character tokens should be obvious and I won't give the code to scan them. The keywords should also be pretty obvious, but I'll give the scanning code from `keyword()` in `scan.c`: ```c switch (*s) { case 'e': if (!strcmp(s, "else")) return (T_ELSE); break; case 'i': if (!strcmp(s, "if")) return (T_IF); if (!strcmp(s, "int")) return (T_INT); break; case 'p': if (!strcmp(s, "print")) return (T_PRINT); break; } ``` ## The New BNF Grammar Our grammar is starting to get big, so I've rewritten it somewhat: ``` compound_statement: '{' '}' // empty, i.e. no statement | '{' statement '}' | '{' statement statements '}' ; statement: print_statement | declaration | assignment_statement | if_statement ; print_statement: 'print' expression ';' ; declaration: 'int' identifier ';' ; assignment_statement: identifier '=' expression ';' ; if_statement: if_head | if_head 'else' compound_statement ; if_head: 'if' '(' true_false_expression ')' compound_statement ; identifier: T_IDENT ; ``` I've left out the definition of `true_false_expression`, but at some point when we've added a few more operators I'll add it in. Note the grammar for the IF statement: it's either an `if_head` (with no 'else' clause), or an `if_head` followed by a 'else' and a `compound_statement`. I've separated out all the different statement types to have their own non-terminal name. Also, the previous `statements` non-terminal is now the `compound_statement` non-terminal, and this requires '{' ... '}' around the statements. This means that the `compound_statement` in the head is surrounded by '{' ... '}' and so is any `compound_statement` after the 'else' keyword. So if we have nested IF statements, they have to look like: ``` if (condition1 is true) { if (condition2 is true) { statements; } else { statements; } } else { statements; } ``` and there is no ambiguity about which 'if' each 'else' belongs to. This solves the dangling else problem. Later on, I'll make the '{' ... '}' optional. ## Parsing Compound Statements The old `void statements()` function is now `compound_statement()` and looks like this: ```c // Parse a compound statement // and return its AST struct ASTnode *compound_statement(void) { struct ASTnode *left = NULL; struct ASTnode *tree; // Require a left curly bracket lbrace(); while (1) { switch (Token.token) { case T_PRINT: tree = print_statement(); break; case T_INT: var_declaration(); tree = NULL; // No AST generated here break; case T_IDENT: tree = assignment_statement(); break; case T_IF: tree = if_statement(); break; case T_RBRACE: // When we hit a right curly bracket, // skip past it and return the AST rbrace(); return (left); default: fatald("Syntax error, token", Token.token); } // For each new tree, either save it in left // if left is empty, or glue the left and the // new tree together if (tree) { if (left == NULL) left = tree; else left = mkastnode(A_GLUE, left, NULL, tree, 0); } } ``` Firstly, note that the code forces the parser to match the '{' at the start of the compound statement with `lbrace()`, and we can only exit when we've matched the ending '}' with `rbrace()`. Secondly, note that `print_statement()`, `assignment_statement()` and `if_statement()` all return an AST tree, as does `compound_statement()`. In our old code, `print_statement()` itself called `genAST()` to evaluate the expression, followed by a call to `genprintint()`. Similarly, `assignment_statement()` also called `genAST()` to do the assignment. Well, this means that we have AST trees over here, and others over there. It makes some sense to generate just a single AST tree, and call `genAST()` once to generate the assembly code for it. This isn't mandatory. For example, SubC only generates ASTs for expressions. For the structural parts of the language, like statements, SubC makes specific calls to the code generator as I was doing in the previous versions of the compiler. I've decided to, for now, generate a single AST tree for the whole input with the parser. Once the input has been parsed, the assembly output can be generated from the one AST tree. Later on, I'll probably generate an AST tree for each function. Later. ## Parsing the IF Grammar Because we are a recursive descent parser, parsing the IF statement is not too bad: ```c // Parse an IF statement including // any optional ELSE clause // and return its AST struct ASTnode *if_statement(void) { struct ASTnode *condAST, *trueAST, *falseAST = NULL; // Ensure we have 'if' '(' match(T_IF, "if"); lparen(); // Parse the following expression // and the ')' following. Ensure // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) fatal("Bad comparison operator"); rparen(); // Get the AST for the compound statement trueAST = compound_statement(); // If we have an 'else', skip it // and get the AST for the compound statement if (Token.token == T_ELSE) { scan(&Token); falseAST = compound_statement(); } // Build and return the AST for this statement return (mkastnode(A_IF, condAST, trueAST, falseAST, 0)); } ``` Right now, I don't want to deal with input like `if (x-2)`, so I've limited the binary expression from `binexpr()` to have a root which is one of the six comparison operators A_EQ, A_NE, A_LT, A_GT, A_LE or A_GE. ## The Third Child I nearly smuggled something past you without properly explaining it. In the last line of `if_statement()` I build an AST node with: ```c mkastnode(A_IF, condAST, trueAST, falseAST, 0); ``` That's *three* AST sub-trees! What's going on here? As you can see, the IF statement will have three children: + the sub-tree that evaluates the condition + the compound statement immediately following + the optional compound statement after the 'else' keyword So we now need an AST node structure with three children (in `defs.h`): ```c // AST node types. enum { ... A_GLUE, A_IF }; // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; union { int intvalue; // For A_INTLIT, the integer value int id; // For A_IDENT, the symbol slot number } v; }; ``` Thus, an A_IF tree looks like this: ``` IF / | \ / | \ / | \ / | \ / | \ / | \ condition statements statements ``` ## Glue AST Nodes There is also a new A_GLUE AST node type. What is this used for? We now build a single AST tree with lots of statements, so we need a way to glue them together. Review the end of the `compound_statement()` loop code: ```c if (left != NULL) left = mkastnode(A_GLUE, left, NULL, tree, 0); ``` Each time we get a new sub-tree, we glue it on to the existing tree. So, for this sequence of statements: ``` stmt1; stmt2; stmt3; stmt4; ``` we end up with: ``` A_GLUE / \ A_GLUE stmt4 / \ A_GLUE stmt3 / \ stmt1 stmt2 ``` And, as we traverse the tree depth-first left to right, this still generates the assembly code in the correct order. ## The Generic Code Generator Now that our AST nodes have multiple children, our generic code generator is going to become a bit more complicated. Also, for the comparison operators, we need to know if we are doing the compare as part of an IF statement (jump on the opposite comparison) or a normal expression (set register to 1 or 0 on the normal comparison). To this end, I've modified `getAST()` so that we can pass in the parent AST nodes operation: ```c // Given an AST, the register (if any) that holds // the previous rvalue, and the AST op of the parent, // generate assembly code recursively. // Return the register id with the tree's final value int genAST(struct ASTnode *n, int reg, int parentASTop) { ... } ``` ### Dealing with Specific AST Nodes The code in `genAST()` now has to deal with specific AST nodes: ```c // We now have specific AST node handling at the top switch (n->op) { case A_IF: return (genIFAST(n)); case A_GLUE: // Do each child statement, and free the // registers after each child genAST(n->left, NOREG, n->op); genfreeregs(); genAST(n->right, NOREG, n->op); genfreeregs(); return (NOREG); } ``` If we don't return, we carry on to do the normal binary operator AST nodes, with one exception: the comparison nodes: ```c case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: // If the parent AST node is an A_IF, generate a compare // followed by a jump. Otherwise, compare registers and // set one to 1 or 0 based on the comparison. if (parentASTop == A_IF) return (cgcompare_and_jump(n->op, leftreg, rightreg, reg)); else return (cgcompare_and_set(n->op, leftreg, rightreg)); ``` I'll cover the new functions `cgcompare_and_jump()` and `cgcompare_and_set()` below. ### Generating the IF Assembly Code We deal with the A_IF AST node with a specific function, along with a function to generate new label numbers: ```c // Generate and return a new label number static int label(void) { static int id = 1; return (id++); } // Generate the code for an IF statement // and an optional ELSE clause static int genIFAST(struct ASTnode *n) { int Lfalse, Lend; // Generate two labels: one for the // false compound statement, and one // for the end of the overall IF statement. // When there is no ELSE clause, Lfalse _is_ // the ending label! Lfalse = label(); if (n->right) Lend = label(); // Generate the condition code followed // by a zero jump to the false label. // We cheat by sending the Lfalse label as a register. genAST(n->left, Lfalse, n->op); genfreeregs(); // Generate the true compound statement genAST(n->mid, NOREG, n->op); genfreeregs(); // If there is an optional ELSE clause, // generate the jump to skip to the end if (n->right) cgjump(Lend); // Now the false label cglabel(Lfalse); // Optional ELSE clause: generate the // false compound statement and the // end label if (n->right) { genAST(n->right, NOREG, n->op); genfreeregs(); cglabel(Lend); } return (NOREG); } ``` Effectively, the code is doing: ```c genAST(n->left, Lfalse, n->op); // Condition and jump to Lfalse genAST(n->mid, NOREG, n->op); // Statements after 'if' cgjump(Lend); // Jump to Lend cglabel(Lfalse); // Lfalse: label genAST(n->right, NOREG, n->op); // Statements after 'else' cglabel(Lend); // Lend: label ``` ## The x86-64 Code Generation Functions So we now have a few new x86-64 code generation functions. Some of these replace the six `cgXXX()` comparison functions we created in the last part of the journey. For the normal comparison functions, we now pass in the AST operation to choose the relevant x86-64 `set` instruction: ```c // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r2], reglist[r2]); free_register(r1); return (r2); } ``` I've also found an x86-64 instruction `movzbq` that moves the lowest byte from one register and extends it to fit into a 64-bit register. I'm using that now instead of the `and $255` in the old code. We need a functions to generate a label and to jump to it: ```c // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } ``` Finally, we need a function to do a comparison and to jump based on the opposite comparison. So, using the AST comparison node type, we do the opposite comparison: ```c // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } ``` ## Testing the IF Statements Do a `make test` which compiles the `input05` file: ```c { int i; int j; i=6; j=12; if (i < j) { print i; } else { print j; } } ``` Here's the resulting assembly output: ``` movq $6, %r8 movq %r8, i(%rip) # i=6; movq $12, %r8 movq %r8, j(%rip) # j=12; movq i(%rip), %r8 movq j(%rip), %r9 cmpq %r9, %r8 # Compare %r8-%r9, i.e. i-j jge L1 # Jump to L1 if i >= j movq i(%rip), %r8 movq %r8, %rdi # print i; call printint jmp L2 # Skip the else code L1: movq j(%rip), %r8 movq %r8, %rdi # print j; call printint L2: ``` And, of course, `make test` shows: ``` cc -o comp1 -g cg.c decl.c expr.c gen.c main.c misc.c scan.c stmt.c sym.c tree.c ./comp1 input05 cc -o out out.s ./out 6 # As 6 is less than 12 ``` ## Conclusion and What's Next We've added our first control structure to our language with the IF statement. I had to rewrite a few existing things along the way and, given I don't have a complete architectural plan in my head, I'll likely have to rewrite more things in the future. The wrinkle for this part of the journey was that we had to perform the opposite comparison for the IF decision than what we would do for the normal comparison operators. My solution was to inform each AST node of the node type of their parent; the comparison nodes can now see if the parent is an A_IF node or not. I know that Nils Holm chose a different approach when he was implementing SubC, so you should look at his code just to see this different solution to the same problem. In the next part of our compiler writing journey, we will add another control structure: the WHILE loop. [Next step](../09_While_Loops/Readme.md) ================================================ FILE: 08_If_Statements/cg.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names // We need a list of byte registers, too static int freereg[4]; static char *reglist[4] = { "%r8", "%r9", "%r10", "%r11" }; static char *breglist[4] = { "%r8b", "%r9b", "%r10b", "%r11b" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n" ".LC0:\n" "\t.string\t\"%d\\n\"\n" "printint:\n" "\tpushq\t%rbp\n" "\tmovq\t%rsp, %rbp\n" "\tsubq\t$16, %rsp\n" "\tmovl\t%edi, -4(%rbp)\n" "\tmovl\t-4(%rbp), %eax\n" "\tmovl\t%eax, %esi\n" "\tleaq .LC0(%rip), %rdi\n" "\tmovl $0, %eax\n" "\tcall printf@PLT\n" "\tnop\n" "\tleave\n" "\tret\n" "\n" "\t.globl\tmain\n" "\t.type\tmain, @function\n" "main:\n" "\tpushq\t%rbp\n" "\tmovq %rsp, %rbp\n", Outfile); } // Print out the assembly postamble void cgpostamble() { fputs("\tmovl $0, %eax\n" "\tpopq %rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register int cgloadint(int value) { // Get a new register int r = alloc_register(); // Print out the code to initialise it fprintf(Outfile, "\tmovq\t$%d, %s\n", value, reglist[r]); return (r); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(char *identifier) { // Get a new register int r = alloc_register(); // Print out the code to initialise it fprintf(Outfile, "\tmovq\t%s(\%%rip), %s\n", identifier, reglist[r]); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\taddq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsubq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timulq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s,%%rax\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidivq\t%s\n", reglist[r2]); fprintf(Outfile, "\tmovq\t%%rax,%s\n", reglist[r1]); free_register(r2); return (r1); } // Call printint() with the given register void cgprintint(int r) { fprintf(Outfile, "\tmovq\t%s, %%rdi\n", reglist[r]); fprintf(Outfile, "\tcall\tprintint\n"); free_register(r); } // Store a register's value into a variable int cgstorglob(int r, char *identifier) { fprintf(Outfile, "\tmovq\t%s, %s(\%%rip)\n", reglist[r], identifier); return (r); } // Generate a global symbol void cgglobsym(char *sym) { fprintf(Outfile, "\t.comm\t%s,8,8\n", sym); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } ================================================ FILE: 08_If_Statements/cgn.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names static int freereg[4]; static char *reglist[4] = { "r8", "r9", "r10", "r11" }; static char *breglist[4] = { "r8b", "r9b", "r10b", "r11b" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\tglobal\tmain\n" "\textern\tprintf\n" "\tsection\t.text\n" "LC0:\tdb\t\"%d\",10,0\n" "printint:\n" "\tpush\trbp\n" "\tmov\trbp, rsp\n" "\tsub\trsp, 16\n" "\tmov\t[rbp-4], edi\n" "\tmov\teax, [rbp-4]\n" "\tmov\tesi, eax\n" "\tlea rdi, [rel LC0]\n" "\tmov eax, 0\n" "\tcall printf\n" "\tnop\n" "\tleave\n" "\tret\n" "\n" "main:\n" "\tpush\trbp\n" "\tmov rbp, rsp\n", Outfile); } // Print out the assembly postamble void cgpostamble() { fputs("\tmov eax, 0\n" "\tpop rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register int cgloadint(int value) { // Get a new register int r = alloc_register(); // Print out the code to initialise it fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], value); return (r); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(char *identifier) { // Get a new register int r = alloc_register(); // Print out the code to initialise it fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], identifier); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timul\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmov\trax, %s\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidiv\t%s\n", reglist[r2]); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[r1]); free_register(r2); return (r1); } // Call printint() with the given register void cgprintint(int r) { fprintf(Outfile, "\tmov\trdi, %s\n", reglist[r]); fprintf(Outfile, "\tcall\tprintint\n"); free_register(r); } // Store a register's value into a variable int cgstorglob(int r, char *identifier) { fprintf(Outfile, "\tmov\t[%s], %s\n", identifier, reglist[r]); return (r); } // Generate a global symbol void cgglobsym(char *sym) { fprintf(Outfile, "\tcommon\t%s 8:8\n", sym); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzb\t%s, %s\n", reglist[r2], breglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } ================================================ FILE: 08_If_Statements/data.h ================================================ #ifndef extern_ #define extern_ extern #endif // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 extern_ int Line; // Current line number extern_ int Putback; // Character put back by scanner extern_ FILE *Infile; // Input and output files extern_ FILE *Outfile; extern_ struct token Token; // Last token scanned extern_ char Text[TEXTLEN + 1]; // Last identifier scanned extern_ struct symtable Gsym[NSYMBOLS]; // Global symbol table ================================================ FILE: 08_If_Statements/decl.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of declarations // Copyright (c) 2019 Warren Toomey, GPL3 // Parse the declaration of a variable void var_declaration(void) { // Ensure we have an 'int' token followed by an identifier // and a semicolon. Text now has the identifier's name. // Add it as a known identifier match(T_INT, "int"); ident(); addglob(Text); genglobsym(Text); semi(); } ================================================ FILE: 08_If_Statements/decl.h ================================================ // Function prototypes for all compiler files // Copyright (c) 2019 Warren Toomey, GPL3 // scan.c int scan(struct token *t); // tree.c struct ASTnode *mkastnode(int op, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, int intvalue); struct ASTnode *mkastleaf(int op, int intvalue); struct ASTnode *mkastunary(int op, struct ASTnode *left, int intvalue); // gen.c int genAST(struct ASTnode *n, int reg, int parentASTop); void genpreamble(); void genpostamble(); void genfreeregs(); void genprintint(int reg); void genglobsym(char *s); // cg.c void freeall_registers(void); void cgpreamble(); void cgpostamble(); int cgloadint(int value); int cgloadglob(char *identifier); int cgadd(int r1, int r2); int cgsub(int r1, int r2); int cgmul(int r1, int r2); int cgdiv(int r1, int r2); void cgprintint(int r); int cgstorglob(int r, char *identifier); void cgglobsym(char *sym); int cgcompare_and_set(int ASTop, int r1, int r2); int cgcompare_and_jump(int ASTop, int r1, int r2, int label); void cglabel(int l); void cgjump(int l); // expr.c struct ASTnode *binexpr(int ptp); // stmt.c struct ASTnode *compound_statement(void); // misc.c void match(int t, char *what); void semi(void); void lbrace(void); void rbrace(void); void lparen(void); void rparen(void); void ident(void); void fatal(char *s); void fatals(char *s1, char *s2); void fatald(char *s, int d); void fatalc(char *s, int c); // sym.c int findglob(char *s); int addglob(char *name); // decl.c void var_declaration(void); ================================================ FILE: 08_If_Statements/defs.h ================================================ #include #include #include #include // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 #define TEXTLEN 512 // Length of symbols in input #define NSYMBOLS 1024 // Number of symbol table entries // Token types enum { T_EOF, T_PLUS, T_MINUS, T_STAR, T_SLASH, T_EQ, T_NE, T_LT, T_GT, T_LE, T_GE, T_INTLIT, T_SEMI, T_ASSIGN, T_IDENT, T_LBRACE, T_RBRACE, T_LPAREN, T_RPAREN, // Keywords T_PRINT, T_INT, T_IF, T_ELSE }; // Token structure struct token { int token; // Token type, from the enum list above int intvalue; // For T_INTLIT, the integer value }; // AST node types. The first few line up // with the related tokens enum { A_ADD = 1, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE, A_INTLIT, A_IDENT, A_LVIDENT, A_ASSIGN, A_PRINT, A_GLUE, A_IF }; // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; union { int intvalue; // For A_INTLIT, the integer value int id; // For A_IDENT, the symbol slot number } v; }; #define NOREG -1 // Use NOREG when the AST generation // functions have no register to return // Symbol table structure struct symtable { char *name; // Name of a symbol }; ================================================ FILE: 08_If_Statements/expr.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of expressions // Copyright (c) 2019 Warren Toomey, GPL3 // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(void) { struct ASTnode *n; int id; switch (Token.token) { case T_INTLIT: // For an INTLIT token, make a leaf AST node for it. n = mkastleaf(A_INTLIT, Token.intvalue); break; case T_IDENT: // Check that this identifier exists id = findglob(Text); if (id == -1) fatals("Unknown variable", Text); // Make a leaf AST node for it n = mkastleaf(A_IDENT, id); break; default: fatald("Syntax error, token", Token.token); } // Scan in the next token and return the leaf node scan(&Token); return (n); } // Convert a binary operator token into an AST operation. // We rely on a 1:1 mapping from token to AST operation static int arithop(int tokentype) { if (tokentype > T_EOF && tokentype < T_INTLIT) return (tokentype); fatald("Syntax error, token", tokentype); } // Operator precedence for each token. Must // match up with the order of tokens in defs.h static int OpPrec[] = { 0, 10, 10, // T_EOF, T_PLUS, T_MINUS 20, 20, // T_STAR, T_SLASH 30, 30, // T_EQ, T_NE 40, 40, 40, 40 // T_LT, T_GT, T_LE, T_GE }; // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec = OpPrec[tokentype]; if (prec == 0) fatald("Syntax error, token", tokentype); return (prec); } // Return an AST tree whose root is a binary operator. // Parameter ptp is the previous token's precedence. struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; int tokentype; // Get the primary tree on the left. // Fetch the next token at the same time. left = primary(); // If we hit a semicolon or ')', return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN) return (left); // While the precedence of this token is // more than that of the previous token precedence while (op_precedence(tokentype) > ptp) { // Fetch in the next integer literal scan(&Token); // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Join that sub-tree with ours. Convert the token // into an AST operation at the same time. left = mkastnode(arithop(tokentype), left, NULL, right, 0); // Update the details of the current token. // If we hit a semicolon or ')', return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN) return (left); } // Return the tree we have when the precedence // is the same or lower return (left); } ================================================ FILE: 08_If_Statements/gen.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Generic code generator // Copyright (c) 2019 Warren Toomey, GPL3 // Generate and return a new label number static int label(void) { static int id = 1; return (id++); } // Generate the code for an IF statement // and an optional ELSE clause static int genIFAST(struct ASTnode *n) { int Lfalse, Lend; // Generate two labels: one for the // false compound statement, and one // for the end of the overall IF statement. // When there is no ELSE clause, Lfalse _is_ // the ending label! Lfalse = label(); if (n->right) Lend = label(); // Generate the condition code followed // by a zero jump to the false label. // We cheat by sending the Lfalse label as a register. genAST(n->left, Lfalse, n->op); genfreeregs(); // Generate the true compound statement genAST(n->mid, NOREG, n->op); genfreeregs(); // If there is an optional ELSE clause, // generate the jump to skip to the end if (n->right) cgjump(Lend); // Now the false label cglabel(Lfalse); // Optional ELSE clause: generate the // false compound statement and the // end label if (n->right) { genAST(n->right, NOREG, n->op); genfreeregs(); cglabel(Lend); } return (NOREG); } // Given an AST, the register (if any) that holds // the previous rvalue, and the AST op of the parent, // generate assembly code recursively. // Return the register id with the tree's final value int genAST(struct ASTnode *n, int reg, int parentASTop) { int leftreg, rightreg; // We now have specific AST node handling at the top switch (n->op) { case A_IF: return (genIFAST(n)); case A_GLUE: // Do each child statement, and free the // registers after each child genAST(n->left, NOREG, n->op); genfreeregs(); genAST(n->right, NOREG, n->op); genfreeregs(); return (NOREG); } // General AST node handling below // Get the left and right sub-tree values if (n->left) leftreg = genAST(n->left, NOREG, n->op); if (n->right) rightreg = genAST(n->right, leftreg, n->op); switch (n->op) { case A_ADD: return (cgadd(leftreg, rightreg)); case A_SUBTRACT: return (cgsub(leftreg, rightreg)); case A_MULTIPLY: return (cgmul(leftreg, rightreg)); case A_DIVIDE: return (cgdiv(leftreg, rightreg)); case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: // If the parent AST node is an A_IF, generate a compare // followed by a jump. Otherwise, compare registers and // set one to 1 or 0 based on the comparison. if (parentASTop == A_IF) return (cgcompare_and_jump(n->op, leftreg, rightreg, reg)); else return (cgcompare_and_set(n->op, leftreg, rightreg)); case A_INTLIT: return (cgloadint(n->v.intvalue)); case A_IDENT: return (cgloadglob(Gsym[n->v.id].name)); case A_LVIDENT: return (cgstorglob(reg, Gsym[n->v.id].name)); case A_ASSIGN: // The work has already been done, return the result return (rightreg); case A_PRINT: // Print the left-child's value // and return no register genprintint(leftreg); genfreeregs(); return (NOREG); default: fatald("Unknown AST operator", n->op); } } void genpreamble() { cgpreamble(); } void genpostamble() { cgpostamble(); } void genfreeregs() { freeall_registers(); } void genprintint(int reg) { cgprintint(reg); } void genglobsym(char *s) { cgglobsym(s); } ================================================ FILE: 08_If_Statements/input01 ================================================ { print 12 * 3; print 18 - 2 * 4; print 1 + 2 + 9 - 5/2 + 3*5; } ================================================ FILE: 08_If_Statements/input02 ================================================ { int fred; int jim; fred= 5; jim= 12; print fred + jim; } ================================================ FILE: 08_If_Statements/input03 ================================================ { int x; x= 1; print x; x= x + 1; print x; x= x + 1; print x; x= x + 1; print x; x= x + 1; print x; } ================================================ FILE: 08_If_Statements/input04 ================================================ { int x; x= 7 < 9; print x; x= 7 <= 9; print x; x= 7 != 9; print x; x= 7 == 7; print x; x= 7 >= 7; print x; x= 7 <= 7; print x; x= 9 > 7; print x; x= 9 >= 7; print x; x= 9 != 7; print x; } ================================================ FILE: 08_If_Statements/input05 ================================================ { int i; int j; i=6; j=12; if (i < j) { print i; } else { print j; } } ================================================ FILE: 08_If_Statements/main.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "decl.h" #include // Compiler setup and top-level execution // Copyright (c) 2019 Warren Toomey, GPL3 // Initialise global variables static void init() { Line = 1; Putback = '\n'; } // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "Usage: %s infile\n", prog); exit(1); } // Main program: check arguments and print a usage // if we don't have an argument. Open up the input // file and call scanfile() to scan the tokens in it. void main(int argc, char *argv[]) { struct ASTnode *tree; if (argc != 2) usage(argv[0]); init(); // Open up the input file if ((Infile = fopen(argv[1], "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", argv[1], strerror(errno)); exit(1); } // Create the output file if ((Outfile = fopen("out.s", "w")) == NULL) { fprintf(stderr, "Unable to create out.s: %s\n", strerror(errno)); exit(1); } scan(&Token); // Get the first token from the input genpreamble(); // Output the preamble tree = compound_statement(); // Parse the compound statement in the input genAST(tree, NOREG, 0); // Generate the assembly code for it genpostamble(); // Output the postamble fclose(Outfile); // Close the output file and exit exit(0); } ================================================ FILE: 08_If_Statements/misc.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Miscellaneous functions // Copyright (c) 2019 Warren Toomey, GPL3 // Ensure that the current token is t, // and fetch the next token. Otherwise // throw an error void match(int t, char *what) { if (Token.token == t) { scan(&Token); } else { fatals("Expected", what); } } // Match a semicolon and fetch the next token void semi(void) { match(T_SEMI, ";"); } // Match a left brace and fetch the next token void lbrace(void) { match(T_LBRACE, "{"); } // Match a right brace and fetch the next token void rbrace(void) { match(T_RBRACE, "}"); } // Match a left parenthesis and fetch the next token void lparen(void) { match(T_LPAREN, "("); } // Match a right parenthesis and fetch the next token void rparen(void) { match(T_RPAREN, ")"); } // Match an identifier and fetch the next token void ident(void) { match(T_IDENT, "identifier"); } // Print out fatal messages void fatal(char *s) { fprintf(stderr, "%s on line %d\n", s, Line); exit(1); } void fatals(char *s1, char *s2) { fprintf(stderr, "%s:%s on line %d\n", s1, s2, Line); exit(1); } void fatald(char *s, int d) { fprintf(stderr, "%s:%d on line %d\n", s, d, Line); exit(1); } void fatalc(char *s, int c) { fprintf(stderr, "%s:%c on line %d\n", s, c, Line); exit(1); } ================================================ FILE: 08_If_Statements/scan.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { char *p; p = strchr(s, c); return (p ? p - s : -1); } // Get the next character from the input file. static int next(void) { int c; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return (c); } c = fgetc(Infile); // Read from input file if ('\n' == c) Line++; // Increment line count return (c); } // Put back an unwanted character static void putback(int c) { Putback = c; } // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0; // Convert each character into an int value while ((k = chrpos("0123456789", c)) >= 0) { val = val * 10 + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return (val); } // Scan an identifier from the input file and // store it in buf[]. Return the identifier's length static int scanident(int c, char *buf, int lim) { int i = 0; // Allow digits, alpha and underscores while (isalpha(c) || isdigit(c) || '_' == c) { // Error if we hit the identifier length limit, // else append to buf[] and get next character if (lim - 1 == i) { fatal("Identifier too long"); } else if (i < lim - 1) { buf[i++] = c; } c = next(); } // We hit a non-valid character, put it back. // NUL-terminate the buf[] and return the length putback(c); buf[i] = '\0'; return (i); } // Given a word from the input, return the matching // keyword token number or 0 if it's not a keyword. // Switch on the first letter so that we don't have // to waste time strcmp()ing against all the keywords. static int keyword(char *s) { switch (*s) { case 'e': if (!strcmp(s, "else")) return (T_ELSE); break; case 'i': if (!strcmp(s, "if")) return (T_IF); if (!strcmp(s, "int")) return (T_INT); break; case 'p': if (!strcmp(s, "print")) return (T_PRINT); break; } return (0); } // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c, tokentype; // Skip whitespace c = skip(); // Determine the token based on // the input character switch (c) { case EOF: t->token = T_EOF; return (0); case '+': t->token = T_PLUS; break; case '-': t->token = T_MINUS; break; case '*': t->token = T_STAR; break; case '/': t->token = T_SLASH; break; case ';': t->token = T_SEMI; break; case '{': t->token = T_LBRACE; break; case '}': t->token = T_RBRACE; break; case '(': t->token = T_LPAREN; break; case ')': t->token = T_RPAREN; break; case '=': if ((c = next()) == '=') { t->token = T_EQ; } else { putback(c); t->token = T_ASSIGN; } break; case '!': if ((c = next()) == '=') { t->token = T_NE; } else { fatalc("Unrecognised character", c); } break; case '<': if ((c = next()) == '=') { t->token = T_LE; } else { putback(c); t->token = T_LT; } break; case '>': if ((c = next()) == '=') { t->token = T_GE; } else { putback(c); t->token = T_GT; } break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } else if (isalpha(c) || '_' == c) { // Read in a keyword or identifier scanident(c, Text, TEXTLEN); // If it's a recognised keyword, return that token if (tokentype = keyword(Text)) { t->token = tokentype; break; } // Not a recognised keyword, so it must be an identifier t->token = T_IDENT; break; } // The character isn't part of any recognised token, error fatalc("Unrecognised character", c); } // We found a token return (1); } ================================================ FILE: 08_If_Statements/stmt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of statements // Copyright (c) 2019 Warren Toomey, GPL3 // compound_statement: // empty, i.e. no statement // | statement // | statement statements // ; // // statement: print_statement // | declaration // | assignment_statement // | if_statement // ; // // print_statement: 'print' expression ';' ; // // declaration: 'int' identifier ';' ; // // assignment_statement: identifier '=' expression ';' ; // // if_statement: if_head // | if_head 'else' compound_statement // ; // // if_head: 'if' '(' true_false_expression ')' compound_statement ; // // identifier: T_IDENT ; static struct ASTnode *print_statement(void) { struct ASTnode *tree; int reg; // Match a 'print' as the first token match(T_PRINT, "print"); // Parse the following expression tree = binexpr(0); // Make an print AST tree tree = mkastunary(A_PRINT, tree, 0); // Match the following semicolon // and return the AST semi(); return (tree); } static struct ASTnode *assignment_statement(void) { struct ASTnode *left, *right, *tree; int id; // Ensure we have an identifier ident(); // Check it's been defined then make a leaf node for it if ((id = findglob(Text)) == -1) { fatals("Undeclared variable", Text); } right = mkastleaf(A_LVIDENT, id); // Ensure we have an equals sign match(T_ASSIGN, "="); // Parse the following expression left = binexpr(0); // Make an assignment AST tree tree = mkastnode(A_ASSIGN, left, NULL, right, 0); // Match the following semicolon // and return the AST semi(); return (tree); } // Parse an IF statement including // any optional ELSE clause // and return its AST struct ASTnode *if_statement(void) { struct ASTnode *condAST, *trueAST, *falseAST = NULL; // Ensure we have 'if' '(' match(T_IF, "if"); lparen(); // Parse the following expression // and the ')' following. Ensure // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) fatal("Bad comparison operator"); rparen(); // Get the AST for the compound statement trueAST = compound_statement(); // If we have an 'else', skip it // and get the AST for the compound statement if (Token.token == T_ELSE) { scan(&Token); falseAST = compound_statement(); } // Build and return the AST for this statement return (mkastnode(A_IF, condAST, trueAST, falseAST, 0)); } // Parse a compound statement // and return its AST struct ASTnode *compound_statement(void) { struct ASTnode *left = NULL; struct ASTnode *tree; // Require a left curly bracket lbrace(); while (1) { switch (Token.token) { case T_PRINT: tree = print_statement(); break; case T_INT: var_declaration(); tree = NULL; // No AST generated here break; case T_IDENT: tree = assignment_statement(); break; case T_IF: tree = if_statement(); break; case T_RBRACE: // When we hit a right curly bracket, // skip past it and return the AST rbrace(); return (left); default: fatald("Syntax error, token", Token.token); } // For each new tree, either save it in left // if left is empty, or glue the left and the // new tree together if (tree) { if (left == NULL) left = tree; else left = mkastnode(A_GLUE, left, NULL, tree, 0); } } } ================================================ FILE: 08_If_Statements/sym.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Symbol table functions // Copyright (c) 2019 Warren Toomey, GPL3 static int Globs = 0; // Position of next free global symbol slot // Determine if the symbol s is in the global symbol table. // Return its slot position or -1 if not found. int findglob(char *s) { int i; for (i = 0; i < Globs; i++) { if (*s == *Gsym[i].name && !strcmp(s, Gsym[i].name)) return (i); } return (-1); } // Get the position of a new global symbol slot, or die // if we've run out of positions. static int newglob(void) { int p; if ((p = Globs++) >= NSYMBOLS) fatal("Too many global symbols"); return (p); } // Add a global symbol to the symbol table. // Return the slot number in the symbol table int addglob(char *name) { int y; // If this is already in the symbol table, return the existing slot if ((y = findglob(name)) != -1) return (y); // Otherwise get a new slot, fill it in and // return the slot number y = newglob(); Gsym[y].name = strdup(name); return (y); } ================================================ FILE: 08_If_Statements/tree.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree functions // Copyright (c) 2019 Warren Toomey, GPL3 // Build and return a generic AST node struct ASTnode *mkastnode(int op, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, int intvalue) { struct ASTnode *n; // Malloc a new ASTnode n = (struct ASTnode *) malloc(sizeof(struct ASTnode)); if (n == NULL) fatal("Unable to malloc in mkastnode()"); // Copy in the field values and return it n->op = op; n->left = left; n->mid = mid; n->right = right; n->v.intvalue = intvalue; return (n); } // Make an AST leaf node struct ASTnode *mkastleaf(int op, int intvalue) { return (mkastnode(op, NULL, NULL, NULL, intvalue)); } // Make a unary AST node: only one child struct ASTnode *mkastunary(int op, struct ASTnode *left, int intvalue) { return (mkastnode(op, left, NULL, NULL, intvalue)); } ================================================ FILE: 09_While_Loops/Makefile ================================================ SRCS= cg.c decl.c expr.c gen.c main.c misc.c scan.c stmt.c sym.c tree.c SRCN= cgn.c decl.c expr.c gen.c main.c misc.c scan.c stmt.c sym.c tree.c comp1: $(SRCS) cc -o comp1 -g $(SRCS) compn: $(SRCN) cc -o compn -g $(SRCN) clean: rm -f comp1 compn *.o *.s out test: comp1 tests/runtests (cd tests; chmod +x runtests; ./runtests) test6: comp1 tests/input06 ./comp1 tests/input06 cc -o out out.s ./out testn: compn tests/runtestsn (cd tests; chmod +x runtestsn; ./runtestsn) test6n: compn tests/input06 ./compn tests/input06 nasm -f elf64 out.s cc -no-pie -o out out.o ./out ================================================ FILE: 09_While_Loops/Readme.md ================================================ # Part 9: While Loops In this part of the journey we are going to add WHILE loops to our language. In some sense, a WHILE loop is very much like an IF statement without an 'else' clause, except that we always jump back to the top of the loop. So, this: ``` while (condition is true) { statements; } ``` should get translated to: ``` Lstart: evaluate condition jump to Lend if condition false statements jump to Lstart Lend: ``` This means that we can borrow the scanning, parsing and code generation structures that we used with IF statements and make some small changes to also deal with WHILE statements. Let's see how we make this happen. ## New Tokens We need a new token, T_WHILE, for the new 'while' keyword. The changes to `defs.h` and `scan.c` are obvious so I'll omit them here. ## Parsing the While Syntax The BNF grammar for the WHILE loop is: ``` // while_statement: 'while' '(' true_false_expression ')' compound_statement ; ``` and we need a function in `stmt.c` to parse this. Here it is; note the simplicity of this compared to the parsing of IF statements: ```c // Parse a WHILE statement // and return its AST struct ASTnode *while_statement(void) { struct ASTnode *condAST, *bodyAST; // Ensure we have 'while' '(' match(T_WHILE, "while"); lparen(); // Parse the following expression // and the ')' following. Ensure // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) fatal("Bad comparison operator"); rparen(); // Get the AST for the compound statement bodyAST = compound_statement(); // Build and return the AST for this statement return (mkastnode(A_WHILE, condAST, NULL, bodyAST, 0)); } ``` We need a new AST node type, A_WHILE, which has been added to `defs.h`. This node has a left child sub-tree to evaluate the condition, and a right child sub-tree for the compound statement which is the body of the WHILE loop. ## Generic Code Generation We need to create a start and end label, evaluate the condition and insert appropriate jumps to exit the loop and to return to the top of the loop. Again, this is much simpler than the code to generate IF statements. In `gen.c`: ```c // Generate the code for a WHILE statement // and an optional ELSE clause static int genWHILE(struct ASTnode *n) { int Lstart, Lend; // Generate the start and end labels // and output the start label Lstart = label(); Lend = label(); cglabel(Lstart); // Generate the condition code followed // by a jump to the end label. // We cheat by sending the Lfalse label as a register. genAST(n->left, Lend, n->op); genfreeregs(); // Generate the compound statement for the body genAST(n->right, NOREG, n->op); genfreeregs(); // Finally output the jump back to the condition, // and the end label cgjump(Lstart); cglabel(Lend); return (NOREG); } ``` One thing I had to do was recognise that the parent AST node of the comparison operators could now be A_WHILE, so in `genAST()` the code for the comparison operators looks like: ```c case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, compare registers // and set one to 1 or 0 based on the comparison. if (parentASTop == A_IF || parentASTop == A_WHILE) return (cgcompare_and_jump(n->op, leftreg, rightreg, reg)); else return (cgcompare_and_set(n->op, leftreg, rightreg)); ``` And that, altogether, is all we need to implement WHILE loops! ## Testing the New Language Additions I've moved all of the input files into a `test/` directory. If you now do `make test`, it will go into this directory, compile each input and compare the output against known-good output: ``` cc -o comp1 -g cg.c decl.c expr.c gen.c main.c misc.c scan.c stmt.c sym.c tree.c (cd tests; chmod +x runtests; ./runtests) input01: OK input02: OK input03: OK input04: OK input05: OK input06: OK ``` You can also do a `make test6`. This compiles the `tests/input06` file: ```c { int i; i=1; while (i <= 10) { print i; i= i + 1; } } ``` This will print out the numbers from 1 to 10: ``` cc -o comp1 -g cg.c decl.c expr.c gen.c main.c misc.c scan.c stmt.c sym.c tree.c ./comp1 tests/input06 cc -o out out.s ./out 1 2 3 4 5 6 7 8 9 10 ``` And here is the assembly output from the compilation: ``` .comm i,8,8 movq $1, %r8 movq %r8, i(%rip) # i= 1 L1: movq i(%rip), %r8 movq $10, %r9 cmpq %r9, %r8 # Is i <= 10? jg L2 # Greater than, jump to L2 movq i(%rip), %r8 movq %r8, %rdi # Print out i call printint movq i(%rip), %r8 movq $1, %r9 addq %r8, %r9 # Add 1 to i movq %r9, i(%rip) jmp L1 # and loop back L2: ``` ## Conclusion and What's Next The WHILE loop was easy to add, once we had already done the IF statement as they share a lot of similarities. I think we also now have a [Turing-complete](https://en.wikipedia.org/wiki/Turing_completeness) language: + an infinite amount of storage, i.e. an infinite number of variables + the ability to make decisions based on stored values, i.e. IF statements + the ability to change directions, i.e. WHILE loops So we can stop now, our job is done! No, of course not. We are still working towards getting the compiler to compile itself. In the next part of our compiler writing journey, we will add FOR loops to the language. [Next step](../10_For_Loops/Readme.md) ================================================ FILE: 09_While_Loops/cg.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names // We need a list of byte registers, too static int freereg[4]; static char *reglist[4] = { "%r8", "%r9", "%r10", "%r11" }; static char *breglist[4] = { "%r8b", "%r9b", "%r10b", "%r11b" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n" ".LC0:\n" "\t.string\t\"%d\\n\"\n" "printint:\n" "\tpushq\t%rbp\n" "\tmovq\t%rsp, %rbp\n" "\tsubq\t$16, %rsp\n" "\tmovl\t%edi, -4(%rbp)\n" "\tmovl\t-4(%rbp), %eax\n" "\tmovl\t%eax, %esi\n" "\tleaq .LC0(%rip), %rdi\n" "\tmovl $0, %eax\n" "\tcall printf@PLT\n" "\tnop\n" "\tleave\n" "\tret\n" "\n" "\t.globl\tmain\n" "\t.type\tmain, @function\n" "main:\n" "\tpushq\t%rbp\n" "\tmovq %rsp, %rbp\n", Outfile); } // Print out the assembly postamble void cgpostamble() { fputs("\tmovl $0, %eax\n" "\tpopq %rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register int cgloadint(int value) { // Get a new register int r = alloc_register(); // Print out the code to initialise it fprintf(Outfile, "\tmovq\t$%d, %s\n", value, reglist[r]); return (r); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(char *identifier) { // Get a new register int r = alloc_register(); // Print out the code to initialise it fprintf(Outfile, "\tmovq\t%s(\%%rip), %s\n", identifier, reglist[r]); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\taddq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsubq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timulq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s,%%rax\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidivq\t%s\n", reglist[r2]); fprintf(Outfile, "\tmovq\t%%rax,%s\n", reglist[r1]); free_register(r2); return (r1); } // Call printint() with the given register void cgprintint(int r) { fprintf(Outfile, "\tmovq\t%s, %%rdi\n", reglist[r]); fprintf(Outfile, "\tcall\tprintint\n"); free_register(r); } // Store a register's value into a variable int cgstorglob(int r, char *identifier) { fprintf(Outfile, "\tmovq\t%s, %s(\%%rip)\n", reglist[r], identifier); return (r); } // Generate a global symbol void cgglobsym(char *sym) { fprintf(Outfile, "\t.comm\t%s,8,8\n", sym); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } ================================================ FILE: 09_While_Loops/cgn.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names static int freereg[4]; static char *reglist[4] = { "r8", "r9", "r10", "r11" }; static char *breglist[4] = { "r8b", "r9b", "r10b", "r11b" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\tglobal\tmain\n" "\textern\tprintf\n" "\tsection\t.text\n" "LC0:\tdb\t\"%d\",10,0\n" "printint:\n" "\tpush\trbp\n" "\tmov\trbp, rsp\n" "\tsub\trsp, 16\n" "\tmov\t[rbp-4], edi\n" "\tmov\teax, [rbp-4]\n" "\tmov\tesi, eax\n" "\tlea rdi, [rel LC0]\n" "\tmov eax, 0\n" "\tcall printf\n" "\tnop\n" "\tleave\n" "\tret\n" "\n" "main:\n" "\tpush\trbp\n" "\tmov rbp, rsp\n", Outfile); } // Print out the assembly postamble void cgpostamble() { fputs("\tmov eax, 0\n" "\tpop rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register int cgloadint(int value) { // Get a new register int r = alloc_register(); // Print out the code to initialise it fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], value); return (r); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(char *identifier) { // Get a new register int r = alloc_register(); // Print out the code to initialise it fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], identifier); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timul\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmov\trax, %s\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidiv\t%s\n", reglist[r2]); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[r1]); free_register(r2); return (r1); } // Call printint() with the given register void cgprintint(int r) { fprintf(Outfile, "\tmov\trdi, %s\n", reglist[r]); fprintf(Outfile, "\tcall\tprintint\n"); free_register(r); } // Store a register's value into a variable int cgstorglob(int r, char *identifier) { fprintf(Outfile, "\tmov\t[%s], %s\n", identifier, reglist[r]); return (r); } // Generate a global symbol void cgglobsym(char *sym) { fprintf(Outfile, "\tcommon\t%s 8:8\n", sym); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r2], breglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } ================================================ FILE: 09_While_Loops/data.h ================================================ #ifndef extern_ #define extern_ extern #endif // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 extern_ int Line; // Current line number extern_ int Putback; // Character put back by scanner extern_ FILE *Infile; // Input and output files extern_ FILE *Outfile; extern_ struct token Token; // Last token scanned extern_ char Text[TEXTLEN + 1]; // Last identifier scanned extern_ struct symtable Gsym[NSYMBOLS]; // Global symbol table ================================================ FILE: 09_While_Loops/decl.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of declarations // Copyright (c) 2019 Warren Toomey, GPL3 // declaration: 'int' identifier ';' ; // // Parse the declaration of a variable void var_declaration(void) { // Ensure we have an 'int' token followed by an identifier // and a semicolon. Text now has the identifier's name. // Add it as a known identifier match(T_INT, "int"); ident(); addglob(Text); genglobsym(Text); semi(); } ================================================ FILE: 09_While_Loops/decl.h ================================================ // Function prototypes for all compiler files // Copyright (c) 2019 Warren Toomey, GPL3 // scan.c int scan(struct token *t); // tree.c struct ASTnode *mkastnode(int op, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, int intvalue); struct ASTnode *mkastleaf(int op, int intvalue); struct ASTnode *mkastunary(int op, struct ASTnode *left, int intvalue); // gen.c int genAST(struct ASTnode *n, int reg, int parentASTop); void genpreamble(); void genpostamble(); void genfreeregs(); void genprintint(int reg); void genglobsym(char *s); // cg.c void freeall_registers(void); void cgpreamble(); void cgpostamble(); int cgloadint(int value); int cgloadglob(char *identifier); int cgadd(int r1, int r2); int cgsub(int r1, int r2); int cgmul(int r1, int r2); int cgdiv(int r1, int r2); void cgprintint(int r); int cgstorglob(int r, char *identifier); void cgglobsym(char *sym); int cgcompare_and_set(int ASTop, int r1, int r2); int cgcompare_and_jump(int ASTop, int r1, int r2, int label); void cglabel(int l); void cgjump(int l); // expr.c struct ASTnode *binexpr(int ptp); // stmt.c struct ASTnode *compound_statement(void); // misc.c void match(int t, char *what); void semi(void); void lbrace(void); void rbrace(void); void lparen(void); void rparen(void); void ident(void); void fatal(char *s); void fatals(char *s1, char *s2); void fatald(char *s, int d); void fatalc(char *s, int c); // sym.c int findglob(char *s); int addglob(char *name); // decl.c void var_declaration(void); ================================================ FILE: 09_While_Loops/defs.h ================================================ #include #include #include #include // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 #define TEXTLEN 512 // Length of symbols in input #define NSYMBOLS 1024 // Number of symbol table entries // Token types enum { T_EOF, T_PLUS, T_MINUS, T_STAR, T_SLASH, T_EQ, T_NE, T_LT, T_GT, T_LE, T_GE, T_INTLIT, T_SEMI, T_ASSIGN, T_IDENT, T_LBRACE, T_RBRACE, T_LPAREN, T_RPAREN, // Keywords T_PRINT, T_INT, T_IF, T_ELSE, T_WHILE }; // Token structure struct token { int token; // Token type, from the enum list above int intvalue; // For T_INTLIT, the integer value }; // AST node types. The first few line up // with the related tokens enum { A_ADD = 1, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE, A_INTLIT, A_IDENT, A_LVIDENT, A_ASSIGN, A_PRINT, A_GLUE, A_IF, A_WHILE }; // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; union { int intvalue; // For A_INTLIT, the integer value int id; // For A_IDENT, the symbol slot number } v; }; #define NOREG -1 // Use NOREG when the AST generation // functions have no register to return // Symbol table structure struct symtable { char *name; // Name of a symbol }; ================================================ FILE: 09_While_Loops/expr.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of expressions // Copyright (c) 2019 Warren Toomey, GPL3 // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(void) { struct ASTnode *n; int id; switch (Token.token) { case T_INTLIT: // For an INTLIT token, make a leaf AST node for it. n = mkastleaf(A_INTLIT, Token.intvalue); break; case T_IDENT: // Check that this identifier exists id = findglob(Text); if (id == -1) fatals("Unknown variable", Text); // Make a leaf AST node for it n = mkastleaf(A_IDENT, id); break; default: fatald("Syntax error, token", Token.token); } // Scan in the next token and return the leaf node scan(&Token); return (n); } // Convert a binary operator token into an AST operation. // We rely on a 1:1 mapping from token to AST operation static int arithop(int tokentype) { if (tokentype > T_EOF && tokentype < T_INTLIT) return (tokentype); fatald("Syntax error, token", tokentype); } // Operator precedence for each token. Must // match up with the order of tokens in defs.h static int OpPrec[] = { 0, 10, 10, // T_EOF, T_PLUS, T_MINUS 20, 20, // T_STAR, T_SLASH 30, 30, // T_EQ, T_NE 40, 40, 40, 40 // T_LT, T_GT, T_LE, T_GE }; // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec = OpPrec[tokentype]; if (prec == 0) fatald("Syntax error, token", tokentype); return (prec); } // Return an AST tree whose root is a binary operator. // Parameter ptp is the previous token's precedence. struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; int tokentype; // Get the primary tree on the left. // Fetch the next token at the same time. left = primary(); // If we hit a semicolon or ')', return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN) return (left); // While the precedence of this token is // more than that of the previous token precedence while (op_precedence(tokentype) > ptp) { // Fetch in the next integer literal scan(&Token); // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Join that sub-tree with ours. Convert the token // into an AST operation at the same time. left = mkastnode(arithop(tokentype), left, NULL, right, 0); // Update the details of the current token. // If we hit a semicolon or ')', return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN) return (left); } // Return the tree we have when the precedence // is the same or lower return (left); } ================================================ FILE: 09_While_Loops/gen.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Generic code generator // Copyright (c) 2019 Warren Toomey, GPL3 // Generate and return a new label number static int label(void) { static int id = 1; return (id++); } // Generate the code for an IF statement // and an optional ELSE clause static int genIF(struct ASTnode *n) { int Lfalse, Lend; // Generate two labels: one for the // false compound statement, and one // for the end of the overall IF statement. // When there is no ELSE clause, Lfalse _is_ // the ending label! Lfalse = label(); if (n->right) Lend = label(); // Generate the condition code followed // by a jump to the false label. // We cheat by sending the Lfalse label as a register. genAST(n->left, Lfalse, n->op); genfreeregs(); // Generate the true compound statement genAST(n->mid, NOREG, n->op); genfreeregs(); // If there is an optional ELSE clause, // generate the jump to skip to the end if (n->right) cgjump(Lend); // Now the false label cglabel(Lfalse); // Optional ELSE clause: generate the // false compound statement and the // end label if (n->right) { genAST(n->right, NOREG, n->op); genfreeregs(); cglabel(Lend); } return (NOREG); } // Generate the code for a WHILE statement // and an optional ELSE clause static int genWHILE(struct ASTnode *n) { int Lstart, Lend; // Generate the start and end labels // and output the start label Lstart = label(); Lend = label(); cglabel(Lstart); // Generate the condition code followed // by a jump to the end label. // We cheat by sending the Lfalse label as a register. genAST(n->left, Lend, n->op); genfreeregs(); // Generate the compound statement for the body genAST(n->right, NOREG, n->op); genfreeregs(); // Finally output the jump back to the condition, // and the end label cgjump(Lstart); cglabel(Lend); return (NOREG); } // Given an AST, the register (if any) that holds // the previous rvalue, and the AST op of the parent, // generate assembly code recursively. // Return the register id with the tree's final value int genAST(struct ASTnode *n, int reg, int parentASTop) { int leftreg, rightreg; // We now have specific AST node handling at the top switch (n->op) { case A_IF: return (genIF(n)); case A_WHILE: return (genWHILE(n)); case A_GLUE: // Do each child statement, and free the // registers after each child genAST(n->left, NOREG, n->op); genfreeregs(); genAST(n->right, NOREG, n->op); genfreeregs(); return (NOREG); } // General AST node handling below // Get the left and right sub-tree values if (n->left) leftreg = genAST(n->left, NOREG, n->op); if (n->right) rightreg = genAST(n->right, leftreg, n->op); switch (n->op) { case A_ADD: return (cgadd(leftreg, rightreg)); case A_SUBTRACT: return (cgsub(leftreg, rightreg)); case A_MULTIPLY: return (cgmul(leftreg, rightreg)); case A_DIVIDE: return (cgdiv(leftreg, rightreg)); case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, compare registers // and set one to 1 or 0 based on the comparison. if (parentASTop == A_IF || parentASTop == A_WHILE) return (cgcompare_and_jump(n->op, leftreg, rightreg, reg)); else return (cgcompare_and_set(n->op, leftreg, rightreg)); case A_INTLIT: return (cgloadint(n->v.intvalue)); case A_IDENT: return (cgloadglob(Gsym[n->v.id].name)); case A_LVIDENT: return (cgstorglob(reg, Gsym[n->v.id].name)); case A_ASSIGN: // The work has already been done, return the result return (rightreg); case A_PRINT: // Print the left-child's value // and return no register genprintint(leftreg); genfreeregs(); return (NOREG); default: fatald("Unknown AST operator", n->op); } } void genpreamble() { cgpreamble(); } void genpostamble() { cgpostamble(); } void genfreeregs() { freeall_registers(); } void genprintint(int reg) { cgprintint(reg); } void genglobsym(char *s) { cgglobsym(s); } ================================================ FILE: 09_While_Loops/main.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "decl.h" #include // Compiler setup and top-level execution // Copyright (c) 2019 Warren Toomey, GPL3 // Initialise global variables static void init() { Line = 1; Putback = '\n'; } // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "Usage: %s infile\n", prog); exit(1); } // Main program: check arguments and print a usage // if we don't have an argument. Open up the input // file and call scanfile() to scan the tokens in it. void main(int argc, char *argv[]) { struct ASTnode *tree; if (argc != 2) usage(argv[0]); init(); // Open up the input file if ((Infile = fopen(argv[1], "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", argv[1], strerror(errno)); exit(1); } // Create the output file if ((Outfile = fopen("out.s", "w")) == NULL) { fprintf(stderr, "Unable to create out.s: %s\n", strerror(errno)); exit(1); } scan(&Token); // Get the first token from the input genpreamble(); // Output the preamble tree = compound_statement(); // Parse the compound statement in the input genAST(tree, NOREG, 0); // Generate the assembly code for it genpostamble(); // Output the postamble fclose(Outfile); // Close the output file and exit exit(0); } ================================================ FILE: 09_While_Loops/misc.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Miscellaneous functions // Copyright (c) 2019 Warren Toomey, GPL3 // Ensure that the current token is t, // and fetch the next token. Otherwise // throw an error void match(int t, char *what) { if (Token.token == t) { scan(&Token); } else { fatals("Expected", what); } } // Match a semicolon and fetch the next token void semi(void) { match(T_SEMI, ";"); } // Match a left brace and fetch the next token void lbrace(void) { match(T_LBRACE, "{"); } // Match a right brace and fetch the next token void rbrace(void) { match(T_RBRACE, "}"); } // Match a left parenthesis and fetch the next token void lparen(void) { match(T_LPAREN, "("); } // Match a right parenthesis and fetch the next token void rparen(void) { match(T_RPAREN, ")"); } // Match an identifier and fetch the next token void ident(void) { match(T_IDENT, "identifier"); } // Print out fatal messages void fatal(char *s) { fprintf(stderr, "%s on line %d\n", s, Line); exit(1); } void fatals(char *s1, char *s2) { fprintf(stderr, "%s:%s on line %d\n", s1, s2, Line); exit(1); } void fatald(char *s, int d) { fprintf(stderr, "%s:%d on line %d\n", s, d, Line); exit(1); } void fatalc(char *s, int c) { fprintf(stderr, "%s:%c on line %d\n", s, c, Line); exit(1); } ================================================ FILE: 09_While_Loops/scan.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { char *p; p = strchr(s, c); return (p ? p - s : -1); } // Get the next character from the input file. static int next(void) { int c; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return (c); } c = fgetc(Infile); // Read from input file if ('\n' == c) Line++; // Increment line count return (c); } // Put back an unwanted character static void putback(int c) { Putback = c; } // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0; // Convert each character into an int value while ((k = chrpos("0123456789", c)) >= 0) { val = val * 10 + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return (val); } // Scan an identifier from the input file and // store it in buf[]. Return the identifier's length static int scanident(int c, char *buf, int lim) { int i = 0; // Allow digits, alpha and underscores while (isalpha(c) || isdigit(c) || '_' == c) { // Error if we hit the identifier length limit, // else append to buf[] and get next character if (lim - 1 == i) { fatal("Identifier too long"); } else if (i < lim - 1) { buf[i++] = c; } c = next(); } // We hit a non-valid character, put it back. // NUL-terminate the buf[] and return the length putback(c); buf[i] = '\0'; return (i); } // Given a word from the input, return the matching // keyword token number or 0 if it's not a keyword. // Switch on the first letter so that we don't have // to waste time strcmp()ing against all the keywords. static int keyword(char *s) { switch (*s) { case 'e': if (!strcmp(s, "else")) return (T_ELSE); break; case 'i': if (!strcmp(s, "if")) return (T_IF); if (!strcmp(s, "int")) return (T_INT); break; case 'p': if (!strcmp(s, "print")) return (T_PRINT); break; case 'w': if (!strcmp(s, "while")) return (T_WHILE); break; } return (0); } // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c, tokentype; // Skip whitespace c = skip(); // Determine the token based on // the input character switch (c) { case EOF: t->token = T_EOF; return (0); case '+': t->token = T_PLUS; break; case '-': t->token = T_MINUS; break; case '*': t->token = T_STAR; break; case '/': t->token = T_SLASH; break; case ';': t->token = T_SEMI; break; case '{': t->token = T_LBRACE; break; case '}': t->token = T_RBRACE; break; case '(': t->token = T_LPAREN; break; case ')': t->token = T_RPAREN; break; case '=': if ((c = next()) == '=') { t->token = T_EQ; } else { putback(c); t->token = T_ASSIGN; } break; case '!': if ((c = next()) == '=') { t->token = T_NE; } else { fatalc("Unrecognised character", c); } break; case '<': if ((c = next()) == '=') { t->token = T_LE; } else { putback(c); t->token = T_LT; } break; case '>': if ((c = next()) == '=') { t->token = T_GE; } else { putback(c); t->token = T_GT; } break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } else if (isalpha(c) || '_' == c) { // Read in a keyword or identifier scanident(c, Text, TEXTLEN); // If it's a recognised keyword, return that token if (tokentype = keyword(Text)) { t->token = tokentype; break; } // Not a recognised keyword, so it must be an identifier t->token = T_IDENT; break; } // The character isn't part of any recognised token, error fatalc("Unrecognised character", c); } // We found a token return (1); } ================================================ FILE: 09_While_Loops/stmt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of statements // Copyright (c) 2019 Warren Toomey, GPL3 // compound_statement: // empty, i.e. no statement // | statement // | statement statements // ; // // statement: print_statement // | declaration // | assignment_statement // | if_statement // | while_statement // ; // print_statement: 'print' expression ';' ; // static struct ASTnode *print_statement(void) { struct ASTnode *tree; int reg; // Match a 'print' as the first token match(T_PRINT, "print"); // Parse the following expression tree = binexpr(0); // Make an print AST tree tree = mkastunary(A_PRINT, tree, 0); // Match the following semicolon // and return the AST semi(); return (tree); } // assignment_statement: identifier '=' expression ';' ; // static struct ASTnode *assignment_statement(void) { struct ASTnode *left, *right, *tree; int id; // Ensure we have an identifier ident(); // Check it's been defined then make a leaf node for it if ((id = findglob(Text)) == -1) { fatals("Undeclared variable", Text); } right = mkastleaf(A_LVIDENT, id); // Ensure we have an equals sign match(T_ASSIGN, "="); // Parse the following expression left = binexpr(0); // Make an assignment AST tree tree = mkastnode(A_ASSIGN, left, NULL, right, 0); // Match the following semicolon // and return the AST semi(); return (tree); } // if_statement: if_head // | if_head 'else' compound_statement // ; // // if_head: 'if' '(' true_false_expression ')' compound_statement ; // // Parse an IF statement including // any optional ELSE clause // and return its AST struct ASTnode *if_statement(void) { struct ASTnode *condAST, *trueAST, *falseAST = NULL; // Ensure we have 'if' '(' match(T_IF, "if"); lparen(); // Parse the following expression // and the ')' following. Ensure // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) fatal("Bad comparison operator"); rparen(); // Get the AST for the compound statement trueAST = compound_statement(); // If we have an 'else', skip it // and get the AST for the compound statement if (Token.token == T_ELSE) { scan(&Token); falseAST = compound_statement(); } // Build and return the AST for this statement return (mkastnode(A_IF, condAST, trueAST, falseAST, 0)); } // while_statement: 'while' '(' true_false_expression ')' compound_statement ; // // Parse a WHILE statement // and return its AST struct ASTnode *while_statement(void) { struct ASTnode *condAST, *bodyAST; // Ensure we have 'while' '(' match(T_WHILE, "while"); lparen(); // Parse the following expression // and the ')' following. Ensure // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) fatal("Bad comparison operator"); rparen(); // Get the AST for the compound statement bodyAST = compound_statement(); // Build and return the AST for this statement return (mkastnode(A_WHILE, condAST, NULL, bodyAST, 0)); } // Parse a compound statement // and return its AST struct ASTnode *compound_statement(void) { struct ASTnode *left = NULL; struct ASTnode *tree; // Require a left curly bracket lbrace(); while (1) { switch (Token.token) { case T_PRINT: tree = print_statement(); break; case T_INT: var_declaration(); tree = NULL; // No AST generated here break; case T_IDENT: tree = assignment_statement(); break; case T_IF: tree = if_statement(); break; case T_WHILE: tree = while_statement(); break; case T_RBRACE: // When we hit a right curly bracket, // skip past it and return the AST rbrace(); return (left); default: fatald("Syntax error, token", Token.token); } // For each new tree, either save it in left // if left is empty, or glue the left and the // new tree together if (tree) { if (left == NULL) left = tree; else left = mkastnode(A_GLUE, left, NULL, tree, 0); } } } ================================================ FILE: 09_While_Loops/sym.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Symbol table functions // Copyright (c) 2019 Warren Toomey, GPL3 static int Globs = 0; // Position of next free global symbol slot // Determine if the symbol s is in the global symbol table. // Return its slot position or -1 if not found. int findglob(char *s) { int i; for (i = 0; i < Globs; i++) { if (*s == *Gsym[i].name && !strcmp(s, Gsym[i].name)) return (i); } return (-1); } // Get the position of a new global symbol slot, or die // if we've run out of positions. static int newglob(void) { int p; if ((p = Globs++) >= NSYMBOLS) fatal("Too many global symbols"); return (p); } // Add a global symbol to the symbol table. // Return the slot number in the symbol table int addglob(char *name) { int y; // If this is already in the symbol table, return the existing slot if ((y = findglob(name)) != -1) return (y); // Otherwise get a new slot, fill it in and // return the slot number y = newglob(); Gsym[y].name = strdup(name); return (y); } ================================================ FILE: 09_While_Loops/tests/input01 ================================================ { print 12 * 3; print 18 - 2 * 4; print 1 + 2 + 9 - 5/2 + 3*5; } ================================================ FILE: 09_While_Loops/tests/input02 ================================================ { int fred; int jim; fred= 5; jim= 12; print fred + jim; } ================================================ FILE: 09_While_Loops/tests/input03 ================================================ { int x; x= 1; print x; x= x + 1; print x; x= x + 1; print x; x= x + 1; print x; x= x + 1; print x; } ================================================ FILE: 09_While_Loops/tests/input04 ================================================ { int x; x= 7 < 9; print x; x= 7 <= 9; print x; x= 7 != 9; print x; x= 7 == 7; print x; x= 7 >= 7; print x; x= 7 <= 7; print x; x= 9 > 7; print x; x= 9 >= 7; print x; x= 9 != 7; print x; } ================================================ FILE: 09_While_Loops/tests/input05 ================================================ { int i; int j; i=6; j=12; if (i < j) { print i; } else { print j; } } ================================================ FILE: 09_While_Loops/tests/input06 ================================================ { int i; i=1; while (i <= 10) { print i; i= i + 1; } } ================================================ FILE: 09_While_Loops/tests/mktests ================================================ #!/bin/sh # Make the output files for each test if [ ! -f ../comp1 ] then echo "Need to build ../comp1 first!"; exit 1 fi for i in input* do if [ ! -f "out.$i" ] then ../comp1 $i cc -o out out.s ./out > out.$i rm -f out out.s fi done ================================================ FILE: 09_While_Loops/tests/mktestsn ================================================ #!/bin/sh # Make the output files for each test if [ ! -f ../compn ] then echo "Need to build ../compn first!"; exit 1 fi for i in input* do if [ ! -f "out.$i" ] then ../compn $i nasm -f elf64 out.s cc -no-pie -o out out.o ./out > out.$i rm -f out out.s fi done ================================================ FILE: 09_While_Loops/tests/out.input01 ================================================ 36 10 25 ================================================ FILE: 09_While_Loops/tests/out.input02 ================================================ 17 ================================================ FILE: 09_While_Loops/tests/out.input03 ================================================ 1 2 3 4 5 ================================================ FILE: 09_While_Loops/tests/out.input04 ================================================ 1 1 1 1 1 1 1 1 1 ================================================ FILE: 09_While_Loops/tests/out.input05 ================================================ 6 ================================================ FILE: 09_While_Loops/tests/out.input06 ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 09_While_Loops/tests/runtests ================================================ #!/bin/sh # Run each test and compare # against known good output if [ ! -f ../comp1 ] then echo "Need to build ../comp1 first!"; exit 1 fi for i in input* do if [ ! -f "out.$i" ] then echo "Can't run test on $i, no output file!" else echo -n $i ../comp1 $i cc -o out out.s ./out > trial.$i cmp -s "out.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo else echo ": OK" fi rm -f out out.s "trial.$i" fi done ================================================ FILE: 09_While_Loops/tests/runtestsn ================================================ #!/bin/sh # Run each test and compare # against known good output if [ ! -f ../compn ] then echo "Need to build ../compn first!"; exit 1 fi for i in input* do if [ ! -f "out.$i" ] then echo "Can't run test on $i, no output file!" else echo -n $i ../compn $i nasm -f elf64 out.s cc -no-pie -o out out.o ./out > trial.$i cmp -s "out.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo else echo ": OK" fi rm -f out out.o out.s "trial.$i" fi done ================================================ FILE: 09_While_Loops/tree.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree functions // Copyright (c) 2019 Warren Toomey, GPL3 // Build and return a generic AST node struct ASTnode *mkastnode(int op, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, int intvalue) { struct ASTnode *n; // Malloc a new ASTnode n = (struct ASTnode *) malloc(sizeof(struct ASTnode)); if (n == NULL) fatal("Unable to malloc in mkastnode()"); // Copy in the field values and return it n->op = op; n->left = left; n->mid = mid; n->right = right; n->v.intvalue = intvalue; return (n); } // Make an AST leaf node struct ASTnode *mkastleaf(int op, int intvalue) { return (mkastnode(op, NULL, NULL, NULL, intvalue)); } // Make a unary AST node: only one child struct ASTnode *mkastunary(int op, struct ASTnode *left, int intvalue) { return (mkastnode(op, left, NULL, NULL, intvalue)); } ================================================ FILE: 10_For_Loops/Makefile ================================================ SRCS= cg.c decl.c expr.c gen.c main.c misc.c scan.c stmt.c sym.c tree.c SRCN= cgn.c decl.c expr.c gen.c main.c misc.c scan.c stmt.c sym.c tree.c comp1: $(SRCS) cc -o comp1 -g $(SRCS) compn: $(SRCN) cc -o compn -g $(SRCN) clean: rm -f comp1 compn *.o *.s out test: comp1 tests/runtests (cd tests; chmod +x runtests; ./runtests) test7: comp1 tests/input07 ./comp1 tests/input07 cc -o out out.s ./out testn: compn tests/runtestsn (cd tests; chmod +x runtestsn; ./runtestsn) test7n: compn tests/input07 ./compn tests/input07 nasm -f elf64 out.s cc -no-pie -o out out.o ./out ================================================ FILE: 10_For_Loops/Readme.md ================================================ # Part 10: FOR Loops In this part of our compiler writing journey I'm going to add FOR loops. There is a wrinkle to work out in terms of implementation which I want to explain before I get to the discussion on how it got solved. ## The FOR Loop Syntax I assume that you are familiar with the syntax of FOR loops. One example is ```c for (i=0; i < MAX; i++) printf("%d\n", i); ``` I'm going to use this BNF syntax for our language: ``` for_statement: 'for' '(' preop_statement ';' true_false_expression ';' postop_statement ')' compound_statement ; preop_statement: statement ; (for now) postop_statement: statement ; (for now) ``` The `preop_statement` is run before the loop starts. Later on, we will have to limit exactly what sort of actions can be performed here (e.g. no IF statements). Then the `true_false_expression` is evaluated. If true the loop executes the `compound_statement`. Once this is done, the `postop_statement` is performed and the code loops back to redo the `true_false_expression`. ## The Wrinkle The wrinkle is that the `postop_statement` is parsed before the `compound_statement`, but we have to generate the code for the `postop_statement` *after* the code for the `compound_statement`. There are several ways to solve this problem. When I wrote a previous compiler, I chose to put the `compound_statement` assembly code in a temporary buffer, and "play back" the buffer once I'd generated the code for the `postop_statement`. In the SubC compiler, Nils makes clever use of labels and jumps to labels to "thread" the code's execution to enforce the correct sequence. But we build an AST tree here. Let's use it to get the generated assembly code in the correct sequence. ## What Sort of AST Tree? You might have noticed that a FOR loop has four structural components: 1. The `preop_statement` 2. The `true_false_expression` 3. The `postop_statement` 4. The `compound_statement` I don't really want to change the AST node structure yet again to have four children. But we can visualise a FOR loop as an augmented WHILE loop: ``` preop_statement; while ( true_false_expression ) { compound_statement; postop_statement; } ``` Can we build an AST tree with our existing node types to reflect this structure? Yes: ``` A_GLUE / \ preop A_WHILE / \ decision A_GLUE / \ compound postop ``` Manually traverse this tree top-down left-to-right and convince yourself that we will generate the assembly code in the right order. We had to glue the `compound_statement` and the `postop_statement` together so that, when the WHILE loop exits, it will skip over both the `compound_statement` and the `postop_statement`. This also means that we need a new T_FOR token but we won't need a new AST node type. So the only compiler change will be scanning and parsing. ## Tokens and Scanning There is a new keyword 'for' and an associated token, T_FOR. No big changes here. ## Parsing Statements We do need to make a structural change to the parser. For the FOR grammar, I only want a single statement as the `preop_statement` and the `postop_statement`. Right now, we have a `compound_statement()` function that simply loops until it hits a right curly bracket '}'. We need to separate this out so `compound_statement()` calls `single_statement()` to get one statement. But there's another wrinkle. Take the existing parsing of assignment statements in `assignment_statement()`. The parser must find a semicolon at the end of the statement. That's good for compound statements but it won't work for FOR loops. I would have to write something like: ```c for (i=1 ; i < 10 ; i= i + 1; ) ``` because each assignment statement *must* end with a semicolon. What we need is for the single statement parser *not* to scan in the semicolon, but to leave that up to the compound statement parser. And we scan in semicolons for some statements (e.g. between assignment statements) and not for other statements (e.g. not between successive IF statements). With all of that explained, let's now look at the new single and compound statement parsing code: ```c // Parse a single statement // and return its AST static struct ASTnode *single_statement(void) { switch (Token.token) { case T_PRINT: return (print_statement()); case T_INT: var_declaration(); return (NULL); // No AST generated here case T_IDENT: return (assignment_statement()); case T_IF: return (if_statement()); case T_WHILE: return (while_statement()); case T_FOR: return (for_statement()); default: fatald("Syntax error, token", Token.token); } } // Parse a compound statement // and return its AST struct ASTnode *compound_statement(void) { struct ASTnode *left = NULL; struct ASTnode *tree; // Require a left curly bracket lbrace(); while (1) { // Parse a single statement tree = single_statement(); // Some statements must be followed by a semicolon if (tree != NULL && (tree->op == A_PRINT || tree->op == A_ASSIGN)) semi(); // For each new tree, either save it in left // if left is empty, or glue the left and the // new tree together if (tree != NULL) { if (left == NULL) left = tree; else left = mkastnode(A_GLUE, left, NULL, tree, 0); } // When we hit a right curly bracket, // skip past it and return the AST if (Token.token == T_RBRACE) { rbrace(); return (left); } } } ``` I've also removed the calls to `semi()` in `print_statement()` and `assignment_statement()`. ## Parsing FOR Loops Given the BNF syntax for FOR loops above, this is straightforward. And given the shape of the AST tree we want, the code to build this tree is also straightforward. Here's the code: ```c // Parse a FOR statement // and return its AST static struct ASTnode *for_statement(void) { struct ASTnode *condAST, *bodyAST; struct ASTnode *preopAST, *postopAST; struct ASTnode *tree; // Ensure we have 'for' '(' match(T_FOR, "for"); lparen(); // Get the pre_op statement and the ';' preopAST= single_statement(); semi(); // Get the condition and the ';' condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) fatal("Bad comparison operator"); semi(); // Get the post_op statement and the ')' postopAST= single_statement(); rparen(); // Get the compound statement which is the body bodyAST = compound_statement(); // For now, all four sub-trees have to be non-NULL. // Later on, we'll change the semantics for when some are missing // Glue the compound statement and the postop tree tree= mkastnode(A_GLUE, bodyAST, NULL, postopAST, 0); // Make a WHILE loop with the condition and this new body tree= mkastnode(A_WHILE, condAST, NULL, tree, 0); // And glue the preop tree to the A_WHILE tree return(mkastnode(A_GLUE, preopAST, NULL, tree, 0)); } ``` ## Generating the Assembly Code Well, all we have done is synthesized a tree which has a WHILE loop in it with some sub-trees glued together, so there are no changes to the generation side of the compiler. ## Trying It Out The `tests/input07` file has this program in it: ```c { int i; for (i= 1; i <= 10; i= i + 1) { print i; } } ``` When we do `make test7`, we get this output: ``` cc -o comp1 -g cg.c decl.c expr.c gen.c main.c misc.c scan.c stmt.c sym.c tree.c ./comp1 tests/input07 cc -o out out.s ./out 1 2 3 4 5 6 7 8 9 10 ``` and here is the relevant assembly output: ``` .comm i,8,8 movq $1, %r8 movq %r8, i(%rip) # i = 1 L1: movq i(%rip), %r8 movq $10, %r9 cmpq %r9, %r8 # Is i < 10? jg L2 # i >= 10, jump to L2 movq i(%rip), %r8 movq %r8, %rdi call printint # print i movq i(%rip), %r8 movq $1, %r9 addq %r8, %r9 # i = i + 1 movq %r9, i(%rip) jmp L1 # Jump to top of loop L2: ``` ## Conclusion and What's Next We now have a reasonable number of control structures in our language: IF statements, WHILE loops and FOR loops. The question is, what to tackle next? There are so many things we could look at: + types + local versus global things + functions + arrays and pointers + structures and unions + auto, static and friends I've decided to look at functions. So, in the next part of our compiler writing journey, we will begin the first of several stages to add functions to our language. [Next step](../11_Functions_pt1/Readme.md) ================================================ FILE: 10_For_Loops/cg.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names // We need a list of byte registers, too static int freereg[4]; static char *reglist[4] = { "%r8", "%r9", "%r10", "%r11" }; static char *breglist[4] = { "%r8b", "%r9b", "%r10b", "%r11b" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n" ".LC0:\n" "\t.string\t\"%d\\n\"\n" "printint:\n" "\tpushq\t%rbp\n" "\tmovq\t%rsp, %rbp\n" "\tsubq\t$16, %rsp\n" "\tmovl\t%edi, -4(%rbp)\n" "\tmovl\t-4(%rbp), %eax\n" "\tmovl\t%eax, %esi\n" "\tleaq .LC0(%rip), %rdi\n" "\tmovl $0, %eax\n" "\tcall printf@PLT\n" "\tnop\n" "\tleave\n" "\tret\n" "\n" "\t.globl\tmain\n" "\t.type\tmain, @function\n" "main:\n" "\tpushq\t%rbp\n" "\tmovq %rsp, %rbp\n", Outfile); } // Print out the assembly postamble void cgpostamble() { fputs("\tmovl $0, %eax\n" "\tpopq %rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register int cgloadint(int value) { // Get a new register int r = alloc_register(); // Print out the code to initialise it fprintf(Outfile, "\tmovq\t$%d, %s\n", value, reglist[r]); return (r); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(char *identifier) { // Get a new register int r = alloc_register(); // Print out the code to initialise it fprintf(Outfile, "\tmovq\t%s(\%%rip), %s\n", identifier, reglist[r]); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\taddq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsubq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timulq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s,%%rax\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidivq\t%s\n", reglist[r2]); fprintf(Outfile, "\tmovq\t%%rax,%s\n", reglist[r1]); free_register(r2); return (r1); } // Call printint() with the given register void cgprintint(int r) { fprintf(Outfile, "\tmovq\t%s, %%rdi\n", reglist[r]); fprintf(Outfile, "\tcall\tprintint\n"); free_register(r); } // Store a register's value into a variable int cgstorglob(int r, char *identifier) { fprintf(Outfile, "\tmovq\t%s, %s(\%%rip)\n", reglist[r], identifier); return (r); } // Generate a global symbol void cgglobsym(char *sym) { fprintf(Outfile, "\t.comm\t%s,8,8\n", sym); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } ================================================ FILE: 10_For_Loops/cgn.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names static int freereg[4]; static char *reglist[4] = { "r8", "r9", "r10", "r11" }; static char *breglist[4] = { "r8b", "r9b", "r10b", "r11b" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\tglobal\tmain\n" "\textern\tprintf\n" "\tsection\t.text\n" "LC0:\tdb\t\"%d\",10,0\n" "printint:\n" "\tpush\trbp\n" "\tmov\trbp, rsp\n" "\tsub\trsp, 16\n" "\tmov\t[rbp-4], edi\n" "\tmov\teax, [rbp-4]\n" "\tmov\tesi, eax\n" "\tlea rdi, [rel LC0]\n" "\tmov eax, 0\n" "\tcall printf\n" "\tnop\n" "\tleave\n" "\tret\n" "\n" "main:\n" "\tpush\trbp\n" "\tmov rbp, rsp\n", Outfile); } // Print out the assembly postamble void cgpostamble() { fputs("\tmov eax, 0\n" "\tpop rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register int cgloadint(int value) { // Get a new register int r = alloc_register(); // Print out the code to initialise it fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], value); return (r); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(char *identifier) { // Get a new register int r = alloc_register(); // Print out the code to initialise it fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], identifier); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timul\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmov\trax, %s\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidiv\t%s\n", reglist[r2]); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[r1]); free_register(r2); return (r1); } // Call printint() with the given register void cgprintint(int r) { fprintf(Outfile, "\tmov\trdi, %s\n", reglist[r]); fprintf(Outfile, "\tcall\tprintint\n"); free_register(r); } // Store a register's value into a variable int cgstorglob(int r, char *identifier) { fprintf(Outfile, "\tmov\t[%s], %s\n", identifier, reglist[r]); return (r); } // Generate a global symbol void cgglobsym(char *sym) { fprintf(Outfile, "\tcommon\t%s 8:8\n", sym); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r2], breglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } ================================================ FILE: 10_For_Loops/data.h ================================================ #ifndef extern_ #define extern_ extern #endif // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 extern_ int Line; // Current line number extern_ int Putback; // Character put back by scanner extern_ FILE *Infile; // Input and output files extern_ FILE *Outfile; extern_ struct token Token; // Last token scanned extern_ char Text[TEXTLEN + 1]; // Last identifier scanned extern_ struct symtable Gsym[NSYMBOLS]; // Global symbol table ================================================ FILE: 10_For_Loops/decl.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of declarations // Copyright (c) 2019 Warren Toomey, GPL3 // declaration: 'int' identifier ';' ; // // Parse the declaration of a variable void var_declaration(void) { // Ensure we have an 'int' token followed by an identifier // and a semicolon. Text now has the identifier's name. // Add it as a known identifier match(T_INT, "int"); ident(); addglob(Text); genglobsym(Text); semi(); } ================================================ FILE: 10_For_Loops/decl.h ================================================ // Function prototypes for all compiler files // Copyright (c) 2019 Warren Toomey, GPL3 // scan.c int scan(struct token *t); // tree.c struct ASTnode *mkastnode(int op, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, int intvalue); struct ASTnode *mkastleaf(int op, int intvalue); struct ASTnode *mkastunary(int op, struct ASTnode *left, int intvalue); // gen.c int genAST(struct ASTnode *n, int reg, int parentASTop); void genpreamble(); void genpostamble(); void genfreeregs(); void genprintint(int reg); void genglobsym(char *s); // cg.c void freeall_registers(void); void cgpreamble(); void cgpostamble(); int cgloadint(int value); int cgloadglob(char *identifier); int cgadd(int r1, int r2); int cgsub(int r1, int r2); int cgmul(int r1, int r2); int cgdiv(int r1, int r2); void cgprintint(int r); int cgstorglob(int r, char *identifier); void cgglobsym(char *sym); int cgcompare_and_set(int ASTop, int r1, int r2); int cgcompare_and_jump(int ASTop, int r1, int r2, int label); void cglabel(int l); void cgjump(int l); // expr.c struct ASTnode *binexpr(int ptp); // stmt.c struct ASTnode *compound_statement(void); // misc.c void match(int t, char *what); void semi(void); void lbrace(void); void rbrace(void); void lparen(void); void rparen(void); void ident(void); void fatal(char *s); void fatals(char *s1, char *s2); void fatald(char *s, int d); void fatalc(char *s, int c); // sym.c int findglob(char *s); int addglob(char *name); // decl.c void var_declaration(void); ================================================ FILE: 10_For_Loops/defs.h ================================================ #include #include #include #include // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 #define TEXTLEN 512 // Length of symbols in input #define NSYMBOLS 1024 // Number of symbol table entries // Token types enum { T_EOF, T_PLUS, T_MINUS, T_STAR, T_SLASH, T_EQ, T_NE, T_LT, T_GT, T_LE, T_GE, T_INTLIT, T_SEMI, T_ASSIGN, T_IDENT, T_LBRACE, T_RBRACE, T_LPAREN, T_RPAREN, // Keywords T_PRINT, T_INT, T_IF, T_ELSE, T_WHILE, T_FOR }; // Token structure struct token { int token; // Token type, from the enum list above int intvalue; // For T_INTLIT, the integer value }; // AST node types. The first few line up // with the related tokens enum { A_ADD = 1, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE, A_INTLIT, A_IDENT, A_LVIDENT, A_ASSIGN, A_PRINT, A_GLUE, A_IF, A_WHILE }; // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; union { int intvalue; // For A_INTLIT, the integer value int id; // For A_IDENT, the symbol slot number } v; }; #define NOREG -1 // Use NOREG when the AST generation // functions have no register to return // Symbol table structure struct symtable { char *name; // Name of a symbol }; ================================================ FILE: 10_For_Loops/expr.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of expressions // Copyright (c) 2019 Warren Toomey, GPL3 // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(void) { struct ASTnode *n; int id; switch (Token.token) { case T_INTLIT: // For an INTLIT token, make a leaf AST node for it. n = mkastleaf(A_INTLIT, Token.intvalue); break; case T_IDENT: // Check that this identifier exists id = findglob(Text); if (id == -1) fatals("Unknown variable", Text); // Make a leaf AST node for it n = mkastleaf(A_IDENT, id); break; default: fatald("Syntax error, token", Token.token); } // Scan in the next token and return the leaf node scan(&Token); return (n); } // Convert a binary operator token into an AST operation. // We rely on a 1:1 mapping from token to AST operation static int arithop(int tokentype) { if (tokentype > T_EOF && tokentype < T_INTLIT) return (tokentype); fatald("Syntax error, token", tokentype); } // Operator precedence for each token. Must // match up with the order of tokens in defs.h static int OpPrec[] = { 0, 10, 10, // T_EOF, T_PLUS, T_MINUS 20, 20, // T_STAR, T_SLASH 30, 30, // T_EQ, T_NE 40, 40, 40, 40 // T_LT, T_GT, T_LE, T_GE }; // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec = OpPrec[tokentype]; if (prec == 0) fatald("Syntax error, token", tokentype); return (prec); } // Return an AST tree whose root is a binary operator. // Parameter ptp is the previous token's precedence. struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; int tokentype; // Get the primary tree on the left. // Fetch the next token at the same time. left = primary(); // If we hit a semicolon or ')', return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN) return (left); // While the precedence of this token is // more than that of the previous token precedence while (op_precedence(tokentype) > ptp) { // Fetch in the next integer literal scan(&Token); // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Join that sub-tree with ours. Convert the token // into an AST operation at the same time. left = mkastnode(arithop(tokentype), left, NULL, right, 0); // Update the details of the current token. // If we hit a semicolon or ')', return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN) return (left); } // Return the tree we have when the precedence // is the same or lower return (left); } ================================================ FILE: 10_For_Loops/gen.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Generic code generator // Copyright (c) 2019 Warren Toomey, GPL3 // Generate and return a new label number static int label(void) { static int id = 1; return (id++); } // Generate the code for an IF statement // and an optional ELSE clause static int genIF(struct ASTnode *n) { int Lfalse, Lend; // Generate two labels: one for the // false compound statement, and one // for the end of the overall IF statement. // When there is no ELSE clause, Lfalse _is_ // the ending label! Lfalse = label(); if (n->right) Lend = label(); // Generate the condition code followed // by a jump to the false label. // We cheat by sending the Lfalse label as a register. genAST(n->left, Lfalse, n->op); genfreeregs(); // Generate the true compound statement genAST(n->mid, NOREG, n->op); genfreeregs(); // If there is an optional ELSE clause, // generate the jump to skip to the end if (n->right) cgjump(Lend); // Now the false label cglabel(Lfalse); // Optional ELSE clause: generate the // false compound statement and the // end label if (n->right) { genAST(n->right, NOREG, n->op); genfreeregs(); cglabel(Lend); } return (NOREG); } // Generate the code for a WHILE statement // and an optional ELSE clause static int genWHILE(struct ASTnode *n) { int Lstart, Lend; // Generate the start and end labels // and output the start label Lstart = label(); Lend = label(); cglabel(Lstart); // Generate the condition code followed // by a jump to the end label. // We cheat by sending the Lfalse label as a register. genAST(n->left, Lend, n->op); genfreeregs(); // Generate the compound statement for the body genAST(n->right, NOREG, n->op); genfreeregs(); // Finally output the jump back to the condition, // and the end label cgjump(Lstart); cglabel(Lend); return (NOREG); } // Given an AST, the register (if any) that holds // the previous rvalue, and the AST op of the parent, // generate assembly code recursively. // Return the register id with the tree's final value int genAST(struct ASTnode *n, int reg, int parentASTop) { int leftreg, rightreg; // We now have specific AST node handling at the top switch (n->op) { case A_IF: return (genIF(n)); case A_WHILE: return (genWHILE(n)); case A_GLUE: // Do each child statement, and free the // registers after each child genAST(n->left, NOREG, n->op); genfreeregs(); genAST(n->right, NOREG, n->op); genfreeregs(); return (NOREG); } // General AST node handling below // Get the left and right sub-tree values if (n->left) leftreg = genAST(n->left, NOREG, n->op); if (n->right) rightreg = genAST(n->right, leftreg, n->op); switch (n->op) { case A_ADD: return (cgadd(leftreg, rightreg)); case A_SUBTRACT: return (cgsub(leftreg, rightreg)); case A_MULTIPLY: return (cgmul(leftreg, rightreg)); case A_DIVIDE: return (cgdiv(leftreg, rightreg)); case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, compare registers // and set one to 1 or 0 based on the comparison. if (parentASTop == A_IF || parentASTop == A_WHILE) return (cgcompare_and_jump(n->op, leftreg, rightreg, reg)); else return (cgcompare_and_set(n->op, leftreg, rightreg)); case A_INTLIT: return (cgloadint(n->v.intvalue)); case A_IDENT: return (cgloadglob(Gsym[n->v.id].name)); case A_LVIDENT: return (cgstorglob(reg, Gsym[n->v.id].name)); case A_ASSIGN: // The work has already been done, return the result return (rightreg); case A_PRINT: // Print the left-child's value // and return no register genprintint(leftreg); genfreeregs(); return (NOREG); default: fatald("Unknown AST operator", n->op); } } void genpreamble() { cgpreamble(); } void genpostamble() { cgpostamble(); } void genfreeregs() { freeall_registers(); } void genprintint(int reg) { cgprintint(reg); } void genglobsym(char *s) { cgglobsym(s); } ================================================ FILE: 10_For_Loops/main.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "decl.h" #include // Compiler setup and top-level execution // Copyright (c) 2019 Warren Toomey, GPL3 // Initialise global variables static void init() { Line = 1; Putback = '\n'; } // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "Usage: %s infile\n", prog); exit(1); } // Main program: check arguments and print a usage // if we don't have an argument. Open up the input // file and call scanfile() to scan the tokens in it. void main(int argc, char *argv[]) { struct ASTnode *tree; if (argc != 2) usage(argv[0]); init(); // Open up the input file if ((Infile = fopen(argv[1], "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", argv[1], strerror(errno)); exit(1); } // Create the output file if ((Outfile = fopen("out.s", "w")) == NULL) { fprintf(stderr, "Unable to create out.s: %s\n", strerror(errno)); exit(1); } scan(&Token); // Get the first token from the input genpreamble(); // Output the preamble tree = compound_statement(); // Parse the compound statement in the input genAST(tree, NOREG, 0); // Generate the assembly code for it genpostamble(); // Output the postamble fclose(Outfile); // Close the output file and exit exit(0); } ================================================ FILE: 10_For_Loops/misc.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Miscellaneous functions // Copyright (c) 2019 Warren Toomey, GPL3 // Ensure that the current token is t, // and fetch the next token. Otherwise // throw an error void match(int t, char *what) { if (Token.token == t) { scan(&Token); } else { fatals("Expected", what); } } // Match a semicolon and fetch the next token void semi(void) { match(T_SEMI, ";"); } // Match a left brace and fetch the next token void lbrace(void) { match(T_LBRACE, "{"); } // Match a right brace and fetch the next token void rbrace(void) { match(T_RBRACE, "}"); } // Match a left parenthesis and fetch the next token void lparen(void) { match(T_LPAREN, "("); } // Match a right parenthesis and fetch the next token void rparen(void) { match(T_RPAREN, ")"); } // Match an identifier and fetch the next token void ident(void) { match(T_IDENT, "identifier"); } // Print out fatal messages void fatal(char *s) { fprintf(stderr, "%s on line %d\n", s, Line); exit(1); } void fatals(char *s1, char *s2) { fprintf(stderr, "%s:%s on line %d\n", s1, s2, Line); exit(1); } void fatald(char *s, int d) { fprintf(stderr, "%s:%d on line %d\n", s, d, Line); exit(1); } void fatalc(char *s, int c) { fprintf(stderr, "%s:%c on line %d\n", s, c, Line); exit(1); } ================================================ FILE: 10_For_Loops/scan.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { char *p; p = strchr(s, c); return (p ? p - s : -1); } // Get the next character from the input file. static int next(void) { int c; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return (c); } c = fgetc(Infile); // Read from input file if ('\n' == c) Line++; // Increment line count return (c); } // Put back an unwanted character static void putback(int c) { Putback = c; } // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0; // Convert each character into an int value while ((k = chrpos("0123456789", c)) >= 0) { val = val * 10 + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return (val); } // Scan an identifier from the input file and // store it in buf[]. Return the identifier's length static int scanident(int c, char *buf, int lim) { int i = 0; // Allow digits, alpha and underscores while (isalpha(c) || isdigit(c) || '_' == c) { // Error if we hit the identifier length limit, // else append to buf[] and get next character if (lim - 1 == i) { fatal("Identifier too long"); } else if (i < lim - 1) { buf[i++] = c; } c = next(); } // We hit a non-valid character, put it back. // NUL-terminate the buf[] and return the length putback(c); buf[i] = '\0'; return (i); } // Given a word from the input, return the matching // keyword token number or 0 if it's not a keyword. // Switch on the first letter so that we don't have // to waste time strcmp()ing against all the keywords. static int keyword(char *s) { switch (*s) { case 'e': if (!strcmp(s, "else")) return (T_ELSE); break; case 'f': if (!strcmp(s, "for")) return (T_FOR); break; case 'i': if (!strcmp(s, "if")) return (T_IF); if (!strcmp(s, "int")) return (T_INT); break; case 'p': if (!strcmp(s, "print")) return (T_PRINT); break; case 'w': if (!strcmp(s, "while")) return (T_WHILE); break; } return (0); } // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c, tokentype; // Skip whitespace c = skip(); // Determine the token based on // the input character switch (c) { case EOF: t->token = T_EOF; return (0); case '+': t->token = T_PLUS; break; case '-': t->token = T_MINUS; break; case '*': t->token = T_STAR; break; case '/': t->token = T_SLASH; break; case ';': t->token = T_SEMI; break; case '{': t->token = T_LBRACE; break; case '}': t->token = T_RBRACE; break; case '(': t->token = T_LPAREN; break; case ')': t->token = T_RPAREN; break; case '=': if ((c = next()) == '=') { t->token = T_EQ; } else { putback(c); t->token = T_ASSIGN; } break; case '!': if ((c = next()) == '=') { t->token = T_NE; } else { fatalc("Unrecognised character", c); } break; case '<': if ((c = next()) == '=') { t->token = T_LE; } else { putback(c); t->token = T_LT; } break; case '>': if ((c = next()) == '=') { t->token = T_GE; } else { putback(c); t->token = T_GT; } break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } else if (isalpha(c) || '_' == c) { // Read in a keyword or identifier scanident(c, Text, TEXTLEN); // If it's a recognised keyword, return that token if (tokentype = keyword(Text)) { t->token = tokentype; break; } // Not a recognised keyword, so it must be an identifier t->token = T_IDENT; break; } // The character isn't part of any recognised token, error fatalc("Unrecognised character", c); } // We found a token return (1); } ================================================ FILE: 10_For_Loops/stmt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of statements // Copyright (c) 2019 Warren Toomey, GPL3 // Prototypes static struct ASTnode *single_statement(void); // compound_statement: // empty, i.e. no statement // | statement // | statement statements // ; // // statement: print_statement // | declaration // | assignment_statement // | if_statement // | while_statement // ; // print_statement: 'print' expression ';' ; // static struct ASTnode *print_statement(void) { struct ASTnode *tree; int reg; // Match a 'print' as the first token match(T_PRINT, "print"); // Parse the following expression tree = binexpr(0); // Make an print AST tree tree = mkastunary(A_PRINT, tree, 0); // Return the AST return (tree); } // assignment_statement: identifier '=' expression ';' ; // static struct ASTnode *assignment_statement(void) { struct ASTnode *left, *right, *tree; int id; // Ensure we have an identifier ident(); // Check it's been defined then make a leaf node for it if ((id = findglob(Text)) == -1) { fatals("Undeclared variable", Text); } right = mkastleaf(A_LVIDENT, id); // Ensure we have an equals sign match(T_ASSIGN, "="); // Parse the following expression left = binexpr(0); // Make an assignment AST tree tree = mkastnode(A_ASSIGN, left, NULL, right, 0); // Return the AST return (tree); } // if_statement: if_head // | if_head 'else' compound_statement // ; // // if_head: 'if' '(' true_false_expression ')' compound_statement ; // // Parse an IF statement including // any optional ELSE clause // and return its AST static struct ASTnode *if_statement(void) { struct ASTnode *condAST, *trueAST, *falseAST = NULL; // Ensure we have 'if' '(' match(T_IF, "if"); lparen(); // Parse the following expression // and the ')' following. Ensure // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) fatal("Bad comparison operator"); rparen(); // Get the AST for the compound statement trueAST = compound_statement(); // If we have an 'else', skip it // and get the AST for the compound statement if (Token.token == T_ELSE) { scan(&Token); falseAST = compound_statement(); } // Build and return the AST for this statement return (mkastnode(A_IF, condAST, trueAST, falseAST, 0)); } // while_statement: 'while' '(' true_false_expression ')' compound_statement ; // // Parse a WHILE statement // and return its AST static struct ASTnode *while_statement(void) { struct ASTnode *condAST, *bodyAST; // Ensure we have 'while' '(' match(T_WHILE, "while"); lparen(); // Parse the following expression // and the ')' following. Ensure // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) fatal("Bad comparison operator"); rparen(); // Get the AST for the compound statement bodyAST = compound_statement(); // Build and return the AST for this statement return (mkastnode(A_WHILE, condAST, NULL, bodyAST, 0)); } // for_statement: 'for' '(' preop_statement ';' // true_false_expression ';' // postop_statement ')' compound_statement ; // // preop_statement: statement (for now) // postop_statement: statement (for now) // // Parse a FOR statement // and return its AST static struct ASTnode *for_statement(void) { struct ASTnode *condAST, *bodyAST; struct ASTnode *preopAST, *postopAST; struct ASTnode *tree; // Ensure we have 'for' '(' match(T_FOR, "for"); lparen(); // Get the pre_op statement and the ';' preopAST = single_statement(); semi(); // Get the condition and the ';' condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) fatal("Bad comparison operator"); semi(); // Get the post_op statement and the ')' postopAST = single_statement(); rparen(); // Get the compound statement which is the body bodyAST = compound_statement(); // For now, all four sub-trees have to be non-NULL. // Later on, we'll change the semantics for when some are missing // Glue the compound statement and the postop tree tree = mkastnode(A_GLUE, bodyAST, NULL, postopAST, 0); // Make a WHILE loop with the condition and this new body tree = mkastnode(A_WHILE, condAST, NULL, tree, 0); // And glue the preop tree to the A_WHILE tree return (mkastnode(A_GLUE, preopAST, NULL, tree, 0)); } // Parse a single statement // and return its AST static struct ASTnode *single_statement(void) { switch (Token.token) { case T_PRINT: return (print_statement()); case T_INT: var_declaration(); return (NULL); // No AST generated here case T_IDENT: return (assignment_statement()); case T_IF: return (if_statement()); case T_WHILE: return (while_statement()); case T_FOR: return (for_statement()); default: fatald("Syntax error, token", Token.token); } } // Parse a compound statement // and return its AST struct ASTnode *compound_statement(void) { struct ASTnode *left = NULL; struct ASTnode *tree; // Require a left curly bracket lbrace(); while (1) { // Parse a single statement tree = single_statement(); // Some statements must be followed by a semicolon if (tree != NULL && (tree->op == A_PRINT || tree->op == A_ASSIGN)) semi(); // For each new tree, either save it in left // if left is empty, or glue the left and the // new tree together if (tree != NULL) { if (left == NULL) left = tree; else left = mkastnode(A_GLUE, left, NULL, tree, 0); } // When we hit a right curly bracket, // skip past it and return the AST if (Token.token == T_RBRACE) { rbrace(); return (left); } } } ================================================ FILE: 10_For_Loops/sym.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Symbol table functions // Copyright (c) 2019 Warren Toomey, GPL3 static int Globs = 0; // Position of next free global symbol slot // Determine if the symbol s is in the global symbol table. // Return its slot position or -1 if not found. int findglob(char *s) { int i; for (i = 0; i < Globs; i++) { if (*s == *Gsym[i].name && !strcmp(s, Gsym[i].name)) return (i); } return (-1); } // Get the position of a new global symbol slot, or die // if we've run out of positions. static int newglob(void) { int p; if ((p = Globs++) >= NSYMBOLS) fatal("Too many global symbols"); return (p); } // Add a global symbol to the symbol table. // Return the slot number in the symbol table int addglob(char *name) { int y; // If this is already in the symbol table, return the existing slot if ((y = findglob(name)) != -1) return (y); // Otherwise get a new slot, fill it in and // return the slot number y = newglob(); Gsym[y].name = strdup(name); return (y); } ================================================ FILE: 10_For_Loops/tests/input01 ================================================ { print 12 * 3; print 18 - 2 * 4; print 1 + 2 + 9 - 5/2 + 3*5; } ================================================ FILE: 10_For_Loops/tests/input02 ================================================ { int fred; int jim; fred= 5; jim= 12; print fred + jim; } ================================================ FILE: 10_For_Loops/tests/input03 ================================================ { int x; x= 1; print x; x= x + 1; print x; x= x + 1; print x; x= x + 1; print x; x= x + 1; print x; } ================================================ FILE: 10_For_Loops/tests/input04 ================================================ { int x; x= 7 < 9; print x; x= 7 <= 9; print x; x= 7 != 9; print x; x= 7 == 7; print x; x= 7 >= 7; print x; x= 7 <= 7; print x; x= 9 > 7; print x; x= 9 >= 7; print x; x= 9 != 7; print x; } ================================================ FILE: 10_For_Loops/tests/input05 ================================================ { int i; int j; i=6; j=12; if (i < j) { print i; } else { print j; } } ================================================ FILE: 10_For_Loops/tests/input06 ================================================ { int i; i=1; while (i <= 10) { print i; i= i + 1; } } ================================================ FILE: 10_For_Loops/tests/input07 ================================================ { int i; for (i= 1; i <= 10; i= i + 1) { print i; } } ================================================ FILE: 10_For_Loops/tests/mktests ================================================ #!/bin/sh # Make the output files for each test if [ ! -f ../comp1 ] then echo "Need to build ../comp1 first!"; exit 1 fi for i in input* do if [ ! -f "out.$i" ] then ../comp1 $i cc -o out out.s ./out > out.$i rm -f out out.s fi done ================================================ FILE: 10_For_Loops/tests/mktestsn ================================================ #!/bin/sh # Make the output files for each test if [ ! -f ../compn ] then echo "Need to build ../compn first!"; exit 1 fi for i in input* do if [ ! -f "out.$i" ] then ../compn $i nasm -f elf64 out.s cc -no-pie -o out out.o ./out > out.$i rm -f out out.s fi done ================================================ FILE: 10_For_Loops/tests/out.input01 ================================================ 36 10 25 ================================================ FILE: 10_For_Loops/tests/out.input02 ================================================ 17 ================================================ FILE: 10_For_Loops/tests/out.input03 ================================================ 1 2 3 4 5 ================================================ FILE: 10_For_Loops/tests/out.input04 ================================================ 1 1 1 1 1 1 1 1 1 ================================================ FILE: 10_For_Loops/tests/out.input05 ================================================ 6 ================================================ FILE: 10_For_Loops/tests/out.input06 ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 10_For_Loops/tests/out.input07 ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 10_For_Loops/tests/runtests ================================================ #!/bin/sh # Run each test and compare # against known good output if [ ! -f ../comp1 ] then echo "Need to build ../comp1 first!"; exit 1 fi for i in input* do if [ ! -f "out.$i" ] then echo "Can't run test on $i, no output file!" else echo -n $i ../comp1 $i cc -o out out.s ./out > trial.$i cmp -s "out.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo else echo ": OK" fi rm -f out out.s "trial.$i" fi done ================================================ FILE: 10_For_Loops/tests/runtestsn ================================================ #!/bin/sh # Run each test and compare # against known good output if [ ! -f ../compn ] then echo "Need to build ../compn first!"; exit 1 fi for i in input* do if [ ! -f "out.$i" ] then echo "Can't run test on $i, no output file!" else echo -n $i ../compn $i nasm -f elf64 out.s cc -no-pie -o out out.o ./out > trial.$i cmp -s "out.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo else echo ": OK" fi rm -f out out.o out.s "trial.$i" fi done ================================================ FILE: 10_For_Loops/tree.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree functions // Copyright (c) 2019 Warren Toomey, GPL3 // Build and return a generic AST node struct ASTnode *mkastnode(int op, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, int intvalue) { struct ASTnode *n; // Malloc a new ASTnode n = (struct ASTnode *) malloc(sizeof(struct ASTnode)); if (n == NULL) fatal("Unable to malloc in mkastnode()"); // Copy in the field values and return it n->op = op; n->left = left; n->mid = mid; n->right = right; n->v.intvalue = intvalue; return (n); } // Make an AST leaf node struct ASTnode *mkastleaf(int op, int intvalue) { return (mkastnode(op, NULL, NULL, NULL, intvalue)); } // Make a unary AST node: only one child struct ASTnode *mkastunary(int op, struct ASTnode *left, int intvalue) { return (mkastnode(op, left, NULL, NULL, intvalue)); } ================================================ FILE: 11_Functions_pt1/Makefile ================================================ SRCS= cg.c decl.c expr.c gen.c main.c misc.c scan.c stmt.c sym.c tree.c SRCN= cgn.c decl.c expr.c gen.c main.c misc.c scan.c stmt.c sym.c tree.c comp1: $(SRCS) cc -o comp1 -g $(SRCS) compn: $(SRCN) cc -o compn -g $(SRCN) clean: rm -f comp1 compn *.o *.s out test: comp1 tests/runtests (cd tests; chmod +x runtests; ./runtests) test8: comp1 tests/input08 ./comp1 tests/input08 cc -o out out.s ./out testn: compn tests/runtestsn (cd tests; chmod +x runtestsn; ./runtestsn) test8n: compn tests/input08 ./compn tests/input08 nasm -f elf64 out.s cc -no-pie -o out out.o ./out ================================================ FILE: 11_Functions_pt1/Readme.md ================================================ # Part 11: Functions, part 1 I want to start work on implementing functions into our language, but I know this is going to involve a heck of a lot of steps. Some things that we will have to deal with along the way are: + Types of data: `char`, `int`, `long` etc. + The return type of each function + The number of arguments to each function + Variables local to a function versus global variables That is way too much to get done in this part of our journey. So what I'm going to do here is to get to the point where we can *declare* different functions. Only the `main()` function in our resulting executable will run, but we will have the ability to generate code for multiple function. Hopefully soon, the language that our compiler recognises will be enough of a subset of C that our input will be recognisable by a "real" C compiler. But just not yet. ## The Simplistic Function Syntax This is definitely going to be a placeholder, so that we can parse something that looks like a function. Once this is done, we can add those other important things: types, return types, arguments etc. So, for now, I will add a function grammar that looks like this in BNF: ``` function_declaration: 'void' identifier '(' ')' compound_statement ; ``` All functions will be declared `void` and have no arguments. We also won't introduce the ability to call a function, so only the `main()` function will execute. We need a new keyword `void` and a new token T_VOID, which are both easy to add. ## Parsing the Simplistic Function Syntax The new function syntax is so simple that we can write a nice, small function to parse it (in `decl.c`): ```c // Parse the declaration of a simplistic function struct ASTnode *function_declaration(void) { struct ASTnode *tree; int nameslot; // Find the 'void', the identifier, and the '(' ')'. // For now, do nothing with them match(T_VOID, "void"); ident(); nameslot= addglob(Text); lparen(); rparen(); // Get the AST tree for the compound statement tree= compound_statement(); // Return an A_FUNCTION node which has the function's nameslot // and the compound statement sub-tree return(mkastunary(A_FUNCTION, tree, nameslot)); } ``` This is going to do the syntax checking and AST building, but there is little to no semantic error checking here. What if a function gets redeclared? Well, we won't notice that yet. ## Modifications to `main()` With the above function, we can now rewrite some of the code in `main()` to parse multiple functions one after the other: ```c scan(&Token); // Get the first token from the input genpreamble(); // Output the preamble while (1) { // Parse a function and tree = function_declaration(); genAST(tree, NOREG, 0); // generate the assembly code for it if (Token.token == T_EOF) // Stop when we have reached EOF break; } ``` Notice that I've removed the `genpostamble()` function call. That's because its output was technically the postamble to the generated assembly for `main()`. We now need some code generation functions to generate the beginning of a function and the end of a function. ## Generic Code Generation for Functions Now that we have an A_FUNCTION AST node, we had better add some code in the generic code generator, `gen.c` to deal with it. Looking above, this is a *unary* AST node with a single child: ```c // Return an A_FUNCTION node which has the function's nameslot // and the compound statement sub-tree return(mkastunary(A_FUNCTION, tree, nameslot)); ``` The child has the sub-tree which holds the compound statement that is the body of the function. We need to generate the start of the function *before* we generate the code for the compound statement. So here's the code in `genAST()` to do this: ```c case A_FUNCTION: // Generate the function's preamble before the code cgfuncpreamble(Gsym[n->v.id].name); genAST(n->left, NOREG, n->op); cgfuncpostamble(); return (NOREG); ``` ## x86-64 Code Generation Now we are at the point where we have to generate the code to set the stack and frame pointer for each function, and also to undo this at the end of the function and return to the function's caller. We already have this code in `cgpreamble()` and `cgpostamble()`, but `cgpreamble()` also has the assembly code for the `printint()` function. Therefore, it's a matter of separating out these snippets of assembly code into new functions in `cg.c`: ```c // Print out the assembly preamble void cgpreamble() { freeall_registers(); // Only prints out the code for printint() } // Print out a function preamble void cgfuncpreamble(char *name) { fprintf(Outfile, "\t.text\n" "\t.globl\t%s\n" "\t.type\t%s, @function\n" "%s:\n" "\tpushq\t%%rbp\n" "\tmovq\t%%rsp, %%rbp\n", name, name, name); } // Print out a function postamble void cgfuncpostamble() { fputs("\tmovl $0, %eax\n" "\tpopq %rbp\n" "\tret\n", Outfile); } ``` ## Testing The Function Generation Functionality We have a new test program, `tests/input08` which is starting to look like a C program (apart from the `print` statement): ```c void main() { int i; for (i= 1; i <= 10; i= i + 1) { print i; } } ``` To test this, `make test8` which does: ``` cc -o comp1 -g cg.c decl.c expr.c gen.c main.c misc.c scan.c stmt.c sym.c tree.c ./comp1 tests/input08 cc -o out out.s ./out 1 2 3 4 5 6 7 8 9 10 ``` I'm not going to look at the assembly output as it's identical to the code generated for the FOR loop test in the last part. However, I've added `void main()` into all the previous test input files, as the language requires a function declaration before the compound statement code. The test program `tests/input09` has two functions declared in it. The compiler happily generates working assembly code for each function, but at present we can't run the code for the second function. ## Conclusion and What's Next We've made a good start at adding functions to our language. For now, it's a pretty simplistic function declaration only. In the next part of our compiler writing journey, we will begin the process to add types to our compiler. [Next step](../12_Types_pt1/Readme.md) ================================================ FILE: 11_Functions_pt1/cg.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names // We need a list of byte registers, too static int freereg[4]; static char *reglist[4] = { "%r8", "%r9", "%r10", "%r11" }; static char *breglist[4] = { "%r8b", "%r9b", "%r10b", "%r11b" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n" ".LC0:\n" "\t.string\t\"%d\\n\"\n" "printint:\n" "\tpushq\t%rbp\n" "\tmovq\t%rsp, %rbp\n" "\tsubq\t$16, %rsp\n" "\tmovl\t%edi, -4(%rbp)\n" "\tmovl\t-4(%rbp), %eax\n" "\tmovl\t%eax, %esi\n" "\tleaq .LC0(%rip), %rdi\n" "\tmovl $0, %eax\n" "\tcall printf@PLT\n" "\tnop\n" "\tleave\n" "\tret\n" "\n", Outfile); } // Print out a function preamble void cgfuncpreamble(char *name) { fprintf(Outfile, "\t.text\n" "\t.globl\t%s\n" "\t.type\t%s, @function\n" "%s:\n" "\tpushq\t%%rbp\n" "\tmovq\t%%rsp, %%rbp\n", name, name, name); } // Print out a function postamble void cgfuncpostamble() { fputs("\tmovl $0, %eax\n" "\tpopq %rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register int cgloadint(int value) { // Get a new register int r = alloc_register(); // Print out the code to initialise it fprintf(Outfile, "\tmovq\t$%d, %s\n", value, reglist[r]); return (r); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(char *identifier) { // Get a new register int r = alloc_register(); // Print out the code to initialise it fprintf(Outfile, "\tmovq\t%s(\%%rip), %s\n", identifier, reglist[r]); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\taddq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsubq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timulq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s,%%rax\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidivq\t%s\n", reglist[r2]); fprintf(Outfile, "\tmovq\t%%rax,%s\n", reglist[r1]); free_register(r2); return (r1); } // Call printint() with the given register void cgprintint(int r) { fprintf(Outfile, "\tmovq\t%s, %%rdi\n", reglist[r]); fprintf(Outfile, "\tcall\tprintint\n"); free_register(r); } // Store a register's value into a variable int cgstorglob(int r, char *identifier) { fprintf(Outfile, "\tmovq\t%s, %s(\%%rip)\n", reglist[r], identifier); return (r); } // Generate a global symbol void cgglobsym(char *sym) { fprintf(Outfile, "\t.comm\t%s,8,8\n", sym); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } ================================================ FILE: 11_Functions_pt1/cgn.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names static int freereg[4]; static char *reglist[4] = { "r8", "r9", "r10", "r11" }; static char *breglist[4] = { "r8b", "r9b", "r10b", "r11b" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\tglobal\tmain\n" "\textern\tprintf\n" "\tsection\t.text\n" "LC0:\tdb\t\"%d\",10,0\n" "printint:\n" "\tpush\trbp\n" "\tmov\trbp, rsp\n" "\tsub\trsp, 16\n" "\tmov\t[rbp-4], edi\n" "\tmov\teax, [rbp-4]\n" "\tmov\tesi, eax\n" "\tlea rdi, [rel LC0]\n" "\tmov eax, 0\n" "\tcall printf\n" "\tnop\n" "\tleave\n" "\tret\n" "\n", Outfile); } // Print out a function preamble void cgfuncpreamble(char *name) { fprintf(Outfile, "\tsection\t.text\n" "\tglobal\t%s\n" "%s:\n" "\tpush\trbp\n" "\tmov\trbp, rsp\n", name, name); } // Print out a function postamble void cgfuncpostamble() { fputs("\tmov eax, 0\n" "\tpop rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register int cgloadint(int value) { // Get a new register int r = alloc_register(); // Print out the code to initialise it fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], value); return (r); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(char *identifier) { // Get a new register int r = alloc_register(); // Print out the code to initialise it fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], identifier); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timul\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmov\trax, %s\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidiv\t%s\n", reglist[r2]); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[r1]); free_register(r2); return (r1); } // Call printint() with the given register void cgprintint(int r) { fprintf(Outfile, "\tmov\trdi, %s\n", reglist[r]); fprintf(Outfile, "\tcall\tprintint\n"); free_register(r); } // Store a register's value into a variable int cgstorglob(int r, char *identifier) { fprintf(Outfile, "\tmov\t[%s], %s\n", identifier, reglist[r]); return (r); } // Generate a global symbol void cgglobsym(char *sym) { fprintf(Outfile, "\tcommon\t%s 8:8\n", sym); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r2], breglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } ================================================ FILE: 11_Functions_pt1/data.h ================================================ #ifndef extern_ #define extern_ extern #endif // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 extern_ int Line; // Current line number extern_ int Putback; // Character put back by scanner extern_ FILE *Infile; // Input and output files extern_ FILE *Outfile; extern_ struct token Token; // Last token scanned extern_ char Text[TEXTLEN + 1]; // Last identifier scanned extern_ struct symtable Gsym[NSYMBOLS]; // Global symbol table ================================================ FILE: 11_Functions_pt1/decl.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of declarations // Copyright (c) 2019 Warren Toomey, GPL3 // variable_declaration: 'int' identifier ';' ; // // Parse the declaration of a variable void var_declaration(void) { // Ensure we have an 'int' token followed by an identifier // and a semicolon. Text now has the identifier's name. // Add it as a known identifier match(T_INT, "int"); ident(); addglob(Text); genglobsym(Text); semi(); } // For now we have a very simplistic function definition grammar // // function_declaration: 'void' identifier '(' ')' compound_statement ; // // Parse the declaration of a simplistic function struct ASTnode *function_declaration(void) { struct ASTnode *tree; int nameslot; // Find the 'void', the identifier, and the '(' ')'. // For now, do nothing with them match(T_VOID, "void"); ident(); nameslot= addglob(Text); lparen(); rparen(); // Get the AST tree for the compound statement tree= compound_statement(); // Return an A_FUNCTION node which has the function's nameslot // and the compound statement sub-tree return(mkastunary(A_FUNCTION, tree, nameslot)); } ================================================ FILE: 11_Functions_pt1/decl.h ================================================ // Function prototypes for all compiler files // Copyright (c) 2019 Warren Toomey, GPL3 // scan.c int scan(struct token *t); // tree.c struct ASTnode *mkastnode(int op, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, int intvalue); struct ASTnode *mkastleaf(int op, int intvalue); struct ASTnode *mkastunary(int op, struct ASTnode *left, int intvalue); // gen.c int genAST(struct ASTnode *n, int reg, int parentASTop); void genpreamble(); void genpostamble(); void genfreeregs(); void genprintint(int reg); void genglobsym(char *s); // cg.c void freeall_registers(void); void cgpreamble(); void cgfuncpreamble(char *name); void cgfuncpostamble(); int cgloadint(int value); int cgloadglob(char *identifier); int cgadd(int r1, int r2); int cgsub(int r1, int r2); int cgmul(int r1, int r2); int cgdiv(int r1, int r2); void cgprintint(int r); int cgstorglob(int r, char *identifier); void cgglobsym(char *sym); int cgcompare_and_set(int ASTop, int r1, int r2); int cgcompare_and_jump(int ASTop, int r1, int r2, int label); void cglabel(int l); void cgjump(int l); // expr.c struct ASTnode *binexpr(int ptp); // stmt.c struct ASTnode *compound_statement(void); // misc.c void match(int t, char *what); void semi(void); void lbrace(void); void rbrace(void); void lparen(void); void rparen(void); void ident(void); void fatal(char *s); void fatals(char *s1, char *s2); void fatald(char *s, int d); void fatalc(char *s, int c); // sym.c int findglob(char *s); int addglob(char *name); // decl.c void var_declaration(void); struct ASTnode *function_declaration(void); ================================================ FILE: 11_Functions_pt1/defs.h ================================================ #include #include #include #include // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 #define TEXTLEN 512 // Length of symbols in input #define NSYMBOLS 1024 // Number of symbol table entries // Token types enum { T_EOF, T_PLUS, T_MINUS, T_STAR, T_SLASH, T_EQ, T_NE, T_LT, T_GT, T_LE, T_GE, T_INTLIT, T_SEMI, T_ASSIGN, T_IDENT, T_LBRACE, T_RBRACE, T_LPAREN, T_RPAREN, // Keywords T_PRINT, T_INT, T_IF, T_ELSE, T_WHILE, T_FOR, T_VOID }; // Token structure struct token { int token; // Token type, from the enum list above int intvalue; // For T_INTLIT, the integer value }; // AST node types. The first few line up // with the related tokens enum { A_ADD = 1, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE, A_INTLIT, A_IDENT, A_LVIDENT, A_ASSIGN, A_PRINT, A_GLUE, A_IF, A_WHILE, A_FUNCTION }; // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; union { int intvalue; // For A_INTLIT, the integer value int id; // For A_IDENT, the symbol slot number } v; // For A_FUNCTION, the symbol slot number }; #define NOREG -1 // Use NOREG when the AST generation // functions have no register to return // Symbol table structure struct symtable { char *name; // Name of a symbol }; ================================================ FILE: 11_Functions_pt1/expr.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of expressions // Copyright (c) 2019 Warren Toomey, GPL3 // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(void) { struct ASTnode *n; int id; switch (Token.token) { case T_INTLIT: // For an INTLIT token, make a leaf AST node for it. n = mkastleaf(A_INTLIT, Token.intvalue); break; case T_IDENT: // Check that this identifier exists id = findglob(Text); if (id == -1) fatals("Unknown variable", Text); // Make a leaf AST node for it n = mkastleaf(A_IDENT, id); break; default: fatald("Syntax error, token", Token.token); } // Scan in the next token and return the leaf node scan(&Token); return (n); } // Convert a binary operator token into an AST operation. // We rely on a 1:1 mapping from token to AST operation static int arithop(int tokentype) { if (tokentype > T_EOF && tokentype < T_INTLIT) return (tokentype); fatald("Syntax error, token", tokentype); } // Operator precedence for each token. Must // match up with the order of tokens in defs.h static int OpPrec[] = { 0, 10, 10, // T_EOF, T_PLUS, T_MINUS 20, 20, // T_STAR, T_SLASH 30, 30, // T_EQ, T_NE 40, 40, 40, 40 // T_LT, T_GT, T_LE, T_GE }; // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec = OpPrec[tokentype]; if (prec == 0) fatald("Syntax error, token", tokentype); return (prec); } // Return an AST tree whose root is a binary operator. // Parameter ptp is the previous token's precedence. struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; int tokentype; // Get the primary tree on the left. // Fetch the next token at the same time. left = primary(); // If we hit a semicolon or ')', return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN) return (left); // While the precedence of this token is // more than that of the previous token precedence while (op_precedence(tokentype) > ptp) { // Fetch in the next integer literal scan(&Token); // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Join that sub-tree with ours. Convert the token // into an AST operation at the same time. left = mkastnode(arithop(tokentype), left, NULL, right, 0); // Update the details of the current token. // If we hit a semicolon or ')', return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN) return (left); } // Return the tree we have when the precedence // is the same or lower return (left); } ================================================ FILE: 11_Functions_pt1/gen.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Generic code generator // Copyright (c) 2019 Warren Toomey, GPL3 // Generate and return a new label number static int label(void) { static int id = 1; return (id++); } // Generate the code for an IF statement // and an optional ELSE clause static int genIF(struct ASTnode *n) { int Lfalse, Lend; // Generate two labels: one for the // false compound statement, and one // for the end of the overall IF statement. // When there is no ELSE clause, Lfalse _is_ // the ending label! Lfalse = label(); if (n->right) Lend = label(); // Generate the condition code followed // by a jump to the false label. // We cheat by sending the Lfalse label as a register. genAST(n->left, Lfalse, n->op); genfreeregs(); // Generate the true compound statement genAST(n->mid, NOREG, n->op); genfreeregs(); // If there is an optional ELSE clause, // generate the jump to skip to the end if (n->right) cgjump(Lend); // Now the false label cglabel(Lfalse); // Optional ELSE clause: generate the // false compound statement and the // end label if (n->right) { genAST(n->right, NOREG, n->op); genfreeregs(); cglabel(Lend); } return (NOREG); } // Generate the code for a WHILE statement // and an optional ELSE clause static int genWHILE(struct ASTnode *n) { int Lstart, Lend; // Generate the start and end labels // and output the start label Lstart = label(); Lend = label(); cglabel(Lstart); // Generate the condition code followed // by a jump to the end label. // We cheat by sending the Lfalse label as a register. genAST(n->left, Lend, n->op); genfreeregs(); // Generate the compound statement for the body genAST(n->right, NOREG, n->op); genfreeregs(); // Finally output the jump back to the condition, // and the end label cgjump(Lstart); cglabel(Lend); return (NOREG); } // Given an AST, the register (if any) that holds // the previous rvalue, and the AST op of the parent, // generate assembly code recursively. // Return the register id with the tree's final value int genAST(struct ASTnode *n, int reg, int parentASTop) { int leftreg, rightreg; // We now have specific AST node handling at the top switch (n->op) { case A_IF: return (genIF(n)); case A_WHILE: return (genWHILE(n)); case A_GLUE: // Do each child statement, and free the // registers after each child genAST(n->left, NOREG, n->op); genfreeregs(); genAST(n->right, NOREG, n->op); genfreeregs(); return (NOREG); case A_FUNCTION: // Generate the function's preamble before the code cgfuncpreamble(Gsym[n->v.id].name); genAST(n->left, NOREG, n->op); cgfuncpostamble(); return (NOREG); } // General AST node handling below // Get the left and right sub-tree values if (n->left) leftreg = genAST(n->left, NOREG, n->op); if (n->right) rightreg = genAST(n->right, leftreg, n->op); switch (n->op) { case A_ADD: return (cgadd(leftreg, rightreg)); case A_SUBTRACT: return (cgsub(leftreg, rightreg)); case A_MULTIPLY: return (cgmul(leftreg, rightreg)); case A_DIVIDE: return (cgdiv(leftreg, rightreg)); case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, compare registers // and set one to 1 or 0 based on the comparison. if (parentASTop == A_IF || parentASTop == A_WHILE) return (cgcompare_and_jump(n->op, leftreg, rightreg, reg)); else return (cgcompare_and_set(n->op, leftreg, rightreg)); case A_INTLIT: return (cgloadint(n->v.intvalue)); case A_IDENT: return (cgloadglob(Gsym[n->v.id].name)); case A_LVIDENT: return (cgstorglob(reg, Gsym[n->v.id].name)); case A_ASSIGN: // The work has already been done, return the result return (rightreg); case A_PRINT: // Print the left-child's value // and return no register genprintint(leftreg); genfreeregs(); return (NOREG); default: fatald("Unknown AST operator", n->op); } } void genpreamble() { cgpreamble(); } void genfreeregs() { freeall_registers(); } void genprintint(int reg) { cgprintint(reg); } void genglobsym(char *s) { cgglobsym(s); } ================================================ FILE: 11_Functions_pt1/main.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "decl.h" #include // Compiler setup and top-level execution // Copyright (c) 2019 Warren Toomey, GPL3 // Initialise global variables static void init() { Line = 1; Putback = '\n'; } // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "Usage: %s infile\n", prog); exit(1); } // Main program: check arguments and print a usage // if we don't have an argument. Open up the input // file and call scanfile() to scan the tokens in it. void main(int argc, char *argv[]) { struct ASTnode *tree; if (argc != 2) usage(argv[0]); init(); // Open up the input file if ((Infile = fopen(argv[1], "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", argv[1], strerror(errno)); exit(1); } // Create the output file if ((Outfile = fopen("out.s", "w")) == NULL) { fprintf(stderr, "Unable to create out.s: %s\n", strerror(errno)); exit(1); } scan(&Token); // Get the first token from the input genpreamble(); // Output the preamble while (1) { // Parse a function and tree = function_declaration(); genAST(tree, NOREG, 0); // generate the assembly code for it if (Token.token == T_EOF) // Stop when we have reached EOF break; } fclose(Outfile); // Close the output file and exit exit(0); } ================================================ FILE: 11_Functions_pt1/misc.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Miscellaneous functions // Copyright (c) 2019 Warren Toomey, GPL3 // Ensure that the current token is t, // and fetch the next token. Otherwise // throw an error void match(int t, char *what) { if (Token.token == t) { scan(&Token); } else { fatals("Expected", what); } } // Match a semicolon and fetch the next token void semi(void) { match(T_SEMI, ";"); } // Match a left brace and fetch the next token void lbrace(void) { match(T_LBRACE, "{"); } // Match a right brace and fetch the next token void rbrace(void) { match(T_RBRACE, "}"); } // Match a left parenthesis and fetch the next token void lparen(void) { match(T_LPAREN, "("); } // Match a right parenthesis and fetch the next token void rparen(void) { match(T_RPAREN, ")"); } // Match an identifier and fetch the next token void ident(void) { match(T_IDENT, "identifier"); } // Print out fatal messages void fatal(char *s) { fprintf(stderr, "%s on line %d\n", s, Line); exit(1); } void fatals(char *s1, char *s2) { fprintf(stderr, "%s:%s on line %d\n", s1, s2, Line); exit(1); } void fatald(char *s, int d) { fprintf(stderr, "%s:%d on line %d\n", s, d, Line); exit(1); } void fatalc(char *s, int c) { fprintf(stderr, "%s:%c on line %d\n", s, c, Line); exit(1); } ================================================ FILE: 11_Functions_pt1/scan.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { char *p; p = strchr(s, c); return (p ? p - s : -1); } // Get the next character from the input file. static int next(void) { int c; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return (c); } c = fgetc(Infile); // Read from input file if ('\n' == c) Line++; // Increment line count return (c); } // Put back an unwanted character static void putback(int c) { Putback = c; } // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0; // Convert each character into an int value while ((k = chrpos("0123456789", c)) >= 0) { val = val * 10 + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return (val); } // Scan an identifier from the input file and // store it in buf[]. Return the identifier's length static int scanident(int c, char *buf, int lim) { int i = 0; // Allow digits, alpha and underscores while (isalpha(c) || isdigit(c) || '_' == c) { // Error if we hit the identifier length limit, // else append to buf[] and get next character if (lim - 1 == i) { fatal("Identifier too long"); } else if (i < lim - 1) { buf[i++] = c; } c = next(); } // We hit a non-valid character, put it back. // NUL-terminate the buf[] and return the length putback(c); buf[i] = '\0'; return (i); } // Given a word from the input, return the matching // keyword token number or 0 if it's not a keyword. // Switch on the first letter so that we don't have // to waste time strcmp()ing against all the keywords. static int keyword(char *s) { switch (*s) { case 'e': if (!strcmp(s, "else")) return (T_ELSE); break; case 'f': if (!strcmp(s, "for")) return (T_FOR); break; case 'i': if (!strcmp(s, "if")) return (T_IF); if (!strcmp(s, "int")) return (T_INT); break; case 'p': if (!strcmp(s, "print")) return (T_PRINT); break; case 'w': if (!strcmp(s, "while")) return (T_WHILE); break; case 'v': if (!strcmp(s, "void")) return (T_VOID); break; } return (0); } // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c, tokentype; // Skip whitespace c = skip(); // Determine the token based on // the input character switch (c) { case EOF: t->token = T_EOF; return (0); case '+': t->token = T_PLUS; break; case '-': t->token = T_MINUS; break; case '*': t->token = T_STAR; break; case '/': t->token = T_SLASH; break; case ';': t->token = T_SEMI; break; case '{': t->token = T_LBRACE; break; case '}': t->token = T_RBRACE; break; case '(': t->token = T_LPAREN; break; case ')': t->token = T_RPAREN; break; case '=': if ((c = next()) == '=') { t->token = T_EQ; } else { putback(c); t->token = T_ASSIGN; } break; case '!': if ((c = next()) == '=') { t->token = T_NE; } else { fatalc("Unrecognised character", c); } break; case '<': if ((c = next()) == '=') { t->token = T_LE; } else { putback(c); t->token = T_LT; } break; case '>': if ((c = next()) == '=') { t->token = T_GE; } else { putback(c); t->token = T_GT; } break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } else if (isalpha(c) || '_' == c) { // Read in a keyword or identifier scanident(c, Text, TEXTLEN); // If it's a recognised keyword, return that token if (tokentype = keyword(Text)) { t->token = tokentype; break; } // Not a recognised keyword, so it must be an identifier t->token = T_IDENT; break; } // The character isn't part of any recognised token, error fatalc("Unrecognised character", c); } // We found a token return (1); } ================================================ FILE: 11_Functions_pt1/stmt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of statements // Copyright (c) 2019 Warren Toomey, GPL3 // Prototypes static struct ASTnode *single_statement(void); // compound_statement: // empty, i.e. no statement // | statement // | statement statements // ; // // statement: print_statement // | declaration // | assignment_statement // | if_statement // | while_statement // ; // print_statement: 'print' expression ';' ; // static struct ASTnode *print_statement(void) { struct ASTnode *tree; int reg; // Match a 'print' as the first token match(T_PRINT, "print"); // Parse the following expression tree = binexpr(0); // Make an print AST tree tree = mkastunary(A_PRINT, tree, 0); // Return the AST return (tree); } // assignment_statement: identifier '=' expression ';' ; // static struct ASTnode *assignment_statement(void) { struct ASTnode *left, *right, *tree; int id; // Ensure we have an identifier ident(); // Check it's been defined then make a leaf node for it if ((id = findglob(Text)) == -1) { fatals("Undeclared variable", Text); } right = mkastleaf(A_LVIDENT, id); // Ensure we have an equals sign match(T_ASSIGN, "="); // Parse the following expression left = binexpr(0); // Make an assignment AST tree tree = mkastnode(A_ASSIGN, left, NULL, right, 0); // Return the AST return (tree); } // if_statement: if_head // | if_head 'else' compound_statement // ; // // if_head: 'if' '(' true_false_expression ')' compound_statement ; // // Parse an IF statement including // any optional ELSE clause // and return its AST static struct ASTnode *if_statement(void) { struct ASTnode *condAST, *trueAST, *falseAST = NULL; // Ensure we have 'if' '(' match(T_IF, "if"); lparen(); // Parse the following expression // and the ')' following. Ensure // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) fatal("Bad comparison operator"); rparen(); // Get the AST for the compound statement trueAST = compound_statement(); // If we have an 'else', skip it // and get the AST for the compound statement if (Token.token == T_ELSE) { scan(&Token); falseAST = compound_statement(); } // Build and return the AST for this statement return (mkastnode(A_IF, condAST, trueAST, falseAST, 0)); } // while_statement: 'while' '(' true_false_expression ')' compound_statement ; // // Parse a WHILE statement // and return its AST static struct ASTnode *while_statement(void) { struct ASTnode *condAST, *bodyAST; // Ensure we have 'while' '(' match(T_WHILE, "while"); lparen(); // Parse the following expression // and the ')' following. Ensure // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) fatal("Bad comparison operator"); rparen(); // Get the AST for the compound statement bodyAST = compound_statement(); // Build and return the AST for this statement return (mkastnode(A_WHILE, condAST, NULL, bodyAST, 0)); } // for_statement: 'for' '(' preop_statement ';' // true_false_expression ';' // postop_statement ')' compound_statement ; // // preop_statement: statement (for now) // postop_statement: statement (for now) // // Parse a FOR statement // and return its AST static struct ASTnode *for_statement(void) { struct ASTnode *condAST, *bodyAST; struct ASTnode *preopAST, *postopAST; struct ASTnode *tree; // Ensure we have 'for' '(' match(T_FOR, "for"); lparen(); // Get the pre_op statement and the ';' preopAST = single_statement(); semi(); // Get the condition and the ';' condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) fatal("Bad comparison operator"); semi(); // Get the post_op statement and the ')' postopAST = single_statement(); rparen(); // Get the compound statement which is the body bodyAST = compound_statement(); // For now, all four sub-trees have to be non-NULL. // Later on, we'll change the semantics for when some are missing // Glue the compound statement and the postop tree tree = mkastnode(A_GLUE, bodyAST, NULL, postopAST, 0); // Make a WHILE loop with the condition and this new body tree = mkastnode(A_WHILE, condAST, NULL, tree, 0); // And glue the preop tree to the A_WHILE tree return (mkastnode(A_GLUE, preopAST, NULL, tree, 0)); } // Parse a single statement // and return its AST static struct ASTnode *single_statement(void) { switch (Token.token) { case T_PRINT: return (print_statement()); case T_INT: var_declaration(); return (NULL); // No AST generated here case T_IDENT: return (assignment_statement()); case T_IF: return (if_statement()); case T_WHILE: return (while_statement()); case T_FOR: return (for_statement()); default: fatald("Syntax error, token", Token.token); } } // Parse a compound statement // and return its AST struct ASTnode *compound_statement(void) { struct ASTnode *left = NULL; struct ASTnode *tree; // Require a left curly bracket lbrace(); while (1) { // Parse a single statement tree = single_statement(); // Some statements must be followed by a semicolon if (tree != NULL && (tree->op == A_PRINT || tree->op == A_ASSIGN)) semi(); // For each new tree, either save it in left // if left is empty, or glue the left and the // new tree together if (tree != NULL) { if (left == NULL) left = tree; else left = mkastnode(A_GLUE, left, NULL, tree, 0); } // When we hit a right curly bracket, // skip past it and return the AST if (Token.token == T_RBRACE) { rbrace(); return (left); } } } ================================================ FILE: 11_Functions_pt1/sym.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Symbol table functions // Copyright (c) 2019 Warren Toomey, GPL3 static int Globs = 0; // Position of next free global symbol slot // Determine if the symbol s is in the global symbol table. // Return its slot position or -1 if not found. int findglob(char *s) { int i; for (i = 0; i < Globs; i++) { if (*s == *Gsym[i].name && !strcmp(s, Gsym[i].name)) return (i); } return (-1); } // Get the position of a new global symbol slot, or die // if we've run out of positions. static int newglob(void) { int p; if ((p = Globs++) >= NSYMBOLS) fatal("Too many global symbols"); return (p); } // Add a global symbol to the symbol table. // Return the slot number in the symbol table int addglob(char *name) { int y; // If this is already in the symbol table, return the existing slot if ((y = findglob(name)) != -1) return (y); // Otherwise get a new slot, fill it in and // return the slot number y = newglob(); Gsym[y].name = strdup(name); return (y); } ================================================ FILE: 11_Functions_pt1/tests/input01 ================================================ void main() { print 12 * 3; print 18 - 2 * 4; print 1 + 2 + 9 - 5/2 + 3*5; } ================================================ FILE: 11_Functions_pt1/tests/input02 ================================================ void main() { int fred; int jim; fred= 5; jim= 12; print fred + jim; } ================================================ FILE: 11_Functions_pt1/tests/input03 ================================================ void main() { int x; x= 1; print x; x= x + 1; print x; x= x + 1; print x; x= x + 1; print x; x= x + 1; print x; } ================================================ FILE: 11_Functions_pt1/tests/input04 ================================================ void main() { int x; x= 7 < 9; print x; x= 7 <= 9; print x; x= 7 != 9; print x; x= 7 == 7; print x; x= 7 >= 7; print x; x= 7 <= 7; print x; x= 9 > 7; print x; x= 9 >= 7; print x; x= 9 != 7; print x; } ================================================ FILE: 11_Functions_pt1/tests/input05 ================================================ void main() { int i; int j; i=6; j=12; if (i < j) { print i; } else { print j; } } ================================================ FILE: 11_Functions_pt1/tests/input06 ================================================ void main() { int i; i=1; while (i <= 10) { print i; i= i + 1; } } ================================================ FILE: 11_Functions_pt1/tests/input07 ================================================ void main() { int i; for (i= 1; i <= 10; i= i + 1) { print i; } } ================================================ FILE: 11_Functions_pt1/tests/input08 ================================================ void main() { int i; for (i= 1; i <= 10; i= i + 1) { print i; } } ================================================ FILE: 11_Functions_pt1/tests/input09 ================================================ void main() { int i; for (i= 1; i <= 10; i= i + 1) { print i; } } void fred() { int a; int b; a= 12; b= 3 * a; if (a >= b) { print 2 * b - a; } } ================================================ FILE: 11_Functions_pt1/tests/mktests ================================================ #!/bin/sh # Make the output files for each test if [ ! -f ../comp1 ] then echo "Need to build ../comp1 first!"; exit 1 fi for i in input* do if [ ! -f "out.$i" ] then ../comp1 $i cc -o out out.s ./out > out.$i rm -f out out.s fi done ================================================ FILE: 11_Functions_pt1/tests/mktestsn ================================================ #!/bin/sh # Make the output files for each test if [ ! -f ../compn ] then echo "Need to build ../compn first!"; exit 1 fi for i in input* do if [ ! -f "out.$i" ] then ../compn $i nasm -f elf64 out.s cc -no-pie -o out out.o ./out > out.$i rm -f out out.s fi done ================================================ FILE: 11_Functions_pt1/tests/out.input01 ================================================ 36 10 25 ================================================ FILE: 11_Functions_pt1/tests/out.input02 ================================================ 17 ================================================ FILE: 11_Functions_pt1/tests/out.input03 ================================================ 1 2 3 4 5 ================================================ FILE: 11_Functions_pt1/tests/out.input04 ================================================ 1 1 1 1 1 1 1 1 1 ================================================ FILE: 11_Functions_pt1/tests/out.input05 ================================================ 6 ================================================ FILE: 11_Functions_pt1/tests/out.input06 ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 11_Functions_pt1/tests/out.input07 ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 11_Functions_pt1/tests/out.input08 ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 11_Functions_pt1/tests/out.input09 ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 11_Functions_pt1/tests/runtests ================================================ #!/bin/sh # Run each test and compare # against known good output if [ ! -f ../comp1 ] then echo "Need to build ../comp1 first!"; exit 1 fi for i in input* do if [ ! -f "out.$i" ] then echo "Can't run test on $i, no output file!" else echo -n $i ../comp1 $i cc -o out out.s ./out > trial.$i cmp -s "out.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo else echo ": OK" fi rm -f out out.s "trial.$i" fi done ================================================ FILE: 11_Functions_pt1/tests/runtestsn ================================================ #!/bin/sh # Run each test and compare # against known good output if [ ! -f ../compn ] then echo "Need to build ../compn first!"; exit 1 fi for i in input* do if [ ! -f "out.$i" ] then echo "Can't run test on $i, no output file!" else echo -n $i ../compn $i nasm -f elf64 out.s cc -no-pie -o out out.o ./out > trial.$i cmp -s "out.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo else echo ": OK" fi rm -f out out.o out.s "trial.$i" fi done ================================================ FILE: 11_Functions_pt1/tree.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree functions // Copyright (c) 2019 Warren Toomey, GPL3 // Build and return a generic AST node struct ASTnode *mkastnode(int op, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, int intvalue) { struct ASTnode *n; // Malloc a new ASTnode n = (struct ASTnode *) malloc(sizeof(struct ASTnode)); if (n == NULL) fatal("Unable to malloc in mkastnode()"); // Copy in the field values and return it n->op = op; n->left = left; n->mid = mid; n->right = right; n->v.intvalue = intvalue; return (n); } // Make an AST leaf node struct ASTnode *mkastleaf(int op, int intvalue) { return (mkastnode(op, NULL, NULL, NULL, intvalue)); } // Make a unary AST node: only one child struct ASTnode *mkastunary(int op, struct ASTnode *left, int intvalue) { return (mkastnode(op, left, NULL, NULL, intvalue)); } ================================================ FILE: 12_Types_pt1/Makefile ================================================ SRCS= cg.c decl.c expr.c gen.c main.c misc.c scan.c stmt.c sym.c tree.c types.c SRCN= cgn.c decl.c expr.c gen.c main.c misc.c scan.c stmt.c sym.c tree.c types.c comp1: $(SRCS) cc -o comp1 -g $(SRCS) compn: $(SRCN) cc -o compn -g $(SRCN) clean: rm -f comp1 compn *.o *.s out test: comp1 tests/runtests (cd tests; chmod +x runtests; ./runtests) test10: comp1 tests/input10 ./comp1 tests/input10 cc -o out out.s ./out testn: compn tests/runtestsn (cd tests; chmod +x runtestsn; ./runtestsn) test10n: compn tests/input10 ./compn tests/input10 nasm -f elf64 out.s cc -no-pie -o out out.o ./out ================================================ FILE: 12_Types_pt1/Readme.md ================================================ # Part 12: Types, part 1 I've just begun the process to add types to our compiler. Now, I should warn you that this is new to me, as in my [previous compiler](https://github.com/DoctorWkt/h-compiler) I only had `int`s. I've resisted the urge to look at the SubC source code for ideas. Thus, I'm striking out on my own and it's likely that I will have to redo some of the code as I deal with the greater issues involving types. ## What Types for Now? I'll start with `char` and `int` for our global variables. We've already added the `void` keyword for functions. In the next step I will add function return values. So, for now, `void` exists but I'm not fully dealing with it. Obviously, `char` has a much more limited range of values that `int`. Like SubC, I'm going to use the range 0 .. 255 for `char`s and a range of signed values for `int`s. This means that we can widen `char` values to become `int`s, but we must warn the developer if they try to narrow `int` values down to a `char` range. ## New Keywords and Tokens There is only the new 'char' keyword and the T_CHAR token. Nothing exciting here. ## Expression Types From now on, every expression has a type. This includes: + integer literals, e.g 56 is an `int` + maths expressions, e.g. 45 - 12 is an `int` + variables, e.g. if we declared `x` as a `char`, then it's *rvalue* is a `char` We are going to have to track the type of each expression as we evaluate it, to ensure we can widen it as required or refuse to narrow it if necessary. In the SubC compiler, Nils created a single *lvalue* structure. A pointer to this single stucture was passed around in the recursive parser to track the type of any expression at a point in its parsing. I've taken a different tack. I've modified our Abstract Syntax Tree node to have a `type` field which holds the type of the tree at that point. In `defs.h`, here are the types I've created so far: ```c // Primitive types enum { P_NONE, P_VOID, P_CHAR, P_INT }; ``` I've called them *primitive* types, as Nils did in SubC, because I can't think of a better name for them. Data types, perhaps? The P_NONE value indicates that the AST node *doesn't* represent an expression and has no type. An example is the A_GLUE node type which glues statements together: once the left-hand statement is generated, there is no type to speak of. If you look in `tree.c`, you will see that the functions to build AST nodes have been modified to also assign to the `type` field in the new AST node structure (in `defs.h`): ```c struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates ... }; ``` ## Variable Declarations and Their Types We now have at least two ways to declare global variables: ```c int x; char y; ``` We'll need to parse this, yes. But first, how do we record the type for each variable? We need to modify the `symtable` structure. I've also added the details of the "structural type" of the symbol which I'll use in the future (in `defs.h`): ```c // Structural types enum { S_VARIABLE, S_FUNCTION }; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol int stype; // Structural type for the symbol }; ``` There's new code in `newglob()` in `sym.c` to initialise these new fields: ```c int addglob(char *name, int type, int stype) { ... Gsym[y].type = type; Gsym[y].stype = stype; return (y); } ``` ## Parsing Variable Declarations It's time to separate out the parsing of the type from the parsing of the variable itself. So, in `decl.c` we now have: ```c // Parse the current token and // return a primitive type enum value int parse_type(int t) { if (t == T_CHAR) return (P_CHAR); if (t == T_INT) return (P_INT); if (t == T_VOID) return (P_VOID); fatald("Illegal type, token", t); } // Parse the declaration of a variable void var_declaration(void) { int id, type; // Get the type of the variable, then the identifier type = parse_type(Token.token); scan(&Token); ident(); id = addglob(Text, type, S_VARIABLE); genglobsym(id); semi(); } ``` ## Dealing with Expression Types All of the above is the easy part done! We now have: + a set of three types: `char`, `int` and `void`, + parsing of variable declarations to find their type, + capture of each variable's type in the symbol table, and + storage of the type of an expression in each AST node Now we need to actually fill in the type in the AST nodes that we build. Then we have to decide when to widen types and/or reject type clashes. Let's get on with the job! ## Parsing Primary Terminals We'll start with the parsing of integer literal values and variable identifiers. One wrinkle is that we want to be able to do: ```c char j; j= 2; ``` But if we mark the `2` as a P_INT, then we won't be able to narrow the value when we try to store it in the P_CHAR `j` variable. For now, I've added some semantic code to keep small integer literal values as P_CHARs: ```c // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(void) { struct ASTnode *n; int id; switch (Token.token) { case T_INTLIT: // For an INTLIT token, make a leaf AST node for it. // Make it a P_CHAR if it's within the P_CHAR range if ((Token.intvalue) >= 0 && (Token.intvalue < 256)) n = mkastleaf(A_INTLIT, P_CHAR, Token.intvalue); else n = mkastleaf(A_INTLIT, P_INT, Token.intvalue); break; case T_IDENT: // Check that this identifier exists id = findglob(Text); if (id == -1) fatals("Unknown variable", Text); // Make a leaf AST node for it n = mkastleaf(A_IDENT, Gsym[id].type, id); break; default: fatald("Syntax error, token", Token.token); } // Scan in the next token and return the leaf node scan(&Token); return (n); } ``` Also note that, for identifiers, we can easily get their type details from the global symbol table. ## Building Binary Expressions: Comparing Types As we build maths expressions with our binary maths operators, we will have a type from the left-hand child and a type from the right-hand child. Here is where we are going to have to either widen, do nothing, or reject the expression if the two types are incompatible. For now, I have a new file `types.c` with a function that compares the types on either side. Here's the code: ```c // Given two primitive types, return true if they are compatible, // false otherwise. Also return either zero or an A_WIDEN // operation if one has to be widened to match the other. // If onlyright is true, only widen left to right. int type_compatible(int *left, int *right, int onlyright) { // Voids not compatible with anything if ((*left == P_VOID) || (*right == P_VOID)) return (0); // Same types, they are compatible if (*left == *right) { *left = *right = 0; return (1); } // Widen P_CHARs to P_INTs as required if ((*left == P_CHAR) && (*right == P_INT)) { *left = A_WIDEN; *right = 0; return (1); } if ((*left == P_INT) && (*right == P_CHAR)) { if (onlyright) return (0); *left = 0; *right = A_WIDEN; return (1); } // Anything remaining is compatible *left = *right = 0; return (1); } ``` There's a fair bit going on here. Firstly, if both types are the same we can simply return True. Anything with a P_VOID cannot be mixed with another type. If one side is a P_CHAR and the other is a P_INT, we can widen the result to a P_INT. The way I do this is to modify the type information that comes in and I replace it either with zero (do nothing), or a new AST node type A_WIDEN. This means: widen the more narrow child's value to be as wide as the wider child's value. We'll see this in operation soon. There is one extra argument `onlyright`. I use this when we get to A_ASSIGN AST nodes where we are assigning the left-child's expression to the variable *lvalue* on the right. If this is set, don't let a P_INT expression be transferred to a P_CHAR variable Finally, for now, let any other type pairs through. I think I can guarantee that this will need to be changed once we bring in arrays and pointers. I also hope I can find a way to make the code simpler and more elegant. But it will do for now. ## Using `type_compatible()` in Expressions I've used `type_compatible()` in three different places in this version of the compiler. We'll start with merging expressions with binary operators. I've modified the code in `binexpr()` in `expr.c` to do this: ```c // Ensure the two types are compatible. lefttype = left->type; righttype = right->type; if (!type_compatible(&lefttype, &righttype, 0)) fatal("Incompatible types"); // Widen either side if required. type vars are A_WIDEN now if (lefttype) left = mkastunary(lefttype, right->type, left, 0); if (righttype) right = mkastunary(righttype, left->type, right, 0); // Join that sub-tree with ours. Convert the token // into an AST operation at the same time. left = mkastnode(arithop(tokentype), left->type, left, NULL, right, 0); ``` We reject incompatible types. But, if `type_compatible()` returned non-zero `lefttype` or `righttype` values, these are actually the A_WIDEN value. We can use this to build a unary AST node with the narrow child as the child. When we get to the code generator, it will now know that this child's value has to be widened. Now, where else do we need to widen expression values? ## Using `type_compatible()` to Print Expressions When we use the `print` keyword, we need to have an `int` expression for it to print. So we need to change `print_statement()` in `stmt.c`: ```c static struct ASTnode *print_statement(void) { struct ASTnode *tree; int lefttype, righttype; int reg; ... // Parse the following expression tree = binexpr(0); // Ensure the two types are compatible. lefttype = P_INT; righttype = tree->type; if (!type_compatible(&lefttype, &righttype, 0)) fatal("Incompatible types"); // Widen the tree if required. if (righttype) tree = mkastunary(righttype, P_INT, tree, 0); ``` ## Using `type_compatible()` to Assign to a Variable This is the last place where we need to check types. When we assign to a variable, we need to ensure that we can widen the right-hand side expression. We've got to reject any attempt to store a wide type into a narrow variable. Here is the new code in `assignment_statement()` in `stmt.c`: ```c static struct ASTnode *assignment_statement(void) { struct ASTnode *left, *right, *tree; int lefttype, righttype; int id; ... // Make an lvalue node for the variable right = mkastleaf(A_LVIDENT, Gsym[id].type, id); // Parse the following expression left = binexpr(0); // Ensure the two types are compatible. lefttype = left->type; righttype = right->type; if (!type_compatible(&lefttype, &righttype, 1)) // Note the 1 fatal("Incompatible types"); // Widen the left if required. if (lefttype) left = mkastunary(lefttype, right->type, left, 0); ``` Note the 1 at the the end to this call to `type_compatible()`. This enforces the semantics that we cannot save a wide value to a narrow variable. Given all of the above, we now can parse a few types and enforce some sensible language semantics: widen values where possible, prevent type narrowing and prevent unsuitable type clashes. Now we move to the code generation side of things. ## The Changes to x86-64 Code Geneneration Our assembly output is register based and essentially they are fixed in size. What we can influence is: + the size of the memory locations to store variables, and + how much of a register is used hold data, e.g. one byte for characters, eight bytes for a 64-bit integer. I'll start with the x86-64 specific code in `cg.c`, and then I'll show how this is used in the generic code generator in `gen.c`. Let's start with generating the storage for variables. ```c // Generate a global symbol void cgglobsym(int id) { // Choose P_INT or P_CHAR if (Gsym[id].type == P_INT) fprintf(Outfile, "\t.comm\t%s,8,8\n", Gsym[id].name); else fprintf(Outfile, "\t.comm\t%s,1,1\n", Gsym[id].name); } ``` We extract the type from the variable slot in the symbol table and choose to allocate 1 or 8 bytes for it depending on this type. Now we need to load the value into a register: ```c // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Print out the code to initialise it: P_CHAR or P_INT if (Gsym[id].type == P_INT) fprintf(Outfile, "\tmovq\t%s(\%%rip), %s\n", Gsym[id].name, reglist[r]); else fprintf(Outfile, "\tmovzbq\t%s(\%%rip), %s\n", Gsym[id].name, reglist[r]); return (r); ``` The `movq` instruction moves eight bytes into the 8-byte register. The `movzbq` instruction zeroes the 8-byte register and then moves a single byte into it. This also implicitly widens the one byte value to eight bytes. Our storage function is similar: ```c // Store a register's value into a variable int cgstorglob(int r, int id) { // Choose P_INT or P_CHAR if (Gsym[id].type == P_INT) fprintf(Outfile, "\tmovq\t%s, %s(\%%rip)\n", reglist[r], Gsym[id].name); else fprintf(Outfile, "\tmovb\t%s, %s(\%%rip)\n", breglist[r], Gsym[id].name); return (r); } ``` This time we have to use the "byte" name of the register and the `movb` instruction to move a single byte. Luckily, the `cgloadglob()` function has already done the widening of P_CHAR variables. So this is the code for our new `cgwiden()` function: ```c // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } ``` ## The Changes to The Generic Code Geneneration With the above in place, there are only a few changes to the generic code generator in `gen.c`: + The calls to `cgloadglob()` and `cgstorglob()` now take the symbol's slot number and not the symbol's name. + Similarly, `genglobsym()` now receives the symbol's slot number and passes it on to `cgglobsym()` The only major change is the code to deal with the new A_WIDEN AST node type. We don't need this node (as `cgwiden()` does nothing), but it's here for other hardware platforms: ```c case A_WIDEN: // Widen the child's type to the parent's type return (cgwiden(leftreg, n->left->type, n->type)); ``` ## Testing the New Type Changes Here is my test input file, `tests/input10`: ```c void main() { int i; char j; j= 20; print j; i= 10; print i; for (i= 1; i <= 5; i= i + 1) { print i; } for (j= 253; j != 2; j= j + 1) { print j; } } ``` I check that we can assign to and print from `char` and `int` types. I also verify that, for `char` variables, we will overflow in the value sequence: 253, 254, 255, 0, 1, 2 etc. ``` $ make test cc -o comp1 -g cg.c decl.c expr.c gen.c main.c misc.c scan.c stmt.c sym.c tree.c types.c ./comp1 tests/input10 cc -o out out.s ./out 20 10 1 2 3 4 5 253 254 255 0 1 ``` Let's look at some of the assembly that was generated: ``` .comm i,8,8 # Eight byte i storage .comm j,1,1 # One byte j storage ... movq $20, %r8 movb %r8b, j(%rip) # j= 20 movzbq j(%rip), %r8 movq %r8, %rdi # print j call printint movq $253, %r8 movb %r8b, j(%rip) # j= 253 L3: movzbq j(%rip), %r8 movq $2, %r9 cmpq %r9, %r8 # while j != 2 je L4 movzbq j(%rip), %r8 movq %r8, %rdi # print j call printint movzbq j(%rip), %r8 movq $1, %r9 # j= j + 1 addq %r8, %r9 movb %r9b, j(%rip) jmp L3 ``` Still not the most elegant assembly code, but it does work. Also, `$ make test` confirms that all the previous code examples still work. ## Conclusion and What's Next In the next part of our compiler writing journey, we will add function calls with one argument, and returning a value from a function. [Next step](../13_Functions_pt2/Readme.md) ================================================ FILE: 12_Types_pt1/cg.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names // We need a list of byte registers, too static int freereg[4]; static char *reglist[4] = { "%r8", "%r9", "%r10", "%r11" }; static char *breglist[4] = { "%r8b", "%r9b", "%r10b", "%r11b" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n" ".LC0:\n" "\t.string\t\"%d\\n\"\n" "printint:\n" "\tpushq\t%rbp\n" "\tmovq\t%rsp, %rbp\n" "\tsubq\t$16, %rsp\n" "\tmovl\t%edi, -4(%rbp)\n" "\tmovl\t-4(%rbp), %eax\n" "\tmovl\t%eax, %esi\n" "\tleaq .LC0(%rip), %rdi\n" "\tmovl $0, %eax\n" "\tcall printf@PLT\n" "\tnop\n" "\tleave\n" "\tret\n" "\n", Outfile); } // Print out a function preamble void cgfuncpreamble(char *name) { fprintf(Outfile, "\t.text\n" "\t.globl\t%s\n" "\t.type\t%s, @function\n" "%s:\n" "\tpushq\t%%rbp\n" "\tmovq\t%%rsp, %%rbp\n", name, name, name); } // Print out a function postamble void cgfuncpostamble() { fputs("\tmovl $0, %eax\n" "\tpopq %rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmovq\t$%d, %s\n", value, reglist[r]); return (r); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Print out the code to initialise it: P_CHAR or P_INT if (Gsym[id].type == P_INT) fprintf(Outfile, "\tmovq\t%s(\%%rip), %s\n", Gsym[id].name, reglist[r]); else fprintf(Outfile, "\tmovzbq\t%s(\%%rip), %s\n", Gsym[id].name, reglist[r]); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\taddq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsubq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timulq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s,%%rax\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidivq\t%s\n", reglist[r2]); fprintf(Outfile, "\tmovq\t%%rax,%s\n", reglist[r1]); free_register(r2); return (r1); } // Call printint() with the given register void cgprintint(int r) { fprintf(Outfile, "\tmovq\t%s, %%rdi\n", reglist[r]); fprintf(Outfile, "\tcall\tprintint\n"); free_register(r); } // Store a register's value into a variable int cgstorglob(int r, int id) { // Choose P_INT or P_CHAR if (Gsym[id].type == P_INT) fprintf(Outfile, "\tmovq\t%s, %s(\%%rip)\n", reglist[r], Gsym[id].name); else fprintf(Outfile, "\tmovb\t%s, %s(\%%rip)\n", breglist[r], Gsym[id].name); return (r); } // Generate a global symbol void cgglobsym(int id) { // Choose P_INT or P_CHAR if (Gsym[id].type == P_INT) fprintf(Outfile, "\t.comm\t%s,8,8\n", Gsym[id].name); else fprintf(Outfile, "\t.comm\t%s,1,1\n", Gsym[id].name); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } ================================================ FILE: 12_Types_pt1/cgn.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names static int freereg[4]; static char *reglist[4] = { "r8", "r9", "r10", "r11" }; static char *breglist[4] = { "r8b", "r9b", "r10b", "r11b" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\tglobal\tmain\n" "\textern\tprintf\n" "\tsection\t.text\n" "LC0:\tdb\t\"%d\",10,0\n" "printint:\n" "\tpush\trbp\n" "\tmov\trbp, rsp\n" "\tsub\trsp, 16\n" "\tmov\t[rbp-4], edi\n" "\tmov\teax, [rbp-4]\n" "\tmov\tesi, eax\n" "\tlea rdi, [rel LC0]\n" "\tmov eax, 0\n" "\tcall printf\n" "\tnop\n" "\tleave\n" "\tret\n" "\n", Outfile); } // Print out a function preamble void cgfuncpreamble(char *name) { fprintf(Outfile, "\tsection\t.text\n" "\tglobal\t%s\n" "%s:\n" "\tpush\trbp\n" "\tmov\trbp, rsp\n", name, name); } // Print out a function postamble void cgfuncpostamble() { fputs("\tmov eax, 0\n" "\tpop rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], value); return (r); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Print out the code to initialise it: P_CHAR or P_INT if (Gsym[id].type == P_INT) fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], Gsym[id].name); else fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], Gsym[id].name); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timul\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmov\trax, %s\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidiv\t%s\n", reglist[r2]); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[r1]); free_register(r2); return (r1); } // Call printint() with the given register void cgprintint(int r) { fprintf(Outfile, "\tmov\trdi, %s\n", reglist[r]); fprintf(Outfile, "\tcall\tprintint\n"); free_register(r); } // Store a register's value into a variable int cgstorglob(int r, int id) { // Choose P_INT or P_CHAR if (Gsym[id].type == P_INT) fprintf(Outfile, "\tmov\t[%s], %s\n", Gsym[id].name, reglist[r]); else fprintf(Outfile, "\tmov\t[%s], %s\n", Gsym[id].name, breglist[r]); return (r); } // Generate a global symbol void cgglobsym(int id) { // Choose P_INT or P_CHAR if (Gsym[id].type == P_INT) fprintf(Outfile, "\tcommon\t%s 8:8\n", Gsym[id].name); else fprintf(Outfile, "\tcommon\t%s 1:1\n", Gsym[id].name); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r2], breglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } ================================================ FILE: 12_Types_pt1/data.h ================================================ #ifndef extern_ #define extern_ extern #endif // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 extern_ int Line; // Current line number extern_ int Putback; // Character put back by scanner extern_ FILE *Infile; // Input and output files extern_ FILE *Outfile; extern_ struct token Token; // Last token scanned extern_ char Text[TEXTLEN + 1]; // Last identifier scanned extern_ struct symtable Gsym[NSYMBOLS]; // Global symbol table ================================================ FILE: 12_Types_pt1/decl.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of declarations // Copyright (c) 2019 Warren Toomey, GPL3 // Parse the current token and // return a primitive type enum value int parse_type(int t) { if (t == T_CHAR) return (P_CHAR); if (t == T_INT) return (P_INT); if (t == T_VOID) return (P_VOID); fatald("Illegal type, token", t); } // variable_declaration: 'int' identifier ';' ; // // Parse the declaration of a variable void var_declaration(void) { int id, type; // Get the type of the variable, then the identifier type = parse_type(Token.token); scan(&Token); ident(); // Text now has the identifier's name. // Add it as a known identifier // and generate its space in assembly id = addglob(Text, type, S_VARIABLE); genglobsym(id); // Get the trailing semicolon semi(); } // For now we have a very simplistic function definition grammar // // function_declaration: 'void' identifier '(' ')' compound_statement ; // // Parse the declaration of a simplistic function struct ASTnode *function_declaration(void) { struct ASTnode *tree; int nameslot; // Find the 'void', the identifier, and the '(' ')'. // For now, do nothing with them match(T_VOID, "void"); ident(); nameslot = addglob(Text, P_VOID, S_FUNCTION); lparen(); rparen(); // Get the AST tree for the compound statement tree = compound_statement(); // Return an A_FUNCTION node which has the function's nameslot // and the compound statement sub-tree return (mkastunary(A_FUNCTION, P_VOID, tree, nameslot)); } ================================================ FILE: 12_Types_pt1/decl.h ================================================ // Function prototypes for all compiler files // Copyright (c) 2019 Warren Toomey, GPL3 // scan.c int scan(struct token *t); // tree.c struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, int intvalue); struct ASTnode *mkastleaf(int op, int type, int intvalue); struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, int intvalue); // gen.c int genAST(struct ASTnode *n, int reg, int parentASTop); void genpreamble(); void genpostamble(); void genfreeregs(); void genprintint(int reg); void genglobsym(int id); // cg.c void freeall_registers(void); void cgpreamble(); void cgfuncpreamble(char *name); void cgfuncpostamble(); int cgloadint(int value, int type); int cgloadglob(int id); int cgadd(int r1, int r2); int cgsub(int r1, int r2); int cgmul(int r1, int r2); int cgdiv(int r1, int r2); void cgprintint(int r); int cgstorglob(int r, int id); void cgglobsym(int id); int cgcompare_and_set(int ASTop, int r1, int r2); int cgcompare_and_jump(int ASTop, int r1, int r2, int label); void cglabel(int l); void cgjump(int l); int cgwiden(int r, int oldtype, int newtype); // expr.c struct ASTnode *binexpr(int ptp); // stmt.c struct ASTnode *compound_statement(void); // misc.c void match(int t, char *what); void semi(void); void lbrace(void); void rbrace(void); void lparen(void); void rparen(void); void ident(void); void fatal(char *s); void fatals(char *s1, char *s2); void fatald(char *s, int d); void fatalc(char *s, int c); // sym.c int findglob(char *s); int addglob(char *name, int type, int stype); // decl.c void var_declaration(void); struct ASTnode *function_declaration(void); // types.c int type_compatible(int *left, int *right, int onlyright); ================================================ FILE: 12_Types_pt1/defs.h ================================================ #include #include #include #include // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 #define TEXTLEN 512 // Length of symbols in input #define NSYMBOLS 1024 // Number of symbol table entries // Token types enum { T_EOF, T_PLUS, T_MINUS, T_STAR, T_SLASH, T_EQ, T_NE, T_LT, T_GT, T_LE, T_GE, T_INTLIT, T_SEMI, T_ASSIGN, T_IDENT, T_LBRACE, T_RBRACE, T_LPAREN, T_RPAREN, // Keywords T_PRINT, T_INT, T_IF, T_ELSE, T_WHILE, T_FOR, T_VOID, T_CHAR }; // Token structure struct token { int token; // Token type, from the enum list above int intvalue; // For T_INTLIT, the integer value }; // AST node types. The first few line up // with the related tokens enum { A_ADD = 1, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE, A_INTLIT, A_IDENT, A_LVIDENT, A_ASSIGN, A_PRINT, A_GLUE, A_IF, A_WHILE, A_FUNCTION, A_WIDEN }; // Primitive types enum { P_NONE, P_VOID, P_CHAR, P_INT }; // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; union { int intvalue; // For A_INTLIT, the integer value int id; // For A_IDENT, the symbol slot number } v; // For A_FUNCTION, the symbol slot number }; #define NOREG -1 // Use NOREG when the AST generation // functions have no register to return // Structural types enum { S_VARIABLE, S_FUNCTION }; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol int stype; // Structural type for the symbol }; ================================================ FILE: 12_Types_pt1/expr.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of expressions // Copyright (c) 2019 Warren Toomey, GPL3 // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(void) { struct ASTnode *n; int id; switch (Token.token) { case T_INTLIT: // For an INTLIT token, make a leaf AST node for it. // Make it a P_CHAR if it's within the P_CHAR range if ((Token.intvalue) >= 0 && (Token.intvalue < 256)) n = mkastleaf(A_INTLIT, P_CHAR, Token.intvalue); else n = mkastleaf(A_INTLIT, P_INT, Token.intvalue); break; case T_IDENT: // Check that this identifier exists id = findglob(Text); if (id == -1) fatals("Unknown variable", Text); // Make a leaf AST node for it n = mkastleaf(A_IDENT, Gsym[id].type, id); break; default: fatald("Syntax error, token", Token.token); } // Scan in the next token and return the leaf node scan(&Token); return (n); } // Convert a binary operator token into an AST operation. // We rely on a 1:1 mapping from token to AST operation static int arithop(int tokentype) { if (tokentype > T_EOF && tokentype < T_INTLIT) return (tokentype); fatald("Syntax error, token", tokentype); } // Operator precedence for each token. Must // match up with the order of tokens in defs.h static int OpPrec[] = { 0, 10, 10, // T_EOF, T_PLUS, T_MINUS 20, 20, // T_STAR, T_SLASH 30, 30, // T_EQ, T_NE 40, 40, 40, 40 // T_LT, T_GT, T_LE, T_GE }; // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec = OpPrec[tokentype]; if (prec == 0) fatald("Syntax error, token", tokentype); return (prec); } // Return an AST tree whose root is a binary operator. // Parameter ptp is the previous token's precedence. struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; int lefttype, righttype; int tokentype; // Get the primary tree on the left. // Fetch the next token at the same time. left = primary(); // If we hit a semicolon or ')', return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN) return (left); // While the precedence of this token is // more than that of the previous token precedence while (op_precedence(tokentype) > ptp) { // Fetch in the next integer literal scan(&Token); // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Ensure the two types are compatible. lefttype = left->type; righttype = right->type; if (!type_compatible(&lefttype, &righttype, 0)) fatal("Incompatible types"); // Widen either side if required. type vars are A_WIDEN now if (lefttype) left = mkastunary(lefttype, right->type, left, 0); if (righttype) right = mkastunary(righttype, left->type, right, 0); // Join that sub-tree with ours. Convert the token // into an AST operation at the same time. left = mkastnode(arithop(tokentype), left->type, left, NULL, right, 0); // Update the details of the current token. // If we hit a semicolon or ')', return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN) return (left); } // Return the tree we have when the precedence // is the same or lower return (left); } ================================================ FILE: 12_Types_pt1/gen.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Generic code generator // Copyright (c) 2019 Warren Toomey, GPL3 // Generate and return a new label number static int label(void) { static int id = 1; return (id++); } // Generate the code for an IF statement // and an optional ELSE clause static int genIF(struct ASTnode *n) { int Lfalse, Lend; // Generate two labels: one for the // false compound statement, and one // for the end of the overall IF statement. // When there is no ELSE clause, Lfalse _is_ // the ending label! Lfalse = label(); if (n->right) Lend = label(); // Generate the condition code followed // by a jump to the false label. // We cheat by sending the Lfalse label as a register. genAST(n->left, Lfalse, n->op); genfreeregs(); // Generate the true compound statement genAST(n->mid, NOREG, n->op); genfreeregs(); // If there is an optional ELSE clause, // generate the jump to skip to the end if (n->right) cgjump(Lend); // Now the false label cglabel(Lfalse); // Optional ELSE clause: generate the // false compound statement and the // end label if (n->right) { genAST(n->right, NOREG, n->op); genfreeregs(); cglabel(Lend); } return (NOREG); } // Generate the code for a WHILE statement // and an optional ELSE clause static int genWHILE(struct ASTnode *n) { int Lstart, Lend; // Generate the start and end labels // and output the start label Lstart = label(); Lend = label(); cglabel(Lstart); // Generate the condition code followed // by a jump to the end label. // We cheat by sending the Lfalse label as a register. genAST(n->left, Lend, n->op); genfreeregs(); // Generate the compound statement for the body genAST(n->right, NOREG, n->op); genfreeregs(); // Finally output the jump back to the condition, // and the end label cgjump(Lstart); cglabel(Lend); return (NOREG); } // Given an AST, the register (if any) that holds // the previous rvalue, and the AST op of the parent, // generate assembly code recursively. // Return the register id with the tree's final value int genAST(struct ASTnode *n, int reg, int parentASTop) { int leftreg, rightreg; // We now have specific AST node handling at the top switch (n->op) { case A_IF: return (genIF(n)); case A_WHILE: return (genWHILE(n)); case A_GLUE: // Do each child statement, and free the // registers after each child genAST(n->left, NOREG, n->op); genfreeregs(); genAST(n->right, NOREG, n->op); genfreeregs(); return (NOREG); case A_FUNCTION: // Generate the function's preamble before the code cgfuncpreamble(Gsym[n->v.id].name); genAST(n->left, NOREG, n->op); cgfuncpostamble(); return (NOREG); } // General AST node handling below // Get the left and right sub-tree values if (n->left) leftreg = genAST(n->left, NOREG, n->op); if (n->right) rightreg = genAST(n->right, leftreg, n->op); switch (n->op) { case A_ADD: return (cgadd(leftreg, rightreg)); case A_SUBTRACT: return (cgsub(leftreg, rightreg)); case A_MULTIPLY: return (cgmul(leftreg, rightreg)); case A_DIVIDE: return (cgdiv(leftreg, rightreg)); case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, compare registers // and set one to 1 or 0 based on the comparison. if (parentASTop == A_IF || parentASTop == A_WHILE) return (cgcompare_and_jump(n->op, leftreg, rightreg, reg)); else return (cgcompare_and_set(n->op, leftreg, rightreg)); case A_INTLIT: return (cgloadint(n->v.intvalue, n->type)); case A_IDENT: return (cgloadglob(n->v.id)); case A_LVIDENT: return (cgstorglob(reg, n->v.id)); case A_ASSIGN: // The work has already been done, return the result return (rightreg); case A_PRINT: // Print the left-child's value // and return no register genprintint(leftreg); genfreeregs(); return (NOREG); case A_WIDEN: // Widen the child's type to the parent's type return (cgwiden(leftreg, n->left->type, n->type)); default: fatald("Unknown AST operator", n->op); } } void genpreamble() { cgpreamble(); } void genfreeregs() { freeall_registers(); } void genprintint(int reg) { cgprintint(reg); } void genglobsym(int id) { cgglobsym(id); } ================================================ FILE: 12_Types_pt1/main.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "decl.h" #include // Compiler setup and top-level execution // Copyright (c) 2019 Warren Toomey, GPL3 // Initialise global variables static void init() { Line = 1; Putback = '\n'; } // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "Usage: %s infile\n", prog); exit(1); } // Main program: check arguments and print a usage // if we don't have an argument. Open up the input // file and call scanfile() to scan the tokens in it. void main(int argc, char *argv[]) { struct ASTnode *tree; if (argc != 2) usage(argv[0]); init(); // Open up the input file if ((Infile = fopen(argv[1], "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", argv[1], strerror(errno)); exit(1); } // Create the output file if ((Outfile = fopen("out.s", "w")) == NULL) { fprintf(stderr, "Unable to create out.s: %s\n", strerror(errno)); exit(1); } scan(&Token); // Get the first token from the input genpreamble(); // Output the preamble while (1) { // Parse a function and tree = function_declaration(); genAST(tree, NOREG, 0); // generate the assembly code for it if (Token.token == T_EOF) // Stop when we have reached EOF break; } fclose(Outfile); // Close the output file and exit exit(0); } ================================================ FILE: 12_Types_pt1/misc.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Miscellaneous functions // Copyright (c) 2019 Warren Toomey, GPL3 // Ensure that the current token is t, // and fetch the next token. Otherwise // throw an error void match(int t, char *what) { if (Token.token == t) { scan(&Token); } else { fatals("Expected", what); } } // Match a semicolon and fetch the next token void semi(void) { match(T_SEMI, ";"); } // Match a left brace and fetch the next token void lbrace(void) { match(T_LBRACE, "{"); } // Match a right brace and fetch the next token void rbrace(void) { match(T_RBRACE, "}"); } // Match a left parenthesis and fetch the next token void lparen(void) { match(T_LPAREN, "("); } // Match a right parenthesis and fetch the next token void rparen(void) { match(T_RPAREN, ")"); } // Match an identifier and fetch the next token void ident(void) { match(T_IDENT, "identifier"); } // Print out fatal messages void fatal(char *s) { fprintf(stderr, "%s on line %d\n", s, Line); exit(1); } void fatals(char *s1, char *s2) { fprintf(stderr, "%s:%s on line %d\n", s1, s2, Line); exit(1); } void fatald(char *s, int d) { fprintf(stderr, "%s:%d on line %d\n", s, d, Line); exit(1); } void fatalc(char *s, int c) { fprintf(stderr, "%s:%c on line %d\n", s, c, Line); exit(1); } ================================================ FILE: 12_Types_pt1/scan.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { char *p; p = strchr(s, c); return (p ? p - s : -1); } // Get the next character from the input file. static int next(void) { int c; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return (c); } c = fgetc(Infile); // Read from input file if ('\n' == c) Line++; // Increment line count return (c); } // Put back an unwanted character static void putback(int c) { Putback = c; } // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0; // Convert each character into an int value while ((k = chrpos("0123456789", c)) >= 0) { val = val * 10 + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return (val); } // Scan an identifier from the input file and // store it in buf[]. Return the identifier's length static int scanident(int c, char *buf, int lim) { int i = 0; // Allow digits, alpha and underscores while (isalpha(c) || isdigit(c) || '_' == c) { // Error if we hit the identifier length limit, // else append to buf[] and get next character if (lim - 1 == i) { fatal("Identifier too long"); } else if (i < lim - 1) { buf[i++] = c; } c = next(); } // We hit a non-valid character, put it back. // NUL-terminate the buf[] and return the length putback(c); buf[i] = '\0'; return (i); } // Given a word from the input, return the matching // keyword token number or 0 if it's not a keyword. // Switch on the first letter so that we don't have // to waste time strcmp()ing against all the keywords. static int keyword(char *s) { switch (*s) { case 'c': if (!strcmp(s, "char")) return (T_CHAR); break; case 'e': if (!strcmp(s, "else")) return (T_ELSE); break; case 'f': if (!strcmp(s, "for")) return (T_FOR); break; case 'i': if (!strcmp(s, "if")) return (T_IF); if (!strcmp(s, "int")) return (T_INT); break; case 'p': if (!strcmp(s, "print")) return (T_PRINT); break; case 'w': if (!strcmp(s, "while")) return (T_WHILE); break; case 'v': if (!strcmp(s, "void")) return (T_VOID); break; } return (0); } // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c, tokentype; // Skip whitespace c = skip(); // Determine the token based on // the input character switch (c) { case EOF: t->token = T_EOF; return (0); case '+': t->token = T_PLUS; break; case '-': t->token = T_MINUS; break; case '*': t->token = T_STAR; break; case '/': t->token = T_SLASH; break; case ';': t->token = T_SEMI; break; case '{': t->token = T_LBRACE; break; case '}': t->token = T_RBRACE; break; case '(': t->token = T_LPAREN; break; case ')': t->token = T_RPAREN; break; case '=': if ((c = next()) == '=') { t->token = T_EQ; } else { putback(c); t->token = T_ASSIGN; } break; case '!': if ((c = next()) == '=') { t->token = T_NE; } else { fatalc("Unrecognised character", c); } break; case '<': if ((c = next()) == '=') { t->token = T_LE; } else { putback(c); t->token = T_LT; } break; case '>': if ((c = next()) == '=') { t->token = T_GE; } else { putback(c); t->token = T_GT; } break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } else if (isalpha(c) || '_' == c) { // Read in a keyword or identifier scanident(c, Text, TEXTLEN); // If it's a recognised keyword, return that token if (tokentype = keyword(Text)) { t->token = tokentype; break; } // Not a recognised keyword, so it must be an identifier t->token = T_IDENT; break; } // The character isn't part of any recognised token, error fatalc("Unrecognised character", c); } // We found a token return (1); } ================================================ FILE: 12_Types_pt1/stmt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of statements // Copyright (c) 2019 Warren Toomey, GPL3 // Prototypes static struct ASTnode *single_statement(void); // compound_statement: // empty, i.e. no statement // | statement // | statement statements // ; // // statement: print_statement // | declaration // | assignment_statement // | if_statement // | while_statement // ; // print_statement: 'print' expression ';' ; // static struct ASTnode *print_statement(void) { struct ASTnode *tree; int lefttype, righttype; int reg; // Match a 'print' as the first token match(T_PRINT, "print"); // Parse the following expression tree = binexpr(0); // Ensure the two types are compatible. lefttype = P_INT; righttype = tree->type; if (!type_compatible(&lefttype, &righttype, 0)) fatal("Incompatible types"); // Widen the tree if required. if (righttype) tree = mkastunary(righttype, P_INT, tree, 0); // Make an print AST tree tree = mkastunary(A_PRINT, P_NONE, tree, 0); // Return the AST return (tree); } // assignment_statement: identifier '=' expression ';' ; // static struct ASTnode *assignment_statement(void) { struct ASTnode *left, *right, *tree; int lefttype, righttype; int id; // Ensure we have an identifier ident(); // Check it's been defined then make a leaf node for it if ((id = findglob(Text)) == -1) { fatals("Undeclared variable", Text); } right = mkastleaf(A_LVIDENT, Gsym[id].type, id); // Ensure we have an equals sign match(T_ASSIGN, "="); // Parse the following expression left = binexpr(0); // Ensure the two types are compatible. lefttype = left->type; righttype = right->type; if (!type_compatible(&lefttype, &righttype, 1)) fatal("Incompatible types"); // Widen the left if required. if (lefttype) left = mkastunary(lefttype, right->type, left, 0); // Make an assignment AST tree tree = mkastnode(A_ASSIGN, P_INT, left, NULL, right, 0); // Return the AST return (tree); } // if_statement: if_head // | if_head 'else' compound_statement // ; // // if_head: 'if' '(' true_false_expression ')' compound_statement ; // // Parse an IF statement including // any optional ELSE clause // and return its AST static struct ASTnode *if_statement(void) { struct ASTnode *condAST, *trueAST, *falseAST = NULL; // Ensure we have 'if' '(' match(T_IF, "if"); lparen(); // Parse the following expression // and the ')' following. Ensure // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) fatal("Bad comparison operator"); rparen(); // Get the AST for the compound statement trueAST = compound_statement(); // If we have an 'else', skip it // and get the AST for the compound statement if (Token.token == T_ELSE) { scan(&Token); falseAST = compound_statement(); } // Build and return the AST for this statement return (mkastnode(A_IF, P_NONE, condAST, trueAST, falseAST, 0)); } // while_statement: 'while' '(' true_false_expression ')' compound_statement ; // // Parse a WHILE statement // and return its AST static struct ASTnode *while_statement(void) { struct ASTnode *condAST, *bodyAST; // Ensure we have 'while' '(' match(T_WHILE, "while"); lparen(); // Parse the following expression // and the ')' following. Ensure // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) fatal("Bad comparison operator"); rparen(); // Get the AST for the compound statement bodyAST = compound_statement(); // Build and return the AST for this statement return (mkastnode(A_WHILE, P_NONE, condAST, NULL, bodyAST, 0)); } // for_statement: 'for' '(' preop_statement ';' // true_false_expression ';' // postop_statement ')' compound_statement ; // // preop_statement: statement (for now) // postop_statement: statement (for now) // // Parse a FOR statement // and return its AST static struct ASTnode *for_statement(void) { struct ASTnode *condAST, *bodyAST; struct ASTnode *preopAST, *postopAST; struct ASTnode *tree; // Ensure we have 'for' '(' match(T_FOR, "for"); lparen(); // Get the pre_op statement and the ';' preopAST = single_statement(); semi(); // Get the condition and the ';' condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) fatal("Bad comparison operator"); semi(); // Get the post_op statement and the ')' postopAST = single_statement(); rparen(); // Get the compound statement which is the body bodyAST = compound_statement(); // For now, all four sub-trees have to be non-NULL. // Later on, we'll change the semantics for when some are missing // Glue the compound statement and the postop tree tree = mkastnode(A_GLUE, P_NONE, bodyAST, NULL, postopAST, 0); // Make a WHILE loop with the condition and this new body tree = mkastnode(A_WHILE, P_NONE, condAST, NULL, tree, 0); // And glue the preop tree to the A_WHILE tree return (mkastnode(A_GLUE, P_NONE, preopAST, NULL, tree, 0)); } // Parse a single statement // and return its AST static struct ASTnode *single_statement(void) { switch (Token.token) { case T_PRINT: return (print_statement()); case T_CHAR: case T_INT: var_declaration(); return (NULL); // No AST generated here case T_IDENT: return (assignment_statement()); case T_IF: return (if_statement()); case T_WHILE: return (while_statement()); case T_FOR: return (for_statement()); default: fatald("Syntax error, token", Token.token); } } // Parse a compound statement // and return its AST struct ASTnode *compound_statement(void) { struct ASTnode *left = NULL; struct ASTnode *tree; // Require a left curly bracket lbrace(); while (1) { // Parse a single statement tree = single_statement(); // Some statements must be followed by a semicolon if (tree != NULL && (tree->op == A_PRINT || tree->op == A_ASSIGN)) semi(); // For each new tree, either save it in left // if left is empty, or glue the left and the // new tree together if (tree != NULL) { if (left == NULL) left = tree; else left = mkastnode(A_GLUE, P_NONE, left, NULL, tree, 0); } // When we hit a right curly bracket, // skip past it and return the AST if (Token.token == T_RBRACE) { rbrace(); return (left); } } } ================================================ FILE: 12_Types_pt1/sym.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Symbol table functions // Copyright (c) 2019 Warren Toomey, GPL3 static int Globs = 0; // Position of next free global symbol slot // Determine if the symbol s is in the global symbol table. // Return its slot position or -1 if not found. int findglob(char *s) { int i; for (i = 0; i < Globs; i++) { if (*s == *Gsym[i].name && !strcmp(s, Gsym[i].name)) return (i); } return (-1); } // Get the position of a new global symbol slot, or die // if we've run out of positions. static int newglob(void) { int p; if ((p = Globs++) >= NSYMBOLS) fatal("Too many global symbols"); return (p); } // Add a global symbol to the symbol table. // Also set up its type and structural type. // Return the slot number in the symbol table int addglob(char *name, int type, int stype) { int y; // If this is already in the symbol table, return the existing slot if ((y = findglob(name)) != -1) return (y); // Otherwise get a new slot, fill it in and // return the slot number y = newglob(); Gsym[y].name = strdup(name); Gsym[y].type = type; Gsym[y].stype = stype; return (y); } ================================================ FILE: 12_Types_pt1/tests/input01 ================================================ void main() { print 12 * 3; print 18 - 2 * 4; print 1 + 2 + 9 - 5/2 + 3*5; } ================================================ FILE: 12_Types_pt1/tests/input02 ================================================ void main() { int fred; int jim; fred= 5; jim= 12; print fred + jim; } ================================================ FILE: 12_Types_pt1/tests/input03 ================================================ void main() { int x; x= 1; print x; x= x + 1; print x; x= x + 1; print x; x= x + 1; print x; x= x + 1; print x; } ================================================ FILE: 12_Types_pt1/tests/input04 ================================================ void main() { int x; x= 7 < 9; print x; x= 7 <= 9; print x; x= 7 != 9; print x; x= 7 == 7; print x; x= 7 >= 7; print x; x= 7 <= 7; print x; x= 9 > 7; print x; x= 9 >= 7; print x; x= 9 != 7; print x; } ================================================ FILE: 12_Types_pt1/tests/input05 ================================================ void main() { int i; int j; i=6; j=12; if (i < j) { print i; } else { print j; } } ================================================ FILE: 12_Types_pt1/tests/input06 ================================================ void main() { int i; i=1; while (i <= 10) { print i; i= i + 1; } } ================================================ FILE: 12_Types_pt1/tests/input07 ================================================ void main() { int i; for (i= 1; i <= 10; i= i + 1) { print i; } } ================================================ FILE: 12_Types_pt1/tests/input08 ================================================ void main() { int i; for (i= 1; i <= 10; i= i + 1) { print i; } } ================================================ FILE: 12_Types_pt1/tests/input09 ================================================ void main() { int i; for (i= 1; i <= 10; i= i + 1) { print i; } } void fred() { int a; int b; a= 12; b= 3 * a; if (a >= b) { print 2 * b - a; } } ================================================ FILE: 12_Types_pt1/tests/input10 ================================================ void main() { int i; char j; j= 20; print j; i= 10; print i; for (i= 1; i <= 5; i= i + 1) { print i; } for (j= 253; j != 2; j= j + 1) { print j; } } ================================================ FILE: 12_Types_pt1/tests/mktests ================================================ #!/bin/sh # Make the output files for each test if [ ! -f ../comp1 ] then echo "Need to build ../comp1 first!"; exit 1 fi for i in input* do if [ ! -f "out.$i" ] then ../comp1 $i cc -o out out.s ./out > out.$i rm -f out out.s fi done ================================================ FILE: 12_Types_pt1/tests/mktestsn ================================================ #!/bin/sh # Make the output files for each test if [ ! -f ../compn ] then echo "Need to build ../compn first!"; exit 1 fi for i in input* do if [ ! -f "out.$i" ] then ../compn $i nasm -f elf64 out.s cc -no-pie -o out out.o ./out > out.$i rm -f out out.s fi done ================================================ FILE: 12_Types_pt1/tests/out.input01 ================================================ 36 10 25 ================================================ FILE: 12_Types_pt1/tests/out.input02 ================================================ 17 ================================================ FILE: 12_Types_pt1/tests/out.input03 ================================================ 1 2 3 4 5 ================================================ FILE: 12_Types_pt1/tests/out.input04 ================================================ 1 1 1 1 1 1 1 1 1 ================================================ FILE: 12_Types_pt1/tests/out.input05 ================================================ 6 ================================================ FILE: 12_Types_pt1/tests/out.input06 ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 12_Types_pt1/tests/out.input07 ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 12_Types_pt1/tests/out.input08 ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 12_Types_pt1/tests/out.input09 ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 12_Types_pt1/tests/out.input10 ================================================ 20 10 1 2 3 4 5 253 254 255 0 1 ================================================ FILE: 12_Types_pt1/tests/runtests ================================================ #!/bin/sh # Run each test and compare # against known good output if [ ! -f ../comp1 ] then echo "Need to build ../comp1 first!"; exit 1 fi for i in input* do if [ ! -f "out.$i" ] then echo "Can't run test on $i, no output file!" else echo -n $i ../comp1 $i cc -o out out.s ./out > trial.$i cmp -s "out.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo else echo ": OK" fi rm -f out out.s "trial.$i" fi done ================================================ FILE: 12_Types_pt1/tests/runtestsn ================================================ #!/bin/sh # Run each test and compare # against known good output if [ ! -f ../compn ] then echo "Need to build ../compn first!"; exit 1 fi for i in input* do if [ ! -f "out.$i" ] then echo "Can't run test on $i, no output file!" else echo -n $i ../compn $i nasm -f elf64 out.s cc -no-pie -o out out.o ./out > trial.$i cmp -s "out.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo else echo ": OK" fi rm -f out out.o out.s "trial.$i" fi done ================================================ FILE: 12_Types_pt1/tree.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree functions // Copyright (c) 2019 Warren Toomey, GPL3 // Build and return a generic AST node struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, int intvalue) { struct ASTnode *n; // Malloc a new ASTnode n = (struct ASTnode *) malloc(sizeof(struct ASTnode)); if (n == NULL) fatal("Unable to malloc in mkastnode()"); // Copy in the field values and return it n->op = op; n->type = type; n->left = left; n->mid = mid; n->right = right; n->v.intvalue = intvalue; return (n); } // Make an AST leaf node struct ASTnode *mkastleaf(int op, int type, int intvalue) { return (mkastnode(op, type, NULL, NULL, NULL, intvalue)); } // Make a unary AST node: only one child struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, int intvalue) { return (mkastnode(op, type, left, NULL, NULL, intvalue)); } ================================================ FILE: 12_Types_pt1/types.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Types and type handling // Copyright (c) 2019 Warren Toomey, GPL3 // Given two primitive types, // return true if they are compatible, // false otherwise. Also return either // zero or an A_WIDEN operation if one // has to be widened to match the other. // If onlyright is true, only widen left to right. int type_compatible(int *left, int *right, int onlyright) { // Voids not compatible with anything if ((*left == P_VOID) || (*right == P_VOID)) return (0); // Same types, they are compatible if (*left == *right) { *left = *right = 0; return (1); } // Widen P_CHARs to P_INTs as required if ((*left == P_CHAR) && (*right == P_INT)) { *left = A_WIDEN; *right = 0; return (1); } if ((*left == P_INT) && (*right == P_CHAR)) { if (onlyright) return (0); *left = 0; *right = A_WIDEN; return (1); } // Anything remaining is compatible *left = *right = 0; return (1); } ================================================ FILE: 13_Functions_pt2/Makefile ================================================ SRCS= cg.c decl.c expr.c gen.c main.c misc.c scan.c stmt.c sym.c tree.c types.c SRCN= cgn.c decl.c expr.c gen.c main.c misc.c scan.c stmt.c sym.c tree.c types.c comp1: $(SRCS) cc -o comp1 -g $(SRCS) compn: $(SRCN) cc -o compn -g $(SRCN) clean: rm -f comp1 compn *.o *.s out test: comp1 tests/runtests (cd tests; chmod +x runtests; ./runtests) test14: comp1 tests/input14 lib/printint.c ./comp1 tests/input14 cc -o out out.s lib/printint.c ./out testn: compn tests/runtestsn (cd tests; chmod +x runtestsn; ./runtestsn) test14n: compn tests/input14 lib/printint.c ./compn tests/input14 nasm -f elf64 out.s cc -no-pie -o out lib/printint.c out.o ./out ================================================ FILE: 13_Functions_pt2/Readme.md ================================================ # Part 13: Functions, part 2 In this part of our compiler writing journey, I want to add the ability to call functions and return a value. Specifically: + define a function, which we already have, + call a function with a single value which for now cannot be used, + return a value from a function, + use a function call as both a statement and also an expression, and + ensure that void functions never return a value and non-void functions must return a value. I've just got this working. I found that I spent most of my time dealing with types. So, on with the writeup. ## New Keywords and Tokens I've been using 8-byte (64-bit) `int`s in the compiler so far, but I've realised that Gcc treats `int`s as four bytes (32 bits) wide. Therefore, I've decided to introduce the `long` type. So now: + `char` is one byte wide + `int` is four bytes (32 bits) wide + `long` is eight bytes (64 bits) wide We also need the ability to 'return', so we have new keywords 'long' and 'return', and associated tokens T_LONG and T_RETURN. ## Parsing Function Calls For now, the BNF syntax that I'm using for a function call is: ``` function_call: identifier '(' expression ')' ; ``` The function has a name followed by a pair of parentheses. Inside the parentheses we must have exactly one argument. I want this to be used as both an expression and also as a standalone statement. So we'll start with the function call parser, `funccall()` in `expr.c`. When we get called, the identifier has already been scanned in and the function's name is in the `Text` global variable: ```c // Parse a function call with a single expression // argument and return its AST struct ASTnode *funccall(void) { struct ASTnode *tree; int id; // Check that the identifier has been defined, // then make a leaf node for it. XXX Add structural type test if ((id = findglob(Text)) == -1) { fatals("Undeclared function", Text); } // Get the '(' lparen(); // Parse the following expression tree = binexpr(0); // Build the function call AST node. Store the // function's return type as this node's type. // Also record the function's symbol-id tree = mkastunary(A_FUNCCALL, Gsym[id].type, tree, id); // Get the ')' rparen(); return (tree); } ``` I've left a reminder comment: *Add structural type test*. When a function or a variable is declared, the symbol table is marked with the structural type S_FUNCTION and S_VARIABLE, respectively. I should add code here to confirm the identifier is really an S_FUNCTION. We build a new unary AST node, A_FUNCCALL. The child is the single expression to pass as the argument. We store the function's symbol-id in the node, and we also record the function's return type. ## But I Don't Want That Token Any More! There is a parsing problem. We have to distinguish between: ``` x= fred + jim; x= fred(5) + jim; ``` We need to look ahead one token to see if there is a '('. If there is, we have a function call. But by doing so, we lose the existing token. To solve this problem, I've modified the scanner so that we can put back an unwanted token: this will be returned when we get the next token instead of a brand-new token. The new code in `scan.c` is: ```c // A pointer to a rejected token static struct token *Rejtoken = NULL; // Reject the token that we just scanned void reject_token(struct token *t) { if (Rejtoken != NULL) fatal("Can't reject token twice"); Rejtoken = t; } // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c, tokentype; // If we have any rejected token, return it if (Rejtoken != NULL) { t = Rejtoken; Rejtoken = NULL; return (1); } // Continue on with the normal scanning ... } ``` ## Calling a Function as an Expression So now we can look at where, in `expr.c` we need to differentiate between a variable name and a function call: it's in `primary()`. The new code is: ```c // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(void) { struct ASTnode *n; int id; switch (Token.token) { ... case T_IDENT: // This could be a variable or a function call. // Scan in the next token to find out scan(&Token); // It's a '(', so a function call if (Token.token == T_LPAREN) return (funccall()); // Not a function call, so reject the new token reject_token(&Token); // Continue on with normal variable parsing ... } ``` ## Calling a Function as a Statement We have essentially the same problem when we try to call a function as a statement. Here, we have to distinguish between: ``` fred = 2; fred(18); ``` Thus, the new statement code in `stmt.c` is similar to the above: ```c // Parse an assignment statement and return its AST static struct ASTnode *assignment_statement(void) { struct ASTnode *left, *right, *tree; int lefttype, righttype; int id; // Ensure we have an identifier ident(); // This could be a variable or a function call. // If next token is '(', it's a function call if (Token.token == T_LPAREN) return (funccall()); // Not a function call, on with an assignment then! ... } ``` We can get away with not rejecting the "unwanted" token here, because there *has* to be either an '=' or a '(' next: we can write the parser code knowing this is true. ## Parsing a Return Statement In BNF, our return statement looks like: ``` return_statement: 'return' '(' expression ')' ; ``` The parsing is easy: 'return', '(', call `binexpr()`, ')', done! What is more difficult is the checking of the type, and if we even should be allowed to return at all. Somehow we need to know which function we are actually in, when we get to a return statement. I've added a global variable in `data.h`: ```c extern_ int Functionid; // Symbol id of the current function ``` and this is set up in `function_declaration()` in `decl.c`: ```c struct ASTnode *function_declaration(void) { ... // Add the function to the symbol table // and set the Functionid global nameslot = addglob(Text, type, S_FUNCTION, endlabel); Functionid = nameslot; ... } ``` With `Functionid` set up each time we enter a function declaration, we can get back to parsing and checking the semantics of a return statement. The new code is `return_statement()` in `stmt.c`: ```c // Parse a return statement and return its AST static struct ASTnode *return_statement(void) { struct ASTnode *tree; int returntype, functype; // Can't return a value if function returns P_VOID if (Gsym[Functionid].type == P_VOID) fatal("Can't return from a void function"); // Ensure we have 'return' '(' match(T_RETURN, "return"); lparen(); // Parse the following expression tree = binexpr(0); // Ensure this is compatible with the function's type returntype = tree->type; functype = Gsym[Functionid].type; if (!type_compatible(&returntype, &functype, 1)) fatal("Incompatible types"); // Widen the left if required. if (returntype) tree = mkastunary(returntype, functype, tree, 0); // Add on the A_RETURN node tree = mkastunary(A_RETURN, P_NONE, tree, 0); // Get the ')' rparen(); return (tree); } ``` We have a new A_RETURN AST node that returns the expression in the child tree. We use `type_compatible()` to ensure the expression matches the return type, and widen it if required. Finally, we see if the function was actually declared `void`. If it was, we cannot do a return statement in this function. ## Types Revisited I introduced `type_compatible()` in the last part of the journey and said that I wanted to refactor it. Now that I've added the `long` type, it's become necessary to do this. So here is the new version in `types.c`. You may want to revisit the commentary on it from the last part of the journey. ```c // Given two primitive types, // return true if they are compatible, // false otherwise. Also return either // zero or an A_WIDEN operation if one // has to be widened to match the other. // If onlyright is true, only widen left to right. int type_compatible(int *left, int *right, int onlyright) { int leftsize, rightsize; // Same types, they are compatible if (*left == *right) { *left = *right = 0; return (1); } // Get the sizes for each type leftsize = genprimsize(*left); rightsize = genprimsize(*right); // Types with zero size are not // not compatible with anything if ((leftsize == 0) || (rightsize == 0)) return (0); // Widen types as required if (leftsize < rightsize) { *left = A_WIDEN; *right = 0; return (1); } if (rightsize < leftsize) { if (onlyright) return (0); *left = 0; *right = A_WIDEN; return (1); } // Anything remaining is the same size // and thus compatible *left = *right = 0; return (1); } ``` I now call `genprimsize()` in the generic code generator which calls `cgprimsize()` in `cg.c` to get the size of the various types: ```c // Array of type sizes in P_XXX order. // 0 means no size. P_NONE, P_VOID, P_CHAR, P_INT, P_LONG static int psize[] = { 0, 0, 1, 4, 8 }; // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { // Check the type is valid if (type < P_NONE || type > P_LONG) fatal("Bad type in cgprimsize()"); return (psize[type]); } ``` This makes the type sizes platform dependent; other platforms can choose different type sizes. It probably means my code to mark a P_INTLIT as a `char` not an `int` will need to be refactored: ```c if ((Token.intvalue) >= 0 && (Token.intvalue < 256)) ``` ## Ensuring Non-Void Functions Return a Value We've just ensured that void functions can't return a value. Now how to we ensure that non-void functions will always return a value? To do this, we have to ensure that the last statement in the function is a return statement. Down at the bottom of `function_declaration()` in `decl.c`, I now have: ```c struct ASTnode *tree, *finalstmt; ... // If the function type isn't P_VOID, check that // the last AST operation in the compound statement // was a return statement if (type != P_VOID) { finalstmt = (tree->op == A_GLUE) ? tree->right : tree; if (finalstmt == NULL || finalstmt->op != A_RETURN) fatal("No return for function with non-void type"); } ``` The wrinkle is that, if the function has exactly one statement, there is no A_GLUE AST node and there is only a left child in the tree which is the compound statement. At this point, we can: + declare a function, store its type, and record we are in that function + make a function call (either as an expression or a statement) with a single argument + return from a non-void function (only), and force that the last statement in a non-void function is a return statement + check and widen the expression being returned so as to match the function's type definition Our AST tree now has A_RETURN and A_FUNCCAL nodes to with the return statements and function calls. Let's now see how they generate the assembly output. ## Why a Single Argument? You might, at this point, be asking: why do you want to have a single function argument, especially as that argument isn't available to the function? The answer is that I want to replace the `print x;` statement in our language with a real function call: `printint(x);`. To do this, we can compile a real C function `printint()` and link it with the output from our compiler. ## The New AST Nodes There is not much new code in `genAST()` in `gen.c`: ```c case A_RETURN: cgreturn(leftreg, Functionid); return (NOREG); case A_FUNCCALL: return (cgcall(leftreg, n->v.id)); ``` A_RETURN doesn't return a value as it's not an expression. A_FUNCCALL is an expression of course. ## Changes in the x86-64 Output All the new code generation work is in the platform-specific code generator, `cg.c`. Let's have a look at this. ### New Types Firstly, we now have `char`, `int` and `long`, and the x86-64 requires us to use the right register names for each type: ```c // List of available registers and their names. static int freereg[4]; static char *reglist[4] = { "%r8", "%r9", "%r10", "%r11" }; static char *breglist[4] = { "%r8b", "%r9b", "%r10b", "%r11b" }; static char *dreglist[4] = { "%r8d", "%r9d", "%r10d", "%r11d" } ``` ### Defining, Loading and Storing Variables Variables now have three possible type. The code we generate needs to reflect this. Here are the changed functions: ```c // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Gsym[id].type); fprintf(Outfile, "\t.comm\t%s,%d,%d\n", Gsym[id].name, typesize, typesize); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Print out the code to initialise it switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tmovzbq\t%s(\%%rip), %s\n", Gsym[id].name, reglist[r]); break; case P_INT: fprintf(Outfile, "\tmovzbl\t%s(\%%rip), %s\n", Gsym[id].name, reglist[r]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s(\%%rip), %s\n", Gsym[id].name, reglist[r]); break; default: fatald("Bad type in cgloadglob:", Gsym[id].type); } return (r); } // Store a register's value into a variable int cgstorglob(int r, int id) { switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %s(\%%rip)\n", breglist[r], Gsym[id].name); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %s(\%%rip)\n", dreglist[r], Gsym[id].name); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, %s(\%%rip)\n", reglist[r], Gsym[id].name); break; default: fatald("Bad type in cgloadglob:", Gsym[id].type); } return (r); } ``` ### Function Calls To call a function with one argument, we need to copy the register with the argument value into `%rdi`. On return, we need to copy the returned value from `%rax` into the register that will have this new value: ```c // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { // Get a new register int outr = alloc_register(); fprintf(Outfile, "\tmovq\t%s, %%rdi\n", reglist[r]); fprintf(Outfile, "\tcall\t%s\n", Gsym[id].name); fprintf(Outfile, "\tmovq\t%%rax, %s\n", reglist[outr]); free_register(r); return (outr); } ``` ### Function Returns To return from a function from any point in the function's execution, we need to jump to a label right at the bottom of the function. I've added code in `function_declaration()` to make a label and store it in the symbol table. As the return value leaves in the `%rax` register, we need to copy into this register before we jump to the end label: ```c // Generate code to return a value from a function void cgreturn(int reg, int id) { // Generate code depending on the function's type switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tmovzbl\t%s, %%eax\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %%eax\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", Gsym[id].type); } cgjump(Gsym[id].endlabel); } ``` ### Changes to the Function Preamble and Postamble There are no changes to the preamble, but previously we were setting `%rax` to zero on the return. We have to remove this bit of code: ```c // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Gsym[id].endlabel); fputs("\tpopq %rbp\n" "\tret\n", Outfile); } ``` ### Changes to the Initial Preamble Up to now, I've been manually inserting an assembly version of `printint()` at the beginning of our assembly output. We no longer need this, as we can compile a real C function `printint()` and link it with the output from our compiler. ## Testing the Changes There is a new test program, `tests/input14`: ```c int fred() { return(20); } void main() { int result; printint(10); result= fred(15); printint(result); printint(fred(15)+10); return(0); } ``` We firstly print 10, then call `fred()` which returns 20 and print this out. Finally, we call `fred()` again, add its return value to 10 and print out 30. This demonstrates function calls with a single value, and function returns. Here is the test results: ``` cc -o comp1 -g cg.c decl.c expr.c gen.c main.c misc.c scan.c stmt.c sym.c tree.c types.c ./comp1 tests/input14 cc -o out out.s lib/printint.c ./out; true 10 20 30 ``` Note that we link our assembly output with `lib/printint.c`: ```c #include void printint(long x) { printf("%ld\n", x); } ``` ## So Nearly C Now With this change, we can do this: ``` $ cat lib/printint.c tests/input14 > input14.c $ cc -o out input14.c $ ./out 10 20 30 ``` In other words, our language is enough of a subset of C that we can compile it with other C functions to get an executable. Excellent! ## Conclusion and What's Next We've just added a simple version of function calls, function returns plus a new data type. As I expected, it wasn't trivial but I think the changes are mostly sensible. In the next part of our compiler writing journey, we will port our compiler to a new hardware platform, the ARM CPU on a Raspberry Pi. [Next step](../14_ARM_Platform/Readme.md) ================================================ FILE: 13_Functions_pt2/cg.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. // We need a list of byte and doubleword registers, too static int freereg[4]; static char *reglist[4] = { "%r8", "%r9", "%r10", "%r11" }; static char *breglist[4] = { "%r8b", "%r9b", "%r10b", "%r11b" }; static char *dreglist[4] = { "%r8d", "%r9d", "%r10d", "%r11d" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n", Outfile); } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Gsym[id].name; fprintf(Outfile, "\t.text\n" "\t.globl\t%s\n" "\t.type\t%s, @function\n" "%s:\n" "\tpushq\t%%rbp\n" "\tmovq\t%%rsp, %%rbp\n", name, name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Gsym[id].endlabel); fputs("\tpopq %rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmovq\t$%d, %s\n", value, reglist[r]); return (r); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Print out the code to initialise it switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tmovzbq\t%s(\%%rip), %s\n", Gsym[id].name, reglist[r]); break; case P_INT: fprintf(Outfile, "\tmovzbl\t%s(\%%rip), %s\n", Gsym[id].name, reglist[r]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s(\%%rip), %s\n", Gsym[id].name, reglist[r]); break; default: fatald("Bad type in cgloadglob:", Gsym[id].type); } return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\taddq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsubq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timulq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s,%%rax\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidivq\t%s\n", reglist[r2]); fprintf(Outfile, "\tmovq\t%%rax,%s\n", reglist[r1]); free_register(r2); return (r1); } // Call printint() with the given register void cgprintint(int r) { fprintf(Outfile, "\tmovq\t%s, %%rdi\n", reglist[r]); fprintf(Outfile, "\tcall\tprintint\n"); free_register(r); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { // Get a new register int outr = alloc_register(); fprintf(Outfile, "\tmovq\t%s, %%rdi\n", reglist[r]); fprintf(Outfile, "\tcall\t%s\n", Gsym[id].name); fprintf(Outfile, "\tmovq\t%%rax, %s\n", reglist[outr]); free_register(r); return (outr); } // Store a register's value into a variable int cgstorglob(int r, int id) { switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %s(\%%rip)\n", breglist[r], Gsym[id].name); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %s(\%%rip)\n", dreglist[r], Gsym[id].name); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, %s(\%%rip)\n", reglist[r], Gsym[id].name); break; default: fatald("Bad type in cgloadglob:", Gsym[id].type); } return (r); } // Array of type sizes in P_XXX order. // 0 means no size. static int psize[] = { 0, 0, 1, 4, 8 }; // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { // Check the type is valid if (type < P_NONE || type > P_LONG) fatal("Bad type in cgprimsize()"); return (psize[type]); } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Gsym[id].type); fprintf(Outfile, "\t.comm\t%s,%d,%d\n", Gsym[id].name, typesize, typesize); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { // Generate code depending on the function's type switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tmovzbl\t%s, %%eax\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %%eax\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", Gsym[id].type); } cgjump(Gsym[id].endlabel); } ================================================ FILE: 13_Functions_pt2/cgn.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. // We need a list of byte and doubleword registers, too static int freereg[4]; static char *reglist[4] = { "r8", "r9", "r10", "r11" }; static char *breglist[4] = { "r8b", "r9b", "r10b", "r11b" }; static char *dreglist[4] = { "r8d", "r9d", "r10d", "r11d" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\textern\tprintint\n", Outfile); } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Gsym[id].name; fprintf(Outfile, "\tsection\t.text\n" "\tglobal\t%s\n" "%s:\n" "\tpush\trbp\n" "\tmov\trbp, rsp\n", name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Gsym[id].endlabel); fputs("\tpop rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], value); return (r); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Print out the code to initialise it switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], Gsym[id].name); break; case P_INT: fprintf(Outfile, "\txor\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tmov\t%s, dword [%s]\n", dreglist[r], Gsym[id].name); break; case P_LONG: fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], Gsym[id].name); break; default: fatald("Bad type in cgloadglob:", Gsym[id].type); } return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timul\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmov\trax, %s\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidiv\t%s\n", reglist[r2]); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[r1]); free_register(r2); return (r1); } // Call printint() with the given register void cgprintint(int r) { fprintf(Outfile, "\tmov\trdi, %s\n", reglist[r]); fprintf(Outfile, "\tcall\tprintint\n"); free_register(r); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { // Get a new register int outr = alloc_register(); fprintf(Outfile, "\tmov\trdi, %s\n", reglist[r]); fprintf(Outfile, "\tcall\t%s\n", Gsym[id].name); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[outr]); free_register(r); return (outr); } // Store a register's value into a variable int cgstorglob(int r, int id) { switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tmov\t[%s], %s\n", Gsym[id].name, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\t[%s], %s\n", Gsym[id].name, dreglist[r]); break; case P_LONG: fprintf(Outfile, "\tmov\t[%s], %s\n", Gsym[id].name, reglist[r]); break; default: fatald("Bad type in cgloadglob:", Gsym[id].type); } return (r); } // Array of type sizes in P_XXX order. // 0 means no size. static int psize[] = { 0, 0, 1, 4, 8 }; // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { // Check the type is valid if (type < P_NONE || type > P_LONG) fatal("Bad type in cgprimsize()"); return (psize[type]); } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Gsym[id].type); fprintf(Outfile, "\tcommon\t%s %d:%d\n", Gsym[id].name, typesize, typesize); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r2], breglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { // Generate code depending on the function's type switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tmovzx\teax, %s\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmov\teax, %s\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", Gsym[id].type); } cgjump(Gsym[id].endlabel); } ================================================ FILE: 13_Functions_pt2/data.h ================================================ #ifndef extern_ #define extern_ extern #endif // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 extern_ int Line; // Current line number extern_ int Putback; // Character put back by scanner extern_ int Functionid; // Symbol id of the current function extern_ FILE *Infile; // Input and output files extern_ FILE *Outfile; extern_ struct token Token; // Last token scanned extern_ char Text[TEXTLEN + 1]; // Last identifier scanned extern_ struct symtable Gsym[NSYMBOLS]; // Global symbol table ================================================ FILE: 13_Functions_pt2/decl.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of declarations // Copyright (c) 2019 Warren Toomey, GPL3 // Parse the current token and // return a primitive type enum value int parse_type(int t) { if (t == T_CHAR) return (P_CHAR); if (t == T_INT) return (P_INT); if (t == T_LONG) return (P_LONG); if (t == T_VOID) return (P_VOID); fatald("Illegal type, token", t); } // variable_declaration: type identifier ';' ; // // Parse the declaration of a variable void var_declaration(void) { int id, type; // Get the type of the variable, then the identifier type = parse_type(Token.token); scan(&Token); ident(); // Text now has the identifier's name. // Add it as a known identifier // and generate its space in assembly id = addglob(Text, type, S_VARIABLE, 0); genglobsym(id); // Get the trailing semicolon semi(); } // // function_declaration: type identifier '(' ')' compound_statement ; // // Parse the declaration of a simplistic function struct ASTnode *function_declaration(void) { struct ASTnode *tree, *finalstmt; int nameslot, type, endlabel; // Get the type of the variable, then the identifier type = parse_type(Token.token); scan(&Token); ident(); // Get a label-id for the end label, add the function // to the symbol table, and set the Functionid global // to the function's symbol-id endlabel = genlabel(); nameslot = addglob(Text, type, S_FUNCTION, endlabel); Functionid = nameslot; // Scan in the parentheses lparen(); rparen(); // Get the AST tree for the compound statement tree = compound_statement(); // If the function type isn't P_VOID, check that // the last AST operation in the compound statement // was a return statement if (type != P_VOID) { finalstmt = (tree->op == A_GLUE) ? tree->right : tree; if (finalstmt == NULL || finalstmt->op != A_RETURN) fatal("No return for function with non-void type"); } // Return an A_FUNCTION node which has the function's nameslot // and the compound statement sub-tree return (mkastunary(A_FUNCTION, type, tree, nameslot)); } ================================================ FILE: 13_Functions_pt2/decl.h ================================================ // Function prototypes for all compiler files // Copyright (c) 2019 Warren Toomey, GPL3 // scan.c void reject_token(struct token *t); int scan(struct token *t); // tree.c struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, int intvalue); struct ASTnode *mkastleaf(int op, int type, int intvalue); struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, int intvalue); // gen.c int genlabel(void); int genAST(struct ASTnode *n, int reg, int parentASTop); void genpreamble(); void genpostamble(); void genfreeregs(); void genprintint(int reg); void genglobsym(int id); int genprimsize(int type); void genreturn(int reg, int id); // cg.c void freeall_registers(void); void cgpreamble(); void cgfuncpreamble(int id); void cgfuncpostamble(int id); int cgloadint(int value, int type); int cgloadglob(int id); int cgadd(int r1, int r2); int cgsub(int r1, int r2); int cgmul(int r1, int r2); int cgdiv(int r1, int r2); void cgprintint(int r); int cgcall(int r, int id); int cgstorglob(int r, int id); void cgglobsym(int id); int cgcompare_and_set(int ASTop, int r1, int r2); int cgcompare_and_jump(int ASTop, int r1, int r2, int label); void cglabel(int l); void cgjump(int l); int cgwiden(int r, int oldtype, int newtype); int cgprimsize(int type); void cgreturn(int reg, int id); // expr.c struct ASTnode *funccall(void); struct ASTnode *binexpr(int ptp); // stmt.c struct ASTnode *compound_statement(void); // misc.c void match(int t, char *what); void semi(void); void lbrace(void); void rbrace(void); void lparen(void); void rparen(void); void ident(void); void fatal(char *s); void fatals(char *s1, char *s2); void fatald(char *s, int d); void fatalc(char *s, int c); // sym.c int findglob(char *s); int addglob(char *name, int type, int stype, int endlabel); // decl.c void var_declaration(void); struct ASTnode *function_declaration(void); // types.c int type_compatible(int *left, int *right, int onlyright); ================================================ FILE: 13_Functions_pt2/defs.h ================================================ #include #include #include #include // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 #define TEXTLEN 512 // Length of symbols in input #define NSYMBOLS 1024 // Number of symbol table entries // Token types enum { T_EOF, // Operators T_PLUS, T_MINUS, T_STAR, T_SLASH, T_EQ, T_NE, T_LT, T_GT, T_LE, T_GE, // Type keywords T_VOID, T_CHAR, T_INT, T_LONG, // Structural tokens T_INTLIT, T_SEMI, T_ASSIGN, T_IDENT, T_LBRACE, T_RBRACE, T_LPAREN, T_RPAREN, // Other keywords T_PRINT, T_IF, T_ELSE, T_WHILE, T_FOR, T_RETURN }; // Token structure struct token { int token; // Token type, from the enum list above int intvalue; // For T_INTLIT, the integer value }; // AST node types. The first few line up // with the related tokens enum { A_ADD = 1, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE, A_INTLIT, A_IDENT, A_LVIDENT, A_ASSIGN, A_PRINT, A_GLUE, A_IF, A_WHILE, A_FUNCTION, A_WIDEN, A_RETURN, A_FUNCCALL }; // Primitive types enum { P_NONE, P_VOID, P_CHAR, P_INT, P_LONG }; // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; union { // For A_INTLIT, the integer value int intvalue; // For A_IDENT, the symbol slot number int id; // For A_FUNCTION, the symbol slot number } v; // For A_FUNCCALL, the symbol slot number }; #define NOREG -1 // Use NOREG when the AST generation // functions have no register to return // Structural types enum { S_VARIABLE, S_FUNCTION }; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol int stype; // Structural type for the symbol int endlabel; // For S_FUNCTIONs, the end label }; ================================================ FILE: 13_Functions_pt2/expr.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of expressions // Copyright (c) 2019 Warren Toomey, GPL3 // Parse a function call with a single expression // argument and return its AST struct ASTnode *funccall(void) { struct ASTnode *tree; int id; // Check that the identifier has been defined, // then make a leaf node for it. XXX Add structural type test if ((id = findglob(Text)) == -1) { fatals("Undeclared function", Text); } // Get the '(' lparen(); // Parse the following expression tree = binexpr(0); // Build the function call AST node. Store the // function's return type as this node's type. // Also record the function's symbol-id tree = mkastunary(A_FUNCCALL, Gsym[id].type, tree, id); // Get the ')' rparen(); return (tree); } // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(void) { struct ASTnode *n; int id; switch (Token.token) { case T_INTLIT: // For an INTLIT token, make a leaf AST node for it. // Make it a P_CHAR if it's within the P_CHAR range if ((Token.intvalue) >= 0 && (Token.intvalue < 256)) n = mkastleaf(A_INTLIT, P_CHAR, Token.intvalue); else n = mkastleaf(A_INTLIT, P_INT, Token.intvalue); break; case T_IDENT: // This could be a variable or a function call. // Scan in the next token to find out scan(&Token); // It's a '(', so a function call if (Token.token == T_LPAREN) return (funccall()); // Not a function call, so reject the new token reject_token(&Token); // Check that the variable exists. XXX Add structural type test id = findglob(Text); if (id == -1) fatals("Unknown variable", Text); // Make a leaf AST node for it n = mkastleaf(A_IDENT, Gsym[id].type, id); break; default: fatald("Syntax error, token", Token.token); } // Scan in the next token and return the leaf node scan(&Token); return (n); } // Convert a binary operator token into an AST operation. // We rely on a 1:1 mapping from token to AST operation static int arithop(int tokentype) { if (tokentype > T_EOF && tokentype < T_INTLIT) return (tokentype); fatald("Syntax error, token", tokentype); } // Operator precedence for each token. Must // match up with the order of tokens in defs.h static int OpPrec[] = { 0, 10, 10, // T_EOF, T_PLUS, T_MINUS 20, 20, // T_STAR, T_SLASH 30, 30, // T_EQ, T_NE 40, 40, 40, 40 // T_LT, T_GT, T_LE, T_GE }; // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec = OpPrec[tokentype]; if (prec == 0) fatald("Syntax error, token", tokentype); return (prec); } // Return an AST tree whose root is a binary operator. // Parameter ptp is the previous token's precedence. struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; int lefttype, righttype; int tokentype; // Get the primary tree on the left. // Fetch the next token at the same time. left = primary(); // If we hit a semicolon or ')', return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN) return (left); // While the precedence of this token is // more than that of the previous token precedence while (op_precedence(tokentype) > ptp) { // Fetch in the next integer literal scan(&Token); // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Ensure the two types are compatible. lefttype = left->type; righttype = right->type; if (!type_compatible(&lefttype, &righttype, 0)) fatal("Incompatible types"); // Widen either side if required. type vars are A_WIDEN now if (lefttype) left = mkastunary(lefttype, right->type, left, 0); if (righttype) right = mkastunary(righttype, left->type, right, 0); // Join that sub-tree with ours. Convert the token // into an AST operation at the same time. left = mkastnode(arithop(tokentype), left->type, left, NULL, right, 0); // Update the details of the current token. // If we hit a semicolon or ')', return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN) return (left); } // Return the tree we have when the precedence // is the same or lower return (left); } ================================================ FILE: 13_Functions_pt2/gen.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Generic code generator // Copyright (c) 2019 Warren Toomey, GPL3 // Generate and return a new label number int genlabel(void) { static int id = 1; return (id++); } // Generate the code for an IF statement // and an optional ELSE clause static int genIF(struct ASTnode *n) { int Lfalse, Lend; // Generate two labels: one for the // false compound statement, and one // for the end of the overall IF statement. // When there is no ELSE clause, Lfalse _is_ // the ending label! Lfalse = genlabel(); if (n->right) Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. // We cheat by sending the Lfalse label as a register. genAST(n->left, Lfalse, n->op); genfreeregs(); // Generate the true compound statement genAST(n->mid, NOREG, n->op); genfreeregs(); // If there is an optional ELSE clause, // generate the jump to skip to the end if (n->right) cgjump(Lend); // Now the false label cglabel(Lfalse); // Optional ELSE clause: generate the // false compound statement and the // end label if (n->right) { genAST(n->right, NOREG, n->op); genfreeregs(); cglabel(Lend); } return (NOREG); } // Generate the code for a WHILE statement // and an optional ELSE clause static int genWHILE(struct ASTnode *n) { int Lstart, Lend; // Generate the start and end labels // and output the start label Lstart = genlabel(); Lend = genlabel(); cglabel(Lstart); // Generate the condition code followed // by a jump to the end label. // We cheat by sending the Lfalse label as a register. genAST(n->left, Lend, n->op); genfreeregs(); // Generate the compound statement for the body genAST(n->right, NOREG, n->op); genfreeregs(); // Finally output the jump back to the condition, // and the end label cgjump(Lstart); cglabel(Lend); return (NOREG); } // Given an AST, the register (if any) that holds // the previous rvalue, and the AST op of the parent, // generate assembly code recursively. // Return the register id with the tree's final value int genAST(struct ASTnode *n, int reg, int parentASTop) { int leftreg, rightreg; // We now have specific AST node handling at the top switch (n->op) { case A_IF: return (genIF(n)); case A_WHILE: return (genWHILE(n)); case A_GLUE: // Do each child statement, and free the // registers after each child genAST(n->left, NOREG, n->op); genfreeregs(); genAST(n->right, NOREG, n->op); genfreeregs(); return (NOREG); case A_FUNCTION: // Generate the function's preamble before the code cgfuncpreamble(n->v.id); genAST(n->left, NOREG, n->op); cgfuncpostamble(n->v.id); return (NOREG); } // General AST node handling below // Get the left and right sub-tree values if (n->left) leftreg = genAST(n->left, NOREG, n->op); if (n->right) rightreg = genAST(n->right, leftreg, n->op); switch (n->op) { case A_ADD: return (cgadd(leftreg, rightreg)); case A_SUBTRACT: return (cgsub(leftreg, rightreg)); case A_MULTIPLY: return (cgmul(leftreg, rightreg)); case A_DIVIDE: return (cgdiv(leftreg, rightreg)); case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, compare registers // and set one to 1 or 0 based on the comparison. if (parentASTop == A_IF || parentASTop == A_WHILE) return (cgcompare_and_jump(n->op, leftreg, rightreg, reg)); else return (cgcompare_and_set(n->op, leftreg, rightreg)); case A_INTLIT: return (cgloadint(n->v.intvalue, n->type)); case A_IDENT: return (cgloadglob(n->v.id)); case A_LVIDENT: return (cgstorglob(reg, n->v.id)); case A_ASSIGN: // The work has already been done, return the result return (rightreg); case A_PRINT: // Print the left-child's value // and return no register genprintint(leftreg); genfreeregs(); return (NOREG); case A_WIDEN: // Widen the child's type to the parent's type return (cgwiden(leftreg, n->left->type, n->type)); case A_RETURN: cgreturn(leftreg, Functionid); return (NOREG); case A_FUNCCALL: return (cgcall(leftreg, n->v.id)); default: fatald("Unknown AST operator", n->op); } } void genpreamble() { cgpreamble(); } void genfreeregs() { freeall_registers(); } void genprintint(int reg) { cgprintint(reg); } void genglobsym(int id) { cgglobsym(id); } int genprimsize(int type) { return (cgprimsize(type)); } ================================================ FILE: 13_Functions_pt2/lib/printint.c ================================================ #include void printint(long x) { printf("%ld\n", x); } ================================================ FILE: 13_Functions_pt2/main.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "decl.h" #include // Compiler setup and top-level execution // Copyright (c) 2019 Warren Toomey, GPL3 // Initialise global variables static void init() { Line = 1; Putback = '\n'; } // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "Usage: %s infile\n", prog); exit(1); } // Main program: check arguments and print a usage // if we don't have an argument. Open up the input // file and call scanfile() to scan the tokens in it. void main(int argc, char *argv[]) { struct ASTnode *tree; if (argc != 2) usage(argv[0]); init(); // Open up the input file if ((Infile = fopen(argv[1], "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", argv[1], strerror(errno)); exit(1); } // Create the output file if ((Outfile = fopen("out.s", "w")) == NULL) { fprintf(stderr, "Unable to create out.s: %s\n", strerror(errno)); exit(1); } // For now, ensure that void printint() is defined addglob("printint", P_CHAR, S_FUNCTION, 0); scan(&Token); // Get the first token from the input genpreamble(); // Output the preamble while (1) { // Parse a function and tree = function_declaration(); genAST(tree, NOREG, 0); // generate the assembly code for it if (Token.token == T_EOF) // Stop when we have reached EOF break; } fclose(Outfile); // Close the output file and exit exit(0); } ================================================ FILE: 13_Functions_pt2/misc.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Miscellaneous functions // Copyright (c) 2019 Warren Toomey, GPL3 // Ensure that the current token is t, // and fetch the next token. Otherwise // throw an error void match(int t, char *what) { if (Token.token == t) { scan(&Token); } else { fatals("Expected", what); } } // Match a semicolon and fetch the next token void semi(void) { match(T_SEMI, ";"); } // Match a left brace and fetch the next token void lbrace(void) { match(T_LBRACE, "{"); } // Match a right brace and fetch the next token void rbrace(void) { match(T_RBRACE, "}"); } // Match a left parenthesis and fetch the next token void lparen(void) { match(T_LPAREN, "("); } // Match a right parenthesis and fetch the next token void rparen(void) { match(T_RPAREN, ")"); } // Match an identifier and fetch the next token void ident(void) { match(T_IDENT, "identifier"); } // Print out fatal messages void fatal(char *s) { fprintf(stderr, "%s on line %d\n", s, Line); exit(1); } void fatals(char *s1, char *s2) { fprintf(stderr, "%s:%s on line %d\n", s1, s2, Line); exit(1); } void fatald(char *s, int d) { fprintf(stderr, "%s:%d on line %d\n", s, d, Line); exit(1); } void fatalc(char *s, int c) { fprintf(stderr, "%s:%c on line %d\n", s, c, Line); exit(1); } ================================================ FILE: 13_Functions_pt2/scan.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { char *p; p = strchr(s, c); return (p ? p - s : -1); } // Get the next character from the input file. static int next(void) { int c; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return (c); } c = fgetc(Infile); // Read from input file if ('\n' == c) Line++; // Increment line count return (c); } // Put back an unwanted character static void putback(int c) { Putback = c; } // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0; // Convert each character into an int value while ((k = chrpos("0123456789", c)) >= 0) { val = val * 10 + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return (val); } // Scan an identifier from the input file and // store it in buf[]. Return the identifier's length static int scanident(int c, char *buf, int lim) { int i = 0; // Allow digits, alpha and underscores while (isalpha(c) || isdigit(c) || '_' == c) { // Error if we hit the identifier length limit, // else append to buf[] and get next character if (lim - 1 == i) { fatal("Identifier too long"); } else if (i < lim - 1) { buf[i++] = c; } c = next(); } // We hit a non-valid character, put it back. // NUL-terminate the buf[] and return the length putback(c); buf[i] = '\0'; return (i); } // Given a word from the input, return the matching // keyword token number or 0 if it's not a keyword. // Switch on the first letter so that we don't have // to waste time strcmp()ing against all the keywords. static int keyword(char *s) { switch (*s) { case 'c': if (!strcmp(s, "char")) return (T_CHAR); break; case 'e': if (!strcmp(s, "else")) return (T_ELSE); break; case 'f': if (!strcmp(s, "for")) return (T_FOR); break; case 'i': if (!strcmp(s, "if")) return (T_IF); if (!strcmp(s, "int")) return (T_INT); break; case 'l': if (!strcmp(s, "long")) return (T_LONG); break; case 'p': if (!strcmp(s, "print")) return (T_PRINT); break; case 'r': if (!strcmp(s, "return")) return (T_RETURN); break; case 'w': if (!strcmp(s, "while")) return (T_WHILE); break; case 'v': if (!strcmp(s, "void")) return (T_VOID); break; } return (0); } // A pointer to a rejected token static struct token *Rejtoken = NULL; // Reject the token that we just scanned void reject_token(struct token *t) { if (Rejtoken != NULL) fatal("Can't reject token twice"); Rejtoken = t; } // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c, tokentype; // If we have any rejected token, return it if (Rejtoken != NULL) { t = Rejtoken; Rejtoken = NULL; return (1); } // Skip whitespace c = skip(); // Determine the token based on // the input character switch (c) { case EOF: t->token = T_EOF; return (0); case '+': t->token = T_PLUS; break; case '-': t->token = T_MINUS; break; case '*': t->token = T_STAR; break; case '/': t->token = T_SLASH; break; case ';': t->token = T_SEMI; break; case '{': t->token = T_LBRACE; break; case '}': t->token = T_RBRACE; break; case '(': t->token = T_LPAREN; break; case ')': t->token = T_RPAREN; break; case '=': if ((c = next()) == '=') { t->token = T_EQ; } else { putback(c); t->token = T_ASSIGN; } break; case '!': if ((c = next()) == '=') { t->token = T_NE; } else { fatalc("Unrecognised character", c); } break; case '<': if ((c = next()) == '=') { t->token = T_LE; } else { putback(c); t->token = T_LT; } break; case '>': if ((c = next()) == '=') { t->token = T_GE; } else { putback(c); t->token = T_GT; } break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } else if (isalpha(c) || '_' == c) { // Read in a keyword or identifier scanident(c, Text, TEXTLEN); // If it's a recognised keyword, return that token if (tokentype = keyword(Text)) { t->token = tokentype; break; } // Not a recognised keyword, so it must be an identifier t->token = T_IDENT; break; } // The character isn't part of any recognised token, error fatalc("Unrecognised character", c); } // We found a token return (1); } ================================================ FILE: 13_Functions_pt2/stmt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of statements // Copyright (c) 2019 Warren Toomey, GPL3 // Prototypes static struct ASTnode *single_statement(void); // compound_statement: // empty, i.e. no statement // | statement // | statement statements // ; // // statement: print_statement // | declaration // | assignment_statement // | function_call // | if_statement // | while_statement // | for_statement // | return_statement // ; // print_statement: 'print' expression ';' ; // static struct ASTnode *print_statement(void) { struct ASTnode *tree; int lefttype, righttype; int reg; // Match a 'print' as the first token match(T_PRINT, "print"); // Parse the following expression tree = binexpr(0); // Ensure the two types are compatible. lefttype = P_INT; righttype = tree->type; if (!type_compatible(&lefttype, &righttype, 0)) fatal("Incompatible types"); // Widen the tree if required. if (righttype) tree = mkastunary(righttype, P_INT, tree, 0); // Make an print AST tree tree = mkastunary(A_PRINT, P_NONE, tree, 0); // Return the AST return (tree); } // assignment_statement: identifier '=' expression ';' ; // // Parse an assignment statement and return its AST static struct ASTnode *assignment_statement(void) { struct ASTnode *left, *right, *tree; int lefttype, righttype; int id; // Ensure we have an identifier ident(); // This could be a variable or a function call. // If next token is '(', it's a function call if (Token.token == T_LPAREN) return (funccall()); // Not a function call, on with an assignment then! // Check the identifier has been defined then make a leaf node for it // XXX Add structural type test if ((id = findglob(Text)) == -1) { fatals("Undeclared variable", Text); } right = mkastleaf(A_LVIDENT, Gsym[id].type, id); // Ensure we have an equals sign match(T_ASSIGN, "="); // Parse the following expression left = binexpr(0); // Ensure the two types are compatible. lefttype = left->type; righttype = right->type; if (!type_compatible(&lefttype, &righttype, 1)) fatal("Incompatible types"); // Widen the left if required. if (lefttype) left = mkastunary(lefttype, right->type, left, 0); // Make an assignment AST tree tree = mkastnode(A_ASSIGN, P_INT, left, NULL, right, 0); // Return the AST return (tree); } // if_statement: if_head // | if_head 'else' compound_statement // ; // // if_head: 'if' '(' true_false_expression ')' compound_statement ; // // Parse an IF statement including any // optional ELSE clause and return its AST static struct ASTnode *if_statement(void) { struct ASTnode *condAST, *trueAST, *falseAST = NULL; // Ensure we have 'if' '(' match(T_IF, "if"); lparen(); // Parse the following expression // and the ')' following. Ensure // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) fatal("Bad comparison operator"); rparen(); // Get the AST for the compound statement trueAST = compound_statement(); // If we have an 'else', skip it // and get the AST for the compound statement if (Token.token == T_ELSE) { scan(&Token); falseAST = compound_statement(); } // Build and return the AST for this statement return (mkastnode(A_IF, P_NONE, condAST, trueAST, falseAST, 0)); } // while_statement: 'while' '(' true_false_expression ')' compound_statement ; // // Parse a WHILE statement and return its AST static struct ASTnode *while_statement(void) { struct ASTnode *condAST, *bodyAST; // Ensure we have 'while' '(' match(T_WHILE, "while"); lparen(); // Parse the following expression // and the ')' following. Ensure // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) fatal("Bad comparison operator"); rparen(); // Get the AST for the compound statement bodyAST = compound_statement(); // Build and return the AST for this statement return (mkastnode(A_WHILE, P_NONE, condAST, NULL, bodyAST, 0)); } // for_statement: 'for' '(' preop_statement ';' // true_false_expression ';' // postop_statement ')' compound_statement ; // // preop_statement: statement (for now) // postop_statement: statement (for now) // // Parse a FOR statement and return its AST static struct ASTnode *for_statement(void) { struct ASTnode *condAST, *bodyAST; struct ASTnode *preopAST, *postopAST; struct ASTnode *tree; // Ensure we have 'for' '(' match(T_FOR, "for"); lparen(); // Get the pre_op statement and the ';' preopAST = single_statement(); semi(); // Get the condition and the ';' condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) fatal("Bad comparison operator"); semi(); // Get the post_op statement and the ')' postopAST = single_statement(); rparen(); // Get the compound statement which is the body bodyAST = compound_statement(); // For now, all four sub-trees have to be non-NULL. // Later on, we'll change the semantics for when some are missing // Glue the compound statement and the postop tree tree = mkastnode(A_GLUE, P_NONE, bodyAST, NULL, postopAST, 0); // Make a WHILE loop with the condition and this new body tree = mkastnode(A_WHILE, P_NONE, condAST, NULL, tree, 0); // And glue the preop tree to the A_WHILE tree return (mkastnode(A_GLUE, P_NONE, preopAST, NULL, tree, 0)); } // return_statement: 'return' '(' expression ')' ; // // Parse a return statement and return its AST static struct ASTnode *return_statement(void) { struct ASTnode *tree; int returntype, functype; // Can't return a value if function returns P_VOID if (Gsym[Functionid].type == P_VOID) fatal("Can't return from a void function"); // Ensure we have 'return' '(' match(T_RETURN, "return"); lparen(); // Parse the following expression tree = binexpr(0); // Ensure this is compatible with the function's type returntype = tree->type; functype = Gsym[Functionid].type; if (!type_compatible(&returntype, &functype, 1)) fatal("Incompatible types"); // Widen the left if required. if (returntype) tree = mkastunary(returntype, functype, tree, 0); // Add on the A_RETURN node tree = mkastunary(A_RETURN, P_NONE, tree, 0); // Get the ')' rparen(); return (tree); } // Parse a single statement // and return its AST static struct ASTnode *single_statement(void) { switch (Token.token) { case T_PRINT: return (print_statement()); case T_CHAR: case T_INT: case T_LONG: var_declaration(); return (NULL); // No AST generated here case T_IDENT: return (assignment_statement()); case T_IF: return (if_statement()); case T_WHILE: return (while_statement()); case T_FOR: return (for_statement()); case T_RETURN: return (return_statement()); default: fatald("Syntax error, token", Token.token); } } // Parse a compound statement // and return its AST struct ASTnode *compound_statement(void) { struct ASTnode *left = NULL; struct ASTnode *tree; // Require a left curly bracket lbrace(); while (1) { // Parse a single statement tree = single_statement(); // Some statements must be followed by a semicolon if (tree != NULL && (tree->op == A_PRINT || tree->op == A_ASSIGN || tree->op == A_RETURN || tree->op == A_FUNCCALL)) semi(); // For each new tree, either save it in left // if left is empty, or glue the left and the // new tree together if (tree != NULL) { if (left == NULL) left = tree; else left = mkastnode(A_GLUE, P_NONE, left, NULL, tree, 0); } // When we hit a right curly bracket, // skip past it and return the AST if (Token.token == T_RBRACE) { rbrace(); return (left); } } } ================================================ FILE: 13_Functions_pt2/sym.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Symbol table functions // Copyright (c) 2019 Warren Toomey, GPL3 static int Globs = 0; // Position of next free global symbol slot // Determine if the symbol s is in the global symbol table. // Return its slot position or -1 if not found. int findglob(char *s) { int i; for (i = 0; i < Globs; i++) { if (*s == *Gsym[i].name && !strcmp(s, Gsym[i].name)) return (i); } return (-1); } // Get the position of a new global symbol slot, or die // if we've run out of positions. static int newglob(void) { int p; if ((p = Globs++) >= NSYMBOLS) fatal("Too many global symbols"); return (p); } // Add a global symbol to the symbol table. // Also set up its type and structural type. // Return the slot number in the symbol table int addglob(char *name, int type, int stype, int endlabel) { int y; // If this is already in the symbol table, return the existing slot if ((y = findglob(name)) != -1) return (y); // Otherwise get a new slot, fill it in and // return the slot number y = newglob(); Gsym[y].name = strdup(name); Gsym[y].type = type; Gsym[y].stype = stype; Gsym[y].endlabel = endlabel; return (y); } ================================================ FILE: 13_Functions_pt2/tests/input01 ================================================ void main() { print 12 * 3; print 18 - 2 * 4; print 1 + 2 + 9 - 5/2 + 3*5; } ================================================ FILE: 13_Functions_pt2/tests/input02 ================================================ void main() { int fred; int jim; fred= 5; jim= 12; print fred + jim; } ================================================ FILE: 13_Functions_pt2/tests/input03 ================================================ void main() { int x; x= 1; print x; x= x + 1; print x; x= x + 1; print x; x= x + 1; print x; x= x + 1; print x; } ================================================ FILE: 13_Functions_pt2/tests/input04 ================================================ void main() { int x; x= 7 < 9; print x; x= 7 <= 9; print x; x= 7 != 9; print x; x= 7 == 7; print x; x= 7 >= 7; print x; x= 7 <= 7; print x; x= 9 > 7; print x; x= 9 >= 7; print x; x= 9 != 7; print x; } ================================================ FILE: 13_Functions_pt2/tests/input05 ================================================ void main() { int i; int j; i=6; j=12; if (i < j) { print i; } else { print j; } } ================================================ FILE: 13_Functions_pt2/tests/input06 ================================================ void main() { int i; i=1; while (i <= 10) { print i; i= i + 1; } } ================================================ FILE: 13_Functions_pt2/tests/input07 ================================================ void main() { int i; for (i= 1; i <= 10; i= i + 1) { print i; } } ================================================ FILE: 13_Functions_pt2/tests/input08 ================================================ void main() { int i; for (i= 1; i <= 10; i= i + 1) { print i; } } ================================================ FILE: 13_Functions_pt2/tests/input09 ================================================ void main() { int i; for (i= 1; i <= 10; i= i + 1) { print i; } } void fred() { int a; int b; a= 12; b= 3 * a; if (a >= b) { print 2 * b - a; } } ================================================ FILE: 13_Functions_pt2/tests/input10 ================================================ void main() { int i; char j; j= 20; print j; i= 10; print i; for (i= 1; i <= 5; i= i + 1) { print i; } for (j= 253; j != 2; j= j + 1) { print j; } } ================================================ FILE: 13_Functions_pt2/tests/input11 ================================================ int main() { int i; char j; long k; i= 10; print i; j= 20; print j; k= 30; print k; for (i= 1; i <= 5; i= i + 1) { print i; } for (j= 253; j != 4; j= j + 1) { print j; } for (k= 1; k <= 5; k= k + 1) { print k; } return(i); print 12345; return(3); } ================================================ FILE: 13_Functions_pt2/tests/input12 ================================================ int fred() { return(5); } void main() { int x; x= fred(2); print x; } ================================================ FILE: 13_Functions_pt2/tests/input13 ================================================ int fred() { return(56); } void main() { int dummy; int result; dummy= printint(23); result= fred(10); dummy= printint(result); } ================================================ FILE: 13_Functions_pt2/tests/input14 ================================================ int fred() { return(20); } int main() { int result; printint(10); result= fred(15); printint(result); printint(fred(15)+10); return(0); } ================================================ FILE: 13_Functions_pt2/tests/mktests ================================================ #!/bin/sh # Make the output files for each test if [ ! -f ../comp1 ] then echo "Need to build ../comp1 first!"; exit 1 fi for i in input* do if [ ! -f "out.$i" ] then ../comp1 $i cc -o out out.s ../lib/printint.c ./out > out.$i rm -f out out.s fi done ================================================ FILE: 13_Functions_pt2/tests/mktestsn ================================================ #!/bin/sh # Make the output files for each test if [ ! -f ../compn ] then echo "Need to build ../compn first!"; exit 1 fi for i in input* do if [ ! -f "out.$i" ] then ../compn $i nasm -f elf64 out.s cc -no-pie -o out out.o ../lib/printint.c ./out > out.$i rm -f out out.s fi done ================================================ FILE: 13_Functions_pt2/tests/out.input01 ================================================ 36 10 25 ================================================ FILE: 13_Functions_pt2/tests/out.input02 ================================================ 17 ================================================ FILE: 13_Functions_pt2/tests/out.input03 ================================================ 1 2 3 4 5 ================================================ FILE: 13_Functions_pt2/tests/out.input04 ================================================ 1 1 1 1 1 1 1 1 1 ================================================ FILE: 13_Functions_pt2/tests/out.input05 ================================================ 6 ================================================ FILE: 13_Functions_pt2/tests/out.input06 ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 13_Functions_pt2/tests/out.input07 ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 13_Functions_pt2/tests/out.input08 ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 13_Functions_pt2/tests/out.input09 ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 13_Functions_pt2/tests/out.input10 ================================================ 20 10 1 2 3 4 5 253 254 255 0 1 ================================================ FILE: 13_Functions_pt2/tests/out.input11 ================================================ 10 20 30 1 2 3 4 5 253 254 255 0 1 2 3 1 2 3 4 5 ================================================ FILE: 13_Functions_pt2/tests/out.input12 ================================================ 5 ================================================ FILE: 13_Functions_pt2/tests/out.input13 ================================================ 23 56 ================================================ FILE: 13_Functions_pt2/tests/out.input14 ================================================ 10 20 30 ================================================ FILE: 13_Functions_pt2/tests/runtests ================================================ #!/bin/sh # Run each test and compare # against known good output if [ ! -f ../comp1 ] then echo "Need to build ../comp1 first!"; exit 1 fi for i in input* do if [ ! -f "out.$i" ] then echo "Can't run test on $i, no output file!" else echo -n $i ../comp1 $i cc -o out out.s ../lib/printint.c ./out > trial.$i cmp -s "out.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo else echo ": OK" fi rm -f out out.s "trial.$i" fi done ================================================ FILE: 13_Functions_pt2/tests/runtestsn ================================================ #!/bin/sh # Run each test and compare # against known good output if [ ! -f ../compn ] then echo "Need to build ../compn first!"; exit 1 fi for i in input* do if [ ! -f "out.$i" ] then echo "Can't run test on $i, no output file!" else echo -n $i ../compn $i nasm -f elf64 out.s cc -no-pie -o out out.o ../lib/printint.c ./out > trial.$i cmp -s "out.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo else echo ": OK" fi rm -f out out.o out.s "trial.$i" fi done ================================================ FILE: 13_Functions_pt2/tree.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree functions // Copyright (c) 2019 Warren Toomey, GPL3 // Build and return a generic AST node struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, int intvalue) { struct ASTnode *n; // Malloc a new ASTnode n = (struct ASTnode *) malloc(sizeof(struct ASTnode)); if (n == NULL) fatal("Unable to malloc in mkastnode()"); // Copy in the field values and return it n->op = op; n->type = type; n->left = left; n->mid = mid; n->right = right; n->v.intvalue = intvalue; return (n); } // Make an AST leaf node struct ASTnode *mkastleaf(int op, int type, int intvalue) { return (mkastnode(op, type, NULL, NULL, NULL, intvalue)); } // Make a unary AST node: only one child struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, int intvalue) { return (mkastnode(op, type, left, NULL, NULL, intvalue)); } ================================================ FILE: 13_Functions_pt2/types.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Types and type handling // Copyright (c) 2019 Warren Toomey, GPL3 // Given two primitive types, // return true if they are compatible, // false otherwise. Also return either // zero or an A_WIDEN operation if one // has to be widened to match the other. // If onlyright is true, only widen left to right. int type_compatible(int *left, int *right, int onlyright) { int leftsize, rightsize; // Same types, they are compatible if (*left == *right) { *left = *right = 0; return (1); } // Get the sizes for each type leftsize = genprimsize(*left); rightsize = genprimsize(*right); // Types with zero size are not // not compatible with anything if ((leftsize == 0) || (rightsize == 0)) return (0); // Widen types as required if (leftsize < rightsize) { *left = A_WIDEN; *right = 0; return (1); } if (rightsize < leftsize) { if (onlyright) return (0); *left = 0; *right = A_WIDEN; return (1); } // Anything remaining is the same size // and thus compatible *left = *right = 0; return (1); } ================================================ FILE: 14_ARM_Platform/Makefile ================================================ SRCS= cg.c decl.c expr.c gen.c main.c misc.c scan.c stmt.c \ sym.c tree.c types.c SRCN= cgn.c decl.c expr.c gen.c main.c misc.c scan.c stmt.c \ sym.c tree.c types.c ARMSRCS= cg_arm.c decl.c expr.c gen.c main.c misc.c scan.c stmt.c \ sym.c tree.c types.c comp1: $(SRCS) cc -o comp1 -g -Wall $(SRCS) compn: $(SRCN) cc -o compn -g $(SRCN) comp1arm: $(ARMSRCS) cc -o comp1arm -g -Wall $(ARMSRCS) cp comp1arm comp1 clean: rm -f comp1 comp1arm compn *.o *.s out test: comp1 tests/runtests (cd tests; chmod +x runtests; ./runtests) armtest: comp1arm tests/runtests (cd tests; chmod +x runtests; ./runtests) testn: compn tests/runtestsn (cd tests; chmod +x runtestsn; ./runtestsn) test14: comp1 tests/input14 lib/printint.c ./comp1 tests/input14 cc -o out out.s lib/printint.c ./out armtest14: comp1arm tests/input14 lib/printint.c ./comp1 tests/input14 cc -o out out.s lib/printint.c ./out test14n: compn tests/input14 lib/printint.c ./compn tests/input14 nasm -f elf64 out.s cc -no-pie -o out lib/printint.c out.o ./out ================================================ FILE: 14_ARM_Platform/Readme.md ================================================ # Part 14: Generating ARM Assembly Code In this part of our compiler writing journey, I've ported the compiler over to the ARM CPU on the [Raspberry Pi 4](https://en.wikipedia.org/wiki/Raspberry_Pi). I should preface this section by saying that, while I know MIPS assembly language quite well, I only knew a bit of x86-32 assembly language when I started this journey, and nothing about x86-64 nor ARM assembly language. What I've been doing along the way is compiling example C programs down to an assembler with various C compilers to see what sort of assembly language they produce. That's what I've done here to write the ARM output for this compiler. ## The Major Differences Firstly, ARM is a RISC CPU and x86-64 is a CISC CPU. There are fewer addressing modes on the ARM when compared to the x86-64. There are also other interesting constraints that occur when generating ARM assembly code. So I will start with the major differences, and leave the main similarities to later. ### ARM Registers ARM has heaps more registers than x86-64. That said, I'm sticking with four registers to allocate: `r4`,`r5`, `r6` and `r7`. We will see that `r0` and `r3` get used for other things below. ### Addressing Global Variables On x86-64, we only have to declare a global variable with a line like: ``` .comm i,4,4 # int variable .comm j,1,1 # char variable ``` and, later, we can load and store to these variables easily: ``` movb %r8b, j(%rip) # Store to j movl %r8d, i(%rip) # Store to i movzbl i(%rip), %r8 # Load from i movzbq j(%rip), %r8 # Load from j ``` With ARM, we have to manually allocate space for all global variables in our program postamble: ``` .comm i,4,4 .comm j,1,1 ... .L2: .word i .word j ``` To access these, we need to load a register with the address of each variable, and load a second register from that address: ``` ldr r3, .L2+0 ldr r4, [r3] # Load i ldr r3, .L2+4 ldr r4, [r3] # Load j ``` Stores to variables are similar: ``` mov r4, #20 ldr r3, .L2+4 strb r4, [r3] # i= 20 mov r4, #10 ldr r3, .L2+0 str r4, [r3] # j= 10 ``` There is now this code in `cgpostamble()` to generate the table of .words: ```c // Print out the global variables fprintf(Outfile, ".L2:\n"); for (int i = 0; i < Globs; i++) { if (Gsym[i].stype == S_VARIABLE) fprintf(Outfile, "\t.word %s\n", Gsym[i].name); } ``` This also means that we need to determine the offset from `.L2` for each global variable. Following the KISS principle, I manually calculate the offset each time I want to load `r3` with the address of a variable. Yes, I should calculate each offset once and store it somewhere; later! ```c // Determine the offset of a variable from the .L2 // label. Yes, this is inefficient code. static void set_var_offset(int id) { int offset = 0; // Walk the symbol table up to id. // Find S_VARIABLEs and add on 4 until // we get to our variable for (int i = 0; i < id; i++) { if (Gsym[i].stype == S_VARIABLE) offset += 4; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L2+%d\n", offset); } ``` ### Loading Int Literals The size of an integer literal in a load instruction is limited to 11 bits and I think this is a signed value. Thus, we can't put large integer literals into a single instruction. That answer is to store the literal values in memory, like variables. So I keep a list of previously-used literal values. In the postamble, I output them following the `.L3` label. And, like variables, I walk this list to determine the offset of any literal from the `.L3` label: ```c // We have to store large integer literal values in memory. // Keep a list of them which will be output in the postamble #define MAXINTS 1024 int Intlist[MAXINTS]; static int Intslot = 0; // Determine the offset of a large integer // literal from the .L3 label. If the integer // isn't in the list, add it. static void set_int_offset(int val) { int offset = -1; // See if it is already there for (int i = 0; i < Intslot; i++) { if (Intlist[i] == val) { offset = 4 * i; break; } } // Not in the list, so add it if (offset == -1) { offset = 4 * Intslot; if (Intslot == MAXINTS) fatal("Out of int slots in set_int_offset()"); Intlist[Intslot++] = val; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L3+%d\n", offset); } ``` ### The Function Preamble I'm going to give you the function preamble, but I am not completely sure what each instruction does. Here it is for `int main(int x)`: ``` .text .globl main .type main, %function main: push {fp, lr} # Save the frame and stack pointers add fp, sp, #4 # Add sp+4 to the stack pointer sub sp, sp, #8 # Lower the stack pointer by 8 str r0, [fp, #-8] # Save the argument as a local var? ``` and here's the function postamble to return a single value: ``` sub sp, fp, #4 # ??? pop {fp, pc} # Pop the frame and stack pointers ``` ### Comparisons Returning 0 or 1 With the x86-64 there's an instruction to set a register to 0 or 1 based on the comparison being true, e.g. `sete`, but then we have to zero-fill the rest of the register with `movzbq`. With the ARM, we run two separate instructions which set a register to a value if the condition we want is true or false, e.g. ``` moveq r4, #1 # Set r4 to 1 if values were equal movne r4, #0 # Set r4 to 0 if values were not equal ``` ## A Comparison of Similar x86-64 and ARM Assembly Output I think that's all the major differences out of the road. So below is a comparison of the `cgXXX()` operation, any specific type for that operation, and an example x86-64 and ARM instruction sequence to perform it. | Operation(type) | x86-64 Version | ARM Version | |-----------------|----------------|-------------| cgloadint() | movq $12, %r8 | mov r4, #13 | cgloadglob(char) | movzbq foo(%rip), %r8 | ldr r3, .L2+#4 | | | | ldr r4, [r3] | cgloadglob(int) | movzbl foo(%rip), %r8 | ldr r3, .L2+#4 | | | | ldr r4, [r3] | cgloadglob(long) | movq foo(%rip), %r8 | ldr r3, .L2+#4 | | | | ldr r4, [r3] | int cgadd() | addq %r8, %r9 | add r4, r4, r5 | int cgsub() | subq %r8, %r9 | sub r4, r4, r5 | int cgmul() | imulq %r8, %r9 | mul r4, r4, r5 | int cgdiv() | movq %r8,%rax | mov r0, r4 | | | cqo | mov r1, r5 | | | idivq %r8 | bl __aeabi_idiv | | | movq %rax,%r8 | mov r4, r0 | cgprintint() | movq %r8, %rdi | mov r0, r4 | | | call printint | bl printint | | | | nop | cgcall() | movq %r8, %rdi | mov r0, r4 | | | call foo | bl foo | | | movq %rax, %r8 | mov r4, r0 | cgstorglob(char) | movb %r8, foo(%rip) | ldr r3, .L2+#4 | | | | strb r4, [r3] | cgstorglob(int) | movl %r8, foo(%rip) | ldr r3, .L2+#4 | | | | str r4, [r3] | cgstorglob(long) | movq %r8, foo(%rip) | ldr r3, .L2+#4 | | | | str r4, [r3] | cgcompare_and_set() | cmpq %r8, %r9 | cmp r4, r5 | | | sete %r8 | moveq r4, #1 | | | movzbq %r8, %r8 | movne r4, #1 | cgcompare_and_jump() | cmpq %r8, %r9 | cmp r4, r5 | | | je L2 | beq L2 | cgreturn(char) | movzbl %r8, %eax | mov r0, r4 | | | jmp L2 | b L2 | cgreturn(int) | movl %r8, %eax | mov r0, r4 | | | jmp L2 | b L2 | cgreturn(long) | movq %r8, %rax | mov r0, r4 | | | jmp L2 | b L2 | ## Testing the ARM Code Generator If you copy the compiler from this part of the journey to a Raspberry Pi 3 or 4, you should be able to do: ``` $ make armtest cc -o comp1arm -g -Wall cg_arm.c decl.c expr.c gen.c main.c misc.c scan.c stmt.c sym.c tree.c types.c cp comp1arm comp1 (cd tests; chmod +x runtests; ./runtests) input01: OK input02: OK input03: OK input04: OK input05: OK input06: OK input07: OK input08: OK input09: OK input10: OK input11: OK input12: OK input13: OK input14: OK $ make armtest14 ./comp1 tests/input14 cc -o out out.s lib/printint.c ./out 10 20 30 ``` ## Conclusion and What's Next It did take me a bit of head scratching to get the ARM version of the code generator `cg_arm.c` to correctly compile all of the test inputs. It was mostly straight-forward, I just wasn't familiar with the architecture and instruction set. It should be relatively easy to port the compiler to a platform with 3 or 4 registers, 2 or so data sizes and a stack (and stack frames). As we go forward, I'll try to keep both `cg.c` and `cg_arm.c` functionally in sync. In the next part of our compiler writing journey, we will add the `char` pointer to the language, as well as the '*' and '&' unary operators. [Next step](../15_Pointers_pt1/Readme.md) ================================================ FILE: 14_ARM_Platform/cg.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. // We need a list of byte and doubleword registers, too static int freereg[4]; static char *reglist[4] = { "%r8", "%r9", "%r10", "%r11" }; static char *breglist[4] = { "%r8b", "%r9b", "%r10b", "%r11b" }; static char *dreglist[4] = { "%r8d", "%r9d", "%r10d", "%r11d" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return(NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n", Outfile); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Gsym[id].name; fprintf(Outfile, "\t.text\n" "\t.globl\t%s\n" "\t.type\t%s, @function\n" "%s:\n" "\tpushq\t%%rbp\n" "\tmovq\t%%rsp, %%rbp\n", name, name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Gsym[id].endlabel); fputs("\tpopq %rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmovq\t$%d, %s\n", value, reglist[r]); return (r); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Print out the code to initialise it switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tmovzbq\t%s(\%%rip), %s\n", Gsym[id].name, reglist[r]); break; case P_INT: fprintf(Outfile, "\tmovzbl\t%s(\%%rip), %s\n", Gsym[id].name, reglist[r]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s(\%%rip), %s\n", Gsym[id].name, reglist[r]); break; default: fatald("Bad type in cgloadglob:", Gsym[id].type); } return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\taddq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsubq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timulq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s,%%rax\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidivq\t%s\n", reglist[r2]); fprintf(Outfile, "\tmovq\t%%rax,%s\n", reglist[r1]); free_register(r2); return (r1); } // Call printint() with the given register void cgprintint(int r) { fprintf(Outfile, "\tmovq\t%s, %%rdi\n", reglist[r]); fprintf(Outfile, "\tcall\tprintint\n"); free_register(r); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { // Get a new register int outr = alloc_register(); fprintf(Outfile, "\tmovq\t%s, %%rdi\n", reglist[r]); fprintf(Outfile, "\tcall\t%s\n", Gsym[id].name); fprintf(Outfile, "\tmovq\t%%rax, %s\n", reglist[outr]); free_register(r); return (outr); } // Store a register's value into a variable int cgstorglob(int r, int id) { switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %s(\%%rip)\n", breglist[r], Gsym[id].name); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %s(\%%rip)\n", dreglist[r], Gsym[id].name); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, %s(\%%rip)\n", reglist[r], Gsym[id].name); break; default: fatald("Bad type in cgloadglob:", Gsym[id].type); } return (r); } // Array of type sizes in P_XXX order. // 0 means no size. static int psize[] = { 0, 0, 1, 4, 8 }; // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { // Check the type is valid if (type < P_NONE || type > P_LONG) fatal("Bad type in cgprimsize()"); return (psize[type]); } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Gsym[id].type); fprintf(Outfile, "\t.comm\t%s,%d,%d\n", Gsym[id].name, typesize, typesize); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { // Generate code depending on the function's type switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tmovzbl\t%s, %%eax\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %%eax\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", Gsym[id].type); } cgjump(Gsym[id].endlabel); } ================================================ FILE: 14_ARM_Platform/cg_arm.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for ARMv6 on Raspberry Pi // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. static int freereg[4]; static char *reglist[4] = { "r4", "r5", "r6", "r7" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // We have to store large integer literal values in memory. // Keep a list of them which will be output in the postamble #define MAXINTS 1024 int Intlist[MAXINTS]; static int Intslot = 0; // Determine the offset of a large integer // literal from the .L3 label. If the integer // isn't in the list, add it. static void set_int_offset(int val) { int offset = -1; // See if it is already there for (int i = 0; i < Intslot; i++) { if (Intlist[i] == val) { offset = 4 * i; break; } } // Not in the list, so add it if (offset == -1) { offset = 4 * Intslot; if (Intslot == MAXINTS) fatal("Out of int slots in set_int_offset()"); Intlist[Intslot++] = val; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L3+%d\n", offset); } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n", Outfile); } // Print out the assembly postamble void cgpostamble() { // Print out the global variables fprintf(Outfile, ".L2:\n"); for (int i = 0; i < Globs; i++) { if (Gsym[i].stype == S_VARIABLE) fprintf(Outfile, "\t.word %s\n", Gsym[i].name); } // Print out the integer literals fprintf(Outfile, ".L3:\n"); for (int i = 0; i < Intslot; i++) { fprintf(Outfile, "\t.word %d\n", Intlist[i]); } } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Gsym[id].name; fprintf(Outfile, "\t.text\n" "\t.globl\t%s\n" "\t.type\t%s, \%%function\n" "%s:\n" "\tpush\t{fp, lr}\n" "\tadd\tfp, sp, #4\n" "\tsub\tsp, sp, #8\n" "\tstr\tr0, [fp, #-8]\n", name, name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Gsym[id].endlabel); fputs("\tsub\tsp, fp, #4\n" "\tpop\t{fp, pc}\n" "\t.align\t2\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); // If the literal value is small, do it with one instruction if (value <= 1000) fprintf(Outfile, "\tmov\t%s, #%d\n", reglist[r], value); else { set_int_offset(value); fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); } return (r); } // Determine the offset of a variable from the .L2 // label. Yes, this is inefficient code. static void set_var_offset(int id) { int offset = 0; // Walk the symbol table up to id. // Find S_VARIABLEs and add on 4 until // we get to our variable for (int i = 0; i < id; i++) { if (Gsym[i].stype == S_VARIABLE) offset += 4; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L2+%d\n", offset); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s, %s\n", reglist[r1], reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\tmul\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { // To do a divide: r0 holds the dividend, r1 holds the divisor. // The quotient is in r0. fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r1]); fprintf(Outfile, "\tmov\tr1, %s\n", reglist[r2]); fprintf(Outfile, "\tbl\t__aeabi_idiv\n"); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r1]); free_register(r2); return (r1); } // Call printint() with the given register void cgprintint(int r) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r]); fprintf(Outfile, "\tbl\tprintint\n"); fprintf(Outfile, "\tnop\n"); free_register(r); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r]); fprintf(Outfile, "\tbl\t%s\n", Gsym[id].name); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r]); return (r); } // Store a register's value into a variable int cgstorglob(int r, int id) { // Get the offset to the variable set_var_offset(id); switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: fprintf(Outfile, "\tstr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgloadglob:", Gsym[id].type); } return (r); } // Array of type sizes in P_XXX order. // 0 means no size. static int psize[] = { 0, 0, 1, 4, 4 }; // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { // Check the type is valid if (type < P_NONE || type > P_LONG) fatal("Bad type in cgprimsize()"); return (psize[type]); } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Gsym[id].type); fprintf(Outfile, "\t.comm\t%s,%d,%d\n", Gsym[id].name, typesize, typesize); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "moveq", "movne", "movlt", "movgt", "movle", "movge" }; // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "movne", "moveq", "movge", "movle", "movgt", "movlt" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #1\n", cmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #0\n", invcmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\tuxtb\t%s, %s\n", reglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tb\tL%d\n", l); } // List of inverted branch instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *brlist[] = { "bne", "beq", "bge", "ble", "bgt", "blt" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", brlist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[reg]); cgjump(Gsym[id].endlabel); } ================================================ FILE: 14_ARM_Platform/cgn.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. // We need a list of byte and doubleword registers, too static int freereg[4]; static char *reglist[4] = { "r8", "r9", "r10", "r11" }; static char *breglist[4] = { "r8b", "r9b", "r10b", "r11b" }; static char *dreglist[4] = { "r8d", "r9d", "r10d", "r11d" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return(NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\textern\tprintint\n", Outfile); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Gsym[id].name; fprintf(Outfile, "\tsection\t.text\n" "\tglobal\t%s\n" "%s:\n" "\tpush\trbp\n" "\tmov\trbp, rsp\n", name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Gsym[id].endlabel); fputs("\tpop rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], value); return (r); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Print out the code to initialise it switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], Gsym[id].name); break; case P_INT: fprintf(Outfile, "\txor\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tmov\t%s, dword [%s]\n", dreglist[r], Gsym[id].name); break; case P_LONG: fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], Gsym[id].name); break; default: fatald("Bad type in cgloadglob:", Gsym[id].type); } return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timul\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmov\trax, %s\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidiv\t%s\n", reglist[r2]); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[r1]); free_register(r2); return (r1); } // Call printint() with the given register void cgprintint(int r) { fprintf(Outfile, "\tmov\trdi, %s\n", reglist[r]); fprintf(Outfile, "\tcall\tprintint\n"); free_register(r); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { // Get a new register int outr = alloc_register(); fprintf(Outfile, "\tmov\trdi, %s\n", reglist[r]); fprintf(Outfile, "\tcall\t%s\n", Gsym[id].name); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[outr]); free_register(r); return (outr); } // Store a register's value into a variable int cgstorglob(int r, int id) { switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tmov\t[%s], %s\n", Gsym[id].name, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\t[%s], %s\n", Gsym[id].name, dreglist[r]); break; case P_LONG: fprintf(Outfile, "\tmov\t[%s], %s\n", Gsym[id].name, reglist[r]); break; default: fatald("Bad type in cgloadglob:", Gsym[id].type); } return (r); } // Array of type sizes in P_XXX order. // 0 means no size. static int psize[] = { 0, 0, 1, 4, 8 }; // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { // Check the type is valid if (type < P_NONE || type > P_LONG) fatal("Bad type in cgprimsize()"); return (psize[type]); } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Gsym[id].type); fprintf(Outfile, "\tcommon\t%s %d:%d\n", Gsym[id].name, typesize, typesize); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r2], breglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { // Generate code depending on the function's type switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tmovzx\teax, %s\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmov\teax, %s\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", Gsym[id].type); } cgjump(Gsym[id].endlabel); } ================================================ FILE: 14_ARM_Platform/data.h ================================================ #ifndef extern_ #define extern_ extern #endif // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 extern_ int Line; // Current line number extern_ int Putback; // Character put back by scanner extern_ int Functionid; // Symbol id of the current function extern_ int Globs; // Position of next free global symbol slot extern_ FILE *Infile; // Input and output files extern_ FILE *Outfile; extern_ struct token Token; // Last token scanned extern_ char Text[TEXTLEN + 1]; // Last identifier scanned extern_ struct symtable Gsym[NSYMBOLS]; // Global symbol table ================================================ FILE: 14_ARM_Platform/decl.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of declarations // Copyright (c) 2019 Warren Toomey, GPL3 // Parse the current token and // return a primitive type enum value int parse_type(int t) { if (t == T_CHAR) return (P_CHAR); if (t == T_INT) return (P_INT); if (t == T_LONG) return (P_LONG); if (t == T_VOID) return (P_VOID); fatald("Illegal type, token", t); return(0); // Keep -Wall happy } // variable_declaration: type identifier ';' ; // // Parse the declaration of a variable void var_declaration(void) { int id, type; // Get the type of the variable, then the identifier type = parse_type(Token.token); scan(&Token); ident(); // Text now has the identifier's name. // Add it as a known identifier // and generate its space in assembly id = addglob(Text, type, S_VARIABLE, 0); genglobsym(id); // Get the trailing semicolon semi(); } // // function_declaration: type identifier '(' ')' compound_statement ; // // Parse the declaration of a simplistic function struct ASTnode *function_declaration(void) { struct ASTnode *tree, *finalstmt; int nameslot, type, endlabel; // Get the type of the variable, then the identifier type = parse_type(Token.token); scan(&Token); ident(); // Get a label-id for the end label, add the function // to the symbol table, and set the Functionid global // to the function's symbol-id endlabel = genlabel(); nameslot = addglob(Text, type, S_FUNCTION, endlabel); Functionid = nameslot; // Scan in the parentheses lparen(); rparen(); // Get the AST tree for the compound statement tree = compound_statement(); // If the function type isn't P_VOID, check that // the last AST operation in the compound statement // was a return statement if (type != P_VOID) { finalstmt = (tree->op == A_GLUE) ? tree->right : tree; if (finalstmt == NULL || finalstmt->op != A_RETURN) fatal("No return for function with non-void type"); } // Return an A_FUNCTION node which has the function's nameslot // and the compound statement sub-tree return (mkastunary(A_FUNCTION, type, tree, nameslot)); } ================================================ FILE: 14_ARM_Platform/decl.h ================================================ // Function prototypes for all compiler files // Copyright (c) 2019 Warren Toomey, GPL3 // scan.c void reject_token(struct token *t); int scan(struct token *t); // tree.c struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, int intvalue); struct ASTnode *mkastleaf(int op, int type, int intvalue); struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, int intvalue); // gen.c int genlabel(void); int genAST(struct ASTnode *n, int reg, int parentASTop); void genpreamble(); void genpostamble(); void genfreeregs(); void genprintint(int reg); void genglobsym(int id); int genprimsize(int type); void genreturn(int reg, int id); // cg.c void freeall_registers(void); void cgpreamble(); void cgpostamble(); void cgfuncpreamble(int id); void cgfuncpostamble(int id); int cgloadint(int value, int type); int cgloadglob(int id); int cgadd(int r1, int r2); int cgsub(int r1, int r2); int cgmul(int r1, int r2); int cgdiv(int r1, int r2); void cgprintint(int r); int cgcall(int r, int id); int cgstorglob(int r, int id); void cgglobsym(int id); int cgcompare_and_set(int ASTop, int r1, int r2); int cgcompare_and_jump(int ASTop, int r1, int r2, int label); void cglabel(int l); void cgjump(int l); int cgwiden(int r, int oldtype, int newtype); int cgprimsize(int type); void cgreturn(int reg, int id); // expr.c struct ASTnode *funccall(void); struct ASTnode *binexpr(int ptp); // stmt.c struct ASTnode *compound_statement(void); // misc.c void match(int t, char *what); void semi(void); void lbrace(void); void rbrace(void); void lparen(void); void rparen(void); void ident(void); void fatal(char *s); void fatals(char *s1, char *s2); void fatald(char *s, int d); void fatalc(char *s, int c); // sym.c int findglob(char *s); int addglob(char *name, int type, int stype, int endlabel); // decl.c void var_declaration(void); struct ASTnode *function_declaration(void); // types.c int type_compatible(int *left, int *right, int onlyright); ================================================ FILE: 14_ARM_Platform/defs.h ================================================ #include #include #include #include // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 #define TEXTLEN 512 // Length of symbols in input #define NSYMBOLS 1024 // Number of symbol table entries // Token types enum { T_EOF, // Operators T_PLUS, T_MINUS, T_STAR, T_SLASH, T_EQ, T_NE, T_LT, T_GT, T_LE, T_GE, // Type keywords T_VOID, T_CHAR, T_INT, T_LONG, // Structural tokens T_INTLIT, T_SEMI, T_ASSIGN, T_IDENT, T_LBRACE, T_RBRACE, T_LPAREN, T_RPAREN, // Other keywords T_PRINT, T_IF, T_ELSE, T_WHILE, T_FOR, T_RETURN }; // Token structure struct token { int token; // Token type, from the enum list above int intvalue; // For T_INTLIT, the integer value }; // AST node types. The first few line up // with the related tokens enum { A_ADD = 1, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE, A_INTLIT, A_IDENT, A_LVIDENT, A_ASSIGN, A_PRINT, A_GLUE, A_IF, A_WHILE, A_FUNCTION, A_WIDEN, A_RETURN, A_FUNCCALL }; // Primitive types enum { P_NONE, P_VOID, P_CHAR, P_INT, P_LONG }; // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; union { // For A_INTLIT, the integer value int intvalue; // For A_IDENT, the symbol slot number int id; // For A_FUNCTION, the symbol slot number } v; // For A_FUNCCALL, the symbol slot number }; #define NOREG -1 // Use NOREG when the AST generation // functions have no register to return // Structural types enum { S_VARIABLE, S_FUNCTION }; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol int stype; // Structural type for the symbol int endlabel; // For S_FUNCTIONs, the end label }; ================================================ FILE: 14_ARM_Platform/expr.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of expressions // Copyright (c) 2019 Warren Toomey, GPL3 // Parse a function call with a single expression // argument and return its AST struct ASTnode *funccall(void) { struct ASTnode *tree; int id; // Check that the identifier has been defined, // then make a leaf node for it. XXX Add structural type test if ((id = findglob(Text)) == -1) { fatals("Undeclared function", Text); } // Get the '(' lparen(); // Parse the following expression tree = binexpr(0); // Build the function call AST node. Store the // function's return type as this node's type. // Also record the function's symbol-id tree = mkastunary(A_FUNCCALL, Gsym[id].type, tree, id); // Get the ')' rparen(); return (tree); } // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(void) { struct ASTnode *n; int id; switch (Token.token) { case T_INTLIT: // For an INTLIT token, make a leaf AST node for it. // Make it a P_CHAR if it's within the P_CHAR range if ((Token.intvalue) >= 0 && (Token.intvalue < 256)) n = mkastleaf(A_INTLIT, P_CHAR, Token.intvalue); else n = mkastleaf(A_INTLIT, P_INT, Token.intvalue); break; case T_IDENT: // This could be a variable or a function call. // Scan in the next token to find out scan(&Token); // It's a '(', so a function call if (Token.token == T_LPAREN) return (funccall()); // Not a function call, so reject the new token reject_token(&Token); // Check that the variable exists. XXX Add structural type test id = findglob(Text); if (id == -1) fatals("Unknown variable", Text); // Make a leaf AST node for it n = mkastleaf(A_IDENT, Gsym[id].type, id); break; default: fatald("Syntax error, token", Token.token); } // Scan in the next token and return the leaf node scan(&Token); return (n); } // Convert a binary operator token into an AST operation. // We rely on a 1:1 mapping from token to AST operation static int arithop(int tokentype) { if (tokentype > T_EOF && tokentype < T_INTLIT) return (tokentype); fatald("Syntax error, token", tokentype); return(0); // Keep -Wall happy } // Operator precedence for each token. Must // match up with the order of tokens in defs.h static int OpPrec[] = { 0, 10, 10, // T_EOF, T_PLUS, T_MINUS 20, 20, // T_STAR, T_SLASH 30, 30, // T_EQ, T_NE 40, 40, 40, 40 // T_LT, T_GT, T_LE, T_GE }; // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec = OpPrec[tokentype]; if (prec == 0) fatald("Syntax error, token", tokentype); return (prec); } // Return an AST tree whose root is a binary operator. // Parameter ptp is the previous token's precedence. struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; int lefttype, righttype; int tokentype; // Get the primary tree on the left. // Fetch the next token at the same time. left = primary(); // If we hit a semicolon or ')', return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN) return (left); // While the precedence of this token is // more than that of the previous token precedence while (op_precedence(tokentype) > ptp) { // Fetch in the next integer literal scan(&Token); // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Ensure the two types are compatible. lefttype = left->type; righttype = right->type; if (!type_compatible(&lefttype, &righttype, 0)) fatal("Incompatible types"); // Widen either side if required. type vars are A_WIDEN now if (lefttype) left = mkastunary(lefttype, right->type, left, 0); if (righttype) right = mkastunary(righttype, left->type, right, 0); // Join that sub-tree with ours. Convert the token // into an AST operation at the same time. left = mkastnode(arithop(tokentype), left->type, left, NULL, right, 0); // Update the details of the current token. // If we hit a semicolon or ')', return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN) return (left); } // Return the tree we have when the precedence // is the same or lower return (left); } ================================================ FILE: 14_ARM_Platform/gen.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Generic code generator // Copyright (c) 2019 Warren Toomey, GPL3 // Generate and return a new label number int genlabel(void) { static int id = 1; return (id++); } // Generate the code for an IF statement // and an optional ELSE clause static int genIF(struct ASTnode *n) { int Lfalse, Lend; // Generate two labels: one for the // false compound statement, and one // for the end of the overall IF statement. // When there is no ELSE clause, Lfalse _is_ // the ending label! Lfalse = genlabel(); if (n->right) Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. // We cheat by sending the Lfalse label as a register. genAST(n->left, Lfalse, n->op); genfreeregs(); // Generate the true compound statement genAST(n->mid, NOREG, n->op); genfreeregs(); // If there is an optional ELSE clause, // generate the jump to skip to the end if (n->right) cgjump(Lend); // Now the false label cglabel(Lfalse); // Optional ELSE clause: generate the // false compound statement and the // end label if (n->right) { genAST(n->right, NOREG, n->op); genfreeregs(); cglabel(Lend); } return (NOREG); } // Generate the code for a WHILE statement // and an optional ELSE clause static int genWHILE(struct ASTnode *n) { int Lstart, Lend; // Generate the start and end labels // and output the start label Lstart = genlabel(); Lend = genlabel(); cglabel(Lstart); // Generate the condition code followed // by a jump to the end label. // We cheat by sending the Lfalse label as a register. genAST(n->left, Lend, n->op); genfreeregs(); // Generate the compound statement for the body genAST(n->right, NOREG, n->op); genfreeregs(); // Finally output the jump back to the condition, // and the end label cgjump(Lstart); cglabel(Lend); return (NOREG); } // Given an AST, the register (if any) that holds // the previous rvalue, and the AST op of the parent, // generate assembly code recursively. // Return the register id with the tree's final value int genAST(struct ASTnode *n, int reg, int parentASTop) { int leftreg, rightreg; // We now have specific AST node handling at the top switch (n->op) { case A_IF: return (genIF(n)); case A_WHILE: return (genWHILE(n)); case A_GLUE: // Do each child statement, and free the // registers after each child genAST(n->left, NOREG, n->op); genfreeregs(); genAST(n->right, NOREG, n->op); genfreeregs(); return (NOREG); case A_FUNCTION: // Generate the function's preamble before the code cgfuncpreamble(n->v.id); genAST(n->left, NOREG, n->op); cgfuncpostamble(n->v.id); return (NOREG); } // General AST node handling below // Get the left and right sub-tree values if (n->left) leftreg = genAST(n->left, NOREG, n->op); if (n->right) rightreg = genAST(n->right, leftreg, n->op); switch (n->op) { case A_ADD: return (cgadd(leftreg, rightreg)); case A_SUBTRACT: return (cgsub(leftreg, rightreg)); case A_MULTIPLY: return (cgmul(leftreg, rightreg)); case A_DIVIDE: return (cgdiv(leftreg, rightreg)); case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, compare registers // and set one to 1 or 0 based on the comparison. if (parentASTop == A_IF || parentASTop == A_WHILE) return (cgcompare_and_jump(n->op, leftreg, rightreg, reg)); else return (cgcompare_and_set(n->op, leftreg, rightreg)); case A_INTLIT: return (cgloadint(n->v.intvalue, n->type)); case A_IDENT: return (cgloadglob(n->v.id)); case A_LVIDENT: return (cgstorglob(reg, n->v.id)); case A_ASSIGN: // The work has already been done, return the result return (rightreg); case A_PRINT: // Print the left-child's value // and return no register genprintint(leftreg); genfreeregs(); return (NOREG); case A_WIDEN: // Widen the child's type to the parent's type return (cgwiden(leftreg, n->left->type, n->type)); case A_RETURN: cgreturn(leftreg, Functionid); return (NOREG); case A_FUNCCALL: return (cgcall(leftreg, n->v.id)); default: fatald("Unknown AST operator", n->op); } return (NOREG); // Keep -Wall happy } void genpreamble() { cgpreamble(); } void genpostamble() { cgpostamble(); } void genfreeregs() { freeall_registers(); } void genprintint(int reg) { cgprintint(reg); } void genglobsym(int id) { cgglobsym(id); } int genprimsize(int type) { return (cgprimsize(type)); } ================================================ FILE: 14_ARM_Platform/lib/printint.c ================================================ #include void printint(long x) { printf("%ld\n", x); } ================================================ FILE: 14_ARM_Platform/main.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "decl.h" #include // Compiler setup and top-level execution // Copyright (c) 2019 Warren Toomey, GPL3 // Initialise global variables static void init() { Line = 1; Putback = '\n'; Globs = 0; } // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "Usage: %s infile\n", prog); exit(1); } // Main program: check arguments and print a usage // if we don't have an argument. Open up the input // file and call scanfile() to scan the tokens in it. int main(int argc, char *argv[]) { struct ASTnode *tree; if (argc != 2) usage(argv[0]); init(); // Open up the input file if ((Infile = fopen(argv[1], "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", argv[1], strerror(errno)); exit(1); } // Create the output file if ((Outfile = fopen("out.s", "w")) == NULL) { fprintf(stderr, "Unable to create out.s: %s\n", strerror(errno)); exit(1); } // For now, ensure that void printint() is defined addglob("printint", P_CHAR, S_FUNCTION, 0); scan(&Token); // Get the first token from the input genpreamble(); // Output the preamble while (1) { // Parse a function and tree = function_declaration(); genAST(tree, NOREG, 0); // generate the assembly code for it if (Token.token == T_EOF) // Stop when we have reached EOF break; } genpostamble(); fclose(Outfile); // Close the output file and exit return(0); } ================================================ FILE: 14_ARM_Platform/misc.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Miscellaneous functions // Copyright (c) 2019 Warren Toomey, GPL3 // Ensure that the current token is t, // and fetch the next token. Otherwise // throw an error void match(int t, char *what) { if (Token.token == t) { scan(&Token); } else { fatals("Expected", what); } } // Match a semicolon and fetch the next token void semi(void) { match(T_SEMI, ";"); } // Match a left brace and fetch the next token void lbrace(void) { match(T_LBRACE, "{"); } // Match a right brace and fetch the next token void rbrace(void) { match(T_RBRACE, "}"); } // Match a left parenthesis and fetch the next token void lparen(void) { match(T_LPAREN, "("); } // Match a right parenthesis and fetch the next token void rparen(void) { match(T_RPAREN, ")"); } // Match an identifier and fetch the next token void ident(void) { match(T_IDENT, "identifier"); } // Print out fatal messages void fatal(char *s) { fprintf(stderr, "%s on line %d\n", s, Line); exit(1); } void fatals(char *s1, char *s2) { fprintf(stderr, "%s:%s on line %d\n", s1, s2, Line); exit(1); } void fatald(char *s, int d) { fprintf(stderr, "%s:%d on line %d\n", s, d, Line); exit(1); } void fatalc(char *s, int c) { fprintf(stderr, "%s:%c on line %d\n", s, c, Line); exit(1); } ================================================ FILE: 14_ARM_Platform/scan.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { char *p; p = strchr(s, c); return (p ? p - s : -1); } // Get the next character from the input file. static int next(void) { int c; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return (c); } c = fgetc(Infile); // Read from input file if ('\n' == c) Line++; // Increment line count return (c); } // Put back an unwanted character static void putback(int c) { Putback = c; } // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0; // Convert each character into an int value while ((k = chrpos("0123456789", c)) >= 0) { val = val * 10 + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return (val); } // Scan an identifier from the input file and // store it in buf[]. Return the identifier's length static int scanident(int c, char *buf, int lim) { int i = 0; // Allow digits, alpha and underscores while (isalpha(c) || isdigit(c) || '_' == c) { // Error if we hit the identifier length limit, // else append to buf[] and get next character if (lim - 1 == i) { fatal("Identifier too long"); } else if (i < lim - 1) { buf[i++] = c; } c = next(); } // We hit a non-valid character, put it back. // NUL-terminate the buf[] and return the length putback(c); buf[i] = '\0'; return (i); } // Given a word from the input, return the matching // keyword token number or 0 if it's not a keyword. // Switch on the first letter so that we don't have // to waste time strcmp()ing against all the keywords. static int keyword(char *s) { switch (*s) { case 'c': if (!strcmp(s, "char")) return (T_CHAR); break; case 'e': if (!strcmp(s, "else")) return (T_ELSE); break; case 'f': if (!strcmp(s, "for")) return (T_FOR); break; case 'i': if (!strcmp(s, "if")) return (T_IF); if (!strcmp(s, "int")) return (T_INT); break; case 'l': if (!strcmp(s, "long")) return (T_LONG); break; case 'p': if (!strcmp(s, "print")) return (T_PRINT); break; case 'r': if (!strcmp(s, "return")) return (T_RETURN); break; case 'w': if (!strcmp(s, "while")) return (T_WHILE); break; case 'v': if (!strcmp(s, "void")) return (T_VOID); break; } return (0); } // A pointer to a rejected token static struct token *Rejtoken = NULL; // Reject the token that we just scanned void reject_token(struct token *t) { if (Rejtoken != NULL) fatal("Can't reject token twice"); Rejtoken = t; } // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c, tokentype; // If we have any rejected token, return it if (Rejtoken != NULL) { t = Rejtoken; Rejtoken = NULL; return (1); } // Skip whitespace c = skip(); // Determine the token based on // the input character switch (c) { case EOF: t->token = T_EOF; return (0); case '+': t->token = T_PLUS; break; case '-': t->token = T_MINUS; break; case '*': t->token = T_STAR; break; case '/': t->token = T_SLASH; break; case ';': t->token = T_SEMI; break; case '{': t->token = T_LBRACE; break; case '}': t->token = T_RBRACE; break; case '(': t->token = T_LPAREN; break; case ')': t->token = T_RPAREN; break; case '=': if ((c = next()) == '=') { t->token = T_EQ; } else { putback(c); t->token = T_ASSIGN; } break; case '!': if ((c = next()) == '=') { t->token = T_NE; } else { fatalc("Unrecognised character", c); } break; case '<': if ((c = next()) == '=') { t->token = T_LE; } else { putback(c); t->token = T_LT; } break; case '>': if ((c = next()) == '=') { t->token = T_GE; } else { putback(c); t->token = T_GT; } break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } else if (isalpha(c) || '_' == c) { // Read in a keyword or identifier scanident(c, Text, TEXTLEN); // If it's a recognised keyword, return that token if ((tokentype = keyword(Text)) != 0) { t->token = tokentype; break; } // Not a recognised keyword, so it must be an identifier t->token = T_IDENT; break; } // The character isn't part of any recognised token, error fatalc("Unrecognised character", c); } // We found a token return (1); } ================================================ FILE: 14_ARM_Platform/stmt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of statements // Copyright (c) 2019 Warren Toomey, GPL3 // Prototypes static struct ASTnode *single_statement(void); // compound_statement: // empty, i.e. no statement // | statement // | statement statements // ; // // statement: print_statement // | declaration // | assignment_statement // | function_call // | if_statement // | while_statement // | for_statement // | return_statement // ; // print_statement: 'print' expression ';' ; // static struct ASTnode *print_statement(void) { struct ASTnode *tree; int lefttype, righttype; // Match a 'print' as the first token match(T_PRINT, "print"); // Parse the following expression tree = binexpr(0); // Ensure the two types are compatible. lefttype = P_INT; righttype = tree->type; if (!type_compatible(&lefttype, &righttype, 0)) fatal("Incompatible types"); // Widen the tree if required. if (righttype) tree = mkastunary(righttype, P_INT, tree, 0); // Make an print AST tree tree = mkastunary(A_PRINT, P_NONE, tree, 0); // Return the AST return (tree); } // assignment_statement: identifier '=' expression ';' ; // // Parse an assignment statement and return its AST static struct ASTnode *assignment_statement(void) { struct ASTnode *left, *right, *tree; int lefttype, righttype; int id; // Ensure we have an identifier ident(); // This could be a variable or a function call. // If next token is '(', it's a function call if (Token.token == T_LPAREN) return (funccall()); // Not a function call, on with an assignment then! // Check the identifier has been defined then make a leaf node for it // XXX Add structural type test if ((id = findglob(Text)) == -1) { fatals("Undeclared variable", Text); } right = mkastleaf(A_LVIDENT, Gsym[id].type, id); // Ensure we have an equals sign match(T_ASSIGN, "="); // Parse the following expression left = binexpr(0); // Ensure the two types are compatible. lefttype = left->type; righttype = right->type; if (!type_compatible(&lefttype, &righttype, 1)) fatal("Incompatible types"); // Widen the left if required. if (lefttype) left = mkastunary(lefttype, right->type, left, 0); // Make an assignment AST tree tree = mkastnode(A_ASSIGN, P_INT, left, NULL, right, 0); // Return the AST return (tree); } // if_statement: if_head // | if_head 'else' compound_statement // ; // // if_head: 'if' '(' true_false_expression ')' compound_statement ; // // Parse an IF statement including any // optional ELSE clause and return its AST static struct ASTnode *if_statement(void) { struct ASTnode *condAST, *trueAST, *falseAST = NULL; // Ensure we have 'if' '(' match(T_IF, "if"); lparen(); // Parse the following expression // and the ')' following. Ensure // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) fatal("Bad comparison operator"); rparen(); // Get the AST for the compound statement trueAST = compound_statement(); // If we have an 'else', skip it // and get the AST for the compound statement if (Token.token == T_ELSE) { scan(&Token); falseAST = compound_statement(); } // Build and return the AST for this statement return (mkastnode(A_IF, P_NONE, condAST, trueAST, falseAST, 0)); } // while_statement: 'while' '(' true_false_expression ')' compound_statement ; // // Parse a WHILE statement and return its AST static struct ASTnode *while_statement(void) { struct ASTnode *condAST, *bodyAST; // Ensure we have 'while' '(' match(T_WHILE, "while"); lparen(); // Parse the following expression // and the ')' following. Ensure // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) fatal("Bad comparison operator"); rparen(); // Get the AST for the compound statement bodyAST = compound_statement(); // Build and return the AST for this statement return (mkastnode(A_WHILE, P_NONE, condAST, NULL, bodyAST, 0)); } // for_statement: 'for' '(' preop_statement ';' // true_false_expression ';' // postop_statement ')' compound_statement ; // // preop_statement: statement (for now) // postop_statement: statement (for now) // // Parse a FOR statement and return its AST static struct ASTnode *for_statement(void) { struct ASTnode *condAST, *bodyAST; struct ASTnode *preopAST, *postopAST; struct ASTnode *tree; // Ensure we have 'for' '(' match(T_FOR, "for"); lparen(); // Get the pre_op statement and the ';' preopAST = single_statement(); semi(); // Get the condition and the ';' condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) fatal("Bad comparison operator"); semi(); // Get the post_op statement and the ')' postopAST = single_statement(); rparen(); // Get the compound statement which is the body bodyAST = compound_statement(); // For now, all four sub-trees have to be non-NULL. // Later on, we'll change the semantics for when some are missing // Glue the compound statement and the postop tree tree = mkastnode(A_GLUE, P_NONE, bodyAST, NULL, postopAST, 0); // Make a WHILE loop with the condition and this new body tree = mkastnode(A_WHILE, P_NONE, condAST, NULL, tree, 0); // And glue the preop tree to the A_WHILE tree return (mkastnode(A_GLUE, P_NONE, preopAST, NULL, tree, 0)); } // return_statement: 'return' '(' expression ')' ; // // Parse a return statement and return its AST static struct ASTnode *return_statement(void) { struct ASTnode *tree; int returntype, functype; // Can't return a value if function returns P_VOID if (Gsym[Functionid].type == P_VOID) fatal("Can't return from a void function"); // Ensure we have 'return' '(' match(T_RETURN, "return"); lparen(); // Parse the following expression tree = binexpr(0); // Ensure this is compatible with the function's type returntype = tree->type; functype = Gsym[Functionid].type; if (!type_compatible(&returntype, &functype, 1)) fatal("Incompatible types"); // Widen the left if required. if (returntype) tree = mkastunary(returntype, functype, tree, 0); // Add on the A_RETURN node tree = mkastunary(A_RETURN, P_NONE, tree, 0); // Get the ')' rparen(); return (tree); } // Parse a single statement // and return its AST static struct ASTnode *single_statement(void) { switch (Token.token) { case T_PRINT: return (print_statement()); case T_CHAR: case T_INT: case T_LONG: var_declaration(); return (NULL); // No AST generated here case T_IDENT: return (assignment_statement()); case T_IF: return (if_statement()); case T_WHILE: return (while_statement()); case T_FOR: return (for_statement()); case T_RETURN: return (return_statement()); default: fatald("Syntax error, token", Token.token); } return(NULL); // Keep -Wall happy } // Parse a compound statement // and return its AST struct ASTnode *compound_statement(void) { struct ASTnode *left = NULL; struct ASTnode *tree; // Require a left curly bracket lbrace(); while (1) { // Parse a single statement tree = single_statement(); // Some statements must be followed by a semicolon if (tree != NULL && (tree->op == A_PRINT || tree->op == A_ASSIGN || tree->op == A_RETURN || tree->op == A_FUNCCALL)) semi(); // For each new tree, either save it in left // if left is empty, or glue the left and the // new tree together if (tree != NULL) { if (left == NULL) left = tree; else left = mkastnode(A_GLUE, P_NONE, left, NULL, tree, 0); } // When we hit a right curly bracket, // skip past it and return the AST if (Token.token == T_RBRACE) { rbrace(); return (left); } } } ================================================ FILE: 14_ARM_Platform/sym.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Symbol table functions // Copyright (c) 2019 Warren Toomey, GPL3 // Determine if the symbol s is in the global symbol table. // Return its slot position or -1 if not found. int findglob(char *s) { int i; for (i = 0; i < Globs; i++) { if (*s == *Gsym[i].name && !strcmp(s, Gsym[i].name)) return (i); } return (-1); } // Get the position of a new global symbol slot, or die // if we've run out of positions. static int newglob(void) { int p; if ((p = Globs++) >= NSYMBOLS) fatal("Too many global symbols"); return (p); } // Add a global symbol to the symbol table. // Also set up its type and structural type. // Return the slot number in the symbol table int addglob(char *name, int type, int stype, int endlabel) { int y; // If this is already in the symbol table, return the existing slot if ((y = findglob(name)) != -1) return (y); // Otherwise get a new slot, fill it in and // return the slot number y = newglob(); Gsym[y].name = strdup(name); Gsym[y].type = type; Gsym[y].stype = stype; Gsym[y].endlabel = endlabel; return (y); } ================================================ FILE: 14_ARM_Platform/tests/input01 ================================================ void main() { print 12 * 3; print 18 - 2 * 4; print 1 + 2 + 9 - 5/2 + 3*5; } ================================================ FILE: 14_ARM_Platform/tests/input02 ================================================ void main() { int fred; int jim; fred= 5; jim= 12; print fred + jim; } ================================================ FILE: 14_ARM_Platform/tests/input03 ================================================ void main() { int x; x= 1; print x; x= x + 1; print x; x= x + 1; print x; x= x + 1; print x; x= x + 1; print x; } ================================================ FILE: 14_ARM_Platform/tests/input04 ================================================ void main() { int x; x= 7 < 9; print x; x= 7 <= 9; print x; x= 7 != 9; print x; x= 7 == 7; print x; x= 7 >= 7; print x; x= 7 <= 7; print x; x= 9 > 7; print x; x= 9 >= 7; print x; x= 9 != 7; print x; } ================================================ FILE: 14_ARM_Platform/tests/input05 ================================================ void main() { int i; int j; i=6; j=12; if (i < j) { print i; } else { print j; } } ================================================ FILE: 14_ARM_Platform/tests/input06 ================================================ void main() { int i; i=1; while (i <= 10) { print i; i= i + 1; } } ================================================ FILE: 14_ARM_Platform/tests/input07 ================================================ void main() { int i; for (i= 1; i <= 10; i= i + 1) { print i; } } ================================================ FILE: 14_ARM_Platform/tests/input08 ================================================ void main() { int i; for (i= 1; i <= 10; i= i + 1) { print i; } } ================================================ FILE: 14_ARM_Platform/tests/input09 ================================================ void main() { int i; for (i= 1; i <= 10; i= i + 1) { print i; } } void fred() { int a; int b; a= 12; b= 3 * a; if (a >= b) { print 2 * b - a; } } ================================================ FILE: 14_ARM_Platform/tests/input10 ================================================ void main() { int i; char j; j= 20; print j; i= 10; print i; for (i= 1; i <= 5; i= i + 1) { print i; } for (j= 253; j != 2; j= j + 1) { print j; } } ================================================ FILE: 14_ARM_Platform/tests/input11 ================================================ int main() { int i; char j; long k; i= 10; print i; j= 20; print j; k= 30; print k; for (i= 1; i <= 5; i= i + 1) { print i; } for (j= 253; j != 4; j= j + 1) { print j; } for (k= 1; k <= 5; k= k + 1) { print k; } return(i); print 12345; return(3); } ================================================ FILE: 14_ARM_Platform/tests/input12 ================================================ int fred() { return(5); } void main() { int x; x= fred(2); print x; } ================================================ FILE: 14_ARM_Platform/tests/input13 ================================================ int fred() { return(56); } void main() { int dummy; int result; dummy= printint(23); result= fred(10); dummy= printint(result); } ================================================ FILE: 14_ARM_Platform/tests/input14 ================================================ int fred() { return(20); } int main() { int result; printint(10); result= fred(15); printint(result); printint(fred(15)+10); return(0); } ================================================ FILE: 14_ARM_Platform/tests/mktests ================================================ #!/bin/sh # Make the output files for each test if [ ! -f ../comp1 ] then echo "Need to build ../comp1 first!"; exit 1 fi for i in input* do if [ ! -f "out.$i" ] then ../comp1 $i cc -o out out.s ../lib/printint.c ./out > out.$i rm -f out out.s fi done ================================================ FILE: 14_ARM_Platform/tests/mktestsn ================================================ #!/bin/sh # Make the output files for each test if [ ! -f ../compn ] then echo "Need to build ../compn first!"; exit 1 fi for i in input* do if [ ! -f "out.$i" ] then ../compn $i nasm -f elf64 out.s cc -no-pie -o out out.o ../lib/printint.c ./out > out.$i rm -f out out.s fi done ================================================ FILE: 14_ARM_Platform/tests/out.input01 ================================================ 36 10 25 ================================================ FILE: 14_ARM_Platform/tests/out.input02 ================================================ 17 ================================================ FILE: 14_ARM_Platform/tests/out.input03 ================================================ 1 2 3 4 5 ================================================ FILE: 14_ARM_Platform/tests/out.input04 ================================================ 1 1 1 1 1 1 1 1 1 ================================================ FILE: 14_ARM_Platform/tests/out.input05 ================================================ 6 ================================================ FILE: 14_ARM_Platform/tests/out.input06 ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 14_ARM_Platform/tests/out.input07 ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 14_ARM_Platform/tests/out.input08 ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 14_ARM_Platform/tests/out.input09 ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 14_ARM_Platform/tests/out.input10 ================================================ 20 10 1 2 3 4 5 253 254 255 0 1 ================================================ FILE: 14_ARM_Platform/tests/out.input11 ================================================ 10 20 30 1 2 3 4 5 253 254 255 0 1 2 3 1 2 3 4 5 ================================================ FILE: 14_ARM_Platform/tests/out.input12 ================================================ 5 ================================================ FILE: 14_ARM_Platform/tests/out.input13 ================================================ 23 56 ================================================ FILE: 14_ARM_Platform/tests/out.input14 ================================================ 10 20 30 ================================================ FILE: 14_ARM_Platform/tests/runtests ================================================ #!/bin/sh # Run each test and compare # against known good output if [ ! -f ../comp1 ] then echo "Need to build ../comp1 first!"; exit 1 fi for i in input* do if [ ! -f "out.$i" ] then echo "Can't run test on $i, no output file!" else echo -n $i ../comp1 $i cc -o out out.s ../lib/printint.c ./out > trial.$i cmp -s "out.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo else echo ": OK" fi rm -f out out.s "trial.$i" fi done ================================================ FILE: 14_ARM_Platform/tests/runtestsn ================================================ #!/bin/sh # Run each test and compare # against known good output if [ ! -f ../compn ] then echo "Need to build ../compn first!"; exit 1 fi for i in input* do if [ ! -f "out.$i" ] then echo "Can't run test on $i, no output file!" else echo -n $i ../compn $i nasm -f elf64 out.s cc -no-pie -o out out.o ../lib/printint.c ./out > trial.$i cmp -s "out.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo else echo ": OK" fi rm -f out out.o out.s "trial.$i" fi done ================================================ FILE: 14_ARM_Platform/tree.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree functions // Copyright (c) 2019 Warren Toomey, GPL3 // Build and return a generic AST node struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, int intvalue) { struct ASTnode *n; // Malloc a new ASTnode n = (struct ASTnode *) malloc(sizeof(struct ASTnode)); if (n == NULL) fatal("Unable to malloc in mkastnode()"); // Copy in the field values and return it n->op = op; n->type = type; n->left = left; n->mid = mid; n->right = right; n->v.intvalue = intvalue; return (n); } // Make an AST leaf node struct ASTnode *mkastleaf(int op, int type, int intvalue) { return (mkastnode(op, type, NULL, NULL, NULL, intvalue)); } // Make a unary AST node: only one child struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, int intvalue) { return (mkastnode(op, type, left, NULL, NULL, intvalue)); } ================================================ FILE: 14_ARM_Platform/types.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Types and type handling // Copyright (c) 2019 Warren Toomey, GPL3 // Given two primitive types, // return true if they are compatible, // false otherwise. Also return either // zero or an A_WIDEN operation if one // has to be widened to match the other. // If onlyright is true, only widen left to right. int type_compatible(int *left, int *right, int onlyright) { int leftsize, rightsize; // Same types, they are compatible if (*left == *right) { *left = *right = 0; return (1); } // Get the sizes for each type leftsize = genprimsize(*left); rightsize = genprimsize(*right); // Types with zero size are not // not compatible with anything if ((leftsize == 0) || (rightsize == 0)) return (0); // Widen types as required if (leftsize < rightsize) { *left = A_WIDEN; *right = 0; return (1); } if (rightsize < leftsize) { if (onlyright) return (0); *left = 0; *right = A_WIDEN; return (1); } // Anything remaining is the same size // and thus compatible *left = *right = 0; return (1); } ================================================ FILE: 15_Pointers_pt1/Makefile ================================================ SRCS= cg.c decl.c expr.c gen.c main.c misc.c scan.c stmt.c \ sym.c tree.c types.c SRCN= cgn.c decl.c expr.c gen.c main.c misc.c scan.c stmt.c \ sym.c tree.c types.c ARMSRCS= cg_arm.c decl.c expr.c gen.c main.c misc.c scan.c stmt.c \ sym.c tree.c types.c comp1: $(SRCS) cc -o comp1 -g -Wall $(SRCS) compn: $(SRCN) cc -o compn -g -Wall $(SRCN) comp1arm: $(ARMSRCS) cc -o comp1arm -g -Wall $(ARMSRCS) cp comp1arm comp1 clean: rm -f comp1 comp1arm compn *.o *.s out test: comp1 tests/runtests (cd tests; chmod +x runtests; ./runtests) armtest: comp1arm tests/runtests (cd tests; chmod +x runtests; ./runtests) testn: compn tests/runtestsn (cd tests; chmod +x runtestsn; ./runtestsn) test15: comp1 tests/input15.c lib/printint.c ./comp1 tests/input15.c cc -o out out.s lib/printint.c ./out armtest15: comp1arm tests/input15.c lib/printint.c ./comp1 tests/input15.c cc -o out out.s lib/printint.c ./out test15n: compn tests/input15.c lib/printint.c ./compn tests/input15.c nasm -f elf64 out.s cc -no-pie -o out lib/printint.c out.o ./out ================================================ FILE: 15_Pointers_pt1/Readme.md ================================================ # Part 15: Pointers, part 1 In this part of our compiler writing journey, I want to begin the work to add pointers to our language. In particular, I want to add this: + Declaration of pointer variables + Assignment of an address to a pointer + Dereferencing a pointer to get the value it points at Given that this is a work in progress, I'm sure I will implement a simplistic version that works for now, but later on I will have to change or extend it for to be more general. ## New Keywords and Tokens There are no new keywords this time, only two new tokens: + '&', T_AMPER, and + '&&', T_LOGAND We don't need T_LOGAND yet, but I might as well add this code to `scan()` now: ```c case '&': if ((c = next()) == '&') { t->token = T_LOGAND; } else { putback(c); t->token = T_AMPER; } break; ``` ## New Code for Types I've added some new primitive types to the language (in `defs.h`): ```c // Primitive types enum { P_NONE, P_VOID, P_CHAR, P_INT, P_LONG, P_VOIDPTR, P_CHARPTR, P_INTPTR, P_LONGPTR }; ``` We will have new unary prefix operators: + '&' to get the address of an identifier, and + '*' to dereference a pointer and get the value it points at. The type of expression that each operator produces is different to the type that each works on. We need a couple of functions in `types.c` to make the type change: ```c // Given a primitive type, return // the type which is a pointer to it int pointer_to(int type) { int newtype; switch (type) { case P_VOID: newtype = P_VOIDPTR; break; case P_CHAR: newtype = P_CHARPTR; break; case P_INT: newtype = P_INTPTR; break; case P_LONG: newtype = P_LONGPTR; break; default: fatald("Unrecognised in pointer_to: type", type); } return (newtype); } // Given a primitive pointer type, return // the type which it points to int value_at(int type) { int newtype; switch (type) { case P_VOIDPTR: newtype = P_VOID; break; case P_CHARPTR: newtype = P_CHAR; break; case P_INTPTR: newtype = P_INT; break; case P_LONGPTR: newtype = P_LONG; break; default: fatald("Unrecognised in value_at: type", type); } return (newtype); } ``` Now, where are we going to use these functions? ## Declaring Pointer Variables We want to be able to declare scalar variables and pointer variables, e.g. ```c char a; char *b; int d; int *e; ``` We already have a function `parse_type()` in `decl.c` that converts the type keyword to a type. Let's extend it to scan the following token and change the type if the next token is a '*': ```c // Parse the current token and return // a primitive type enum value. Also // scan in the next token int parse_type(void) { int type; switch (Token.token) { case T_VOID: type = P_VOID; break; case T_CHAR: type = P_CHAR; break; case T_INT: type = P_INT; break; case T_LONG: type = P_LONG; break; default: fatald("Illegal type, token", Token.token); } // Scan in one or more further '*' tokens // and determine the correct pointer type while (1) { scan(&Token); if (Token.token != T_STAR) break; type = pointer_to(type); } // We leave with the next token already scanned return (type); } ``` This will allow the programmer to try to do: ```c char *****fred; ``` This will fail because `pointer_to()` can't convert a P_CHARPTR to a P_CHARPTRPTR (yet). But the code in `parse_type()` is ready to do it! The code in `var_declaration()` now quite happily parses pointer variable declarations: ```c // Parse the declaration of a variable void var_declaration(void) { int id, type; // Get the type of the variable // which also scans in the identifier type = parse_type(); ident(); ... } ``` ### Prefix Operators '*' and '&' With declarations out of the road, let's now look at parsing expressions where '*' and '&' are operators that come before an expression. The BNF grammar looks like this: ``` prefix_expression: primary | '*' prefix_expression | '&' prefix_expression ; ``` Technically this allows: ``` x= ***y; a= &&&b; ``` To prevent impossible uses of the two operators, we add in some semantic checking. Here's the code: ```c // Parse a prefix expression and return // a sub-tree representing it. struct ASTnode *prefix(void) { struct ASTnode *tree; switch (Token.token) { case T_AMPER: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Ensure that it's an identifier if (tree->op != A_IDENT) fatal("& operator must be followed by an identifier"); // Now change the operator to A_ADDR and the type to // a pointer to the original type tree->op = A_ADDR; tree->type = pointer_to(tree->type); break; case T_STAR: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's either another deref or an // identifier if (tree->op != A_IDENT && tree->op != A_DEREF) fatal("* operator must be followed by an identifier or *"); // Prepend an A_DEREF operation to the tree tree = mkastunary(A_DEREF, value_at(tree->type), tree, 0); break; default: tree = primary(); } return (tree); } ``` We're still doing recursive descent, but we also put error checks in to prevent input mistakes. Right now, the limitations in `value_at()` will prevent more than one '*' operator in a row, but later on when we change `value_at()`, we won't have to come back and change `prefix()`. Note that `prefix()` also calls `primary()` when it doesn't see a '*' or '&' operator. That allows us to change our existing code in `binexpr()`: ```c struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; int lefttype, righttype; int tokentype; // Get the tree on the left. // Fetch the next token at the same time. // Used to be a call to primary(). left = prefix(); ... } ``` ## New AST Node Types Up in `prefix()` I introduced two new AST node types (declared in `defs.h`): + A_DEREF: Dereference the pointer in the child node + A_ADDR: Get the address of the identifier in this node Note that the A_ADDR node isn't a parent node. For the expression `&fred`, the code in `prefix()` replaces the A_IDENT in the "fred" node with the A_ADDR node type. ## Generating the New Assembly Code In our generic code generator, `gen.c`, there are only a few new lines to `genAST()`: ```c case A_ADDR: return (cgaddress(n->v.id)); case A_DEREF: return (cgderef(leftreg, n->left->type)); ``` The A_ADDR node generates the code to load the address of the `n->v.id` identifier into a register. The A_DEREF node take the pointer address in `lefreg`, and its associated type, and returns a register with the value at this address. ### x86-64 Implementation I worked out the following assembly output by reviewing the assembly code generated by other compilers. It might not be correct! ```c // Generate code to load the address of a global // identifier into a variable. Return a new register int cgaddress(int id) { int r = alloc_register(); fprintf(Outfile, "\tleaq\t%s(%%rip), %s\n", Gsym[id].name, reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tmovzbq\t(%s), %s\n", reglist[r], reglist[r]); break; case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tmovq\t(%s), %s\n", reglist[r], reglist[r]); break; } return (r); } ``` The `leaq` instruction loads the address of the named identifier. In the section function, the `(%r8)` syntax loads the value that register `%r8` points to. ## Testing the New Functinality Here's our new test file, `tests/input15.c` and the result when we compile it: ```c int main() { char a; char *b; char c; int d; int *e; int f; a= 18; printint(a); b= &a; c= *b; printint(c); d= 12; printint(d); e= &d; f= *e; printint(f); return(0); } ``` ``` $ make test15 cc -o comp1 -g -Wall cg.c decl.c expr.c gen.c main.c misc.c scan.c stmt.c sym.c tree.c types.c ./comp1 tests/input15.c cc -o out out.s lib/printint.c ./out 18 18 12 12 ``` I decided to change our test files to end with the `.c` suffix, now that they are actually C programs. I also changed the `tests/mktests` script to generate the *correct* results by using a "real" compiler to compile our test files. ## Conclusion and What's Next Well, we have the start of pointers implemented. They are not completely correct yet. For example, if I write this code: ```c int main() { int x; int y; int *iptr; x= 10; y= 20; iptr= &x + 1; printint( *iptr); } ``` it should print 20 because `&x + 1` should address one `int` past `x`, i.e. `y`. This is eight bytes away from `x`. However, our compiler simply adds one to the address of `x`, which is incorrect. I'll have to work out how to fix this. In the next part of our compiler writing journey, we will try to fix this problem. [Next step](../16_Global_Vars/Readme.md) ================================================ FILE: 15_Pointers_pt1/cg.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. // We need a list of byte and doubleword registers, too static int freereg[4]; static char *reglist[4] = { "%r8", "%r9", "%r10", "%r11" }; static char *breglist[4] = { "%r8b", "%r9b", "%r10b", "%r11b" }; static char *dreglist[4] = { "%r8d", "%r9d", "%r10d", "%r11d" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n", Outfile); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Gsym[id].name; fprintf(Outfile, "\t.text\n" "\t.globl\t%s\n" "\t.type\t%s, @function\n" "%s:\n" "\tpushq\t%%rbp\n" "\tmovq\t%%rsp, %%rbp\n", name, name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Gsym[id].endlabel); fputs("\tpopq %rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmovq\t$%d, %s\n", value, reglist[r]); return (r); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Print out the code to initialise it switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tmovzbq\t%s(%%rip), %s\n", Gsym[id].name, reglist[r]); break; case P_INT: fprintf(Outfile, "\tmovzbl\t%s(\%%rip), %s\n", Gsym[id].name, reglist[r]); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tmovq\t%s(\%%rip), %s\n", Gsym[id].name, reglist[r]); break; default: fatald("Bad type in cgloadglob:", Gsym[id].type); } return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\taddq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsubq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timulq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s,%%rax\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidivq\t%s\n", reglist[r2]); fprintf(Outfile, "\tmovq\t%%rax,%s\n", reglist[r1]); free_register(r2); return (r1); } // Call printint() with the given register void cgprintint(int r) { fprintf(Outfile, "\tmovq\t%s, %%rdi\n", reglist[r]); fprintf(Outfile, "\tcall\tprintint\n"); free_register(r); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { // Get a new register int outr = alloc_register(); fprintf(Outfile, "\tmovq\t%s, %%rdi\n", reglist[r]); fprintf(Outfile, "\tcall\t%s\n", Gsym[id].name); fprintf(Outfile, "\tmovq\t%%rax, %s\n", reglist[outr]); free_register(r); return (outr); } // Store a register's value into a variable int cgstorglob(int r, int id) { switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %s(\%%rip)\n", breglist[r], Gsym[id].name); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %s(\%%rip)\n", dreglist[r], Gsym[id].name); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tmovq\t%s, %s(\%%rip)\n", reglist[r], Gsym[id].name); break; default: fatald("Bad type in cgloadglob:", Gsym[id].type); } return (r); } // Array of type sizes in P_XXX order. // 0 means no size. static int psize[] = { 0, 0, 1, 4, 8, 8, 8, 8 }; // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { // Check the type is valid if (type < P_NONE || type > P_LONGPTR) fatal("Bad type in cgprimsize()"); return (psize[type]); } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Gsym[id].type); fprintf(Outfile, "\t.comm\t%s,%d,%d\n", Gsym[id].name, typesize, typesize); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { // Generate code depending on the function's type switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tmovzbl\t%s, %%eax\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %%eax\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", Gsym[id].type); } cgjump(Gsym[id].endlabel); } // Generate code to load the address of a global // identifier into a variable. Return a new register int cgaddress(int id) { int r = alloc_register(); fprintf(Outfile, "\tleaq\t%s(%%rip), %s\n", Gsym[id].name, reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tmovzbq\t(%s), %s\n", reglist[r], reglist[r]); break; case P_INTPTR: fprintf(Outfile, "\tmovq\t(%s), %s\n", reglist[r], reglist[r]); break; case P_LONGPTR: fprintf(Outfile, "\tmovq\t(%s), %s\n", reglist[r], reglist[r]); break; } return (r); } ================================================ FILE: 15_Pointers_pt1/cg_arm.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for ARMv6 on Raspberry Pi // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. static int freereg[4]; static char *reglist[4] = { "r4", "r5", "r6", "r7" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // We have to store large integer literal values in memory. // Keep a list of them which will be output in the postamble #define MAXINTS 1024 int Intlist[MAXINTS]; static int Intslot = 0; // Determine the offset of a large integer // literal from the .L3 label. If the integer // isn't in the list, add it. static void set_int_offset(int val) { int offset = -1; // See if it is already there for (int i = 0; i < Intslot; i++) { if (Intlist[i] == val) { offset = 4 * i; break; } } // Not in the list, so add it if (offset == -1) { offset = 4 * Intslot; if (Intslot == MAXINTS) fatal("Out of int slots in set_int_offset()"); Intlist[Intslot++] = val; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L3+%d\n", offset); } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n", Outfile); } // Print out the assembly postamble void cgpostamble() { // Print out the global variables fprintf(Outfile, ".L2:\n"); for (int i = 0; i < Globs; i++) { if (Gsym[i].stype == S_VARIABLE) fprintf(Outfile, "\t.word %s\n", Gsym[i].name); } // Print out the integer literals fprintf(Outfile, ".L3:\n"); for (int i = 0; i < Intslot; i++) { fprintf(Outfile, "\t.word %d\n", Intlist[i]); } } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Gsym[id].name; fprintf(Outfile, "\t.text\n" "\t.globl\t%s\n" "\t.type\t%s, \%%function\n" "%s:\n" "\tpush\t{fp, lr}\n" "\tadd\tfp, sp, #4\n" "\tsub\tsp, sp, #8\n" "\tstr\tr0, [fp, #-8]\n", name, name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Gsym[id].endlabel); fputs("\tsub\tsp, fp, #4\n" "\tpop\t{fp, pc}\n" "\t.align\t2\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); // If the literal value is small, do it with one instruction if (value <= 1000) fprintf(Outfile, "\tmov\t%s, #%d\n", reglist[r], value); else { set_int_offset(value); fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); } return (r); } // Determine the offset of a variable from the .L2 // label. Yes, this is inefficient code. static void set_var_offset(int id) { int offset = 0; // Walk the symbol table up to id. // Find S_VARIABLEs and add on 4 until // we get to our variable for (int i = 0; i < id; i++) { if (Gsym[i].stype == S_VARIABLE) offset += 4; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L2+%d\n", offset); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tldrb\t%s, [r3]\n", reglist[r]); break; default: fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); break; } return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s, %s\n", reglist[r1], reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\tmul\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { // To do a divide: r0 holds the dividend, r1 holds the divisor. // The quotient is in r0. fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r1]); fprintf(Outfile, "\tmov\tr1, %s\n", reglist[r2]); fprintf(Outfile, "\tbl\t__aeabi_idiv\n"); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r1]); free_register(r2); return (r1); } // Call printint() with the given register void cgprintint(int r) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r]); fprintf(Outfile, "\tbl\tprintint\n"); fprintf(Outfile, "\tnop\n"); free_register(r); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r]); fprintf(Outfile, "\tbl\t%s\n", Gsym[id].name); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r]); return (r); } // Store a register's value into a variable int cgstorglob(int r, int id) { // Get the offset to the variable set_var_offset(id); switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tstr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgstorglob:", Gsym[id].type); } return (r); } // Array of type sizes in P_XXX order. // 0 means no size. static int psize[] = { 0, 0, 1, 4, 4, 4, 4, 4 }; // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { // Check the type is valid if (type < P_NONE || type > P_LONGPTR) fatal("Bad type in cgprimsize()"); return (psize[type]); } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Gsym[id].type); fprintf(Outfile, "\t.comm\t%s,%d,%d\n", Gsym[id].name, typesize, typesize); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "moveq", "movne", "movlt", "movgt", "movle", "movge" }; // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "movne", "moveq", "movge", "movle", "movgt", "movlt" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #1\n", cmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #0\n", invcmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\tuxtb\t%s, %s\n", reglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tb\tL%d\n", l); } // List of inverted branch instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *brlist[] = { "bne", "beq", "bge", "ble", "bgt", "blt" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", brlist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[reg]); cgjump(Gsym[id].endlabel); } // Generate code to load the address of a global // identifier into a variable. Return a new register int cgaddress(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); fprintf(Outfile, "\tmov\t%s, r3\n", reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tldrb\t%s, [%s]\n", reglist[r], reglist[r]); break; case P_INTPTR: fprintf(Outfile, "\tldr\t%s, [%s]\n", reglist[r], reglist[r]); break; case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [%s]\n", reglist[r], reglist[r]); break; } return (r); } ================================================ FILE: 15_Pointers_pt1/cgn.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. // We need a list of byte and doubleword registers, too static int freereg[4]; static char *reglist[4] = { "r8", "r9", "r10", "r11" }; static char *breglist[4] = { "r8b", "r9b", "r10b", "r11b" }; static char *dreglist[4] = { "r8d", "r9d", "r10d", "r11d" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\textern\tprintint\n", Outfile); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Gsym[id].name; fprintf(Outfile, "\tsection\t.text\n" "\tglobal\t%s\n" "%s:\n" "\tpush\trbp\n" "\tmov\trbp, rsp\n", name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Gsym[id].endlabel); fputs("\tpop rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], value); return (r); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Print out the code to initialise it switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], Gsym[id].name); break; case P_INT: fprintf(Outfile, "\txor\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tmov\t%s, dword [%s]\n", dreglist[r], Gsym[id].name); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], Gsym[id].name); break; default: fatald("Bad type in cgloadglob:", Gsym[id].type); } return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timul\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmov\trax, %s\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidiv\t%s\n", reglist[r2]); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[r1]); free_register(r2); return (r1); } // Call printint() with the given register void cgprintint(int r) { fprintf(Outfile, "\tmov\trdi, %s\n", reglist[r]); fprintf(Outfile, "\tcall\tprintint\n"); free_register(r); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { // Get a new register int outr = alloc_register(); fprintf(Outfile, "\tmov\trdi, %s\n", reglist[r]); fprintf(Outfile, "\tcall\t%s\n", Gsym[id].name); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[outr]); free_register(r); return (outr); } // Store a register's value into a variable int cgstorglob(int r, int id) { switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tmov\t[%s], %s\n", Gsym[id].name, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\t[%s], %s\n", Gsym[id].name, dreglist[r]); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tmov\t[%s], %s\n", Gsym[id].name, reglist[r]); break; default: fatald("Bad type in cgloadglob:", Gsym[id].type); } return (r); } // Array of type sizes in P_XXX order. // 0 means no size. static int psize[] = { 0, 0, 1, 4, 8, 8, 8, 8 }; // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { // Check the type is valid if (type < P_NONE || type > P_LONGPTR) fatal("Bad type in cgprimsize()"); return (psize[type]); } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Gsym[id].type); fprintf(Outfile, "\tcommon\t%s %d:%d\n", Gsym[id].name, typesize, typesize); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r2], breglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { // Generate code depending on the function's type switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tmovzx\teax, %s\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmov\teax, %s\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", Gsym[id].type); } cgjump(Gsym[id].endlabel); } // Generate code to load the address of a global // identifier into a variable. Return a new register int cgaddress(int id) { int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, %s\n", reglist[r], Gsym[id].name); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], reglist[r]); break; case P_INTPTR: fprintf(Outfile, "\tmovzx\t%s, word [%s]\n", reglist[r], reglist[r]); break; case P_LONGPTR: fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], reglist[r]); break; } return (r); } ================================================ FILE: 15_Pointers_pt1/data.h ================================================ #ifndef extern_ #define extern_ extern #endif // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 extern_ int Line; // Current line number extern_ int Putback; // Character put back by scanner extern_ int Functionid; // Symbol id of the current function extern_ int Globs; // Position of next free global symbol slot extern_ FILE *Infile; // Input and output files extern_ FILE *Outfile; extern_ struct token Token; // Last token scanned extern_ char Text[TEXTLEN + 1]; // Last identifier scanned extern_ struct symtable Gsym[NSYMBOLS]; // Global symbol table ================================================ FILE: 15_Pointers_pt1/decl.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of declarations // Copyright (c) 2019 Warren Toomey, GPL3 // Parse the current token and return // a primitive type enum value. Also // scan in the next token int parse_type(void) { int type; switch (Token.token) { case T_VOID: type = P_VOID; break; case T_CHAR: type = P_CHAR; break; case T_INT: type = P_INT; break; case T_LONG: type = P_LONG; break; default: fatald("Illegal type, token", Token.token); } // Scan in one or more further '*' tokens // and determine the correct pointer type while (1) { scan(&Token); if (Token.token != T_STAR) break; type = pointer_to(type); } // We leave with the next token already scanned return (type); } // variable_declaration: type identifier ';' ; // // Parse the declaration of a variable void var_declaration(void) { int id, type; // Get the type of the variable // which also scans in the identifier type = parse_type(); ident(); // Text now has the identifier's name. // Add it as a known identifier // and generate its space in assembly id = addglob(Text, type, S_VARIABLE, 0); genglobsym(id); // Get the trailing semicolon semi(); } // // function_declaration: type identifier '(' ')' compound_statement ; // // Parse the declaration of a simplistic function struct ASTnode *function_declaration(void) { struct ASTnode *tree, *finalstmt; int nameslot, type, endlabel; // Get the type of the variable // which also scans in the identifier type = parse_type(); ident(); // Get a label-id for the end label, add the function // to the symbol table, and set the Functionid global // to the function's symbol-id endlabel = genlabel(); nameslot = addglob(Text, type, S_FUNCTION, endlabel); Functionid = nameslot; // Scan in the parentheses lparen(); rparen(); // Get the AST tree for the compound statement tree = compound_statement(); // If the function type isn't P_VOID .. if (type != P_VOID) { // Error if no statements in the function if (tree == NULL) fatal("No statements in function with non-void type"); // Check that the last AST operation in the // compound statement was a return statement finalstmt = (tree->op == A_GLUE) ? tree->right : tree; if (finalstmt == NULL || finalstmt->op != A_RETURN) fatal("No return for function with non-void type"); } // Return an A_FUNCTION node which has the function's nameslot // and the compound statement sub-tree return (mkastunary(A_FUNCTION, type, tree, nameslot)); } ================================================ FILE: 15_Pointers_pt1/decl.h ================================================ // Function prototypes for all compiler files // Copyright (c) 2019 Warren Toomey, GPL3 // scan.c void reject_token(struct token *t); int scan(struct token *t); // tree.c struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, int intvalue); struct ASTnode *mkastleaf(int op, int type, int intvalue); struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, int intvalue); // gen.c int genlabel(void); int genAST(struct ASTnode *n, int reg, int parentASTop); void genpreamble(); void genpostamble(); void genfreeregs(); void genprintint(int reg); void genglobsym(int id); int genprimsize(int type); void genreturn(int reg, int id); // cg.c void freeall_registers(void); void cgpreamble(); void cgpostamble(); void cgfuncpreamble(int id); void cgfuncpostamble(int id); int cgloadint(int value, int type); int cgloadglob(int id); int cgadd(int r1, int r2); int cgsub(int r1, int r2); int cgmul(int r1, int r2); int cgdiv(int r1, int r2); void cgprintint(int r); int cgcall(int r, int id); int cgstorglob(int r, int id); void cgglobsym(int id); int cgcompare_and_set(int ASTop, int r1, int r2); int cgcompare_and_jump(int ASTop, int r1, int r2, int label); void cglabel(int l); void cgjump(int l); int cgwiden(int r, int oldtype, int newtype); int cgprimsize(int type); void cgreturn(int reg, int id); int cgaddress(int id); int cgderef(int r, int type); // expr.c struct ASTnode *funccall(void); struct ASTnode *binexpr(int ptp); // stmt.c struct ASTnode *compound_statement(void); // misc.c void match(int t, char *what); void semi(void); void lbrace(void); void rbrace(void); void lparen(void); void rparen(void); void ident(void); void fatal(char *s); void fatals(char *s1, char *s2); void fatald(char *s, int d); void fatalc(char *s, int c); // sym.c int findglob(char *s); int addglob(char *name, int type, int stype, int endlabel); // decl.c void var_declaration(void); struct ASTnode *function_declaration(void); // types.c int type_compatible(int *left, int *right, int onlyright); int pointer_to(int type); int value_at(int type); ================================================ FILE: 15_Pointers_pt1/defs.h ================================================ #include #include #include #include // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 #define TEXTLEN 512 // Length of symbols in input #define NSYMBOLS 1024 // Number of symbol table entries // Token types enum { T_EOF, // Operators T_PLUS, T_MINUS, T_STAR, T_SLASH, T_EQ, T_NE, T_LT, T_GT, T_LE, T_GE, // Type keywords T_VOID, T_CHAR, T_INT, T_LONG, // Structural tokens T_INTLIT, T_SEMI, T_ASSIGN, T_IDENT, T_LBRACE, T_RBRACE, T_LPAREN, T_RPAREN, T_AMPER, T_LOGAND, // Other keywords T_PRINT, T_IF, T_ELSE, T_WHILE, T_FOR, T_RETURN }; // Token structure struct token { int token; // Token type, from the enum list above int intvalue; // For T_INTLIT, the integer value }; // AST node types. The first few line up // with the related tokens enum { A_ADD = 1, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE, A_INTLIT, A_IDENT, A_LVIDENT, A_ASSIGN, A_PRINT, A_GLUE, A_IF, A_WHILE, A_FUNCTION, A_WIDEN, A_RETURN, A_FUNCCALL, A_DEREF, A_ADDR }; // Primitive types enum { P_NONE, P_VOID, P_CHAR, P_INT, P_LONG, P_VOIDPTR, P_CHARPTR, P_INTPTR, P_LONGPTR }; // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; union { // For A_INTLIT, the integer value int intvalue; // For A_IDENT, the symbol slot number int id; // For A_FUNCTION, the symbol slot number } v; // For A_FUNCCALL, the symbol slot number }; #define NOREG -1 // Use NOREG when the AST generation // functions have no register to return // Structural types enum { S_VARIABLE, S_FUNCTION }; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol int stype; // Structural type for the symbol int endlabel; // For S_FUNCTIONs, the end label }; ================================================ FILE: 15_Pointers_pt1/expr.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of expressions // Copyright (c) 2019 Warren Toomey, GPL3 // Parse a function call with a single expression // argument and return its AST struct ASTnode *funccall(void) { struct ASTnode *tree; int id; // Check that the identifier has been defined, // then make a leaf node for it. XXX Add structural type test if ((id = findglob(Text)) == -1) { fatals("Undeclared function", Text); } // Get the '(' lparen(); // Parse the following expression tree = binexpr(0); // Build the function call AST node. Store the // function's return type as this node's type. // Also record the function's symbol-id tree = mkastunary(A_FUNCCALL, Gsym[id].type, tree, id); // Get the ')' rparen(); return (tree); } // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(void) { struct ASTnode *n; int id; switch (Token.token) { case T_INTLIT: // For an INTLIT token, make a leaf AST node for it. // Make it a P_CHAR if it's within the P_CHAR range if ((Token.intvalue) >= 0 && (Token.intvalue < 256)) n = mkastleaf(A_INTLIT, P_CHAR, Token.intvalue); else n = mkastleaf(A_INTLIT, P_INT, Token.intvalue); break; case T_IDENT: // This could be a variable or a function call. // Scan in the next token to find out scan(&Token); // It's a '(', so a function call if (Token.token == T_LPAREN) return (funccall()); // Not a function call, so reject the new token reject_token(&Token); // Check that the variable exists. XXX Add structural type test id = findglob(Text); if (id == -1) fatals("Unknown variable", Text); // Make a leaf AST node for it n = mkastleaf(A_IDENT, Gsym[id].type, id); break; default: fatald("Syntax error, token", Token.token); } // Scan in the next token and return the leaf node scan(&Token); return (n); } // Convert a binary operator token into an AST operation. // We rely on a 1:1 mapping from token to AST operation static int arithop(int tokentype) { if (tokentype > T_EOF && tokentype < T_INTLIT) return (tokentype); fatald("Syntax error, token", tokentype); return (0); // Keep -Wall happy } // Operator precedence for each token. Must // match up with the order of tokens in defs.h static int OpPrec[] = { 0, 10, 10, // T_EOF, T_PLUS, T_MINUS 20, 20, // T_STAR, T_SLASH 30, 30, // T_EQ, T_NE 40, 40, 40, 40 // T_LT, T_GT, T_LE, T_GE }; // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec = OpPrec[tokentype]; if (prec == 0) fatald("Syntax error, token", tokentype); return (prec); } // prefix_expression: primary // | '*' prefix_expression // | '&' prefix_expression // ; // Parse a prefix expression and return // a sub-tree representing it. struct ASTnode *prefix(void) { struct ASTnode *tree; switch (Token.token) { case T_AMPER: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Ensure that it's an identifier if (tree->op != A_IDENT) fatal("& operator must be followed by an identifier"); // Now change the operator to A_ADDR and the type to // a pointer to the original type tree->op = A_ADDR; tree->type = pointer_to(tree->type); break; case T_STAR: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's either another deref or an // identifier if (tree->op != A_IDENT && tree->op != A_DEREF) fatal("* operator must be followed by an identifier or *"); // Prepend an A_DEREF operation to the tree tree = mkastunary(A_DEREF, value_at(tree->type), tree, 0); break; default: tree = primary(); } return (tree); } // Return an AST tree whose root is a binary operator. // Parameter ptp is the previous token's precedence. struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; int lefttype, righttype; int tokentype; // Get the tree on the left. // Fetch the next token at the same time. left = prefix(); // If we hit a semicolon or ')', return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN) return (left); // While the precedence of this token is // more than that of the previous token precedence while (op_precedence(tokentype) > ptp) { // Fetch in the next integer literal scan(&Token); // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Ensure the two types are compatible. lefttype = left->type; righttype = right->type; if (!type_compatible(&lefttype, &righttype, 0)) fatal("Incompatible types"); // Widen either side if required. type vars are A_WIDEN now if (lefttype) left = mkastunary(lefttype, right->type, left, 0); if (righttype) right = mkastunary(righttype, left->type, right, 0); // Join that sub-tree with ours. Convert the token // into an AST operation at the same time. left = mkastnode(arithop(tokentype), left->type, left, NULL, right, 0); // Update the details of the current token. // If we hit a semicolon or ')', return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN) return (left); } // Return the tree we have when the precedence // is the same or lower return (left); } ================================================ FILE: 15_Pointers_pt1/gen.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Generic code generator // Copyright (c) 2019 Warren Toomey, GPL3 // Generate and return a new label number int genlabel(void) { static int id = 1; return (id++); } // Generate the code for an IF statement // and an optional ELSE clause static int genIF(struct ASTnode *n) { int Lfalse, Lend; // Generate two labels: one for the // false compound statement, and one // for the end of the overall IF statement. // When there is no ELSE clause, Lfalse _is_ // the ending label! Lfalse = genlabel(); if (n->right) Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. // We cheat by sending the Lfalse label as a register. genAST(n->left, Lfalse, n->op); genfreeregs(); // Generate the true compound statement genAST(n->mid, NOREG, n->op); genfreeregs(); // If there is an optional ELSE clause, // generate the jump to skip to the end if (n->right) cgjump(Lend); // Now the false label cglabel(Lfalse); // Optional ELSE clause: generate the // false compound statement and the // end label if (n->right) { genAST(n->right, NOREG, n->op); genfreeregs(); cglabel(Lend); } return (NOREG); } // Generate the code for a WHILE statement // and an optional ELSE clause static int genWHILE(struct ASTnode *n) { int Lstart, Lend; // Generate the start and end labels // and output the start label Lstart = genlabel(); Lend = genlabel(); cglabel(Lstart); // Generate the condition code followed // by a jump to the end label. // We cheat by sending the Lfalse label as a register. genAST(n->left, Lend, n->op); genfreeregs(); // Generate the compound statement for the body genAST(n->right, NOREG, n->op); genfreeregs(); // Finally output the jump back to the condition, // and the end label cgjump(Lstart); cglabel(Lend); return (NOREG); } // Given an AST, the register (if any) that holds // the previous rvalue, and the AST op of the parent, // generate assembly code recursively. // Return the register id with the tree's final value int genAST(struct ASTnode *n, int reg, int parentASTop) { int leftreg, rightreg; // We now have specific AST node handling at the top switch (n->op) { case A_IF: return (genIF(n)); case A_WHILE: return (genWHILE(n)); case A_GLUE: // Do each child statement, and free the // registers after each child genAST(n->left, NOREG, n->op); genfreeregs(); genAST(n->right, NOREG, n->op); genfreeregs(); return (NOREG); case A_FUNCTION: // Generate the function's preamble before the code cgfuncpreamble(n->v.id); genAST(n->left, NOREG, n->op); cgfuncpostamble(n->v.id); return (NOREG); } // General AST node handling below // Get the left and right sub-tree values if (n->left) leftreg = genAST(n->left, NOREG, n->op); if (n->right) rightreg = genAST(n->right, leftreg, n->op); switch (n->op) { case A_ADD: return (cgadd(leftreg, rightreg)); case A_SUBTRACT: return (cgsub(leftreg, rightreg)); case A_MULTIPLY: return (cgmul(leftreg, rightreg)); case A_DIVIDE: return (cgdiv(leftreg, rightreg)); case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, compare registers // and set one to 1 or 0 based on the comparison. if (parentASTop == A_IF || parentASTop == A_WHILE) return (cgcompare_and_jump(n->op, leftreg, rightreg, reg)); else return (cgcompare_and_set(n->op, leftreg, rightreg)); case A_INTLIT: return (cgloadint(n->v.intvalue, n->type)); case A_IDENT: return (cgloadglob(n->v.id)); case A_LVIDENT: return (cgstorglob(reg, n->v.id)); case A_ASSIGN: // The work has already been done, return the result return (rightreg); case A_PRINT: // Print the left-child's value // and return no register genprintint(leftreg); genfreeregs(); return (NOREG); case A_WIDEN: // Widen the child's type to the parent's type return (cgwiden(leftreg, n->left->type, n->type)); case A_RETURN: cgreturn(leftreg, Functionid); return (NOREG); case A_FUNCCALL: return (cgcall(leftreg, n->v.id)); case A_ADDR: return (cgaddress(n->v.id)); case A_DEREF: return (cgderef(leftreg, n->left->type)); default: fatald("Unknown AST operator", n->op); } return (NOREG); // Keep -Wall happy } void genpreamble() { cgpreamble(); } void genpostamble() { cgpostamble(); } void genfreeregs() { freeall_registers(); } void genprintint(int reg) { cgprintint(reg); } void genglobsym(int id) { cgglobsym(id); } int genprimsize(int type) { return (cgprimsize(type)); } ================================================ FILE: 15_Pointers_pt1/lib/printint.c ================================================ #include void printint(long x) { printf("%ld\n", x); } ================================================ FILE: 15_Pointers_pt1/main.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "decl.h" #include // Compiler setup and top-level execution // Copyright (c) 2019 Warren Toomey, GPL3 // Initialise global variables static void init() { Line = 1; Putback = '\n'; Globs = 0; } // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "Usage: %s infile\n", prog); exit(1); } // Main program: check arguments and print a usage // if we don't have an argument. Open up the input // file and call scanfile() to scan the tokens in it. int main(int argc, char *argv[]) { struct ASTnode *tree; if (argc != 2) usage(argv[0]); init(); // Open up the input file if ((Infile = fopen(argv[1], "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", argv[1], strerror(errno)); exit(1); } // Create the output file if ((Outfile = fopen("out.s", "w")) == NULL) { fprintf(stderr, "Unable to create out.s: %s\n", strerror(errno)); exit(1); } // For now, ensure that void printint() is defined addglob("printint", P_CHAR, S_FUNCTION, 0); scan(&Token); // Get the first token from the input genpreamble(); // Output the preamble while (1) { // Parse a function and tree = function_declaration(); genAST(tree, NOREG, 0); // generate the assembly code for it if (Token.token == T_EOF) // Stop when we have reached EOF break; } genpostamble(); fclose(Outfile); // Close the output file and exit return (0); } ================================================ FILE: 15_Pointers_pt1/misc.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Miscellaneous functions // Copyright (c) 2019 Warren Toomey, GPL3 // Ensure that the current token is t, // and fetch the next token. Otherwise // throw an error void match(int t, char *what) { if (Token.token == t) { scan(&Token); } else { fatals("Expected", what); } } // Match a semicolon and fetch the next token void semi(void) { match(T_SEMI, ";"); } // Match a left brace and fetch the next token void lbrace(void) { match(T_LBRACE, "{"); } // Match a right brace and fetch the next token void rbrace(void) { match(T_RBRACE, "}"); } // Match a left parenthesis and fetch the next token void lparen(void) { match(T_LPAREN, "("); } // Match a right parenthesis and fetch the next token void rparen(void) { match(T_RPAREN, ")"); } // Match an identifier and fetch the next token void ident(void) { match(T_IDENT, "identifier"); } // Print out fatal messages void fatal(char *s) { fprintf(stderr, "%s on line %d\n", s, Line); exit(1); } void fatals(char *s1, char *s2) { fprintf(stderr, "%s:%s on line %d\n", s1, s2, Line); exit(1); } void fatald(char *s, int d) { fprintf(stderr, "%s:%d on line %d\n", s, d, Line); exit(1); } void fatalc(char *s, int c) { fprintf(stderr, "%s:%c on line %d\n", s, c, Line); exit(1); } ================================================ FILE: 15_Pointers_pt1/scan.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { char *p; p = strchr(s, c); return (p ? p - s : -1); } // Get the next character from the input file. static int next(void) { int c; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return (c); } c = fgetc(Infile); // Read from input file if ('\n' == c) Line++; // Increment line count return (c); } // Put back an unwanted character static void putback(int c) { Putback = c; } // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0; // Convert each character into an int value while ((k = chrpos("0123456789", c)) >= 0) { val = val * 10 + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return (val); } // Scan an identifier from the input file and // store it in buf[]. Return the identifier's length static int scanident(int c, char *buf, int lim) { int i = 0; // Allow digits, alpha and underscores while (isalpha(c) || isdigit(c) || '_' == c) { // Error if we hit the identifier length limit, // else append to buf[] and get next character if (lim - 1 == i) { fatal("Identifier too long"); } else if (i < lim - 1) { buf[i++] = c; } c = next(); } // We hit a non-valid character, put it back. // NUL-terminate the buf[] and return the length putback(c); buf[i] = '\0'; return (i); } // Given a word from the input, return the matching // keyword token number or 0 if it's not a keyword. // Switch on the first letter so that we don't have // to waste time strcmp()ing against all the keywords. static int keyword(char *s) { switch (*s) { case 'c': if (!strcmp(s, "char")) return (T_CHAR); break; case 'e': if (!strcmp(s, "else")) return (T_ELSE); break; case 'f': if (!strcmp(s, "for")) return (T_FOR); break; case 'i': if (!strcmp(s, "if")) return (T_IF); if (!strcmp(s, "int")) return (T_INT); break; case 'l': if (!strcmp(s, "long")) return (T_LONG); break; case 'p': if (!strcmp(s, "print")) return (T_PRINT); break; case 'r': if (!strcmp(s, "return")) return (T_RETURN); break; case 'w': if (!strcmp(s, "while")) return (T_WHILE); break; case 'v': if (!strcmp(s, "void")) return (T_VOID); break; } return (0); } // A pointer to a rejected token static struct token *Rejtoken = NULL; // Reject the token that we just scanned void reject_token(struct token *t) { if (Rejtoken != NULL) fatal("Can't reject token twice"); Rejtoken = t; } // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c, tokentype; // If we have any rejected token, return it if (Rejtoken != NULL) { t = Rejtoken; Rejtoken = NULL; return (1); } // Skip whitespace c = skip(); // Determine the token based on // the input character switch (c) { case EOF: t->token = T_EOF; return (0); case '+': t->token = T_PLUS; break; case '-': t->token = T_MINUS; break; case '*': t->token = T_STAR; break; case '/': t->token = T_SLASH; break; case ';': t->token = T_SEMI; break; case '{': t->token = T_LBRACE; break; case '}': t->token = T_RBRACE; break; case '(': t->token = T_LPAREN; break; case ')': t->token = T_RPAREN; break; case '=': if ((c = next()) == '=') { t->token = T_EQ; } else { putback(c); t->token = T_ASSIGN; } break; case '!': if ((c = next()) == '=') { t->token = T_NE; } else { fatalc("Unrecognised character", c); } break; case '<': if ((c = next()) == '=') { t->token = T_LE; } else { putback(c); t->token = T_LT; } break; case '>': if ((c = next()) == '=') { t->token = T_GE; } else { putback(c); t->token = T_GT; } break; case '&': if ((c = next()) == '&') { t->token = T_LOGAND; } else { putback(c); t->token = T_AMPER; } break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } else if (isalpha(c) || '_' == c) { // Read in a keyword or identifier scanident(c, Text, TEXTLEN); // If it's a recognised keyword, return that token if ((tokentype = keyword(Text)) != 0) { t->token = tokentype; break; } // Not a recognised keyword, so it must be an identifier t->token = T_IDENT; break; } // The character isn't part of any recognised token, error fatalc("Unrecognised character", c); } // We found a token return (1); } ================================================ FILE: 15_Pointers_pt1/stmt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of statements // Copyright (c) 2019 Warren Toomey, GPL3 // Prototypes static struct ASTnode *single_statement(void); // compound_statement: // empty, i.e. no statement // | statement // | statement statements // ; // // statement: print_statement // | declaration // | assignment_statement // | function_call // | if_statement // | while_statement // | for_statement // | return_statement // ; // print_statement: 'print' expression ';' ; // static struct ASTnode *print_statement(void) { struct ASTnode *tree; int lefttype, righttype; // Match a 'print' as the first token match(T_PRINT, "print"); // Parse the following expression tree = binexpr(0); // Ensure the two types are compatible. lefttype = P_INT; righttype = tree->type; if (!type_compatible(&lefttype, &righttype, 0)) fatal("Incompatible types"); // Widen the tree if required. if (righttype) tree = mkastunary(righttype, P_INT, tree, 0); // Make an print AST tree tree = mkastunary(A_PRINT, P_NONE, tree, 0); // Return the AST return (tree); } // assignment_statement: identifier '=' expression ';' ; // // Parse an assignment statement and return its AST static struct ASTnode *assignment_statement(void) { struct ASTnode *left, *right, *tree; int lefttype, righttype; int id; // Ensure we have an identifier ident(); // This could be a variable or a function call. // If next token is '(', it's a function call if (Token.token == T_LPAREN) return (funccall()); // Not a function call, on with an assignment then! // Check the identifier has been defined then make a leaf node for it // XXX Add structural type test if ((id = findglob(Text)) == -1) { fatals("Undeclared variable", Text); } right = mkastleaf(A_LVIDENT, Gsym[id].type, id); // Ensure we have an equals sign match(T_ASSIGN, "="); // Parse the following expression left = binexpr(0); // Ensure the two types are compatible. lefttype = left->type; righttype = right->type; if (!type_compatible(&lefttype, &righttype, 1)) fatal("Incompatible types"); // Widen the left if required. if (lefttype) left = mkastunary(lefttype, right->type, left, 0); // Make an assignment AST tree tree = mkastnode(A_ASSIGN, P_INT, left, NULL, right, 0); // Return the AST return (tree); } // if_statement: if_head // | if_head 'else' compound_statement // ; // // if_head: 'if' '(' true_false_expression ')' compound_statement ; // // Parse an IF statement including any // optional ELSE clause and return its AST static struct ASTnode *if_statement(void) { struct ASTnode *condAST, *trueAST, *falseAST = NULL; // Ensure we have 'if' '(' match(T_IF, "if"); lparen(); // Parse the following expression // and the ')' following. Ensure // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) fatal("Bad comparison operator"); rparen(); // Get the AST for the compound statement trueAST = compound_statement(); // If we have an 'else', skip it // and get the AST for the compound statement if (Token.token == T_ELSE) { scan(&Token); falseAST = compound_statement(); } // Build and return the AST for this statement return (mkastnode(A_IF, P_NONE, condAST, trueAST, falseAST, 0)); } // while_statement: 'while' '(' true_false_expression ')' compound_statement ; // // Parse a WHILE statement and return its AST static struct ASTnode *while_statement(void) { struct ASTnode *condAST, *bodyAST; // Ensure we have 'while' '(' match(T_WHILE, "while"); lparen(); // Parse the following expression // and the ')' following. Ensure // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) fatal("Bad comparison operator"); rparen(); // Get the AST for the compound statement bodyAST = compound_statement(); // Build and return the AST for this statement return (mkastnode(A_WHILE, P_NONE, condAST, NULL, bodyAST, 0)); } // for_statement: 'for' '(' preop_statement ';' // true_false_expression ';' // postop_statement ')' compound_statement ; // // preop_statement: statement (for now) // postop_statement: statement (for now) // // Parse a FOR statement and return its AST static struct ASTnode *for_statement(void) { struct ASTnode *condAST, *bodyAST; struct ASTnode *preopAST, *postopAST; struct ASTnode *tree; // Ensure we have 'for' '(' match(T_FOR, "for"); lparen(); // Get the pre_op statement and the ';' preopAST = single_statement(); semi(); // Get the condition and the ';' condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) fatal("Bad comparison operator"); semi(); // Get the post_op statement and the ')' postopAST = single_statement(); rparen(); // Get the compound statement which is the body bodyAST = compound_statement(); // For now, all four sub-trees have to be non-NULL. // Later on, we'll change the semantics for when some are missing // Glue the compound statement and the postop tree tree = mkastnode(A_GLUE, P_NONE, bodyAST, NULL, postopAST, 0); // Make a WHILE loop with the condition and this new body tree = mkastnode(A_WHILE, P_NONE, condAST, NULL, tree, 0); // And glue the preop tree to the A_WHILE tree return (mkastnode(A_GLUE, P_NONE, preopAST, NULL, tree, 0)); } // return_statement: 'return' '(' expression ')' ; // // Parse a return statement and return its AST static struct ASTnode *return_statement(void) { struct ASTnode *tree; int returntype, functype; // Can't return a value if function returns P_VOID if (Gsym[Functionid].type == P_VOID) fatal("Can't return from a void function"); // Ensure we have 'return' '(' match(T_RETURN, "return"); lparen(); // Parse the following expression tree = binexpr(0); // Ensure this is compatible with the function's type returntype = tree->type; functype = Gsym[Functionid].type; if (!type_compatible(&returntype, &functype, 1)) fatal("Incompatible types"); // Widen the left if required. if (returntype) tree = mkastunary(returntype, functype, tree, 0); // Add on the A_RETURN node tree = mkastunary(A_RETURN, P_NONE, tree, 0); // Get the ')' rparen(); return (tree); } // Parse a single statement // and return its AST static struct ASTnode *single_statement(void) { switch (Token.token) { case T_PRINT: return (print_statement()); case T_CHAR: case T_INT: case T_LONG: var_declaration(); return (NULL); // No AST generated here case T_IDENT: return (assignment_statement()); case T_IF: return (if_statement()); case T_WHILE: return (while_statement()); case T_FOR: return (for_statement()); case T_RETURN: return (return_statement()); default: fatald("Syntax error, token", Token.token); } return (NULL); // Keep -Wall happy } // Parse a compound statement // and return its AST struct ASTnode *compound_statement(void) { struct ASTnode *left = NULL; struct ASTnode *tree; // Require a left curly bracket lbrace(); while (1) { // Parse a single statement tree = single_statement(); // Some statements must be followed by a semicolon if (tree != NULL && (tree->op == A_PRINT || tree->op == A_ASSIGN || tree->op == A_RETURN || tree->op == A_FUNCCALL)) semi(); // For each new tree, either save it in left // if left is empty, or glue the left and the // new tree together if (tree != NULL) { if (left == NULL) left = tree; else left = mkastnode(A_GLUE, P_NONE, left, NULL, tree, 0); } // When we hit a right curly bracket, // skip past it and return the AST if (Token.token == T_RBRACE) { rbrace(); return (left); } } } ================================================ FILE: 15_Pointers_pt1/sym.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Symbol table functions // Copyright (c) 2019 Warren Toomey, GPL3 // Determine if the symbol s is in the global symbol table. // Return its slot position or -1 if not found. int findglob(char *s) { int i; for (i = 0; i < Globs; i++) { if (*s == *Gsym[i].name && !strcmp(s, Gsym[i].name)) return (i); } return (-1); } // Get the position of a new global symbol slot, or die // if we've run out of positions. static int newglob(void) { int p; if ((p = Globs++) >= NSYMBOLS) fatal("Too many global symbols"); return (p); } // Add a global symbol to the symbol table. // Also set up its type and structural type. // Return the slot number in the symbol table int addglob(char *name, int type, int stype, int endlabel) { int y; // If this is already in the symbol table, return the existing slot if ((y = findglob(name)) != -1) return (y); // Otherwise get a new slot, fill it in and // return the slot number y = newglob(); Gsym[y].name = strdup(name); Gsym[y].type = type; Gsym[y].stype = stype; Gsym[y].endlabel = endlabel; return (y); } ================================================ FILE: 15_Pointers_pt1/tests/input01.c ================================================ void main() { printint(12 * 3); printint(18 - 2 * 4); printint(1 + 2 + 9 - 5/2 + 3*5); } ================================================ FILE: 15_Pointers_pt1/tests/input02.c ================================================ void main() { int fred; int jim; fred= 5; jim= 12; printint(fred + jim); } ================================================ FILE: 15_Pointers_pt1/tests/input03.c ================================================ void main() { int x; x= 1; printint(x); x= x + 1; printint(x); x= x + 1; printint(x); x= x + 1; printint(x); x= x + 1; printint(x); } ================================================ FILE: 15_Pointers_pt1/tests/input04.c ================================================ void main() { int x; x= 7 < 9; printint(x); x= 7 <= 9; printint(x); x= 7 != 9; printint(x); x= 7 == 7; printint(x); x= 7 >= 7; printint(x); x= 7 <= 7; printint(x); x= 9 > 7; printint(x); x= 9 >= 7; printint(x); x= 9 != 7; printint(x); } ================================================ FILE: 15_Pointers_pt1/tests/input05.c ================================================ void main() { int i; int j; i=6; j=12; if (i < j) { printint(i); } else { printint(j); } } ================================================ FILE: 15_Pointers_pt1/tests/input06.c ================================================ void main() { int i; i=1; while (i <= 10) { printint(i); i= i + 1; } } ================================================ FILE: 15_Pointers_pt1/tests/input07.c ================================================ void main() { int i; for (i= 1; i <= 10; i= i + 1) { printint(i); } } ================================================ FILE: 15_Pointers_pt1/tests/input08.c ================================================ void main() { int i; for (i= 1; i <= 10; i= i + 1) { printint(i); } } ================================================ FILE: 15_Pointers_pt1/tests/input09.c ================================================ void main() { int i; for (i= 1; i <= 10; i= i + 1) { printint(i); } } void fred() { int a; int b; a= 12; b= 3 * a; if (a >= b) { printint(2 * b - a); } } ================================================ FILE: 15_Pointers_pt1/tests/input10.c ================================================ void main() { int i; char j; j= 20; printint(j); i= 10; printint(i); for (i= 1; i <= 5; i= i + 1) { printint(i); } for (j= 253; j != 2; j= j + 1) { printint(j); } } ================================================ FILE: 15_Pointers_pt1/tests/input11.c ================================================ int main() { int i; char j; long k; i= 10; printint(i); j= 20; printint(j); k= 30; printint(k); for (i= 1; i <= 5; i= i + 1) { printint(i); } for (j= 253; j != 4; j= j + 1) { printint(j); } for (k= 1; k <= 5; k= k + 1) { printint(k); } return(i); printint(12345); return(3); } ================================================ FILE: 15_Pointers_pt1/tests/input12.c ================================================ int fred() { return(5); } void main() { int x; x= fred(2); printint(x); } ================================================ FILE: 15_Pointers_pt1/tests/input13.c ================================================ int fred() { return(56); } void main() { int dummy; int result; dummy= printint(23); result= fred(10); dummy= printint(result); } ================================================ FILE: 15_Pointers_pt1/tests/input14.c ================================================ int fred() { return(20); } int main() { int result; printint(10); result= fred(15); printint(result); printint(fred(15)+10); return(0); } ================================================ FILE: 15_Pointers_pt1/tests/input15.c ================================================ int main() { char a; char *b; char c; int d; int *e; int f; a= 18; printint(a); b= &a; c= *b; printint(c); d= 12; printint(d); e= &d; f= *e; printint(f); return(0); } ================================================ FILE: 15_Pointers_pt1/tests/mktests ================================================ #!/bin/sh # Make the output files for each test for i in input*c do if [ ! -f "out.$i" ] then cc -o out $i ../lib/printint.c ./out > out.$i rm -f out fi done ================================================ FILE: 15_Pointers_pt1/tests/out.input01.c ================================================ 36 10 25 ================================================ FILE: 15_Pointers_pt1/tests/out.input02.c ================================================ 17 ================================================ FILE: 15_Pointers_pt1/tests/out.input03.c ================================================ 1 2 3 4 5 ================================================ FILE: 15_Pointers_pt1/tests/out.input04.c ================================================ 1 1 1 1 1 1 1 1 1 ================================================ FILE: 15_Pointers_pt1/tests/out.input05.c ================================================ 6 ================================================ FILE: 15_Pointers_pt1/tests/out.input06.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 15_Pointers_pt1/tests/out.input07.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 15_Pointers_pt1/tests/out.input08.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 15_Pointers_pt1/tests/out.input09.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 15_Pointers_pt1/tests/out.input10.c ================================================ 20 10 1 2 3 4 5 253 254 255 0 1 ================================================ FILE: 15_Pointers_pt1/tests/out.input11.c ================================================ 10 20 30 1 2 3 4 5 253 254 255 0 1 2 3 1 2 3 4 5 ================================================ FILE: 15_Pointers_pt1/tests/out.input12.c ================================================ 5 ================================================ FILE: 15_Pointers_pt1/tests/out.input13.c ================================================ 23 56 ================================================ FILE: 15_Pointers_pt1/tests/out.input14.c ================================================ 10 20 30 ================================================ FILE: 15_Pointers_pt1/tests/out.input15.c ================================================ 18 18 12 12 ================================================ FILE: 15_Pointers_pt1/tests/runtests ================================================ #!/bin/sh # Run each test and compare # against known good output if [ ! -f ../comp1 ] then echo "Need to build ../comp1 first!"; exit 1 fi for i in input* do if [ ! -f "out.$i" ] then echo "Can't run test on $i, no output file!" else echo -n $i ../comp1 $i cc -o out out.s ../lib/printint.c ./out > trial.$i cmp -s "out.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo else echo ": OK" fi rm -f out out.s "trial.$i" fi done ================================================ FILE: 15_Pointers_pt1/tests/runtestsn ================================================ #!/bin/sh # Run each test and compare # against known good output if [ ! -f ../compn ] then echo "Need to build ../compn first!"; exit 1 fi for i in input* do if [ ! -f "out.$i" ] then echo "Can't run test on $i, no output file!" else echo -n $i ../compn $i nasm -f elf64 out.s cc -no-pie -Wall -o out out.o ../lib/printint.c ./out > trial.$i cmp -s "out.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo else echo ": OK" fi rm -f out out.o out.s "trial.$i" fi done ================================================ FILE: 15_Pointers_pt1/tree.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree functions // Copyright (c) 2019 Warren Toomey, GPL3 // Build and return a generic AST node struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, int intvalue) { struct ASTnode *n; // Malloc a new ASTnode n = (struct ASTnode *) malloc(sizeof(struct ASTnode)); if (n == NULL) fatal("Unable to malloc in mkastnode()"); // Copy in the field values and return it n->op = op; n->type = type; n->left = left; n->mid = mid; n->right = right; n->v.intvalue = intvalue; return (n); } // Make an AST leaf node struct ASTnode *mkastleaf(int op, int type, int intvalue) { return (mkastnode(op, type, NULL, NULL, NULL, intvalue)); } // Make a unary AST node: only one child struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, int intvalue) { return (mkastnode(op, type, left, NULL, NULL, intvalue)); } ================================================ FILE: 15_Pointers_pt1/types.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Types and type handling // Copyright (c) 2019 Warren Toomey, GPL3 // Given two primitive types, // return true if they are compatible, // false otherwise. Also return either // zero or an A_WIDEN operation if one // has to be widened to match the other. // If onlyright is true, only widen left to right. int type_compatible(int *left, int *right, int onlyright) { int leftsize, rightsize; // Same types, they are compatible if (*left == *right) { *left = *right = 0; return (1); } // Get the sizes for each type leftsize = genprimsize(*left); rightsize = genprimsize(*right); // Types with zero size are not // not compatible with anything if ((leftsize == 0) || (rightsize == 0)) return (0); // Widen types as required if (leftsize < rightsize) { *left = A_WIDEN; *right = 0; return (1); } if (rightsize < leftsize) { if (onlyright) return (0); *left = 0; *right = A_WIDEN; return (1); } // Anything remaining is the same size // and thus compatible *left = *right = 0; return (1); } // Given a primitive type, return // the type which is a pointer to it int pointer_to(int type) { int newtype; switch (type) { case P_VOID: newtype = P_VOIDPTR; break; case P_CHAR: newtype = P_CHARPTR; break; case P_INT: newtype = P_INTPTR; break; case P_LONG: newtype = P_LONGPTR; break; default: fatald("Unrecognised in pointer_to: type", type); } return (newtype); } // Given a primitive pointer type, return // the type which it points to int value_at(int type) { int newtype; switch (type) { case P_VOIDPTR: newtype = P_VOID; break; case P_CHARPTR: newtype = P_CHAR; break; case P_INTPTR: newtype = P_INT; break; case P_LONGPTR: newtype = P_LONG; break; default: fatald("Unrecognised in value_at: type", type); } return (newtype); } ================================================ FILE: 16_Global_Vars/Makefile ================================================ SRCS= cg.c decl.c expr.c gen.c main.c misc.c scan.c stmt.c \ sym.c tree.c types.c SRCN= cgn.c decl.c expr.c gen.c main.c misc.c scan.c stmt.c \ sym.c tree.c types.c ARMSRCS= cg_arm.c decl.c expr.c gen.c main.c misc.c scan.c stmt.c \ sym.c tree.c types.c comp1: $(SRCS) cc -o comp1 -g -Wall $(SRCS) compn: $(SRCN) cc -o compn -g -Wall $(SRCN) comp1arm: $(ARMSRCS) cc -o comp1arm -g -Wall $(ARMSRCS) cp comp1arm comp1 clean: rm -f comp1 comp1arm compn *.o *.s out test: comp1 tests/runtests (cd tests; chmod +x runtests; ./runtests) armtest: comp1arm tests/runtests (cd tests; chmod +x runtests; ./runtests) testn: compn tests/runtestsn (cd tests; chmod +x runtestsn; ./runtestsn) test16: comp1 tests/input16.c lib/printint.c ./comp1 tests/input16.c cc -o out out.s lib/printint.c ./out armtest16: comp1arm tests/input16.c lib/printint.c ./comp1 tests/input16.c cc -o out out.s lib/printint.c ./out test16n: compn tests/input16.c lib/printint.c ./compn tests/input16.c nasm -f elf64 out.s cc -no-pie -o out lib/printint.c out.o ./out ================================================ FILE: 16_Global_Vars/Readme.md ================================================ # Part 16: Declaring Global Variables Properly I did promise to look at the issue of adding offsets to pointers, but I need to do some thinking about that first. So I've decided to move global variable declarations out of function declarations. Actually, I've also left the parsing of variable declarations inside functions, because later on we will change them to be local variable declarations. I also want to extend our grammar so that we can declare multiple variables with the same type at the same time, e.g. ```c int x, y, z; ``` ## The New BNF Grammar Here is the new BNF grammar for global declarations, both functions and variables: ``` global_declarations : global_declarations | global_declaration global_declarations ; global_declaration: function_declaration | var_declaration ; function_declaration: type identifier '(' ')' compound_statement ; var_declaration: type identifier_list ';' ; type: type_keyword opt_pointer ; type_keyword: 'void' | 'char' | 'int' | 'long' ; opt_pointer: | '*' opt_pointer ; identifier_list: identifier | identifier ',' identifier_list ; ``` Both `function_declaration` and `global_declaration` start with a `type`. This is now a `type_keyword` followed by `opt_pointer` which is zero or more '*' tokens. After this, both `function_declaration` and `global_declaration` must be followed by one identifier. However, after the `type`, `var_declaration` is followed by an `identifier_list`, which is one or more `identifier`s separated by a ',' token. Also `var_declaration` must end with a ';' token but `function_declaration` ends with a `compound_statement` and no ';' token. ## New Tokens We now have the T_COMMA token for the ',' character in `scan.c`. ## Changes to `decl.c` We now convert the above BNF grammar into a set of recursive descent functions but, as we can do looping, we can turn some of the recursion into internal loops. ### `global_declarations()` As there are one or more global declarations, we can loop parsing each one. When we run out of tokens, we can leave the loop. ```c // Parse one or more global declarations, either // variables or functions void global_declarations(void) { struct ASTnode *tree; int type; while (1) { // We have to read past the type and identifier // to see either a '(' for a function declaration // or a ',' or ';' for a variable declaration. // Text is filled in by the ident() call. type = parse_type(); ident(); if (Token.token == T_LPAREN) { // Parse the function declaration and // generate the assembly code for it tree = function_declaration(type); genAST(tree, NOREG, 0); } else { // Parse the global variable declaration var_declaration(type); } // Stop when we have reached EOF if (Token.token == T_EOF) break; } } ``` Knowing that, for now we only have global variables and functions, we can scan in the type here and the first identifier. Then, we look at the next token. If it's a '(', we call `function_declaration()`. If not, we can assume that it is a `var_declaration()`. We pass the `type` in to both functions. Now that we are receiving the AST `tree` from `function_declaration()` here, we can generate the code from the AST tree immediately. This code was in `main()` but has now been moved here. `main()` now only has to call `global_declarations()`: ```c scan(&Token); // Get the first token from the input genpreamble(); // Output the preamble global_declarations(); // Parse the global declarations genpostamble(); // Output the postamble ``` ### `var_declaration()` The parsing of functions is much the same as before, except the code to scan the type and identifier are done elsewhere, and we receive the `type` as an argument. The parsing of variables also loses the type and identifier scanning code. We can add the identifier to the global symbol and generate the assembly code for it. But now we need to add in a loop. If there's a following ',', loop back to get the next identifier with the same type. And if there's a following ';', that's the end of the variable declarations. ```c // Parse the declaration of a list of variables. // The identifier has been scanned & we have the type void var_declaration(int type) { int id; while (1) { // Text now has the identifier's name. // Add it as a known identifier // and generate its space in assembly id = addglob(Text, type, S_VARIABLE, 0); genglobsym(id); // If the next token is a semicolon, // skip it and return. if (Token.token == T_SEMI) { scan(&Token); return; } // If the next token is a comma, skip it, // get the identifier and loop back if (Token.token == T_COMMA) { scan(&Token); ident(); continue; } fatal("Missing , or ; after identifier"); } } ``` ## Not Quite Local Variables `var_declaration()` can now parse a list of variable declarations, but it requires the type and first identifier to be pre-scanned. Thus, I've left the call to `var_declaration()` in `single_statement()` in `stmt.c`. Later on, we will modify this to declare local variables. But for now, all of the variables in this example program are globals: ```c int d, f; int *e; int main() { int a, b, c; b= 3; c= 5; a= b + c * 10; printint(a); d= 12; printint(d); e= &d; f= *e; printint(f); return(0); } ``` ## Testing the Changes The above code is our `tests/input16.c`. As always, we can test it: ``` $ make test16 cc -o comp1 -g -Wall cg.c decl.c expr.c gen.c main.c misc.c scan.c stmt.c sym.c tree.c types.c ./comp1 tests/input16.c cc -o out out.s lib/printint.c ./out 53 12 12 ``` ## Conclusion and What's Next In the next part of our compiler writing journey, I promise to tackle the issue of adding offsets to pointers. [Next step](../17_Scaling_Offsets/Readme.md) ================================================ FILE: 16_Global_Vars/cg.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. // We need a list of byte and doubleword registers, too static int freereg[4]; static char *reglist[4] = { "%r8", "%r9", "%r10", "%r11" }; static char *breglist[4] = { "%r8b", "%r9b", "%r10b", "%r11b" }; static char *dreglist[4] = { "%r8d", "%r9d", "%r10d", "%r11d" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n", Outfile); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Gsym[id].name; fprintf(Outfile, "\t.text\n" "\t.globl\t%s\n" "\t.type\t%s, @function\n" "%s:\n" "\tpushq\t%%rbp\n" "\tmovq\t%%rsp, %%rbp\n", name, name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Gsym[id].endlabel); fputs("\tpopq %rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmovq\t$%d, %s\n", value, reglist[r]); return (r); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Print out the code to initialise it switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tmovzbq\t%s(%%rip), %s\n", Gsym[id].name, reglist[r]); break; case P_INT: fprintf(Outfile, "\tmovzbl\t%s(\%%rip), %s\n", Gsym[id].name, reglist[r]); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tmovq\t%s(\%%rip), %s\n", Gsym[id].name, reglist[r]); break; default: fatald("Bad type in cgloadglob:", Gsym[id].type); } return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\taddq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsubq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timulq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s,%%rax\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidivq\t%s\n", reglist[r2]); fprintf(Outfile, "\tmovq\t%%rax,%s\n", reglist[r1]); free_register(r2); return (r1); } // Call printint() with the given register void cgprintint(int r) { fprintf(Outfile, "\tmovq\t%s, %%rdi\n", reglist[r]); fprintf(Outfile, "\tcall\tprintint\n"); free_register(r); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { // Get a new register int outr = alloc_register(); fprintf(Outfile, "\tmovq\t%s, %%rdi\n", reglist[r]); fprintf(Outfile, "\tcall\t%s\n", Gsym[id].name); fprintf(Outfile, "\tmovq\t%%rax, %s\n", reglist[outr]); free_register(r); return (outr); } // Store a register's value into a variable int cgstorglob(int r, int id) { switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %s(\%%rip)\n", breglist[r], Gsym[id].name); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %s(\%%rip)\n", dreglist[r], Gsym[id].name); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tmovq\t%s, %s(\%%rip)\n", reglist[r], Gsym[id].name); break; default: fatald("Bad type in cgloadglob:", Gsym[id].type); } return (r); } // Array of type sizes in P_XXX order. // 0 means no size. static int psize[] = { 0, 0, 1, 4, 8, 8, 8, 8 }; // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { // Check the type is valid if (type < P_NONE || type > P_LONGPTR) fatal("Bad type in cgprimsize()"); return (psize[type]); } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Gsym[id].type); fprintf(Outfile, "\t.comm\t%s,%d,%d\n", Gsym[id].name, typesize, typesize); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { // Generate code depending on the function's type switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tmovzbl\t%s, %%eax\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %%eax\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", Gsym[id].type); } cgjump(Gsym[id].endlabel); } // Generate code to load the address of a global // identifier into a variable. Return a new register int cgaddress(int id) { int r = alloc_register(); fprintf(Outfile, "\tleaq\t%s(%%rip), %s\n", Gsym[id].name, reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tmovzbq\t(%s), %s\n", reglist[r], reglist[r]); break; case P_INTPTR: fprintf(Outfile, "\tmovq\t(%s), %s\n", reglist[r], reglist[r]); break; case P_LONGPTR: fprintf(Outfile, "\tmovq\t(%s), %s\n", reglist[r], reglist[r]); break; } return (r); } ================================================ FILE: 16_Global_Vars/cg_arm.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for ARMv6 on Raspberry Pi // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. static int freereg[4]; static char *reglist[4] = { "r4", "r5", "r6", "r7" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // We have to store large integer literal values in memory. // Keep a list of them which will be output in the postamble #define MAXINTS 1024 int Intlist[MAXINTS]; static int Intslot = 0; // Determine the offset of a large integer // literal from the .L3 label. If the integer // isn't in the list, add it. static void set_int_offset(int val) { int offset = -1; // See if it is already there for (int i = 0; i < Intslot; i++) { if (Intlist[i] == val) { offset = 4 * i; break; } } // Not in the list, so add it if (offset == -1) { offset = 4 * Intslot; if (Intslot == MAXINTS) fatal("Out of int slots in set_int_offset()"); Intlist[Intslot++] = val; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L3+%d\n", offset); } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n", Outfile); } // Print out the assembly postamble void cgpostamble() { // Print out the global variables fprintf(Outfile, ".L2:\n"); for (int i = 0; i < Globs; i++) { if (Gsym[i].stype == S_VARIABLE) fprintf(Outfile, "\t.word %s\n", Gsym[i].name); } // Print out the integer literals fprintf(Outfile, ".L3:\n"); for (int i = 0; i < Intslot; i++) { fprintf(Outfile, "\t.word %d\n", Intlist[i]); } } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Gsym[id].name; fprintf(Outfile, "\t.text\n" "\t.globl\t%s\n" "\t.type\t%s, \%%function\n" "%s:\n" "\tpush\t{fp, lr}\n" "\tadd\tfp, sp, #4\n" "\tsub\tsp, sp, #8\n" "\tstr\tr0, [fp, #-8]\n", name, name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Gsym[id].endlabel); fputs("\tsub\tsp, fp, #4\n" "\tpop\t{fp, pc}\n" "\t.align\t2\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); // If the literal value is small, do it with one instruction if (value <= 1000) fprintf(Outfile, "\tmov\t%s, #%d\n", reglist[r], value); else { set_int_offset(value); fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); } return (r); } // Determine the offset of a variable from the .L2 // label. Yes, this is inefficient code. static void set_var_offset(int id) { int offset = 0; // Walk the symbol table up to id. // Find S_VARIABLEs and add on 4 until // we get to our variable for (int i = 0; i < id; i++) { if (Gsym[i].stype == S_VARIABLE) offset += 4; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L2+%d\n", offset); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tldrb\t%s, [r3]\n", reglist[r]); break; default: fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); break; } return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s, %s\n", reglist[r1], reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\tmul\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { // To do a divide: r0 holds the dividend, r1 holds the divisor. // The quotient is in r0. fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r1]); fprintf(Outfile, "\tmov\tr1, %s\n", reglist[r2]); fprintf(Outfile, "\tbl\t__aeabi_idiv\n"); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r1]); free_register(r2); return (r1); } // Call printint() with the given register void cgprintint(int r) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r]); fprintf(Outfile, "\tbl\tprintint\n"); fprintf(Outfile, "\tnop\n"); free_register(r); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r]); fprintf(Outfile, "\tbl\t%s\n", Gsym[id].name); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r]); return (r); } // Store a register's value into a variable int cgstorglob(int r, int id) { // Get the offset to the variable set_var_offset(id); switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tstr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgstorglob:", Gsym[id].type); } return (r); } // Array of type sizes in P_XXX order. // 0 means no size. static int psize[] = { 0, 0, 1, 4, 4, 4, 4, 4 }; // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { // Check the type is valid if (type < P_NONE || type > P_LONGPTR) fatal("Bad type in cgprimsize()"); return (psize[type]); } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Gsym[id].type); fprintf(Outfile, "\t.comm\t%s,%d,%d\n", Gsym[id].name, typesize, typesize); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "moveq", "movne", "movlt", "movgt", "movle", "movge" }; // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "movne", "moveq", "movge", "movle", "movgt", "movlt" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #1\n", cmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #0\n", invcmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\tuxtb\t%s, %s\n", reglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tb\tL%d\n", l); } // List of inverted branch instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *brlist[] = { "bne", "beq", "bge", "ble", "bgt", "blt" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", brlist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[reg]); cgjump(Gsym[id].endlabel); } // Generate code to load the address of a global // identifier into a variable. Return a new register int cgaddress(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); fprintf(Outfile, "\tmov\t%s, r3\n", reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tldrb\t%s, [%s]\n", reglist[r], reglist[r]); break; case P_INTPTR: fprintf(Outfile, "\tldr\t%s, [%s]\n", reglist[r], reglist[r]); break; case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [%s]\n", reglist[r], reglist[r]); break; } return (r); } ================================================ FILE: 16_Global_Vars/cgn.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. // We need a list of byte and doubleword registers, too static int freereg[4]; static char *reglist[4] = { "r8", "r9", "r10", "r11" }; static char *breglist[4] = { "r8b", "r9b", "r10b", "r11b" }; static char *dreglist[4] = { "r8d", "r9d", "r10d", "r11d" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\textern\tprintint\n", Outfile); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Gsym[id].name; fprintf(Outfile, "\tsection\t.text\n" "\tglobal\t%s\n" "%s:\n" "\tpush\trbp\n" "\tmov\trbp, rsp\n", name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Gsym[id].endlabel); fputs("\tpop rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], value); return (r); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Print out the code to initialise it switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], Gsym[id].name); break; case P_INT: fprintf(Outfile, "\txor\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tmov\t%s, dword [%s]\n", dreglist[r], Gsym[id].name); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], Gsym[id].name); break; default: fatald("Bad type in cgloadglob:", Gsym[id].type); } return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timul\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmov\trax, %s\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidiv\t%s\n", reglist[r2]); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[r1]); free_register(r2); return (r1); } // Call printint() with the given register void cgprintint(int r) { fprintf(Outfile, "\tmov\trdi, %s\n", reglist[r]); fprintf(Outfile, "\tcall\tprintint\n"); free_register(r); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { // Get a new register int outr = alloc_register(); fprintf(Outfile, "\tmov\trdi, %s\n", reglist[r]); fprintf(Outfile, "\tcall\t%s\n", Gsym[id].name); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[outr]); free_register(r); return (outr); } // Store a register's value into a variable int cgstorglob(int r, int id) { switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tmov\t[%s], %s\n", Gsym[id].name, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\t[%s], %s\n", Gsym[id].name, dreglist[r]); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tmov\t[%s], %s\n", Gsym[id].name, reglist[r]); break; default: fatald("Bad type in cgloadglob:", Gsym[id].type); } return (r); } // Array of type sizes in P_XXX order. // 0 means no size. static int psize[] = { 0, 0, 1, 4, 8, 8, 8, 8 }; // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { // Check the type is valid if (type < P_NONE || type > P_LONGPTR) fatal("Bad type in cgprimsize()"); return (psize[type]); } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Gsym[id].type); fprintf(Outfile, "\tcommon\t%s %d:%d\n", Gsym[id].name, typesize, typesize); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r2], breglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { // Generate code depending on the function's type switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tmovzx\teax, %s\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmov\teax, %s\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", Gsym[id].type); } cgjump(Gsym[id].endlabel); } // Generate code to load the address of a global // identifier into a variable. Return a new register int cgaddress(int id) { int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, %s\n", reglist[r], Gsym[id].name); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], reglist[r]); break; case P_INTPTR: fprintf(Outfile, "\tmovzx\t%s, word [%s]\n", reglist[r], reglist[r]); break; case P_LONGPTR: fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], reglist[r]); break; } return (r); } ================================================ FILE: 16_Global_Vars/data.h ================================================ #ifndef extern_ #define extern_ extern #endif // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 extern_ int Line; // Current line number extern_ int Putback; // Character put back by scanner extern_ int Functionid; // Symbol id of the current function extern_ int Globs; // Position of next free global symbol slot extern_ FILE *Infile; // Input and output files extern_ FILE *Outfile; extern_ struct token Token; // Last token scanned extern_ char Text[TEXTLEN + 1]; // Last identifier scanned extern_ struct symtable Gsym[NSYMBOLS]; // Global symbol table ================================================ FILE: 16_Global_Vars/decl.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of declarations // Copyright (c) 2019 Warren Toomey, GPL3 // global_declarations : global_declarations // | global_declaration global_declarations // ; // // global_declaration: function_declaration | var_declaration ; // // function_declaration: type identifier '(' ')' compound_statement ; // // var_declaration: type identifier_list ';' ; // // type: type_keyword opt_pointer ; // // type_keyword: 'void' | 'char' | 'int' | 'long' ; // // opt_pointer: | '*' opt_pointer ; // // identifier_list: identifier | identifier ',' identifier_list ; // // Parse the current token and return // a primitive type enum value. Also // scan in the next token int parse_type(void) { int type; switch (Token.token) { case T_VOID: type = P_VOID; break; case T_CHAR: type = P_CHAR; break; case T_INT: type = P_INT; break; case T_LONG: type = P_LONG; break; default: fatald("Illegal type, token", Token.token); } // Scan in one or more further '*' tokens // and determine the correct pointer type while (1) { scan(&Token); if (Token.token != T_STAR) break; type = pointer_to(type); } // We leave with the next token already scanned return (type); } // variable_declaration: type identifier ';' ; // // Parse the declaration of a list of variables. // The identifier has been scanned & we have the type void var_declaration(int type) { int id; while (1) { // Text now has the identifier's name. // Add it as a known identifier // and generate its space in assembly id = addglob(Text, type, S_VARIABLE, 0); genglobsym(id); // If the next token is a semicolon, // skip it and return. if (Token.token == T_SEMI) { scan(&Token); return; } // If the next token is a comma, skip it, // get the identifier and loop back if (Token.token == T_COMMA) { scan(&Token); ident(); continue; } fatal("Missing , or ; after identifier"); } } // // function_declaration: type identifier '(' ')' compound_statement ; // // Parse the declaration of a simplistic function. // The identifier has been scanned & we have the type struct ASTnode *function_declaration(int type) { struct ASTnode *tree, *finalstmt; int nameslot, endlabel; // Text now has the identifier's name. // Get a label-id for the end label, add the function // to the symbol table, and set the Functionid global // to the function's symbol-id endlabel = genlabel(); nameslot = addglob(Text, type, S_FUNCTION, endlabel); Functionid = nameslot; // Scan in the parentheses lparen(); rparen(); // Get the AST tree for the compound statement tree = compound_statement(); // If the function type isn't P_VOID .. if (type != P_VOID) { // Error if no statements in the function if (tree == NULL) fatal("No statements in function with non-void type"); // Check that the last AST operation in the // compound statement was a return statement finalstmt = (tree->op == A_GLUE) ? tree->right : tree; if (finalstmt == NULL || finalstmt->op != A_RETURN) fatal("No return for function with non-void type"); } // Return an A_FUNCTION node which has the function's nameslot // and the compound statement sub-tree return (mkastunary(A_FUNCTION, type, tree, nameslot)); } // Parse one or more global declarations, either // variables or functions void global_declarations(void) { struct ASTnode *tree; int type; while (1) { // We have to read past the type and identifier // to see either a '(' for a function declaration // or a ',' or ';' for a variable declaration. // Text is filled in by the ident() call. type = parse_type(); ident(); if (Token.token == T_LPAREN) { // Parse the function declaration and // generate the assembly code for it tree = function_declaration(type); genAST(tree, NOREG, 0); } else { // Parse the global variable declaration var_declaration(type); } // Stop when we have reached EOF if (Token.token == T_EOF) break; } } ================================================ FILE: 16_Global_Vars/decl.h ================================================ // Function prototypes for all compiler files // Copyright (c) 2019 Warren Toomey, GPL3 // scan.c void reject_token(struct token *t); int scan(struct token *t); // tree.c struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, int intvalue); struct ASTnode *mkastleaf(int op, int type, int intvalue); struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, int intvalue); // gen.c int genlabel(void); int genAST(struct ASTnode *n, int reg, int parentASTop); void genpreamble(); void genpostamble(); void genfreeregs(); void genprintint(int reg); void genglobsym(int id); int genprimsize(int type); void genreturn(int reg, int id); // cg.c void freeall_registers(void); void cgpreamble(); void cgpostamble(); void cgfuncpreamble(int id); void cgfuncpostamble(int id); int cgloadint(int value, int type); int cgloadglob(int id); int cgadd(int r1, int r2); int cgsub(int r1, int r2); int cgmul(int r1, int r2); int cgdiv(int r1, int r2); void cgprintint(int r); int cgcall(int r, int id); int cgstorglob(int r, int id); void cgglobsym(int id); int cgcompare_and_set(int ASTop, int r1, int r2); int cgcompare_and_jump(int ASTop, int r1, int r2, int label); void cglabel(int l); void cgjump(int l); int cgwiden(int r, int oldtype, int newtype); int cgprimsize(int type); void cgreturn(int reg, int id); int cgaddress(int id); int cgderef(int r, int type); // expr.c struct ASTnode *funccall(void); struct ASTnode *binexpr(int ptp); // stmt.c struct ASTnode *compound_statement(void); // misc.c void match(int t, char *what); void semi(void); void lbrace(void); void rbrace(void); void lparen(void); void rparen(void); void ident(void); void fatal(char *s); void fatals(char *s1, char *s2); void fatald(char *s, int d); void fatalc(char *s, int c); // sym.c int findglob(char *s); int addglob(char *name, int type, int stype, int endlabel); // decl.c void var_declaration(int type); struct ASTnode *function_declaration(int type); void global_declarations(void); // types.c int parse_type(void); int type_compatible(int *left, int *right, int onlyright); int pointer_to(int type); int value_at(int type); ================================================ FILE: 16_Global_Vars/defs.h ================================================ #include #include #include #include // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 #define TEXTLEN 512 // Length of symbols in input #define NSYMBOLS 1024 // Number of symbol table entries // Token types enum { T_EOF, // Operators T_PLUS, T_MINUS, T_STAR, T_SLASH, T_EQ, T_NE, T_LT, T_GT, T_LE, T_GE, // Type keywords T_VOID, T_CHAR, T_INT, T_LONG, // Structural tokens T_INTLIT, T_SEMI, T_ASSIGN, T_IDENT, T_LBRACE, T_RBRACE, T_LPAREN, T_RPAREN, T_AMPER, T_LOGAND, T_COMMA, // Other keywords T_PRINT, T_IF, T_ELSE, T_WHILE, T_FOR, T_RETURN }; // Token structure struct token { int token; // Token type, from the enum list above int intvalue; // For T_INTLIT, the integer value }; // AST node types. The first few line up // with the related tokens enum { A_ADD = 1, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE, A_INTLIT, A_IDENT, A_LVIDENT, A_ASSIGN, A_PRINT, A_GLUE, A_IF, A_WHILE, A_FUNCTION, A_WIDEN, A_RETURN, A_FUNCCALL, A_DEREF, A_ADDR }; // Primitive types enum { P_NONE, P_VOID, P_CHAR, P_INT, P_LONG, P_VOIDPTR, P_CHARPTR, P_INTPTR, P_LONGPTR }; // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; union { // For A_INTLIT, the integer value int intvalue; // For A_IDENT, the symbol slot number int id; // For A_FUNCTION, the symbol slot number } v; // For A_FUNCCALL, the symbol slot number }; #define NOREG -1 // Use NOREG when the AST generation // functions have no register to return // Structural types enum { S_VARIABLE, S_FUNCTION }; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol int stype; // Structural type for the symbol int endlabel; // For S_FUNCTIONs, the end label }; ================================================ FILE: 16_Global_Vars/expr.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of expressions // Copyright (c) 2019 Warren Toomey, GPL3 // Parse a function call with a single expression // argument and return its AST struct ASTnode *funccall(void) { struct ASTnode *tree; int id; // Check that the identifier has been defined, // then make a leaf node for it. XXX Add structural type test if ((id = findglob(Text)) == -1) { fatals("Undeclared function", Text); } // Get the '(' lparen(); // Parse the following expression tree = binexpr(0); // Build the function call AST node. Store the // function's return type as this node's type. // Also record the function's symbol-id tree = mkastunary(A_FUNCCALL, Gsym[id].type, tree, id); // Get the ')' rparen(); return (tree); } // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(void) { struct ASTnode *n; int id; switch (Token.token) { case T_INTLIT: // For an INTLIT token, make a leaf AST node for it. // Make it a P_CHAR if it's within the P_CHAR range if ((Token.intvalue) >= 0 && (Token.intvalue < 256)) n = mkastleaf(A_INTLIT, P_CHAR, Token.intvalue); else n = mkastleaf(A_INTLIT, P_INT, Token.intvalue); break; case T_IDENT: // This could be a variable or a function call. // Scan in the next token to find out scan(&Token); // It's a '(', so a function call if (Token.token == T_LPAREN) return (funccall()); // Not a function call, so reject the new token reject_token(&Token); // Check that the variable exists. XXX Add structural type test id = findglob(Text); if (id == -1) fatals("Unknown variable", Text); // Make a leaf AST node for it n = mkastleaf(A_IDENT, Gsym[id].type, id); break; default: fatald("Syntax error, token", Token.token); } // Scan in the next token and return the leaf node scan(&Token); return (n); } // Convert a binary operator token into an AST operation. // We rely on a 1:1 mapping from token to AST operation static int arithop(int tokentype) { if (tokentype > T_EOF && tokentype < T_INTLIT) return (tokentype); fatald("Syntax error, token", tokentype); return (0); // Keep -Wall happy } // Operator precedence for each token. Must // match up with the order of tokens in defs.h static int OpPrec[] = { 0, 10, 10, // T_EOF, T_PLUS, T_MINUS 20, 20, // T_STAR, T_SLASH 30, 30, // T_EQ, T_NE 40, 40, 40, 40 // T_LT, T_GT, T_LE, T_GE }; // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec = OpPrec[tokentype]; if (prec == 0) fatald("Syntax error, token", tokentype); return (prec); } // prefix_expression: primary // | '*' prefix_expression // | '&' prefix_expression // ; // Parse a prefix expression and return // a sub-tree representing it. struct ASTnode *prefix(void) { struct ASTnode *tree; switch (Token.token) { case T_AMPER: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Ensure that it's an identifier if (tree->op != A_IDENT) fatal("& operator must be followed by an identifier"); // Now change the operator to A_ADDR and the type to // a pointer to the original type tree->op = A_ADDR; tree->type = pointer_to(tree->type); break; case T_STAR: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's either another deref or an // identifier if (tree->op != A_IDENT && tree->op != A_DEREF) fatal("* operator must be followed by an identifier or *"); // Prepend an A_DEREF operation to the tree tree = mkastunary(A_DEREF, value_at(tree->type), tree, 0); break; default: tree = primary(); } return (tree); } // Return an AST tree whose root is a binary operator. // Parameter ptp is the previous token's precedence. struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; int lefttype, righttype; int tokentype; // Get the tree on the left. // Fetch the next token at the same time. left = prefix(); // If we hit a semicolon or ')', return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN) return (left); // While the precedence of this token is // more than that of the previous token precedence while (op_precedence(tokentype) > ptp) { // Fetch in the next integer literal scan(&Token); // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Ensure the two types are compatible. lefttype = left->type; righttype = right->type; if (!type_compatible(&lefttype, &righttype, 0)) fatal("Incompatible types"); // Widen either side if required. type vars are A_WIDEN now if (lefttype) left = mkastunary(lefttype, right->type, left, 0); if (righttype) right = mkastunary(righttype, left->type, right, 0); // Join that sub-tree with ours. Convert the token // into an AST operation at the same time. left = mkastnode(arithop(tokentype), left->type, left, NULL, right, 0); // Update the details of the current token. // If we hit a semicolon or ')', return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN) return (left); } // Return the tree we have when the precedence // is the same or lower return (left); } ================================================ FILE: 16_Global_Vars/gen.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Generic code generator // Copyright (c) 2019 Warren Toomey, GPL3 // Generate and return a new label number int genlabel(void) { static int id = 1; return (id++); } // Generate the code for an IF statement // and an optional ELSE clause static int genIF(struct ASTnode *n) { int Lfalse, Lend; // Generate two labels: one for the // false compound statement, and one // for the end of the overall IF statement. // When there is no ELSE clause, Lfalse _is_ // the ending label! Lfalse = genlabel(); if (n->right) Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. // We cheat by sending the Lfalse label as a register. genAST(n->left, Lfalse, n->op); genfreeregs(); // Generate the true compound statement genAST(n->mid, NOREG, n->op); genfreeregs(); // If there is an optional ELSE clause, // generate the jump to skip to the end if (n->right) cgjump(Lend); // Now the false label cglabel(Lfalse); // Optional ELSE clause: generate the // false compound statement and the // end label if (n->right) { genAST(n->right, NOREG, n->op); genfreeregs(); cglabel(Lend); } return (NOREG); } // Generate the code for a WHILE statement // and an optional ELSE clause static int genWHILE(struct ASTnode *n) { int Lstart, Lend; // Generate the start and end labels // and output the start label Lstart = genlabel(); Lend = genlabel(); cglabel(Lstart); // Generate the condition code followed // by a jump to the end label. // We cheat by sending the Lfalse label as a register. genAST(n->left, Lend, n->op); genfreeregs(); // Generate the compound statement for the body genAST(n->right, NOREG, n->op); genfreeregs(); // Finally output the jump back to the condition, // and the end label cgjump(Lstart); cglabel(Lend); return (NOREG); } // Given an AST, the register (if any) that holds // the previous rvalue, and the AST op of the parent, // generate assembly code recursively. // Return the register id with the tree's final value int genAST(struct ASTnode *n, int reg, int parentASTop) { int leftreg, rightreg; // We now have specific AST node handling at the top switch (n->op) { case A_IF: return (genIF(n)); case A_WHILE: return (genWHILE(n)); case A_GLUE: // Do each child statement, and free the // registers after each child genAST(n->left, NOREG, n->op); genfreeregs(); genAST(n->right, NOREG, n->op); genfreeregs(); return (NOREG); case A_FUNCTION: // Generate the function's preamble before the code cgfuncpreamble(n->v.id); genAST(n->left, NOREG, n->op); cgfuncpostamble(n->v.id); return (NOREG); } // General AST node handling below // Get the left and right sub-tree values if (n->left) leftreg = genAST(n->left, NOREG, n->op); if (n->right) rightreg = genAST(n->right, leftreg, n->op); switch (n->op) { case A_ADD: return (cgadd(leftreg, rightreg)); case A_SUBTRACT: return (cgsub(leftreg, rightreg)); case A_MULTIPLY: return (cgmul(leftreg, rightreg)); case A_DIVIDE: return (cgdiv(leftreg, rightreg)); case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, compare registers // and set one to 1 or 0 based on the comparison. if (parentASTop == A_IF || parentASTop == A_WHILE) return (cgcompare_and_jump(n->op, leftreg, rightreg, reg)); else return (cgcompare_and_set(n->op, leftreg, rightreg)); case A_INTLIT: return (cgloadint(n->v.intvalue, n->type)); case A_IDENT: return (cgloadglob(n->v.id)); case A_LVIDENT: return (cgstorglob(reg, n->v.id)); case A_ASSIGN: // The work has already been done, return the result return (rightreg); case A_PRINT: // Print the left-child's value // and return no register genprintint(leftreg); genfreeregs(); return (NOREG); case A_WIDEN: // Widen the child's type to the parent's type return (cgwiden(leftreg, n->left->type, n->type)); case A_RETURN: cgreturn(leftreg, Functionid); return (NOREG); case A_FUNCCALL: return (cgcall(leftreg, n->v.id)); case A_ADDR: return (cgaddress(n->v.id)); case A_DEREF: return (cgderef(leftreg, n->left->type)); default: fatald("Unknown AST operator", n->op); } return (NOREG); // Keep -Wall happy } void genpreamble() { cgpreamble(); } void genpostamble() { cgpostamble(); } void genfreeregs() { freeall_registers(); } void genprintint(int reg) { cgprintint(reg); } void genglobsym(int id) { cgglobsym(id); } int genprimsize(int type) { return (cgprimsize(type)); } ================================================ FILE: 16_Global_Vars/lib/printint.c ================================================ #include void printint(long x) { printf("%ld\n", x); } ================================================ FILE: 16_Global_Vars/main.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "decl.h" #include // Compiler setup and top-level execution // Copyright (c) 2019 Warren Toomey, GPL3 // Initialise global variables static void init() { Line = 1; Putback = '\n'; Globs = 0; } // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "Usage: %s infile\n", prog); exit(1); } // Main program: check arguments and print a usage // if we don't have an argument. Open up the input // file and call scanfile() to scan the tokens in it. int main(int argc, char *argv[]) { if (argc != 2) usage(argv[0]); init(); // Open up the input file if ((Infile = fopen(argv[1], "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", argv[1], strerror(errno)); exit(1); } // Create the output file if ((Outfile = fopen("out.s", "w")) == NULL) { fprintf(stderr, "Unable to create out.s: %s\n", strerror(errno)); exit(1); } // For now, ensure that void printint() is defined addglob("printint", P_CHAR, S_FUNCTION, 0); scan(&Token); // Get the first token from the input genpreamble(); // Output the preamble global_declarations(); // Parse the global declarations genpostamble(); // Output the postamble fclose(Outfile); // Close the output file and exit return (0); } ================================================ FILE: 16_Global_Vars/misc.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Miscellaneous functions // Copyright (c) 2019 Warren Toomey, GPL3 // Ensure that the current token is t, // and fetch the next token. Otherwise // throw an error void match(int t, char *what) { if (Token.token == t) { scan(&Token); } else { fatals("Expected", what); } } // Match a semicolon and fetch the next token void semi(void) { match(T_SEMI, ";"); } // Match a left brace and fetch the next token void lbrace(void) { match(T_LBRACE, "{"); } // Match a right brace and fetch the next token void rbrace(void) { match(T_RBRACE, "}"); } // Match a left parenthesis and fetch the next token void lparen(void) { match(T_LPAREN, "("); } // Match a right parenthesis and fetch the next token void rparen(void) { match(T_RPAREN, ")"); } // Match an identifier and fetch the next token void ident(void) { match(T_IDENT, "identifier"); } // Print out fatal messages void fatal(char *s) { fprintf(stderr, "%s on line %d\n", s, Line); exit(1); } void fatals(char *s1, char *s2) { fprintf(stderr, "%s:%s on line %d\n", s1, s2, Line); exit(1); } void fatald(char *s, int d) { fprintf(stderr, "%s:%d on line %d\n", s, d, Line); exit(1); } void fatalc(char *s, int c) { fprintf(stderr, "%s:%c on line %d\n", s, c, Line); exit(1); } ================================================ FILE: 16_Global_Vars/scan.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { char *p; p = strchr(s, c); return (p ? p - s : -1); } // Get the next character from the input file. static int next(void) { int c; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return (c); } c = fgetc(Infile); // Read from input file if ('\n' == c) Line++; // Increment line count return (c); } // Put back an unwanted character static void putback(int c) { Putback = c; } // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0; // Convert each character into an int value while ((k = chrpos("0123456789", c)) >= 0) { val = val * 10 + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return (val); } // Scan an identifier from the input file and // store it in buf[]. Return the identifier's length static int scanident(int c, char *buf, int lim) { int i = 0; // Allow digits, alpha and underscores while (isalpha(c) || isdigit(c) || '_' == c) { // Error if we hit the identifier length limit, // else append to buf[] and get next character if (lim - 1 == i) { fatal("Identifier too long"); } else if (i < lim - 1) { buf[i++] = c; } c = next(); } // We hit a non-valid character, put it back. // NUL-terminate the buf[] and return the length putback(c); buf[i] = '\0'; return (i); } // Given a word from the input, return the matching // keyword token number or 0 if it's not a keyword. // Switch on the first letter so that we don't have // to waste time strcmp()ing against all the keywords. static int keyword(char *s) { switch (*s) { case 'c': if (!strcmp(s, "char")) return (T_CHAR); break; case 'e': if (!strcmp(s, "else")) return (T_ELSE); break; case 'f': if (!strcmp(s, "for")) return (T_FOR); break; case 'i': if (!strcmp(s, "if")) return (T_IF); if (!strcmp(s, "int")) return (T_INT); break; case 'l': if (!strcmp(s, "long")) return (T_LONG); break; case 'p': if (!strcmp(s, "print")) return (T_PRINT); break; case 'r': if (!strcmp(s, "return")) return (T_RETURN); break; case 'w': if (!strcmp(s, "while")) return (T_WHILE); break; case 'v': if (!strcmp(s, "void")) return (T_VOID); break; } return (0); } // A pointer to a rejected token static struct token *Rejtoken = NULL; // Reject the token that we just scanned void reject_token(struct token *t) { if (Rejtoken != NULL) fatal("Can't reject token twice"); Rejtoken = t; } // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c, tokentype; // If we have any rejected token, return it if (Rejtoken != NULL) { t = Rejtoken; Rejtoken = NULL; return (1); } // Skip whitespace c = skip(); // Determine the token based on // the input character switch (c) { case EOF: t->token = T_EOF; return (0); case '+': t->token = T_PLUS; break; case '-': t->token = T_MINUS; break; case '*': t->token = T_STAR; break; case '/': t->token = T_SLASH; break; case ';': t->token = T_SEMI; break; case '{': t->token = T_LBRACE; break; case '}': t->token = T_RBRACE; break; case '(': t->token = T_LPAREN; break; case ')': t->token = T_RPAREN; break; case ',': t->token = T_COMMA; break; case '=': if ((c = next()) == '=') { t->token = T_EQ; } else { putback(c); t->token = T_ASSIGN; } break; case '!': if ((c = next()) == '=') { t->token = T_NE; } else { fatalc("Unrecognised character", c); } break; case '<': if ((c = next()) == '=') { t->token = T_LE; } else { putback(c); t->token = T_LT; } break; case '>': if ((c = next()) == '=') { t->token = T_GE; } else { putback(c); t->token = T_GT; } break; case '&': if ((c = next()) == '&') { t->token = T_LOGAND; } else { putback(c); t->token = T_AMPER; } break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } else if (isalpha(c) || '_' == c) { // Read in a keyword or identifier scanident(c, Text, TEXTLEN); // If it's a recognised keyword, return that token if ((tokentype = keyword(Text)) != 0) { t->token = tokentype; break; } // Not a recognised keyword, so it must be an identifier t->token = T_IDENT; break; } // The character isn't part of any recognised token, error fatalc("Unrecognised character", c); } // We found a token return (1); } ================================================ FILE: 16_Global_Vars/stmt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of statements // Copyright (c) 2019 Warren Toomey, GPL3 // Prototypes static struct ASTnode *single_statement(void); // compound_statement: // empty, i.e. no statement // | statement // | statement statements // ; // // statement: print_statement // | declaration // | assignment_statement // | function_call // | if_statement // | while_statement // | for_statement // | return_statement // ; // print_statement: 'print' expression ';' ; // static struct ASTnode *print_statement(void) { struct ASTnode *tree; int lefttype, righttype; // Match a 'print' as the first token match(T_PRINT, "print"); // Parse the following expression tree = binexpr(0); // Ensure the two types are compatible. lefttype = P_INT; righttype = tree->type; if (!type_compatible(&lefttype, &righttype, 0)) fatal("Incompatible types"); // Widen the tree if required. if (righttype) tree = mkastunary(righttype, P_INT, tree, 0); // Make an print AST tree tree = mkastunary(A_PRINT, P_NONE, tree, 0); // Return the AST return (tree); } // assignment_statement: identifier '=' expression ';' ; // // Parse an assignment statement and return its AST static struct ASTnode *assignment_statement(void) { struct ASTnode *left, *right, *tree; int lefttype, righttype; int id; // Ensure we have an identifier ident(); // This could be a variable or a function call. // If next token is '(', it's a function call if (Token.token == T_LPAREN) return (funccall()); // Not a function call, on with an assignment then! // Check the identifier has been defined then make a leaf node for it // XXX Add structural type test if ((id = findglob(Text)) == -1) { fatals("Undeclared variable", Text); } right = mkastleaf(A_LVIDENT, Gsym[id].type, id); // Ensure we have an equals sign match(T_ASSIGN, "="); // Parse the following expression left = binexpr(0); // Ensure the two types are compatible. lefttype = left->type; righttype = right->type; if (!type_compatible(&lefttype, &righttype, 1)) fatal("Incompatible types"); // Widen the left if required. if (lefttype) left = mkastunary(lefttype, right->type, left, 0); // Make an assignment AST tree tree = mkastnode(A_ASSIGN, P_INT, left, NULL, right, 0); // Return the AST return (tree); } // if_statement: if_head // | if_head 'else' compound_statement // ; // // if_head: 'if' '(' true_false_expression ')' compound_statement ; // // Parse an IF statement including any // optional ELSE clause and return its AST static struct ASTnode *if_statement(void) { struct ASTnode *condAST, *trueAST, *falseAST = NULL; // Ensure we have 'if' '(' match(T_IF, "if"); lparen(); // Parse the following expression // and the ')' following. Ensure // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) fatal("Bad comparison operator"); rparen(); // Get the AST for the compound statement trueAST = compound_statement(); // If we have an 'else', skip it // and get the AST for the compound statement if (Token.token == T_ELSE) { scan(&Token); falseAST = compound_statement(); } // Build and return the AST for this statement return (mkastnode(A_IF, P_NONE, condAST, trueAST, falseAST, 0)); } // while_statement: 'while' '(' true_false_expression ')' compound_statement ; // // Parse a WHILE statement and return its AST static struct ASTnode *while_statement(void) { struct ASTnode *condAST, *bodyAST; // Ensure we have 'while' '(' match(T_WHILE, "while"); lparen(); // Parse the following expression // and the ')' following. Ensure // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) fatal("Bad comparison operator"); rparen(); // Get the AST for the compound statement bodyAST = compound_statement(); // Build and return the AST for this statement return (mkastnode(A_WHILE, P_NONE, condAST, NULL, bodyAST, 0)); } // for_statement: 'for' '(' preop_statement ';' // true_false_expression ';' // postop_statement ')' compound_statement ; // // preop_statement: statement (for now) // postop_statement: statement (for now) // // Parse a FOR statement and return its AST static struct ASTnode *for_statement(void) { struct ASTnode *condAST, *bodyAST; struct ASTnode *preopAST, *postopAST; struct ASTnode *tree; // Ensure we have 'for' '(' match(T_FOR, "for"); lparen(); // Get the pre_op statement and the ';' preopAST = single_statement(); semi(); // Get the condition and the ';' condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) fatal("Bad comparison operator"); semi(); // Get the post_op statement and the ')' postopAST = single_statement(); rparen(); // Get the compound statement which is the body bodyAST = compound_statement(); // For now, all four sub-trees have to be non-NULL. // Later on, we'll change the semantics for when some are missing // Glue the compound statement and the postop tree tree = mkastnode(A_GLUE, P_NONE, bodyAST, NULL, postopAST, 0); // Make a WHILE loop with the condition and this new body tree = mkastnode(A_WHILE, P_NONE, condAST, NULL, tree, 0); // And glue the preop tree to the A_WHILE tree return (mkastnode(A_GLUE, P_NONE, preopAST, NULL, tree, 0)); } // return_statement: 'return' '(' expression ')' ; // // Parse a return statement and return its AST static struct ASTnode *return_statement(void) { struct ASTnode *tree; int returntype, functype; // Can't return a value if function returns P_VOID if (Gsym[Functionid].type == P_VOID) fatal("Can't return from a void function"); // Ensure we have 'return' '(' match(T_RETURN, "return"); lparen(); // Parse the following expression tree = binexpr(0); // Ensure this is compatible with the function's type returntype = tree->type; functype = Gsym[Functionid].type; if (!type_compatible(&returntype, &functype, 1)) fatal("Incompatible types"); // Widen the left if required. if (returntype) tree = mkastunary(returntype, functype, tree, 0); // Add on the A_RETURN node tree = mkastunary(A_RETURN, P_NONE, tree, 0); // Get the ')' rparen(); return (tree); } // Parse a single statement and return its AST static struct ASTnode *single_statement(void) { int type; switch (Token.token) { case T_PRINT: return (print_statement()); case T_CHAR: case T_INT: case T_LONG: // The beginning of a variable declaration. // Parse the type and get the identifier. // Then parse the rest of the declaration. // XXX: These are globals at present. type = parse_type(); ident(); var_declaration(type); return (NULL); // No AST generated here case T_IDENT: return (assignment_statement()); case T_IF: return (if_statement()); case T_WHILE: return (while_statement()); case T_FOR: return (for_statement()); case T_RETURN: return (return_statement()); default: fatald("Syntax error, token", Token.token); } return (NULL); // Keep -Wall happy } // Parse a compound statement // and return its AST struct ASTnode *compound_statement(void) { struct ASTnode *left = NULL; struct ASTnode *tree; // Require a left curly bracket lbrace(); while (1) { // Parse a single statement tree = single_statement(); // Some statements must be followed by a semicolon if (tree != NULL && (tree->op == A_PRINT || tree->op == A_ASSIGN || tree->op == A_RETURN || tree->op == A_FUNCCALL)) semi(); // For each new tree, either save it in left // if left is empty, or glue the left and the // new tree together if (tree != NULL) { if (left == NULL) left = tree; else left = mkastnode(A_GLUE, P_NONE, left, NULL, tree, 0); } // When we hit a right curly bracket, // skip past it and return the AST if (Token.token == T_RBRACE) { rbrace(); return (left); } } } ================================================ FILE: 16_Global_Vars/sym.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Symbol table functions // Copyright (c) 2019 Warren Toomey, GPL3 // Determine if the symbol s is in the global symbol table. // Return its slot position or -1 if not found. int findglob(char *s) { int i; for (i = 0; i < Globs; i++) { if (*s == *Gsym[i].name && !strcmp(s, Gsym[i].name)) return (i); } return (-1); } // Get the position of a new global symbol slot, or die // if we've run out of positions. static int newglob(void) { int p; if ((p = Globs++) >= NSYMBOLS) fatal("Too many global symbols"); return (p); } // Add a global symbol to the symbol table. // Also set up its type and structural type. // Return the slot number in the symbol table int addglob(char *name, int type, int stype, int endlabel) { int y; // If this is already in the symbol table, return the existing slot if ((y = findglob(name)) != -1) return (y); // Otherwise get a new slot, fill it in and // return the slot number y = newglob(); Gsym[y].name = strdup(name); Gsym[y].type = type; Gsym[y].stype = stype; Gsym[y].endlabel = endlabel; return (y); } ================================================ FILE: 16_Global_Vars/tests/input01.c ================================================ void main() { printint(12 * 3); printint(18 - 2 * 4); printint(1 + 2 + 9 - 5/2 + 3*5); } ================================================ FILE: 16_Global_Vars/tests/input02.c ================================================ void main() { int fred; int jim; fred= 5; jim= 12; printint(fred + jim); } ================================================ FILE: 16_Global_Vars/tests/input03.c ================================================ void main() { int x; x= 1; printint(x); x= x + 1; printint(x); x= x + 1; printint(x); x= x + 1; printint(x); x= x + 1; printint(x); } ================================================ FILE: 16_Global_Vars/tests/input04.c ================================================ void main() { int x; x= 7 < 9; printint(x); x= 7 <= 9; printint(x); x= 7 != 9; printint(x); x= 7 == 7; printint(x); x= 7 >= 7; printint(x); x= 7 <= 7; printint(x); x= 9 > 7; printint(x); x= 9 >= 7; printint(x); x= 9 != 7; printint(x); } ================================================ FILE: 16_Global_Vars/tests/input05.c ================================================ void main() { int i; int j; i=6; j=12; if (i < j) { printint(i); } else { printint(j); } } ================================================ FILE: 16_Global_Vars/tests/input06.c ================================================ void main() { int i; i=1; while (i <= 10) { printint(i); i= i + 1; } } ================================================ FILE: 16_Global_Vars/tests/input07.c ================================================ void main() { int i; for (i= 1; i <= 10; i= i + 1) { printint(i); } } ================================================ FILE: 16_Global_Vars/tests/input08.c ================================================ void main() { int i; for (i= 1; i <= 10; i= i + 1) { printint(i); } } ================================================ FILE: 16_Global_Vars/tests/input09.c ================================================ void main() { int i; for (i= 1; i <= 10; i= i + 1) { printint(i); } } void fred() { int a; int b; a= 12; b= 3 * a; if (a >= b) { printint(2 * b - a); } } ================================================ FILE: 16_Global_Vars/tests/input10.c ================================================ void main() { int i; char j; j= 20; printint(j); i= 10; printint(i); for (i= 1; i <= 5; i= i + 1) { printint(i); } for (j= 253; j != 2; j= j + 1) { printint(j); } } ================================================ FILE: 16_Global_Vars/tests/input11.c ================================================ int main() { int i; char j; long k; i= 10; printint(i); j= 20; printint(j); k= 30; printint(k); for (i= 1; i <= 5; i= i + 1) { printint(i); } for (j= 253; j != 4; j= j + 1) { printint(j); } for (k= 1; k <= 5; k= k + 1) { printint(k); } return(i); printint(12345); return(3); } ================================================ FILE: 16_Global_Vars/tests/input12.c ================================================ int fred() { return(5); } void main() { int x; x= fred(2); printint(x); } ================================================ FILE: 16_Global_Vars/tests/input13.c ================================================ int fred() { return(56); } void main() { int dummy; int result; dummy= printint(23); result= fred(10); dummy= printint(result); } ================================================ FILE: 16_Global_Vars/tests/input14.c ================================================ int fred() { return(20); } int main() { int result; printint(10); result= fred(15); printint(result); printint(fred(15)+10); return(0); } ================================================ FILE: 16_Global_Vars/tests/input15.c ================================================ int main() { char a; char *b; char c; int d; int *e; int f; a= 18; printint(a); b= &a; c= *b; printint(c); d= 12; printint(d); e= &d; f= *e; printint(f); return(0); } ================================================ FILE: 16_Global_Vars/tests/input16.c ================================================ int d, f; int *e; int main() { int a, b, c; b= 3; c= 5; a= b + c * 10; printint(a); d= 12; printint(d); e= &d; f= *e; printint(f); return(0); } ================================================ FILE: 16_Global_Vars/tests/mktests ================================================ #!/bin/sh # Make the output files for each test for i in input*c do if [ ! -f "out.$i" ] then cc -o out $i ../lib/printint.c ./out > out.$i rm -f out fi done ================================================ FILE: 16_Global_Vars/tests/out.input01.c ================================================ 36 10 25 ================================================ FILE: 16_Global_Vars/tests/out.input02.c ================================================ 17 ================================================ FILE: 16_Global_Vars/tests/out.input03.c ================================================ 1 2 3 4 5 ================================================ FILE: 16_Global_Vars/tests/out.input04.c ================================================ 1 1 1 1 1 1 1 1 1 ================================================ FILE: 16_Global_Vars/tests/out.input05.c ================================================ 6 ================================================ FILE: 16_Global_Vars/tests/out.input06.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 16_Global_Vars/tests/out.input07.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 16_Global_Vars/tests/out.input08.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 16_Global_Vars/tests/out.input09.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 16_Global_Vars/tests/out.input10.c ================================================ 20 10 1 2 3 4 5 253 254 255 0 1 ================================================ FILE: 16_Global_Vars/tests/out.input11.c ================================================ 10 20 30 1 2 3 4 5 253 254 255 0 1 2 3 1 2 3 4 5 ================================================ FILE: 16_Global_Vars/tests/out.input12.c ================================================ 5 ================================================ FILE: 16_Global_Vars/tests/out.input13.c ================================================ 23 56 ================================================ FILE: 16_Global_Vars/tests/out.input14.c ================================================ 10 20 30 ================================================ FILE: 16_Global_Vars/tests/out.input15.c ================================================ 18 18 12 12 ================================================ FILE: 16_Global_Vars/tests/out.input16.c ================================================ 53 12 12 ================================================ FILE: 16_Global_Vars/tests/runtests ================================================ #!/bin/sh # Run each test and compare # against known good output if [ ! -f ../comp1 ] then echo "Need to build ../comp1 first!"; exit 1 fi for i in input* do if [ ! -f "out.$i" ] then echo "Can't run test on $i, no output file!" else echo -n $i ../comp1 $i cc -o out out.s ../lib/printint.c ./out > trial.$i cmp -s "out.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo else echo ": OK" fi rm -f out out.s "trial.$i" fi done ================================================ FILE: 16_Global_Vars/tests/runtestsn ================================================ #!/bin/sh # Run each test and compare # against known good output if [ ! -f ../compn ] then echo "Need to build ../compn first!"; exit 1 fi for i in input* do if [ ! -f "out.$i" ] then echo "Can't run test on $i, no output file!" else echo -n $i ../compn $i nasm -f elf64 out.s cc -no-pie -Wall -o out out.o ../lib/printint.c ./out > trial.$i cmp -s "out.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo else echo ": OK" fi rm -f out out.o out.s "trial.$i" fi done ================================================ FILE: 16_Global_Vars/tree.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree functions // Copyright (c) 2019 Warren Toomey, GPL3 // Build and return a generic AST node struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, int intvalue) { struct ASTnode *n; // Malloc a new ASTnode n = (struct ASTnode *) malloc(sizeof(struct ASTnode)); if (n == NULL) fatal("Unable to malloc in mkastnode()"); // Copy in the field values and return it n->op = op; n->type = type; n->left = left; n->mid = mid; n->right = right; n->v.intvalue = intvalue; return (n); } // Make an AST leaf node struct ASTnode *mkastleaf(int op, int type, int intvalue) { return (mkastnode(op, type, NULL, NULL, NULL, intvalue)); } // Make a unary AST node: only one child struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, int intvalue) { return (mkastnode(op, type, left, NULL, NULL, intvalue)); } ================================================ FILE: 16_Global_Vars/types.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Types and type handling // Copyright (c) 2019 Warren Toomey, GPL3 // Given two primitive types, // return true if they are compatible, // false otherwise. Also return either // zero or an A_WIDEN operation if one // has to be widened to match the other. // If onlyright is true, only widen left to right. int type_compatible(int *left, int *right, int onlyright) { int leftsize, rightsize; // Same types, they are compatible if (*left == *right) { *left = *right = 0; return (1); } // Get the sizes for each type leftsize = genprimsize(*left); rightsize = genprimsize(*right); // Types with zero size are not // not compatible with anything if ((leftsize == 0) || (rightsize == 0)) return (0); // Widen types as required if (leftsize < rightsize) { *left = A_WIDEN; *right = 0; return (1); } if (rightsize < leftsize) { if (onlyright) return (0); *left = 0; *right = A_WIDEN; return (1); } // Anything remaining is the same size // and thus compatible *left = *right = 0; return (1); } // Given a primitive type, return // the type which is a pointer to it int pointer_to(int type) { int newtype; switch (type) { case P_VOID: newtype = P_VOIDPTR; break; case P_CHAR: newtype = P_CHARPTR; break; case P_INT: newtype = P_INTPTR; break; case P_LONG: newtype = P_LONGPTR; break; default: fatald("Unrecognised in pointer_to: type", type); } return (newtype); } // Given a primitive pointer type, return // the type which it points to int value_at(int type) { int newtype; switch (type) { case P_VOIDPTR: newtype = P_VOID; break; case P_CHARPTR: newtype = P_CHAR; break; case P_INTPTR: newtype = P_INT; break; case P_LONGPTR: newtype = P_LONG; break; default: fatald("Unrecognised in value_at: type", type); } return (newtype); } ================================================ FILE: 17_Scaling_Offsets/Makefile ================================================ SRCS= cg.c decl.c expr.c gen.c main.c misc.c scan.c stmt.c \ sym.c tree.c types.c SRCN= cgn.c decl.c expr.c gen.c main.c misc.c scan.c stmt.c \ sym.c tree.c types.c ARMSRCS= cg_arm.c decl.c expr.c gen.c main.c misc.c scan.c stmt.c \ sym.c tree.c types.c comp1: $(SRCS) cc -o comp1 -g -Wall $(SRCS) compn: $(SRCN) cc -o compn -g -Wall $(SRCN) comp1arm: $(ARMSRCS) cc -o comp1arm -g -Wall $(ARMSRCS) cp comp1arm comp1 clean: rm -f comp1 comp1arm compn *.o *.s out test: comp1 tests/runtests (cd tests; chmod +x runtests; ./runtests) armtest: comp1arm tests/runtests (cd tests; chmod +x runtests; ./runtests) testn: compn tests/runtestsn (cd tests; chmod +x runtestsn; ./runtestsn) test16: comp1 tests/input16.c lib/printint.c ./comp1 tests/input16.c cc -o out out.s lib/printint.c ./out armtest16: comp1arm tests/input16.c lib/printint.c ./comp1 tests/input16.c cc -o out out.s lib/printint.c ./out test16n: compn tests/input16.c lib/printint.c ./compn tests/input16.c nasm -f elf64 out.s cc -no-pie -o out lib/printint.c out.o ./out ================================================ FILE: 17_Scaling_Offsets/Readme.md ================================================ # Part 17: Better Type Checking and Pointer Offsets A couple of parts ago, I introduced pointers and implemented some code to check type compatibility. At the time, I realised that, for code like: ```c int c; int *e; e= &c + 1; ``` the addition of one to the pointer calculated by `&c` would need to be converted into the size of `c`, to ensure we skip to the next `int` in memory after `c`. In other words, we would have to scale the integer. We need to do this for pointer, and later on we will need to do this for arrays. Consider the code: ```c int list[10]; int x= list[3]; ``` To do this, we need to find the base address of `list[]`, then add on three times the size of `int` to find the element at index position 3. At the time, I'd written a function in `types.c` called `type_compatible()` to determine if two types were compatible, and to indicate if we needed to "widen" a small integer type so that it was the same size as a larger integer type. This widening, though, was performed elsewhere. In fact, it ended up being done in three places in the compiler. ## A Replacement for `type_compatible()` If `type_compatible()` indicated so, we would A_WIDEN a tree to match a larger integer type. Now we need to A_SCALE a tree so that its value is scaled by the size of a type. And I want to refactor the duplicate widening code. To this end, I've thrown out `type_compatible()` and replaced it. This took quite a bit of thinking, and I probably will have to tweak or extend it again. Let's look at the design. The existing `type_compatible()`: + took two type values as arguments, plus an optional direction, + returned true if the types were compatible, + returned A_WIDEN on the left or right if either side needed to be widened, + didn't actually add the A_WIDEN node to the tree, + returned false if the types were not compatible, and + didn't handle pointer types Now let's look at the use cases for type comparisons: + when performing a binary operation on two expressions, are their types compatible and do we need to widen or scale one? + when doing a `print` statement, is the expression an integer and does it need widening? + when doing an assignment statement, does the expression need widening and does it match the lvalue type? + when doing a `return` statement, does the expression need widening and does it match the return type of the function? In only one of these use cases do we have two expressions. Therefore, I've chosen to write a new function that takes one AST tree and the type we want it to become. For the binary operation use case, we will call it twice and see what happens for each call. ## Introducing `modify_type()` `modify_type()` in `types.c` is the replacement code for `type_compatible()`. The API for the function is: ```c // Given an AST tree and a type which we want it to become, // possibly modify the tree by widening or scaling so that // it is compatible with this type. Return the original tree // if no changes occurred, a modified tree, or NULL if the // tree is not compatible with the given type. // If this will be part of a binary operation, the AST op is not zero. struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op); ``` Question: why do we need whatever binary operation is being performed on the tree and some other tree? The answer is that we can only add to or subtract from pointers. We can't do anything else, e.g. ```c int x; int *ptr; x= *ptr; // OK x= *(ptr + 2); // Two ints up from where ptr is pointing x= *(ptr * 4); // Does not make sense x= *(ptr / 13); // Does not make sense either ``` Here is the code for now. There are lots of specific tests, and at present I can't see a way to rationalise all the possible tests. Also, it will need to be extended later. ```c struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op) { int ltype; int lsize, rsize; ltype = tree->type; // Compare scalar int types if (inttype(ltype) && inttype(rtype)) { // Both types same, nothing to do if (ltype == rtype) return (tree); // Get the sizes for each type lsize = genprimsize(ltype); rsize = genprimsize(rtype); // Tree's size is too big if (lsize > rsize) return (NULL); // Widen to the right if (rsize > lsize) return (mkastunary(A_WIDEN, rtype, tree, 0)); } // For pointers on the left if (ptrtype(ltype)) { // OK is same type on right and not doing a binary op if (op == 0 && ltype == rtype) return (tree); } // We can scale only on A_ADD or A_SUBTRACT operation if (op == A_ADD || op == A_SUBTRACT) { // Left is int type, right is pointer type and the size // of the original type is >1: scale the left if (inttype(ltype) && ptrtype(rtype)) { rsize = genprimsize(value_at(rtype)); if (rsize > 1) return (mkastunary(A_SCALE, rtype, tree, rsize)); } } // If we get here, the types are not compatible return (NULL); } ``` The code to add the AST A_WIDEN and A_SCALE operations are now done here in one place only. The A_WIDEN operation converts the child's type to the parent's type. The A_SCALE operation multiplies the child's value by the size which is store in the new `struct ASTnode` union field (in `defs.h`): ```c // Abstract Syntax Tree structure struct ASTnode { ... union { int size; // For A_SCALE, the size to scale by } v; }; ``` ## Using the New `modify_type()` API With this new API, we can remove the duplicated code to A_WIDEN which is in `stmt.c` and `expr.c`. However, this new function only takes one tree. This is fine when we indeed only have one tree. There are three calls now to `modify_type()` in `stmt.c`. They are all similar, so here is the one from `assignment_statement()`: ```c // Make the AST node for the assignment lvalue right = mkastleaf(A_LVIDENT, Gsym[id].type, id); ... // Parse the following expression left = binexpr(0); // Ensure the two types are compatible. left = modify_type(left, right->type, 0); if (left == NULL) fatal("Incompatible expression in assignment"); ``` which is so much cleaner than the code we had before. ### And in `binexpr()` ... But in `binexpr()` in `expr.c`, we now need to combine two AST trees with a binary operations like addition, multiplication etc. Here, we try to `modify_type()` each tree with the other tree's type. Now, one may widen: this also implies that the other will fail and return NULL. Thus, we can't just see if one result from `modify_type()` is NULL: we need to see both be NULL to assume a type incompatibility. Here's the new comparison code in `binexpr()`: ```c struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; struct ASTnode *ltemp, *rtemp; int ASTop; int tokentype; ... // Get the tree on the left. // Fetch the next token at the same time. left = prefix(); tokentype = Token.token; ... // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Ensure the two types are compatible by trying // to modify each tree to match the other's type. ASTop = arithop(tokentype); ltemp = modify_type(left, right->type, ASTop); rtemp = modify_type(right, left->type, ASTop); if (ltemp == NULL && rtemp == NULL) fatal("Incompatible types in binary expression"); // Update any trees that were widened or scaled if (ltemp != NULL) left = ltemp; if (rtemp != NULL) right = rtemp; ``` The code is a little bit messy but no more that what was previously there, and it now deals with A_SCALE as well as A_WIDEN. ## Performing the Scaling We have added the A_SCALE to the list of AST node operations in `defs.h`. Now we need to implement it. As I mentioned before, the A_SCALE operation multiplies the child's value by the size which is store in the new `struct ASTnode` union field. For all of our integer types, this will be a multiple of two. Because of this fact, we can multiply the child's value with a shift left of a certain number of bits. Later on, we will have structs that have a size which isn't a power of two. So we can do a shift optimisation when the scale factor is suitable, but we also need to implement a multiply for a more general scale factor. Here is the new code that does this in `genAST()` in `gen.c`: ```c case A_SCALE: // Small optimisation: use shift if the // scale value is a known power of two switch (n->v.size) { case 2: return(cgshlconst(leftreg, 1)); case 4: return(cgshlconst(leftreg, 2)); case 8: return(cgshlconst(leftreg, 3)); default: // Load a register with the size and // multiply the leftreg by this size rightreg= cgloadint(n->v.size, P_INT); return (cgmul(leftreg, rightreg)); ``` ## Shifting Left in x86-64 Code We now need a `cgshlconst()` function to shift a register value left by a constant. When we add the C '<<' operator later, I will write a more general shift left function. For now, we can use the `salq` instruction with an integer literal value: ```c // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsalq\t$%d, %s\n", val, reglist[r]); return(r); } ``` ## Our Test Program that Doesn't Work My test program for the scaling functionality is `tests/input16.c`: ```c int c; int d; int *e; int f; int main() { c= 12; d=18; printint(c); e= &c + 1; f= *e; printint(f); return(0); } ``` I was hoping that `d` would be placed immediately after `c` by the assembler when we generate these assembly directives: ``` .comm c,1,1 .comm d,4,4 ``` But when I compiled the assembly output and checked, they were not adjacent: ``` $ cc -o out out.s lib/printint.c $ nm -n out | grep 'B ' 0000000000201018 B d 0000000000201020 B b 0000000000201028 B f 0000000000201030 B e 0000000000201038 B c ``` `d` actually comes *before* `c`! I had to work out a way to ensure the adjacency, so I looked at the code that *SubC* generates here, and changed our compiler to now generate this: ``` .data .globl c c: .long 0 # Four byte integer .globl d d: .long 0 .globl e e: .quad 0 # Eight byte pointer .globl f f: .long 0 ``` Now when we run our `input16.c` test, `e= &c + 1; f= *e;` gets the address of the integer one up from `c` and stores that integer's value in `f`. As we declared: ```c int c; int d; ... c= 12; d=18; printint(c); e= &c + 1; f= *e; printint(f); ``` we will print out both numbers: ``` cc -o comp1 -g -Wall cg.c decl.c expr.c gen.c main.c misc.c scan.c stmt.c sym.c tree.c types.c ./comp1 tests/input16.c cc -o out out.s lib/printint.c ./out 12 18 ``` ## Conclusion and What's Next I feel a lot happier with the code that converts between types. Behind the scenes, I wrote some test code that supplied all possible type values to `modify_type()`, as well as a binary operation and zero for the operation. I eyeballed the output and it seems to be what I want. Only time will tell. In the next part of our compiler writing journey, I don't know what I will do! [Next step](../18_Lvalues_Revisited/Readme.md) ================================================ FILE: 17_Scaling_Offsets/cg.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. // We need a list of byte and doubleword registers, too static int freereg[4]; static char *reglist[4] = { "%r8", "%r9", "%r10", "%r11" }; static char *breglist[4] = { "%r8b", "%r9b", "%r10b", "%r11b" }; static char *dreglist[4] = { "%r8d", "%r9d", "%r10d", "%r11d" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n", Outfile); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Gsym[id].name; fprintf(Outfile, "\t.text\n" "\t.globl\t%s\n" "\t.type\t%s, @function\n" "%s:\n" "\tpushq\t%%rbp\n" "\tmovq\t%%rsp, %%rbp\n", name, name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Gsym[id].endlabel); fputs("\tpopq %rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmovq\t$%d, %s\n", value, reglist[r]); return (r); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Print out the code to initialise it switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tmovzbq\t%s(%%rip), %s\n", Gsym[id].name, reglist[r]); break; case P_INT: fprintf(Outfile, "\tmovzbl\t%s(\%%rip), %s\n", Gsym[id].name, reglist[r]); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tmovq\t%s(\%%rip), %s\n", Gsym[id].name, reglist[r]); break; default: fatald("Bad type in cgloadglob:", Gsym[id].type); } return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\taddq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsubq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timulq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s,%%rax\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidivq\t%s\n", reglist[r2]); fprintf(Outfile, "\tmovq\t%%rax,%s\n", reglist[r1]); free_register(r2); return (r1); } // Call printint() with the given register void cgprintint(int r) { fprintf(Outfile, "\tmovq\t%s, %%rdi\n", reglist[r]); fprintf(Outfile, "\tcall\tprintint\n"); free_register(r); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { // Get a new register int outr = alloc_register(); fprintf(Outfile, "\tmovq\t%s, %%rdi\n", reglist[r]); fprintf(Outfile, "\tcall\t%s\n", Gsym[id].name); fprintf(Outfile, "\tmovq\t%%rax, %s\n", reglist[outr]); free_register(r); return (outr); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsalq\t$%d, %s\n", val, reglist[r]); return(r); } // Store a register's value into a variable int cgstorglob(int r, int id) { switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %s(\%%rip)\n", breglist[r], Gsym[id].name); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %s(\%%rip)\n", dreglist[r], Gsym[id].name); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tmovq\t%s, %s(\%%rip)\n", reglist[r], Gsym[id].name); break; default: fatald("Bad type in cgloadglob:", Gsym[id].type); } return (r); } // Array of type sizes in P_XXX order. // 0 means no size. static int psize[] = { 0, 0, 1, 4, 8, 8, 8, 8, 8 }; // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { // Check the type is valid if (type < P_NONE || type > P_LONGPTR) fatal("Bad type in cgprimsize()"); return (psize[type]); } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Gsym[id].type); fprintf(Outfile, "\t.data\n" "\t.globl\t%s\n", Gsym[id].name); switch(typesize) { case 1: fprintf(Outfile, "%s:\t.byte\t0\n", Gsym[id].name); break; case 4: fprintf(Outfile, "%s:\t.long\t0\n", Gsym[id].name); break; case 8: fprintf(Outfile, "%s:\t.quad\t0\n", Gsym[id].name); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { // Generate code depending on the function's type switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tmovzbl\t%s, %%eax\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %%eax\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", Gsym[id].type); } cgjump(Gsym[id].endlabel); } // Generate code to load the address of a global // identifier into a variable. Return a new register int cgaddress(int id) { int r = alloc_register(); fprintf(Outfile, "\tleaq\t%s(%%rip), %s\n", Gsym[id].name, reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tmovzbq\t(%s), %s\n", reglist[r], reglist[r]); break; case P_INTPTR: fprintf(Outfile, "\tmovq\t(%s), %s\n", reglist[r], reglist[r]); break; case P_LONGPTR: fprintf(Outfile, "\tmovq\t(%s), %s\n", reglist[r], reglist[r]); break; } return (r); } ================================================ FILE: 17_Scaling_Offsets/cg_arm.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for ARMv6 on Raspberry Pi // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. static int freereg[4]; static char *reglist[4] = { "r4", "r5", "r6", "r7" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // We have to store large integer literal values in memory. // Keep a list of them which will be output in the postamble #define MAXINTS 1024 int Intlist[MAXINTS]; static int Intslot = 0; // Determine the offset of a large integer // literal from the .L3 label. If the integer // isn't in the list, add it. static void set_int_offset(int val) { int offset = -1; // See if it is already there for (int i = 0; i < Intslot; i++) { if (Intlist[i] == val) { offset = 4 * i; break; } } // Not in the list, so add it if (offset == -1) { offset = 4 * Intslot; if (Intslot == MAXINTS) fatal("Out of int slots in set_int_offset()"); Intlist[Intslot++] = val; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L3+%d\n", offset); } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n", Outfile); } // Print out the assembly postamble void cgpostamble() { // Print out the global variables fprintf(Outfile, ".L2:\n"); for (int i = 0; i < Globs; i++) { if (Gsym[i].stype == S_VARIABLE) fprintf(Outfile, "\t.word %s\n", Gsym[i].name); } // Print out the integer literals fprintf(Outfile, ".L3:\n"); for (int i = 0; i < Intslot; i++) { fprintf(Outfile, "\t.word %d\n", Intlist[i]); } } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Gsym[id].name; fprintf(Outfile, "\t.text\n" "\t.globl\t%s\n" "\t.type\t%s, \%%function\n" "%s:\n" "\tpush\t{fp, lr}\n" "\tadd\tfp, sp, #4\n" "\tsub\tsp, sp, #8\n" "\tstr\tr0, [fp, #-8]\n", name, name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Gsym[id].endlabel); fputs("\tsub\tsp, fp, #4\n" "\tpop\t{fp, pc}\n" "\t.align\t2\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); // If the literal value is small, do it with one instruction if (value <= 1000) fprintf(Outfile, "\tmov\t%s, #%d\n", reglist[r], value); else { set_int_offset(value); fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); } return (r); } // Determine the offset of a variable from the .L2 // label. Yes, this is inefficient code. static void set_var_offset(int id) { int offset = 0; // Walk the symbol table up to id. // Find S_VARIABLEs and add on 4 until // we get to our variable for (int i = 0; i < id; i++) { if (Gsym[i].stype == S_VARIABLE) offset += 4; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L2+%d\n", offset); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tldrb\t%s, [r3]\n", reglist[r]); break; default: fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); break; } return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s, %s\n", reglist[r1], reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\tmul\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { // To do a divide: r0 holds the dividend, r1 holds the divisor. // The quotient is in r0. fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r1]); fprintf(Outfile, "\tmov\tr1, %s\n", reglist[r2]); fprintf(Outfile, "\tbl\t__aeabi_idiv\n"); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r1]); free_register(r2); return (r1); } // Call printint() with the given register void cgprintint(int r) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r]); fprintf(Outfile, "\tbl\tprintint\n"); fprintf(Outfile, "\tnop\n"); free_register(r); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r]); fprintf(Outfile, "\tbl\t%s\n", Gsym[id].name); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r]); return (r); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tlsl\t%s, %s, #%d\n", reglist[r], reglist[r], val); return(r); } // Store a register's value into a variable int cgstorglob(int r, int id) { // Get the offset to the variable set_var_offset(id); switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tstr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgstorglob:", Gsym[id].type); } return (r); } // Array of type sizes in P_XXX order. // 0 means no size. static int psize[] = { 0, 0, 1, 4, 4, 4, 4, 4 }; // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { // Check the type is valid if (type < P_NONE || type > P_LONGPTR) fatal("Bad type in cgprimsize()"); return (psize[type]); } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Gsym[id].type); fprintf(Outfile, "\t.data\n" "\t.globl\t%s\n", Gsym[id].name); switch(typesize) { case 1: fprintf(Outfile, "%s:\t.byte\t0\n", Gsym[id].name); break; case 4: fprintf(Outfile, "%s:\t.long\t0\n", Gsym[id].name); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "moveq", "movne", "movlt", "movgt", "movle", "movge" }; // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "movne", "moveq", "movge", "movle", "movgt", "movlt" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #1\n", cmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #0\n", invcmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\tuxtb\t%s, %s\n", reglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tb\tL%d\n", l); } // List of inverted branch instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *brlist[] = { "bne", "beq", "bge", "ble", "bgt", "blt" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", brlist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[reg]); cgjump(Gsym[id].endlabel); } // Generate code to load the address of a global // identifier into a variable. Return a new register int cgaddress(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); fprintf(Outfile, "\tmov\t%s, r3\n", reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tldrb\t%s, [%s]\n", reglist[r], reglist[r]); break; case P_INTPTR: fprintf(Outfile, "\tldr\t%s, [%s]\n", reglist[r], reglist[r]); break; case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [%s]\n", reglist[r], reglist[r]); break; } return (r); } ================================================ FILE: 17_Scaling_Offsets/cgn.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. // We need a list of byte and doubleword registers, too static int freereg[4]; static char *reglist[4] = { "r8", "r9", "r10", "r11" }; static char *breglist[4] = { "r8b", "r9b", "r10b", "r11b" }; static char *dreglist[4] = { "r8d", "r9d", "r10d", "r11d" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\textern\tprintint\n", Outfile); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Gsym[id].name; fprintf(Outfile, "\tsection\t.text\n" "\tglobal\t%s\n" "%s:\n" "\tpush\trbp\n" "\tmov\trbp, rsp\n", name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Gsym[id].endlabel); fputs("\tpop rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], value); return (r); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Print out the code to initialise it switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], Gsym[id].name); break; case P_INT: fprintf(Outfile, "\txor\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tmov\t%s, dword [%s]\n", dreglist[r], Gsym[id].name); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], Gsym[id].name); break; default: fatald("Bad type in cgloadglob:", Gsym[id].type); } return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timul\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmov\trax, %s\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidiv\t%s\n", reglist[r2]); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[r1]); free_register(r2); return (r1); } // Call printint() with the given register void cgprintint(int r) { fprintf(Outfile, "\tmov\trdi, %s\n", reglist[r]); fprintf(Outfile, "\tcall\tprintint\n"); free_register(r); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { // Get a new register int outr = alloc_register(); fprintf(Outfile, "\tmov\trdi, %s\n", reglist[r]); fprintf(Outfile, "\tcall\t%s\n", Gsym[id].name); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[outr]); free_register(r); return (outr); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsal\t%s, %d\n", reglist[r], val); return(r); } // Store a register's value into a variable int cgstorglob(int r, int id) { switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tmov\t[%s], %s\n", Gsym[id].name, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\t[%s], %s\n", Gsym[id].name, dreglist[r]); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tmov\t[%s], %s\n", Gsym[id].name, reglist[r]); break; default: fatald("Bad type in cgloadglob:", Gsym[id].type); } return (r); } // Array of type sizes in P_XXX order. // 0 means no size. static int psize[] = { 0, 0, 1, 4, 8, 8, 8, 8, 8 }; // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { // Check the type is valid if (type < P_NONE || type > P_LONGPTR) fatal("Bad type in cgprimsize()"); return (psize[type]); } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Gsym[id].type); fprintf(Outfile, "\tsection\t.data\n" "\tglobal\t%s\n", Gsym[id].name); switch(typesize) { case 1: fprintf(Outfile, "%s:\tdb\t0\n", Gsym[id].name); break; case 4: fprintf(Outfile, "%s:\tdd\t0\n", Gsym[id].name); break; case 8: fprintf(Outfile, "%s:\tdq\t0\n", Gsym[id].name); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r2], breglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { // Generate code depending on the function's type switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tmovzx\teax, %s\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmov\teax, %s\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", Gsym[id].type); } cgjump(Gsym[id].endlabel); } // Generate code to load the address of a global // identifier into a variable. Return a new register int cgaddress(int id) { int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, %s\n", reglist[r], Gsym[id].name); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], reglist[r]); break; case P_INTPTR: fprintf(Outfile, "\tmovzx\t%s, word [%s]\n", reglist[r], reglist[r]); break; case P_LONGPTR: fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], reglist[r]); break; } return (r); } ================================================ FILE: 17_Scaling_Offsets/data.h ================================================ #ifndef extern_ #define extern_ extern #endif // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 extern_ int Line; // Current line number extern_ int Putback; // Character put back by scanner extern_ int Functionid; // Symbol id of the current function extern_ int Globs; // Position of next free global symbol slot extern_ FILE *Infile; // Input and output files extern_ FILE *Outfile; extern_ struct token Token; // Last token scanned extern_ char Text[TEXTLEN + 1]; // Last identifier scanned extern_ struct symtable Gsym[NSYMBOLS]; // Global symbol table ================================================ FILE: 17_Scaling_Offsets/decl.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of declarations // Copyright (c) 2019 Warren Toomey, GPL3 // Parse the current token and return // a primitive type enum value. Also // scan in the next token int parse_type(void) { int type; switch (Token.token) { case T_VOID: type = P_VOID; break; case T_CHAR: type = P_CHAR; break; case T_INT: type = P_INT; break; case T_LONG: type = P_LONG; break; default: fatald("Illegal type, token", Token.token); } // Scan in one or more further '*' tokens // and determine the correct pointer type while (1) { scan(&Token); if (Token.token != T_STAR) break; type = pointer_to(type); } // We leave with the next token already scanned return (type); } // variable_declaration: type identifier ';' ; // // Parse the declaration of a variable. // The identifier has been scanned & we have the type void var_declaration(int type) { int id; // Text now has the identifier's name. // Add it as a known identifier // and generate its space in assembly id = addglob(Text, type, S_VARIABLE, 0); genglobsym(id); // Get the trailing semicolon semi(); } // // function_declaration: type identifier '(' ')' compound_statement ; // // Parse the declaration of a simplistic function. // The identifier has been scanned & we have the type struct ASTnode *function_declaration(int type) { struct ASTnode *tree, *finalstmt; int nameslot, endlabel; // Text now has the identifier's name. // Get a label-id for the end label, add the function // to the symbol table, and set the Functionid global // to the function's symbol-id endlabel = genlabel(); nameslot = addglob(Text, type, S_FUNCTION, endlabel); Functionid = nameslot; // Scan in the parentheses lparen(); rparen(); // Get the AST tree for the compound statement tree = compound_statement(); // If the function type isn't P_VOID .. if (type != P_VOID) { // Error if no statements in the function if (tree == NULL) fatal("No statements in function with non-void type"); // Check that the last AST operation in the // compound statement was a return statement finalstmt = (tree->op == A_GLUE) ? tree->right : tree; if (finalstmt == NULL || finalstmt->op != A_RETURN) fatal("No return for function with non-void type"); } // Return an A_FUNCTION node which has the function's nameslot // and the compound statement sub-tree return (mkastunary(A_FUNCTION, type, tree, nameslot)); } // Parse one or more global declarations, either // variables or functions void global_declarations(void) { struct ASTnode *tree; int type; while (1) { // We have to read past the type and identifier // to see either a '(' for a function declaration // or a ',' or ';' for a variable declaration. // Text is filled in by the ident() call. type = parse_type(); ident(); if (Token.token == T_LPAREN) { // Parse the function declaration and // generate the assembly code for it tree = function_declaration(type); genAST(tree, NOREG, 0); } else { // Parse the global variable declaration var_declaration(type); } // Stop when we have reached EOF if (Token.token == T_EOF) break; } } ================================================ FILE: 17_Scaling_Offsets/decl.h ================================================ // Function prototypes for all compiler files // Copyright (c) 2019 Warren Toomey, GPL3 // scan.c void reject_token(struct token *t); int scan(struct token *t); // tree.c struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, int intvalue); struct ASTnode *mkastleaf(int op, int type, int intvalue); struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, int intvalue); // gen.c int genlabel(void); int genAST(struct ASTnode *n, int reg, int parentASTop); void genpreamble(); void genpostamble(); void genfreeregs(); void genprintint(int reg); void genglobsym(int id); int genprimsize(int type); void genreturn(int reg, int id); // cg.c void freeall_registers(void); void cgpreamble(); void cgpostamble(); void cgfuncpreamble(int id); void cgfuncpostamble(int id); int cgloadint(int value, int type); int cgloadglob(int id); int cgadd(int r1, int r2); int cgsub(int r1, int r2); int cgmul(int r1, int r2); int cgdiv(int r1, int r2); int cgshlconst(int r, int val); void cgprintint(int r); int cgcall(int r, int id); int cgstorglob(int r, int id); void cgglobsym(int id); int cgcompare_and_set(int ASTop, int r1, int r2); int cgcompare_and_jump(int ASTop, int r1, int r2, int label); void cglabel(int l); void cgjump(int l); int cgwiden(int r, int oldtype, int newtype); int cgprimsize(int type); void cgreturn(int reg, int id); int cgaddress(int id); int cgderef(int r, int type); // expr.c struct ASTnode *funccall(void); struct ASTnode *binexpr(int ptp); // stmt.c struct ASTnode *compound_statement(void); // misc.c void match(int t, char *what); void semi(void); void lbrace(void); void rbrace(void); void lparen(void); void rparen(void); void ident(void); void fatal(char *s); void fatals(char *s1, char *s2); void fatald(char *s, int d); void fatalc(char *s, int c); // sym.c int findglob(char *s); int addglob(char *name, int type, int stype, int endlabel); // decl.c void var_declaration(int type); struct ASTnode *function_declaration(int type); void global_declarations(void); // types.c int parse_type(void); int pointer_to(int type); int value_at(int type); struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op); ================================================ FILE: 17_Scaling_Offsets/defs.h ================================================ #include #include #include #include // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 #define TEXTLEN 512 // Length of symbols in input #define NSYMBOLS 1024 // Number of symbol table entries // Token types enum { T_EOF, // Operators T_PLUS, T_MINUS, T_STAR, T_SLASH, T_EQ, T_NE, T_LT, T_GT, T_LE, T_GE, // Type keywords T_VOID, T_CHAR, T_INT, T_LONG, // Structural tokens T_INTLIT, T_SEMI, T_ASSIGN, T_IDENT, T_LBRACE, T_RBRACE, T_LPAREN, T_RPAREN, T_AMPER, T_LOGAND, // Other keywords T_PRINT, T_IF, T_ELSE, T_WHILE, T_FOR, T_RETURN }; // Token structure struct token { int token; // Token type, from the enum list above int intvalue; // For T_INTLIT, the integer value }; // AST node types. The first few line up // with the related tokens enum { A_ADD = 1, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE, A_INTLIT, A_IDENT, A_LVIDENT, A_ASSIGN, A_PRINT, A_GLUE, A_IF, A_WHILE, A_FUNCTION, A_WIDEN, A_RETURN, A_FUNCCALL, A_DEREF, A_ADDR, A_SCALE }; // Primitive types enum { P_NONE, P_VOID, P_CHAR, P_INT, P_LONG, P_VOIDPTR, P_CHARPTR, P_INTPTR, P_LONGPTR }; // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; union { // For A_INTLIT, the integer value int intvalue; // For A_IDENT, the symbol slot number int id; // For A_FUNCTION, the symbol slot number int size; // For A_SCALE, the size to scale by } v; // For A_FUNCCALL, the symbol slot number }; #define NOREG -1 // Use NOREG when the AST generation // functions have no register to return // Structural types enum { S_VARIABLE, S_FUNCTION }; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol int stype; // Structural type for the symbol int endlabel; // For S_FUNCTIONs, the end label }; ================================================ FILE: 17_Scaling_Offsets/expr.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of expressions // Copyright (c) 2019 Warren Toomey, GPL3 // Parse a function call with a single expression // argument and return its AST struct ASTnode *funccall(void) { struct ASTnode *tree; int id; // Check that the identifier has been defined, // then make a leaf node for it. XXX Add structural type test if ((id = findglob(Text)) == -1) { fatals("Undeclared function", Text); } // Get the '(' lparen(); // Parse the following expression tree = binexpr(0); // Build the function call AST node. Store the // function's return type as this node's type. // Also record the function's symbol-id tree = mkastunary(A_FUNCCALL, Gsym[id].type, tree, id); // Get the ')' rparen(); return (tree); } // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(void) { struct ASTnode *n; int id; switch (Token.token) { case T_INTLIT: // For an INTLIT token, make a leaf AST node for it. // Make it a P_CHAR if it's within the P_CHAR range if ((Token.intvalue) >= 0 && (Token.intvalue < 256)) n = mkastleaf(A_INTLIT, P_CHAR, Token.intvalue); else n = mkastleaf(A_INTLIT, P_INT, Token.intvalue); break; case T_IDENT: // This could be a variable or a function call. // Scan in the next token to find out scan(&Token); // It's a '(', so a function call if (Token.token == T_LPAREN) return (funccall()); // Not a function call, so reject the new token reject_token(&Token); // Check that the variable exists. XXX Add structural type test id = findglob(Text); if (id == -1) fatals("Unknown variable", Text); // Make a leaf AST node for it n = mkastleaf(A_IDENT, Gsym[id].type, id); break; default: fatald("Syntax error, token", Token.token); } // Scan in the next token and return the leaf node scan(&Token); return (n); } // Convert a binary operator token into an AST operation. // We rely on a 1:1 mapping from token to AST operation static int arithop(int tokentype) { if (tokentype > T_EOF && tokentype < T_INTLIT) return (tokentype); fatald("Syntax error, token", tokentype); return (0); // Keep -Wall happy } // Operator precedence for each token. Must // match up with the order of tokens in defs.h static int OpPrec[] = { 0, 10, 10, // T_EOF, T_PLUS, T_MINUS 20, 20, // T_STAR, T_SLASH 30, 30, // T_EQ, T_NE 40, 40, 40, 40 // T_LT, T_GT, T_LE, T_GE }; // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec = OpPrec[tokentype]; if (prec == 0) fatald("Syntax error, token", tokentype); return (prec); } // prefix_expression: primary // | '*' prefix_expression // | '&' prefix_expression // ; // Parse a prefix expression and return // a sub-tree representing it. struct ASTnode *prefix(void) { struct ASTnode *tree; switch (Token.token) { case T_AMPER: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Ensure that it's an identifier if (tree->op != A_IDENT) fatal("& operator must be followed by an identifier"); // Now change the operator to A_ADDR and the type to // a pointer to the original type tree->op = A_ADDR; tree->type = pointer_to(tree->type); break; case T_STAR: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's either another deref or an // identifier if (tree->op != A_IDENT && tree->op != A_DEREF) fatal("* operator must be followed by an identifier or *"); // Prepend an A_DEREF operation to the tree tree = mkastunary(A_DEREF, value_at(tree->type), tree, 0); break; default: tree = primary(); } return (tree); } // Return an AST tree whose root is a binary operator. // Parameter ptp is the previous token's precedence. struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; struct ASTnode *ltemp, *rtemp; int ASTop; int tokentype; // Get the tree on the left. // Fetch the next token at the same time. left = prefix(); // If we hit a semicolon or ')', return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN) return (left); // While the precedence of this token is // more than that of the previous token precedence while (op_precedence(tokentype) > ptp) { // Fetch in the next integer literal scan(&Token); // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Ensure the two types are compatible by trying // to modify each tree to match the other's type. ASTop = arithop(tokentype); ltemp = modify_type(left, right->type, ASTop); rtemp = modify_type(right, left->type, ASTop); if (ltemp == NULL && rtemp == NULL) fatal("Incompatible types in binary expression"); if (ltemp != NULL) left = ltemp; if (rtemp != NULL) right = rtemp; // Join that sub-tree with ours. Convert the token // into an AST operation at the same time. left = mkastnode(arithop(tokentype), left->type, left, NULL, right, 0); // Update the details of the current token. // If we hit a semicolon or ')', return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN) return (left); } // Return the tree we have when the precedence // is the same or lower return (left); } ================================================ FILE: 17_Scaling_Offsets/gen.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Generic code generator // Copyright (c) 2019 Warren Toomey, GPL3 // Generate and return a new label number int genlabel(void) { static int id = 1; return (id++); } // Generate the code for an IF statement // and an optional ELSE clause static int genIF(struct ASTnode *n) { int Lfalse, Lend; // Generate two labels: one for the // false compound statement, and one // for the end of the overall IF statement. // When there is no ELSE clause, Lfalse _is_ // the ending label! Lfalse = genlabel(); if (n->right) Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. // We cheat by sending the Lfalse label as a register. genAST(n->left, Lfalse, n->op); genfreeregs(); // Generate the true compound statement genAST(n->mid, NOREG, n->op); genfreeregs(); // If there is an optional ELSE clause, // generate the jump to skip to the end if (n->right) cgjump(Lend); // Now the false label cglabel(Lfalse); // Optional ELSE clause: generate the // false compound statement and the // end label if (n->right) { genAST(n->right, NOREG, n->op); genfreeregs(); cglabel(Lend); } return (NOREG); } // Generate the code for a WHILE statement // and an optional ELSE clause static int genWHILE(struct ASTnode *n) { int Lstart, Lend; // Generate the start and end labels // and output the start label Lstart = genlabel(); Lend = genlabel(); cglabel(Lstart); // Generate the condition code followed // by a jump to the end label. // We cheat by sending the Lfalse label as a register. genAST(n->left, Lend, n->op); genfreeregs(); // Generate the compound statement for the body genAST(n->right, NOREG, n->op); genfreeregs(); // Finally output the jump back to the condition, // and the end label cgjump(Lstart); cglabel(Lend); return (NOREG); } // Given an AST, the register (if any) that holds // the previous rvalue, and the AST op of the parent, // generate assembly code recursively. // Return the register id with the tree's final value int genAST(struct ASTnode *n, int reg, int parentASTop) { int leftreg, rightreg; // We now have specific AST node handling at the top switch (n->op) { case A_IF: return (genIF(n)); case A_WHILE: return (genWHILE(n)); case A_GLUE: // Do each child statement, and free the // registers after each child genAST(n->left, NOREG, n->op); genfreeregs(); genAST(n->right, NOREG, n->op); genfreeregs(); return (NOREG); case A_FUNCTION: // Generate the function's preamble before the code cgfuncpreamble(n->v.id); genAST(n->left, NOREG, n->op); cgfuncpostamble(n->v.id); return (NOREG); } // General AST node handling below // Get the left and right sub-tree values if (n->left) leftreg = genAST(n->left, NOREG, n->op); if (n->right) rightreg = genAST(n->right, leftreg, n->op); switch (n->op) { case A_ADD: return (cgadd(leftreg, rightreg)); case A_SUBTRACT: return (cgsub(leftreg, rightreg)); case A_MULTIPLY: return (cgmul(leftreg, rightreg)); case A_DIVIDE: return (cgdiv(leftreg, rightreg)); case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, compare registers // and set one to 1 or 0 based on the comparison. if (parentASTop == A_IF || parentASTop == A_WHILE) return (cgcompare_and_jump(n->op, leftreg, rightreg, reg)); else return (cgcompare_and_set(n->op, leftreg, rightreg)); case A_INTLIT: return (cgloadint(n->v.intvalue, n->type)); case A_IDENT: return (cgloadglob(n->v.id)); case A_LVIDENT: return (cgstorglob(reg, n->v.id)); case A_ASSIGN: // The work has already been done, return the result return (rightreg); case A_PRINT: // Print the left-child's value // and return no register genprintint(leftreg); genfreeregs(); return (NOREG); case A_WIDEN: // Widen the child's type to the parent's type return (cgwiden(leftreg, n->left->type, n->type)); case A_RETURN: cgreturn(leftreg, Functionid); return (NOREG); case A_FUNCCALL: return (cgcall(leftreg, n->v.id)); case A_ADDR: return (cgaddress(n->v.id)); case A_DEREF: return (cgderef(leftreg, n->left->type)); case A_SCALE: // Small optimisation: use shift if the // scale value is a known power of two switch (n->v.size) { case 2: return(cgshlconst(leftreg, 1)); case 4: return(cgshlconst(leftreg, 2)); case 8: return(cgshlconst(leftreg, 3)); default: // Load a register with the size and // multiply the leftreg by this size rightreg= cgloadint(n->v.size, P_INT); return (cgmul(leftreg, rightreg)); } default: fatald("Unknown AST operator", n->op); } return (NOREG); // Keep -Wall happy } void genpreamble() { cgpreamble(); } void genpostamble() { cgpostamble(); } void genfreeregs() { freeall_registers(); } void genprintint(int reg) { cgprintint(reg); } void genglobsym(int id) { cgglobsym(id); } int genprimsize(int type) { return (cgprimsize(type)); } ================================================ FILE: 17_Scaling_Offsets/lib/printint.c ================================================ #include void printint(long x) { printf("%ld\n", x); } ================================================ FILE: 17_Scaling_Offsets/main.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "decl.h" #include // Compiler setup and top-level execution // Copyright (c) 2019 Warren Toomey, GPL3 // Initialise global variables static void init() { Line = 1; Putback = '\n'; Globs = 0; } // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "Usage: %s infile\n", prog); exit(1); } // Main program: check arguments and print a usage // if we don't have an argument. Open up the input // file and call scanfile() to scan the tokens in it. int main(int argc, char *argv[]) { if (argc != 2) usage(argv[0]); init(); // Open up the input file if ((Infile = fopen(argv[1], "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", argv[1], strerror(errno)); exit(1); } // Create the output file if ((Outfile = fopen("out.s", "w")) == NULL) { fprintf(stderr, "Unable to create out.s: %s\n", strerror(errno)); exit(1); } // For now, ensure that void printint() is defined addglob("printint", P_CHAR, S_FUNCTION, 0); scan(&Token); // Get the first token from the input genpreamble(); // Output the preamble global_declarations(); // Parse the global declarations genpostamble(); // Output the postamble fclose(Outfile); // Close the output file and exit return (0); } ================================================ FILE: 17_Scaling_Offsets/misc.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Miscellaneous functions // Copyright (c) 2019 Warren Toomey, GPL3 // Ensure that the current token is t, // and fetch the next token. Otherwise // throw an error void match(int t, char *what) { if (Token.token == t) { scan(&Token); } else { fatals("Expected", what); } } // Match a semicolon and fetch the next token void semi(void) { match(T_SEMI, ";"); } // Match a left brace and fetch the next token void lbrace(void) { match(T_LBRACE, "{"); } // Match a right brace and fetch the next token void rbrace(void) { match(T_RBRACE, "}"); } // Match a left parenthesis and fetch the next token void lparen(void) { match(T_LPAREN, "("); } // Match a right parenthesis and fetch the next token void rparen(void) { match(T_RPAREN, ")"); } // Match an identifier and fetch the next token void ident(void) { match(T_IDENT, "identifier"); } // Print out fatal messages void fatal(char *s) { fprintf(stderr, "%s on line %d\n", s, Line); exit(1); } void fatals(char *s1, char *s2) { fprintf(stderr, "%s:%s on line %d\n", s1, s2, Line); exit(1); } void fatald(char *s, int d) { fprintf(stderr, "%s:%d on line %d\n", s, d, Line); exit(1); } void fatalc(char *s, int c) { fprintf(stderr, "%s:%c on line %d\n", s, c, Line); exit(1); } ================================================ FILE: 17_Scaling_Offsets/scan.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { char *p; p = strchr(s, c); return (p ? p - s : -1); } // Get the next character from the input file. static int next(void) { int c; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return (c); } c = fgetc(Infile); // Read from input file if ('\n' == c) Line++; // Increment line count return (c); } // Put back an unwanted character static void putback(int c) { Putback = c; } // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0; // Convert each character into an int value while ((k = chrpos("0123456789", c)) >= 0) { val = val * 10 + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return (val); } // Scan an identifier from the input file and // store it in buf[]. Return the identifier's length static int scanident(int c, char *buf, int lim) { int i = 0; // Allow digits, alpha and underscores while (isalpha(c) || isdigit(c) || '_' == c) { // Error if we hit the identifier length limit, // else append to buf[] and get next character if (lim - 1 == i) { fatal("Identifier too long"); } else if (i < lim - 1) { buf[i++] = c; } c = next(); } // We hit a non-valid character, put it back. // NUL-terminate the buf[] and return the length putback(c); buf[i] = '\0'; return (i); } // Given a word from the input, return the matching // keyword token number or 0 if it's not a keyword. // Switch on the first letter so that we don't have // to waste time strcmp()ing against all the keywords. static int keyword(char *s) { switch (*s) { case 'c': if (!strcmp(s, "char")) return (T_CHAR); break; case 'e': if (!strcmp(s, "else")) return (T_ELSE); break; case 'f': if (!strcmp(s, "for")) return (T_FOR); break; case 'i': if (!strcmp(s, "if")) return (T_IF); if (!strcmp(s, "int")) return (T_INT); break; case 'l': if (!strcmp(s, "long")) return (T_LONG); break; case 'p': if (!strcmp(s, "print")) return (T_PRINT); break; case 'r': if (!strcmp(s, "return")) return (T_RETURN); break; case 'w': if (!strcmp(s, "while")) return (T_WHILE); break; case 'v': if (!strcmp(s, "void")) return (T_VOID); break; } return (0); } // A pointer to a rejected token static struct token *Rejtoken = NULL; // Reject the token that we just scanned void reject_token(struct token *t) { if (Rejtoken != NULL) fatal("Can't reject token twice"); Rejtoken = t; } // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c, tokentype; // If we have any rejected token, return it if (Rejtoken != NULL) { t = Rejtoken; Rejtoken = NULL; return (1); } // Skip whitespace c = skip(); // Determine the token based on // the input character switch (c) { case EOF: t->token = T_EOF; return (0); case '+': t->token = T_PLUS; break; case '-': t->token = T_MINUS; break; case '*': t->token = T_STAR; break; case '/': t->token = T_SLASH; break; case ';': t->token = T_SEMI; break; case '{': t->token = T_LBRACE; break; case '}': t->token = T_RBRACE; break; case '(': t->token = T_LPAREN; break; case ')': t->token = T_RPAREN; break; case '=': if ((c = next()) == '=') { t->token = T_EQ; } else { putback(c); t->token = T_ASSIGN; } break; case '!': if ((c = next()) == '=') { t->token = T_NE; } else { fatalc("Unrecognised character", c); } break; case '<': if ((c = next()) == '=') { t->token = T_LE; } else { putback(c); t->token = T_LT; } break; case '>': if ((c = next()) == '=') { t->token = T_GE; } else { putback(c); t->token = T_GT; } break; case '&': if ((c = next()) == '&') { t->token = T_LOGAND; } else { putback(c); t->token = T_AMPER; } break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } else if (isalpha(c) || '_' == c) { // Read in a keyword or identifier scanident(c, Text, TEXTLEN); // If it's a recognised keyword, return that token if ((tokentype = keyword(Text)) != 0) { t->token = tokentype; break; } // Not a recognised keyword, so it must be an identifier t->token = T_IDENT; break; } // The character isn't part of any recognised token, error fatalc("Unrecognised character", c); } // We found a token return (1); } ================================================ FILE: 17_Scaling_Offsets/stmt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of statements // Copyright (c) 2019 Warren Toomey, GPL3 // Prototypes static struct ASTnode *single_statement(void); // compound_statement: // empty, i.e. no statement // | statement // | statement statements // ; // // statement: print_statement // | declaration // | assignment_statement // | function_call // | if_statement // | while_statement // | for_statement // | return_statement // ; // print_statement: 'print' expression ';' ; // static struct ASTnode *print_statement(void) { struct ASTnode *tree; // Match a 'print' as the first token match(T_PRINT, "print"); // Parse the following expression tree = binexpr(0); // Ensure the two types are compatible. tree = modify_type(tree, P_INT, 0); if (tree == NULL) fatal("Incompatible type to print"); // Make an print AST tree tree = mkastunary(A_PRINT, P_NONE, tree, 0); // Return the AST return (tree); } // assignment_statement: identifier '=' expression ';' ; // // Parse an assignment statement and return its AST static struct ASTnode *assignment_statement(void) { struct ASTnode *left, *right, *tree; int id; // Ensure we have an identifier ident(); // This could be a variable or a function call. // If next token is '(', it's a function call if (Token.token == T_LPAREN) return (funccall()); // Not a function call, on with an assignment then! // Check the identifier has been defined then make a leaf node for it // XXX Add structural type test if ((id = findglob(Text)) == -1) { fatals("Undeclared variable", Text); } right = mkastleaf(A_LVIDENT, Gsym[id].type, id); // Ensure we have an equals sign match(T_ASSIGN, "="); // Parse the following expression left = binexpr(0); // Ensure the two types are compatible. left = modify_type(left, right->type, 0); if (left == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree tree = mkastnode(A_ASSIGN, P_INT, left, NULL, right, 0); // Return the AST return (tree); } // if_statement: if_head // | if_head 'else' compound_statement // ; // // if_head: 'if' '(' true_false_expression ')' compound_statement ; // // Parse an IF statement including any // optional ELSE clause and return its AST static struct ASTnode *if_statement(void) { struct ASTnode *condAST, *trueAST, *falseAST = NULL; // Ensure we have 'if' '(' match(T_IF, "if"); lparen(); // Parse the following expression // and the ')' following. Ensure // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) fatal("Bad comparison operator"); rparen(); // Get the AST for the compound statement trueAST = compound_statement(); // If we have an 'else', skip it // and get the AST for the compound statement if (Token.token == T_ELSE) { scan(&Token); falseAST = compound_statement(); } // Build and return the AST for this statement return (mkastnode(A_IF, P_NONE, condAST, trueAST, falseAST, 0)); } // while_statement: 'while' '(' true_false_expression ')' compound_statement ; // // Parse a WHILE statement and return its AST static struct ASTnode *while_statement(void) { struct ASTnode *condAST, *bodyAST; // Ensure we have 'while' '(' match(T_WHILE, "while"); lparen(); // Parse the following expression // and the ')' following. Ensure // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) fatal("Bad comparison operator"); rparen(); // Get the AST for the compound statement bodyAST = compound_statement(); // Build and return the AST for this statement return (mkastnode(A_WHILE, P_NONE, condAST, NULL, bodyAST, 0)); } // for_statement: 'for' '(' preop_statement ';' // true_false_expression ';' // postop_statement ')' compound_statement ; // // preop_statement: statement (for now) // postop_statement: statement (for now) // // Parse a FOR statement and return its AST static struct ASTnode *for_statement(void) { struct ASTnode *condAST, *bodyAST; struct ASTnode *preopAST, *postopAST; struct ASTnode *tree; // Ensure we have 'for' '(' match(T_FOR, "for"); lparen(); // Get the pre_op statement and the ';' preopAST = single_statement(); semi(); // Get the condition and the ';' condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) fatal("Bad comparison operator"); semi(); // Get the post_op statement and the ')' postopAST = single_statement(); rparen(); // Get the compound statement which is the body bodyAST = compound_statement(); // For now, all four sub-trees have to be non-NULL. // Later on, we'll change the semantics for when some are missing // Glue the compound statement and the postop tree tree = mkastnode(A_GLUE, P_NONE, bodyAST, NULL, postopAST, 0); // Make a WHILE loop with the condition and this new body tree = mkastnode(A_WHILE, P_NONE, condAST, NULL, tree, 0); // And glue the preop tree to the A_WHILE tree return (mkastnode(A_GLUE, P_NONE, preopAST, NULL, tree, 0)); } // return_statement: 'return' '(' expression ')' ; // // Parse a return statement and return its AST static struct ASTnode *return_statement(void) { struct ASTnode *tree; // Can't return a value if function returns P_VOID if (Gsym[Functionid].type == P_VOID) fatal("Can't return from a void function"); // Ensure we have 'return' '(' match(T_RETURN, "return"); lparen(); // Parse the following expression tree = binexpr(0); // Ensure this is compatible with the function's type tree = modify_type(tree, Gsym[Functionid].type, 0); if (tree == NULL) fatal("Incompatible type to print"); // Add on the A_RETURN node tree = mkastunary(A_RETURN, P_NONE, tree, 0); // Get the ')' rparen(); return (tree); } // Parse a single statement and return its AST static struct ASTnode *single_statement(void) { int type; switch (Token.token) { case T_PRINT: return (print_statement()); case T_CHAR: case T_INT: case T_LONG: // The beginning of a variable declaration. // Parse the type and get the identifier. // Then parse the rest of the declaration. // XXX: These are globals at present. type = parse_type(); ident(); var_declaration(type); return (NULL); // No AST generated here case T_IDENT: return (assignment_statement()); case T_IF: return (if_statement()); case T_WHILE: return (while_statement()); case T_FOR: return (for_statement()); case T_RETURN: return (return_statement()); default: fatald("Syntax error, token", Token.token); } return (NULL); // Keep -Wall happy } // Parse a compound statement // and return its AST struct ASTnode *compound_statement(void) { struct ASTnode *left = NULL; struct ASTnode *tree; // Require a left curly bracket lbrace(); while (1) { // Parse a single statement tree = single_statement(); // Some statements must be followed by a semicolon if (tree != NULL && (tree->op == A_PRINT || tree->op == A_ASSIGN || tree->op == A_RETURN || tree->op == A_FUNCCALL)) semi(); // For each new tree, either save it in left // if left is empty, or glue the left and the // new tree together if (tree != NULL) { if (left == NULL) left = tree; else left = mkastnode(A_GLUE, P_NONE, left, NULL, tree, 0); } // When we hit a right curly bracket, // skip past it and return the AST if (Token.token == T_RBRACE) { rbrace(); return (left); } } } ================================================ FILE: 17_Scaling_Offsets/sym.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Symbol table functions // Copyright (c) 2019 Warren Toomey, GPL3 // Determine if the symbol s is in the global symbol table. // Return its slot position or -1 if not found. int findglob(char *s) { int i; for (i = 0; i < Globs; i++) { if (*s == *Gsym[i].name && !strcmp(s, Gsym[i].name)) return (i); } return (-1); } // Get the position of a new global symbol slot, or die // if we've run out of positions. static int newglob(void) { int p; if ((p = Globs++) >= NSYMBOLS) fatal("Too many global symbols"); return (p); } // Add a global symbol to the symbol table. // Also set up its type and structural type. // Return the slot number in the symbol table int addglob(char *name, int type, int stype, int endlabel) { int y; // If this is already in the symbol table, return the existing slot if ((y = findglob(name)) != -1) return (y); // Otherwise get a new slot, fill it in and // return the slot number y = newglob(); Gsym[y].name = strdup(name); Gsym[y].type = type; Gsym[y].stype = stype; Gsym[y].endlabel = endlabel; return (y); } ================================================ FILE: 17_Scaling_Offsets/tests/input01.c ================================================ void main() { printint(12 * 3); printint(18 - 2 * 4); printint(1 + 2 + 9 - 5/2 + 3*5); } ================================================ FILE: 17_Scaling_Offsets/tests/input02.c ================================================ void main() { int fred; int jim; fred= 5; jim= 12; printint(fred + jim); } ================================================ FILE: 17_Scaling_Offsets/tests/input03.c ================================================ void main() { int x; x= 1; printint(x); x= x + 1; printint(x); x= x + 1; printint(x); x= x + 1; printint(x); x= x + 1; printint(x); } ================================================ FILE: 17_Scaling_Offsets/tests/input04.c ================================================ void main() { int x; x= 7 < 9; printint(x); x= 7 <= 9; printint(x); x= 7 != 9; printint(x); x= 7 == 7; printint(x); x= 7 >= 7; printint(x); x= 7 <= 7; printint(x); x= 9 > 7; printint(x); x= 9 >= 7; printint(x); x= 9 != 7; printint(x); } ================================================ FILE: 17_Scaling_Offsets/tests/input05.c ================================================ void main() { int i; int j; i=6; j=12; if (i < j) { printint(i); } else { printint(j); } } ================================================ FILE: 17_Scaling_Offsets/tests/input06.c ================================================ void main() { int i; i=1; while (i <= 10) { printint(i); i= i + 1; } } ================================================ FILE: 17_Scaling_Offsets/tests/input07.c ================================================ void main() { int i; for (i= 1; i <= 10; i= i + 1) { printint(i); } } ================================================ FILE: 17_Scaling_Offsets/tests/input08.c ================================================ void main() { int i; for (i= 1; i <= 10; i= i + 1) { printint(i); } } ================================================ FILE: 17_Scaling_Offsets/tests/input09.c ================================================ void main() { int i; for (i= 1; i <= 10; i= i + 1) { printint(i); } } void fred() { int a; int b; a= 12; b= 3 * a; if (a >= b) { printint(2 * b - a); } } ================================================ FILE: 17_Scaling_Offsets/tests/input10.c ================================================ void main() { int i; char j; j= 20; printint(j); i= 10; printint(i); for (i= 1; i <= 5; i= i + 1) { printint(i); } for (j= 253; j != 2; j= j + 1) { printint(j); } } ================================================ FILE: 17_Scaling_Offsets/tests/input11.c ================================================ int main() { int i; char j; long k; i= 10; printint(i); j= 20; printint(j); k= 30; printint(k); for (i= 1; i <= 5; i= i + 1) { printint(i); } for (j= 253; j != 4; j= j + 1) { printint(j); } for (k= 1; k <= 5; k= k + 1) { printint(k); } return(i); printint(12345); return(3); } ================================================ FILE: 17_Scaling_Offsets/tests/input12.c ================================================ int fred() { return(5); } void main() { int x; x= fred(2); printint(x); } ================================================ FILE: 17_Scaling_Offsets/tests/input13.c ================================================ int fred() { return(56); } void main() { int dummy; int result; dummy= printint(23); result= fred(10); dummy= printint(result); } ================================================ FILE: 17_Scaling_Offsets/tests/input14.c ================================================ int fred() { return(20); } int main() { int result; printint(10); result= fred(15); printint(result); printint(fred(15)+10); return(0); } ================================================ FILE: 17_Scaling_Offsets/tests/input15.c ================================================ int main() { char a; char *b; char c; int d; int *e; int f; a= 18; printint(a); b= &a; c= *b; printint(c); d= 12; printint(d); e= &d; f= *e; printint(f); return(0); } ================================================ FILE: 17_Scaling_Offsets/tests/input16.c ================================================ int c; int d; int *e; int f; int main() { c= 12; d=18; printint(c); e= &c + 1; f= *e; printint(f); return(0); } ================================================ FILE: 17_Scaling_Offsets/tests/mktests ================================================ #!/bin/sh # Make the output files for each test for i in input*c do if [ ! -f "out.$i" ] then cc -o out $i ../lib/printint.c ./out > out.$i rm -f out fi done ================================================ FILE: 17_Scaling_Offsets/tests/out.input01.c ================================================ 36 10 25 ================================================ FILE: 17_Scaling_Offsets/tests/out.input02.c ================================================ 17 ================================================ FILE: 17_Scaling_Offsets/tests/out.input03.c ================================================ 1 2 3 4 5 ================================================ FILE: 17_Scaling_Offsets/tests/out.input04.c ================================================ 1 1 1 1 1 1 1 1 1 ================================================ FILE: 17_Scaling_Offsets/tests/out.input05.c ================================================ 6 ================================================ FILE: 17_Scaling_Offsets/tests/out.input06.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 17_Scaling_Offsets/tests/out.input07.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 17_Scaling_Offsets/tests/out.input08.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 17_Scaling_Offsets/tests/out.input09.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 17_Scaling_Offsets/tests/out.input10.c ================================================ 20 10 1 2 3 4 5 253 254 255 0 1 ================================================ FILE: 17_Scaling_Offsets/tests/out.input11.c ================================================ 10 20 30 1 2 3 4 5 253 254 255 0 1 2 3 1 2 3 4 5 ================================================ FILE: 17_Scaling_Offsets/tests/out.input12.c ================================================ 5 ================================================ FILE: 17_Scaling_Offsets/tests/out.input13.c ================================================ 23 56 ================================================ FILE: 17_Scaling_Offsets/tests/out.input14.c ================================================ 10 20 30 ================================================ FILE: 17_Scaling_Offsets/tests/out.input15.c ================================================ 18 18 12 12 ================================================ FILE: 17_Scaling_Offsets/tests/out.input16.c ================================================ 12 18 ================================================ FILE: 17_Scaling_Offsets/tests/runtests ================================================ #!/bin/sh # Run each test and compare # against known good output if [ ! -f ../comp1 ] then echo "Need to build ../comp1 first!"; exit 1 fi for i in input* do if [ ! -f "out.$i" ] then echo "Can't run test on $i, no output file!" else echo -n $i ../comp1 $i cc -o out out.s ../lib/printint.c ./out > trial.$i cmp -s "out.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo else echo ": OK" fi rm -f out out.s "trial.$i" fi done ================================================ FILE: 17_Scaling_Offsets/tests/runtestsn ================================================ #!/bin/sh # Run each test and compare # against known good output if [ ! -f ../compn ] then echo "Need to build ../compn first!"; exit 1 fi for i in input* do if [ ! -f "out.$i" ] then echo "Can't run test on $i, no output file!" else echo -n $i ../compn $i nasm -f elf64 out.s cc -no-pie -Wall -o out out.o ../lib/printint.c ./out > trial.$i cmp -s "out.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo else echo ": OK" fi rm -f out out.o out.s "trial.$i" fi done ================================================ FILE: 17_Scaling_Offsets/tree.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree functions // Copyright (c) 2019 Warren Toomey, GPL3 // Build and return a generic AST node struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, int intvalue) { struct ASTnode *n; // Malloc a new ASTnode n = (struct ASTnode *) malloc(sizeof(struct ASTnode)); if (n == NULL) fatal("Unable to malloc in mkastnode()"); // Copy in the field values and return it n->op = op; n->type = type; n->left = left; n->mid = mid; n->right = right; n->v.intvalue = intvalue; return (n); } // Make an AST leaf node struct ASTnode *mkastleaf(int op, int type, int intvalue) { return (mkastnode(op, type, NULL, NULL, NULL, intvalue)); } // Make a unary AST node: only one child struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, int intvalue) { return (mkastnode(op, type, left, NULL, NULL, intvalue)); } ================================================ FILE: 17_Scaling_Offsets/types.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Types and type handling // Copyright (c) 2019 Warren Toomey, GPL3 // Return true if a type is an int type // of any size, false otherwise int inttype(int type) { if (type == P_CHAR || type == P_INT || type == P_LONG) return (1); return (0); } // Return true if a type is of pointer type int ptrtype(int type) { if (type == P_VOIDPTR || type == P_CHARPTR || type == P_INTPTR || type == P_LONGPTR) return (1); return (0); } // Given a primitive type, return // the type which is a pointer to it int pointer_to(int type) { int newtype; switch (type) { case P_VOID: newtype = P_VOIDPTR; break; case P_CHAR: newtype = P_CHARPTR; break; case P_INT: newtype = P_INTPTR; break; case P_LONG: newtype = P_LONGPTR; break; default: fatald("Unrecognised in pointer_to: type", type); } return (newtype); } // Given a primitive pointer type, return // the type which it points to int value_at(int type) { int newtype; switch (type) { case P_VOIDPTR: newtype = P_VOID; break; case P_CHARPTR: newtype = P_CHAR; break; case P_INTPTR: newtype = P_INT; break; case P_LONGPTR: newtype = P_LONG; break; default: fatald("Unrecognised in value_at: type", type); } return (newtype); } // Given an AST tree and a type which we want it to become, // possibly modify the tree by widening or scaling so that // it is compatible with this type. Return the original tree // if no changes occurred, a modified tree, or NULL if the // tree is not compatible with the given type. // If this will be part of a binary operation, the AST op is not zero. struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op) { int ltype; int lsize, rsize; ltype = tree->type; // Compare scalar int types if (inttype(ltype) && inttype(rtype)) { // Both types same, nothing to do if (ltype == rtype) return (tree); // Get the sizes for each type lsize = genprimsize(ltype); rsize = genprimsize(rtype); // Tree's size is too big if (lsize > rsize) return (NULL); // Widen to the right if (rsize > lsize) return (mkastunary(A_WIDEN, rtype, tree, 0)); } // For pointers on the left if (ptrtype(ltype)) { // OK is same type on right and not doing a binary op if (op == 0 && ltype == rtype) return (tree); } // We can scale only on A_ADD or A_SUBTRACT operation if (op == A_ADD || op == A_SUBTRACT) { // Left is int type, right is pointer type and the size // of the original type is >1: scale the left if (inttype(ltype) && ptrtype(rtype)) { rsize = genprimsize(value_at(rtype)); if (rsize > 1) return (mkastunary(A_SCALE, rtype, tree, rsize)); } } // If we get here, the types are not compatible return (NULL); } ================================================ FILE: 18_Lvalues_Revisited/Makefile ================================================ SRCS= cg.c decl.c expr.c gen.c main.c misc.c scan.c stmt.c \ sym.c tree.c types.c SRCN= cgn.c decl.c expr.c gen.c main.c misc.c scan.c stmt.c \ sym.c tree.c types.c ARMSRCS= cg_arm.c decl.c expr.c gen.c main.c misc.c scan.c stmt.c \ sym.c tree.c types.c comp1: $(SRCS) cc -o comp1 -g -Wall $(SRCS) compn: $(SRCN) cc -o compn -g -Wall $(SRCN) comp1arm: $(ARMSRCS) cc -o comp1arm -g -Wall $(ARMSRCS) cp comp1arm comp1 clean: rm -f comp1 comp1arm compn *.o *.s out test: comp1 tests/runtests (cd tests; chmod +x runtests; ./runtests) armtest: comp1arm tests/runtests (cd tests; chmod +x runtests; ./runtests) testn: compn tests/runtestsn (cd tests; chmod +x runtestsn; ./runtestsn) test17: comp1 tests/input17.c lib/printint.c ./comp1 tests/input17.c cc -o out out.s lib/printint.c ./out armtest17: comp1arm tests/input17.c lib/printint.c ./comp1 tests/input17.c cc -o out out.s lib/printint.c ./out test17n: compn tests/input17.c lib/printint.c ./compn tests/input17.c nasm -f elf64 out.s cc -no-pie -o out lib/printint.c out.o ./out ================================================ FILE: 18_Lvalues_Revisited/Readme.md ================================================ # Part 18: Lvalues and Rvalues Revisited As this is work in progress with no design document to guide me, occasionally I need to remove code that I've already written and rewrite it to make it more general, or to fix shortcomings. That's the case for this part of the journey. We added our initial support for pointers in part 15 so that we could write code line this: ```c int x; int *y; int z; x= 12; y= &x; z= *y; ``` That's all fine and good, but I knew that we would eventually have to support the use of pointers on the left-hand side of assignment statements, e.g. ```c *y = 14; ``` To do this, we have to revisit the topic of [lvalues and rvalues](https://en.wikipedia.org/wiki/Value_(computer_science)#lrvalue). To revise, an *lvalue* is a value that is tied to a specific location, whereas an *rvalue* is a value that isn't. Lvalues are persistent in that we can retrieve their value in future instructions. Rvalues, on the other hand, are evanescent: we can discard them once their use is finished. ### Examples of Rvalues and Lvalues An example of an rvalue is an integer literal, e.g. 23. We can use it in an expression and then discard it afterwards. Examples of lvalues are locations in memory which we can *store into*, such as: ``` a Scalar variable a b[0] Element zero of array b *c The location that pointer c points to (*d)[0] Element zero of the array that d points to ``` As I mentioned before, the names *lvalue* and *rvalue* come from the two sides of an assignment statement: lvalues are on the left, rvalues are on the right. ## Extending Our Notion of Lvalues Right now, the compiler treats nearly everything as an rvalue. For variables, it retrieves the value from the variable's location. Our only nod to the concept of the lvalue is to mark identifiers on the left of an assignment as an A_LVIDENT. We manually deal with this in `genAST()`: ```c case A_IDENT: return (cgloadglob(n->v.id)); case A_LVIDENT: return (cgstorglob(reg, n->v.id)); case A_ASSIGN: // The work has already been done, return the result return (rightreg); ``` which we use for statements like `a= b;`. But now we need to mark more than just identifiers on the left-hand side of an assignment as lvalues. It's also important to make it easy to generate assembly code in the process. While I was writing this part, I tried the idea of prepending a "A_LVALUE" AST node as the parent to a tree, to tell the code generator to output the lvalue version of the code for it instead of the rvalue version. But this turned out to be too late: the sub-tree was already evaluated and rvalue code for it had already been generated. ### Yet Another AST Node Change I'm loath to keep adding more fields to the AST node, but this is what I ended up doing. We now have a field to indicate if the node should generate lvalue code or rvalue code: ```c // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates int rvalue; // True if the node is an rvalue ... }; ``` The `rvalue` field only holds one bit of information; later on, if I need to store other booleans, I will be able to use this as a bitfield. Question: why did I make the field indicate the "rvalue"ness of the node and not the "lvalue"ness? After all, most of the nodes in our AST trees will hold rvalues and not lvalues. While I was reading Nils Holm's book on SubC, I read this line: > Since an indirection cannot be reversed later, the parser assumes each partial expression to be an lvalue. Consider the parser working on the statement `b = a + 2`. After parsing the `b` identifier, we cannot yet tell is this is an lvalue or an rvalue. It's not until we hit the `=` token that we can conclude that it's an lvalue. Also, the C language allows assignments as expressions, so we can also write `b = c = a + 2`. Again, when we parse the `a` identifier, we can't tell if it's an lvalue or an rvalue until we parse the next token. Therefore, I chose to assume each AST node to be an lvalue by default. Once we can definitely tell if a node is rvalue, we can then set the `rvalue` field to indicate this. ## Assignment Expressions I also mentioned that the C language allows assignments as expressions. Now that we have a clear lvalue/rvalue distinction, we can shift the parsing of assignments as statements and move the code into the expression parser. I'll cover this later. It's now time to see what was done to the compiler code base to make this all happen. As always, we start with the tokens and the scanner first. ## Token and Scanning Changes We have no new tokens or new keywords this time. But there is a change which affects the token code. The `=` is now a binary operator with an expression on each side, so we need to integrate it with the other binary operators. According to [this list of C operators](https://en.cppreference.com/w/c/language/operator_precedence), the `=` operator has much lower precedence than `+` or `-`. We there need to rearrange our list of operators and their precedences. In `defs.h`: ```c // Token types enum { T_EOF, // Operators T_ASSIGN, T_PLUS, T_MINUS, ... ``` In `expr.c`, we need to update the code that holds the precedences for our binary operators: ```c // Operator precedence for each token. Must // match up with the order of tokens in defs.h static int OpPrec[] = { 0, 10, // T_EOF, T_ASSIGN 20, 20, // T_PLUS, T_MINUS 30, 30, // T_STAR, T_SLASH 40, 40, // T_EQ, T_NE 50, 50, 50, 50 // T_LT, T_GT, T_LE, T_GE }; ``` ## Changes to the Parser Now we have to remove the parsing of assignments as statements and make them into expressions. I also took the liberty of removing the "print" statement from the language, as we can now call `printint()`. So, in `stmt.c`, I've removed both `print_statement()` and `assignment_statement()`. > I also removed the T_PRINT and 'print' keywords from the language. And now that our concept of lvalues and rvalues are different, I also removed the A_LVIDENT AST node type. For now, the statement parser in `single_statement()` in `stmt.c` assumes that what's coming up next is an expression if it doesn't recognise the first token: ```c static struct ASTnode *single_statement(void) { int type; switch (Token.token) { ... default: // For now, see if this is an expression. // This catches assignment statements. return (binexpr(0)); } } ``` This does mean that `2+3;` will be treated as a legal statement for now. We will fix this later. And in `compound_statement()` we also ensure that the expression is followed by a semicolon: ```c // Some statements must be followed by a semicolon if (tree != NULL && (tree->op == A_ASSIGN || tree->op == A_RETURN || tree->op == A_FUNCCALL)) semi(); ``` ## Expression Parsing You might think that, now that `=` is marked as a binary expression operator and we have set its precedence, that we are all done. Not so! There are two things we have to worry about: 1. We need to generate the assembly code for the right-hand rvalue before the code for the left-hand lvalue. We used to do this in the statement parser, and we'll have to do this in the expression parser. 2. Assignment expressions are *right associative*: the operator binds more tightly to the expression on the right than to the left. We haven't touched right associativity before. Let's look at an example. Consider the expression `2 + 3 + 4`. We can happily parse this from left to right and build the AST tree: ``` + / \ + 4 / \ 2 3 ``` For the expression `a= b= 3`, if we do the above, we end up with the tree: ``` = / \ = 3 / \ a b ``` We don't want to do `a= b` before then trying to assign the 3 to this left sub-tree. Instead, what we want to generate is this tree: ``` = / \ = a / \ 3 b ``` I've reversed the leaf nodes to be in assembly output order. We first store 3 in `b`. Then the result of this assignment, 3, is stored in `a`. ### Modifying the Pratt Parser We are using a Pratt parser to correctly parse the precedences of our binary operators. I did a search to find out how to add right-associativity to a Pratt parser, and found this information in [Wikipedia](https://en.wikipedia.org/wiki/Operator-precedence_parser): ``` while lookahead is a binary operator whose precedence is greater than op's, or a right-associative operator whose precedence is equal to op's ``` So, for right-associative operators, we test if the next operator has the same precedence as the operator we are up to. That's a simple modification to the parser's logic. I've introduced a new function in `expr.c` to determine if an operator is right-associative: ```c // Return true if a token is right-associative, // false otherwise. static int rightassoc(int tokentype) { if (tokentype == T_ASSIGN) return(1); return(0); } ``` In `binexpr()` we alter the while loop as mentioned before, and we also put in A_ASSIGN-specific code to swap the child trees around: ```c struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; struct ASTnode *ltemp, *rtemp; int ASTop; int tokentype; // Get the tree on the left. left = prefix(); ... // While the precedence of this token is more than that of the // previous token precedence, or it's right associative and // equal to the previous token's precedence while ((op_precedence(tokentype) > ptp) || (rightassoc(tokentype) && op_precedence(tokentype) == ptp)) { ... // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); ASTop = binastop(tokentype); if (ASTop == A_ASSIGN) { // Assignment // Make the right tree into an rvalue right->rvalue= 1; ... // Switch left and right around, so that the right expression's // code will be generated before the left expression ltemp= left; left= right; right= ltemp; } else { // We are not doing an assignment, so both trees should be rvalues left->rvalue= 1; right->rvalue= 1; } ... } ... } ``` Notice also the code to explicitly mark the right-hand side of the assignment expression as an rvalue. And, for non assignments, both sides of the expression get marked as rvalues. Scattered through `binexpr()` are a few more lines of code to explicitly set a tree to be an rvalue. These get performed when we hit a leaf node. For example the `a` identifier in `b= a;` needs to be marked as an rvalue, but we will never enter the body of the while loop to do this. ## Printing Out the Tree That is the parser changes out of the road. We now have several nodes marked as rvalues, and some not marked at all. At this point, I realised that I was having trouble visualising the AST trees that get generated. I've written a function called `dumpAST()` in `tree.c` to print out each AST tree to standard output. It's not sophisticated. The compiler now has a `-T` command line argument which sets an internal flag, `O_dumpAST`. And the `global_declarations()` code in `decl.c` now does: ```c // Parse a function declaration and // generate the assembly code for it tree = function_declaration(type); if (O_dumpAST) { dumpAST(tree, NOLABEL, 0); fprintf(stdout, "\n\n"); } genAST(tree, NOLABEL, 0); ``` The tree dumper code prints out each node in the order tree traversal order, so the output isn't tree shaped. However, the indentation of each node indicates its depth in the tree. Let's take a look at some example AST trees for assignment expressions. We'll start with `a= b= 34;`: ``` A_INTLIT 34 A_WIDEN A_IDENT b A_ASSIGN A_IDENT a A_ASSIGN ``` The 34 is small enough to be a char-sized literal, but it gets widened to match the type of `b`. `A_IDENT b` doesn't say "rvalue", so it's a lvalue. The value of 34 is stored in the `b` lvalue. This value is then stored in the `a` lvalue. Now let's try `a= b + 34;`: ``` A_IDENT rval b A_INTLIT 34 A_WIDEN A_ADD A_IDENT a A_ASSIGN ``` You can see the "rval `b`" now, so `b`'s value is loaded into a register, whereas the result of the `b+34` expression is stored in the `a` lvalue. Let's do one more, `*x= *y`: ``` A_IDENT y A_DEREF rval A_IDENT x A_DEREF A_ASSIGN ``` The identifier `y` is dereferenced and this rvalue is loaded. This is then stored in the lvalue which is `x` dereferenced. ## Converting The Above into Code Now that the lvalue and rvalues nodes are clearly identified, we can turn our attention to how we translate each into assembly code. There are many nodes like integer literals, addition etc. which are clearly rvalues. It is only the AST node types which could possibly be lvalues that the code in `genAST()` in `gen.c` needs to worry about. Here is what I have for these node types: ```c case A_IDENT: // Load our value if we are an rvalue // or we are being dereferenced if (n->rvalue || parentASTop== A_DEREF) return (cgloadglob(n->v.id)); else return (NOREG); case A_ASSIGN: // Are we assigning to an identifier or through a pointer? switch (n->right->op) { case A_IDENT: return (cgstorglob(leftreg, n->right->v.id)); case A_DEREF: return (cgstorderef(leftreg, rightreg, n->right->type)); default: fatald("Can't A_ASSIGN in genAST(), op", n->op); } case A_DEREF: // If we are an rvalue, dereference to get the value we point at // otherwise leave it for A_ASSIGN to store through the pointer if (n->rvalue) return (cgderef(leftreg, n->left->type)); else return (leftreg); ``` ### Changes to the x86-64 Code Generator The only change to `cg.c` is a function which allows us to store a value through a pointer: ```c // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, (%s)\n", breglist[r1], reglist[r2]); break; case P_INT: fprintf(Outfile, "\tmovq\t%s, (%s)\n", reglist[r1], reglist[r2]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, (%s)\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ``` which is nearly exactly the opposite of `cgderef()` which appears immediately before this new function. ## Conclusion and What's Next For this part of the journey, I think I took two or three different design directions, tried them, hit a dead end and backed out before I reached the solution described here. I know that, in SubC, Nils passes a single "lvalue" structure which holds the "lvalue"-ness of the node of the AST tree being processed at any point in time. But his tree only holds one expression; the AST tree for this compiler holds one whole function's worth of nodes. And I'm sure that, if you looked in three other compilers, you would probably find three other solutions too. There are many things that we could take on next. There are a bunch of C operators that would be relatively easy to add to the compiler. We have A_SCALE, so we could attempt structures. As yet, there are no local variables, which will need attending to at some point. And, we should generalise functions to have multiple arguments and the ability to access them. In the next part of our compiler writing journey, I'd like to tackle arrays. This will be a combination of dereferencing, lvalues and rvalues, and scaling the array indices by the size of the array's elements. We have all the semantic components in place, but we'll need to add tokens, parsing and the actual index functionality. It should be an interesting topic like this one was. [Next step](../19_Arrays_pt1/Readme.md) ================================================ FILE: 18_Lvalues_Revisited/cg.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. // We need a list of byte and doubleword registers, too static int freereg[4]; static char *reglist[4] = { "%r8", "%r9", "%r10", "%r11" }; static char *breglist[4] = { "%r8b", "%r9b", "%r10b", "%r11b" }; static char *dreglist[4] = { "%r8d", "%r9d", "%r10d", "%r11d" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n", Outfile); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Gsym[id].name; fprintf(Outfile, "\t.text\n" "\t.globl\t%s\n" "\t.type\t%s, @function\n" "%s:\n" "\tpushq\t%%rbp\n" "\tmovq\t%%rsp, %%rbp\n", name, name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Gsym[id].endlabel); fputs("\tpopq %rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmovq\t$%d, %s\n", value, reglist[r]); return (r); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Print out the code to initialise it switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tmovzbq\t%s(%%rip), %s\n", Gsym[id].name, reglist[r]); break; case P_INT: fprintf(Outfile, "\tmovzbl\t%s(\%%rip), %s\n", Gsym[id].name, reglist[r]); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tmovq\t%s(\%%rip), %s\n", Gsym[id].name, reglist[r]); break; default: fatald("Bad type in cgloadglob:", Gsym[id].type); } return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\taddq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsubq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timulq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s,%%rax\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidivq\t%s\n", reglist[r2]); fprintf(Outfile, "\tmovq\t%%rax,%s\n", reglist[r1]); free_register(r2); return (r1); } // Call printint() with the given register void cgprintint(int r) { fprintf(Outfile, "\tmovq\t%s, %%rdi\n", reglist[r]); fprintf(Outfile, "\tcall\tprintint\n"); free_register(r); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { // Get a new register int outr = alloc_register(); fprintf(Outfile, "\tmovq\t%s, %%rdi\n", reglist[r]); fprintf(Outfile, "\tcall\t%s\n", Gsym[id].name); fprintf(Outfile, "\tmovq\t%%rax, %s\n", reglist[outr]); free_register(r); return (outr); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsalq\t$%d, %s\n", val, reglist[r]); return(r); } // Store a register's value into a variable int cgstorglob(int r, int id) { switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %s(\%%rip)\n", breglist[r], Gsym[id].name); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %s(\%%rip)\n", dreglist[r], Gsym[id].name); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tmovq\t%s, %s(\%%rip)\n", reglist[r], Gsym[id].name); break; default: fatald("Bad type in cgloadglob:", Gsym[id].type); } return (r); } // Array of type sizes in P_XXX order. // 0 means no size. static int psize[] = { 0, 0, 1, 4, 8, 8, 8, 8, 8 }; // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { // Check the type is valid if (type < P_NONE || type > P_LONGPTR) fatal("Bad type in cgprimsize()"); return (psize[type]); } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Gsym[id].type); fprintf(Outfile, "\t.data\n" "\t.globl\t%s\n", Gsym[id].name); switch(typesize) { case 1: fprintf(Outfile, "%s:\t.byte\t0\n", Gsym[id].name); break; case 4: fprintf(Outfile, "%s:\t.long\t0\n", Gsym[id].name); break; case 8: fprintf(Outfile, "%s:\t.quad\t0\n", Gsym[id].name); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { // Generate code depending on the function's type switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tmovzbl\t%s, %%eax\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %%eax\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", Gsym[id].type); } cgjump(Gsym[id].endlabel); } // Generate code to load the address of a global // identifier into a variable. Return a new register int cgaddress(int id) { int r = alloc_register(); fprintf(Outfile, "\tleaq\t%s(%%rip), %s\n", Gsym[id].name, reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tmovzbq\t(%s), %s\n", reglist[r], reglist[r]); break; case P_INTPTR: fprintf(Outfile, "\tmovq\t(%s), %s\n", reglist[r], reglist[r]); break; case P_LONGPTR: fprintf(Outfile, "\tmovq\t(%s), %s\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, (%s)\n", breglist[r1], reglist[r2]); break; case P_INT: fprintf(Outfile, "\tmovq\t%s, (%s)\n", reglist[r1], reglist[r2]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, (%s)\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 18_Lvalues_Revisited/cg_arm.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for ARMv6 on Raspberry Pi // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. static int freereg[4]; static char *reglist[4] = { "r4", "r5", "r6", "r7" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // We have to store large integer literal values in memory. // Keep a list of them which will be output in the postamble #define MAXINTS 1024 int Intlist[MAXINTS]; static int Intslot = 0; // Determine the offset of a large integer // literal from the .L3 label. If the integer // isn't in the list, add it. static void set_int_offset(int val) { int offset = -1; // See if it is already there for (int i = 0; i < Intslot; i++) { if (Intlist[i] == val) { offset = 4 * i; break; } } // Not in the list, so add it if (offset == -1) { offset = 4 * Intslot; if (Intslot == MAXINTS) fatal("Out of int slots in set_int_offset()"); Intlist[Intslot++] = val; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L3+%d\n", offset); } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n", Outfile); } // Print out the assembly postamble void cgpostamble() { // Print out the global variables fprintf(Outfile, ".L2:\n"); for (int i = 0; i < Globs; i++) { if (Gsym[i].stype == S_VARIABLE) fprintf(Outfile, "\t.word %s\n", Gsym[i].name); } // Print out the integer literals fprintf(Outfile, ".L3:\n"); for (int i = 0; i < Intslot; i++) { fprintf(Outfile, "\t.word %d\n", Intlist[i]); } } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Gsym[id].name; fprintf(Outfile, "\t.text\n" "\t.globl\t%s\n" "\t.type\t%s, \%%function\n" "%s:\n" "\tpush\t{fp, lr}\n" "\tadd\tfp, sp, #4\n" "\tsub\tsp, sp, #8\n" "\tstr\tr0, [fp, #-8]\n", name, name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Gsym[id].endlabel); fputs("\tsub\tsp, fp, #4\n" "\tpop\t{fp, pc}\n" "\t.align\t2\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); // If the literal value is small, do it with one instruction if (value <= 1000) fprintf(Outfile, "\tmov\t%s, #%d\n", reglist[r], value); else { set_int_offset(value); fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); } return (r); } // Determine the offset of a variable from the .L2 // label. Yes, this is inefficient code. static void set_var_offset(int id) { int offset = 0; // Walk the symbol table up to id. // Find S_VARIABLEs and add on 4 until // we get to our variable for (int i = 0; i < id; i++) { if (Gsym[i].stype == S_VARIABLE) offset += 4; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L2+%d\n", offset); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tldrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgloadglob:", Gsym[id].type); } return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s, %s\n", reglist[r1], reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\tmul\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { // To do a divide: r0 holds the dividend, r1 holds the divisor. // The quotient is in r0. fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r1]); fprintf(Outfile, "\tmov\tr1, %s\n", reglist[r2]); fprintf(Outfile, "\tbl\t__aeabi_idiv\n"); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r1]); free_register(r2); return (r1); } // Call printint() with the given register void cgprintint(int r) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r]); fprintf(Outfile, "\tbl\tprintint\n"); fprintf(Outfile, "\tnop\n"); free_register(r); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r]); fprintf(Outfile, "\tbl\t%s\n", Gsym[id].name); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r]); return (r); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tlsl\t%s, %s, #%d\n", reglist[r], reglist[r], val); return(r); } // Store a register's value into a variable int cgstorglob(int r, int id) { // Get the offset to the variable set_var_offset(id); switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tstr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgstorglob:", Gsym[id].type); } return (r); } // Array of type sizes in P_XXX order. // 0 means no size. static int psize[] = { 0, 0, 1, 4, 4, 4, 4, 4 }; // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { // Check the type is valid if (type < P_NONE || type > P_LONGPTR) fatal("Bad type in cgprimsize()"); return (psize[type]); } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Gsym[id].type); fprintf(Outfile, "\t.data\n" "\t.globl\t%s\n", Gsym[id].name); switch(typesize) { case 1: fprintf(Outfile, "%s:\t.byte\t0\n", Gsym[id].name); break; case 4: fprintf(Outfile, "%s:\t.long\t0\n", Gsym[id].name); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "moveq", "movne", "movlt", "movgt", "movle", "movge" }; // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "movne", "moveq", "movge", "movle", "movgt", "movlt" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #1\n", cmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #0\n", invcmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\tuxtb\t%s, %s\n", reglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tb\tL%d\n", l); } // List of inverted branch instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *brlist[] = { "bne", "beq", "bge", "ble", "bgt", "blt" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", brlist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[reg]); cgjump(Gsym[id].endlabel); } // Generate code to load the address of a global // identifier into a variable. Return a new register int cgaddress(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); fprintf(Outfile, "\tmov\t%s, r3\n", reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tldrb\t%s, [%s]\n", reglist[r], reglist[r]); break; case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [%s]\n", reglist[r], reglist[r]); break; } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [%s]\n", reglist[r1], reglist[r2]); break; case P_INT: case P_LONG: fprintf(Outfile, "\tstr\t%s, [%s]\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 18_Lvalues_Revisited/cgn.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. // We need a list of byte and doubleword registers, too static int freereg[4]; static char *reglist[4] = { "r8", "r9", "r10", "r11" }; static char *breglist[4] = { "r8b", "r9b", "r10b", "r11b" }; static char *dreglist[4] = { "r8d", "r9d", "r10d", "r11d" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\textern\tprintint\n", Outfile); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Gsym[id].name; fprintf(Outfile, "\tsection\t.text\n" "\tglobal\t%s\n" "%s:\n" "\tpush\trbp\n" "\tmov\trbp, rsp\n", name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Gsym[id].endlabel); fputs("\tpop rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], value); return (r); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Print out the code to initialise it switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], Gsym[id].name); break; case P_INT: fprintf(Outfile, "\txor\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tmov\t%s, dword [%s]\n", dreglist[r], Gsym[id].name); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], Gsym[id].name); break; default: fatald("Bad type in cgloadglob:", Gsym[id].type); } return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timul\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmov\trax, %s\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidiv\t%s\n", reglist[r2]); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[r1]); free_register(r2); return (r1); } // Call printint() with the given register void cgprintint(int r) { fprintf(Outfile, "\tmov\trdi, %s\n", reglist[r]); fprintf(Outfile, "\tcall\tprintint\n"); free_register(r); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { // Get a new register int outr = alloc_register(); fprintf(Outfile, "\tmov\trdi, %s\n", reglist[r]); fprintf(Outfile, "\tcall\t%s\n", Gsym[id].name); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[outr]); free_register(r); return (outr); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsal\t%s, %d\n", reglist[r], val); return(r); } // Store a register's value into a variable int cgstorglob(int r, int id) { switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tmov\t[%s], %s\n", Gsym[id].name, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\t[%s], %s\n", Gsym[id].name, dreglist[r]); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tmov\t[%s], %s\n", Gsym[id].name, reglist[r]); break; default: fatald("Bad type in cgloadglob:", Gsym[id].type); } return (r); } // Array of type sizes in P_XXX order. // 0 means no size. static int psize[] = { 0, 0, 1, 4, 8, 8, 8, 8, 8 }; // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { // Check the type is valid if (type < P_NONE || type > P_LONGPTR) fatal("Bad type in cgprimsize()"); return (psize[type]); } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Gsym[id].type); fprintf(Outfile, "\tsection\t.data\n" "\tglobal\t%s\n", Gsym[id].name); switch(typesize) { case 1: fprintf(Outfile, "%s:\tdb\t0\n", Gsym[id].name); break; case 4: fprintf(Outfile, "%s:\tdd\t0\n", Gsym[id].name); break; case 8: fprintf(Outfile, "%s:\tdq\t0\n", Gsym[id].name); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r2], breglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { // Generate code depending on the function's type switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tmovzx\teax, %s\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmov\teax, %s\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", Gsym[id].type); } cgjump(Gsym[id].endlabel); } // Generate code to load the address of a global // identifier into a variable. Return a new register int cgaddress(int id) { int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, %s\n", reglist[r], Gsym[id].name); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], reglist[r]); break; case P_INTPTR: fprintf(Outfile, "\tmovzx\t%s, word [%s]\n", reglist[r], reglist[r]); break; case P_LONGPTR: fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], reglist[r]); break; } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tmov\t[%s], byte %s\n", reglist[r2], breglist[r1]); break; case P_INT: fprintf(Outfile, "\tmov\t[%s], %s\n", reglist[r2], reglist[r1]); break; case P_LONG: fprintf(Outfile, "\tmov\t[%s], %s\n", reglist[r2], reglist[r1]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 18_Lvalues_Revisited/data.h ================================================ #ifndef extern_ #define extern_ extern #endif // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 extern_ int Line; // Current line number extern_ int Putback; // Character put back by scanner extern_ int Functionid; // Symbol id of the current function extern_ int Globs; // Position of next free global symbol slot extern_ FILE *Infile; // Input and output files extern_ FILE *Outfile; extern_ struct token Token; // Last token scanned extern_ char Text[TEXTLEN + 1]; // Last identifier scanned extern_ struct symtable Gsym[NSYMBOLS]; // Global symbol table extern_ int O_dumpAST; ================================================ FILE: 18_Lvalues_Revisited/decl.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of declarations // Copyright (c) 2019 Warren Toomey, GPL3 // Parse the current token and return // a primitive type enum value. Also // scan in the next token int parse_type(void) { int type; switch (Token.token) { case T_VOID: type = P_VOID; break; case T_CHAR: type = P_CHAR; break; case T_INT: type = P_INT; break; case T_LONG: type = P_LONG; break; default: fatald("Illegal type, token", Token.token); } // Scan in one or more further '*' tokens // and determine the correct pointer type while (1) { scan(&Token); if (Token.token != T_STAR) break; type = pointer_to(type); } // We leave with the next token already scanned return (type); } // variable_declaration: type identifier ';' ; // // Parse the declaration of a variable. // The identifier has been scanned & we have the type void var_declaration(int type) { int id; // Text now has the identifier's name. // Add it as a known identifier // and generate its space in assembly id = addglob(Text, type, S_VARIABLE, 0); genglobsym(id); // Get the trailing semicolon semi(); } // // function_declaration: type identifier '(' ')' compound_statement ; // // Parse the declaration of a simplistic function. // The identifier has been scanned & we have the type struct ASTnode *function_declaration(int type) { struct ASTnode *tree, *finalstmt; int nameslot, endlabel; // Text now has the identifier's name. // Get a label-id for the end label, add the function // to the symbol table, and set the Functionid global // to the function's symbol-id endlabel = genlabel(); nameslot = addglob(Text, type, S_FUNCTION, endlabel); Functionid = nameslot; // Scan in the parentheses lparen(); rparen(); // Get the AST tree for the compound statement tree = compound_statement(); // If the function type isn't P_VOID .. if (type != P_VOID) { // Error if no statements in the function if (tree == NULL) fatal("No statements in function with non-void type"); // Check that the last AST operation in the // compound statement was a return statement finalstmt = (tree->op == A_GLUE) ? tree->right : tree; if (finalstmt == NULL || finalstmt->op != A_RETURN) fatal("No return for function with non-void type"); } // Return an A_FUNCTION node which has the function's nameslot // and the compound statement sub-tree return (mkastunary(A_FUNCTION, type, tree, nameslot)); } // Parse one or more global declarations, either // variables or functions void global_declarations(void) { struct ASTnode *tree; int type; while (1) { // We have to read past the type and identifier // to see either a '(' for a function declaration // or a ',' or ';' for a variable declaration. // Text is filled in by the ident() call. type = parse_type(); ident(); if (Token.token == T_LPAREN) { // Parse the function declaration and // generate the assembly code for it tree = function_declaration(type); if (O_dumpAST) { dumpAST(tree, NOLABEL, 0); fprintf(stdout, "\n\n"); } genAST(tree, NOLABEL, 0); } else { // Parse the global variable declaration var_declaration(type); } // Stop when we have reached EOF if (Token.token == T_EOF) break; } } ================================================ FILE: 18_Lvalues_Revisited/decl.h ================================================ // Function prototypes for all compiler files // Copyright (c) 2019 Warren Toomey, GPL3 // scan.c void reject_token(struct token *t); int scan(struct token *t); // tree.c struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, int intvalue); struct ASTnode *mkastleaf(int op, int type, int intvalue); struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, int intvalue); void dumpAST(struct ASTnode *n, int label, int parentASTop); // gen.c int genlabel(void); int genAST(struct ASTnode *n, int reg, int parentASTop); void genpreamble(); void genpostamble(); void genfreeregs(); void genprintint(int reg); void genglobsym(int id); int genprimsize(int type); void genreturn(int reg, int id); // cg.c void freeall_registers(void); void cgpreamble(); void cgpostamble(); void cgfuncpreamble(int id); void cgfuncpostamble(int id); int cgloadint(int value, int type); int cgloadglob(int id); int cgadd(int r1, int r2); int cgsub(int r1, int r2); int cgmul(int r1, int r2); int cgdiv(int r1, int r2); int cgshlconst(int r, int val); void cgprintint(int r); int cgcall(int r, int id); int cgstorglob(int r, int id); void cgglobsym(int id); int cgcompare_and_set(int ASTop, int r1, int r2); int cgcompare_and_jump(int ASTop, int r1, int r2, int label); void cglabel(int l); void cgjump(int l); int cgwiden(int r, int oldtype, int newtype); int cgprimsize(int type); void cgreturn(int reg, int id); int cgaddress(int id); int cgderef(int r, int type); int cgstorderef(int r1, int r2, int type); // expr.c struct ASTnode *funccall(void); struct ASTnode *binexpr(int ptp); // stmt.c struct ASTnode *compound_statement(void); // misc.c void match(int t, char *what); void semi(void); void lbrace(void); void rbrace(void); void lparen(void); void rparen(void); void ident(void); void fatal(char *s); void fatals(char *s1, char *s2); void fatald(char *s, int d); void fatalc(char *s, int c); // sym.c int findglob(char *s); int addglob(char *name, int type, int stype, int endlabel); // decl.c void var_declaration(int type); struct ASTnode *function_declaration(int type); void global_declarations(void); // types.c int parse_type(void); int pointer_to(int type); int value_at(int type); struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op); ================================================ FILE: 18_Lvalues_Revisited/defs.h ================================================ #include #include #include #include // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 #define TEXTLEN 512 // Length of symbols in input #define NSYMBOLS 1024 // Number of symbol table entries // Token types enum { T_EOF, // Operators T_ASSIGN, T_PLUS, T_MINUS, T_STAR, T_SLASH, T_EQ, T_NE, T_LT, T_GT, T_LE, T_GE, // Type keywords T_VOID, T_CHAR, T_INT, T_LONG, // Structural tokens T_INTLIT, T_SEMI, T_IDENT, T_LBRACE, T_RBRACE, T_LPAREN, T_RPAREN, T_AMPER, T_LOGAND, // Other keywords T_IF, T_ELSE, T_WHILE, T_FOR, T_RETURN }; // Token structure struct token { int token; // Token type, from the enum list above int intvalue; // For T_INTLIT, the integer value }; // AST node types. The first few line up // with the related tokens enum { A_ASSIGN= 1, A_ADD, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE, A_INTLIT, A_IDENT, A_GLUE, A_IF, A_WHILE, A_FUNCTION, A_WIDEN, A_RETURN, A_FUNCCALL, A_DEREF, A_ADDR, A_SCALE }; // Primitive types enum { P_NONE, P_VOID, P_CHAR, P_INT, P_LONG, P_VOIDPTR, P_CHARPTR, P_INTPTR, P_LONGPTR }; // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates int rvalue; // True if the node is an rvalue struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; union { // For A_INTLIT, the integer value int intvalue; // For A_IDENT, the symbol slot number int id; // For A_FUNCTION, the symbol slot number int size; // For A_SCALE, the size to scale by } v; // For A_FUNCCALL, the symbol slot number }; #define NOREG -1 // Use NOREG when the AST generation // functions have no register to return #define NOLABEL 0 // Use NOLABEL when we have no label to // pass to genAST() // Structural types enum { S_VARIABLE, S_FUNCTION }; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol int stype; // Structural type for the symbol int endlabel; // For S_FUNCTIONs, the end label }; ================================================ FILE: 18_Lvalues_Revisited/expr.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of expressions // Copyright (c) 2019 Warren Toomey, GPL3 // Parse a function call with a single expression // argument and return its AST struct ASTnode *funccall(void) { struct ASTnode *tree; int id; // Check that the identifier has been defined, // then make a leaf node for it. XXX Add structural type test if ((id = findglob(Text)) == -1) { fatals("Undeclared function", Text); } // Get the '(' lparen(); // Parse the following expression tree = binexpr(0); // Build the function call AST node. Store the // function's return type as this node's type. // Also record the function's symbol-id tree = mkastunary(A_FUNCCALL, Gsym[id].type, tree, id); // Get the ')' rparen(); return (tree); } // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(void) { struct ASTnode *n; int id; switch (Token.token) { case T_INTLIT: // For an INTLIT token, make a leaf AST node for it. // Make it a P_CHAR if it's within the P_CHAR range if ((Token.intvalue) >= 0 && (Token.intvalue < 256)) n = mkastleaf(A_INTLIT, P_CHAR, Token.intvalue); else n = mkastleaf(A_INTLIT, P_INT, Token.intvalue); break; case T_IDENT: // This could be a variable or a function call. // Scan in the next token to find out scan(&Token); // It's a '(', so a function call if (Token.token == T_LPAREN) return (funccall()); // Not a function call, so reject the new token reject_token(&Token); // Check that the variable exists. XXX Add structural type test id = findglob(Text); if (id == -1) fatals("Unknown variable", Text); // Make a leaf AST node for it n = mkastleaf(A_IDENT, Gsym[id].type, id); break; default: fatald("Syntax error, token", Token.token); } // Scan in the next token and return the leaf node scan(&Token); return (n); } // Convert a binary operator token into a binary AST operation. // We rely on a 1:1 mapping from token to AST operation static int binastop(int tokentype) { if (tokentype > T_EOF && tokentype < T_INTLIT) return (tokentype); fatald("Syntax error, token", tokentype); return (0); // Keep -Wall happy } // Return true if a token is right-associative, // false otherwise. static int rightassoc(int tokentype) { if (tokentype == T_ASSIGN) return(1); return(0); } // Operator precedence for each token. Must // match up with the order of tokens in defs.h static int OpPrec[] = { 0, 10, // T_EOF, T_ASSIGN 20, 20, // T_PLUS, T_MINUS 30, 30, // T_STAR, T_SLASH 40, 40, // T_EQ, T_NE 50, 50, 50, 50 // T_LT, T_GT, T_LE, T_GE }; // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec = OpPrec[tokentype]; if (prec == 0) fatald("Syntax error, token", tokentype); return (prec); } // prefix_expression: primary // | '*' prefix_expression // | '&' prefix_expression // ; // Parse a prefix expression and return // a sub-tree representing it. struct ASTnode *prefix(void) { struct ASTnode *tree; switch (Token.token) { case T_AMPER: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Ensure that it's an identifier if (tree->op != A_IDENT) fatal("& operator must be followed by an identifier"); // Now change the operator to A_ADDR and the type to // a pointer to the original type tree->op = A_ADDR; tree->type = pointer_to(tree->type); break; case T_STAR: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's either another deref or an // identifier if (tree->op != A_IDENT && tree->op != A_DEREF) fatal("* operator must be followed by an identifier or *"); // Prepend an A_DEREF operation to the tree tree = mkastunary(A_DEREF, value_at(tree->type), tree, 0); break; default: tree = primary(); } return (tree); } // Return an AST tree whose root is a binary operator. // Parameter ptp is the previous token's precedence. struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; struct ASTnode *ltemp, *rtemp; int ASTop; int tokentype; // Get the tree on the left. // Fetch the next token at the same time. left = prefix(); // If we hit a semicolon or ')', return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN) { left->rvalue= 1; return(left); } // While the precedence of this token is more than that of the // previous token precedence, or it's right associative and // equal to the previous token's precedence while ((op_precedence(tokentype) > ptp) || (rightassoc(tokentype) && op_precedence(tokentype) == ptp)) { // Fetch in the next integer literal scan(&Token); // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Determine the operation to be performed on the sub-trees ASTop = binastop(tokentype); if (ASTop == A_ASSIGN) { // Assignment // Make the right tree into an rvalue right->rvalue= 1; // Ensure the right's type matches the left right = modify_type(right, left->type, 0); if (left == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree. However, switch // left and right around, so that the right expression's // code will be generated before the left expression ltemp= left; left= right; right= ltemp; } else { // We are not doing an assignment, so both trees should be rvalues // Convert both trees into rvalue if they are lvalue trees left->rvalue= 1; right->rvalue= 1; // Ensure the two types are compatible by trying // to modify each tree to match the other's type. ltemp = modify_type(left, right->type, ASTop); rtemp = modify_type(right, left->type, ASTop); if (ltemp == NULL && rtemp == NULL) fatal("Incompatible types in binary expression"); if (ltemp != NULL) left = ltemp; if (rtemp != NULL) right = rtemp; } // Join that sub-tree with ours. Convert the token // into an AST operation at the same time. left = mkastnode(binastop(tokentype), left->type, left, NULL, right, 0); // Update the details of the current token. // If we hit a semicolon or ')', return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN) { left->rvalue= 1; return(left); } } // Return the tree we have when the precedence // is the same or lower left->rvalue= 1; return(left); } ================================================ FILE: 18_Lvalues_Revisited/gen.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Generic code generator // Copyright (c) 2019 Warren Toomey, GPL3 // Generate and return a new label number int genlabel(void) { static int id = 1; return (id++); } // Generate the code for an IF statement // and an optional ELSE clause static int genIF(struct ASTnode *n) { int Lfalse, Lend; // Generate two labels: one for the // false compound statement, and one // for the end of the overall IF statement. // When there is no ELSE clause, Lfalse _is_ // the ending label! Lfalse = genlabel(); if (n->right) Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. genAST(n->left, Lfalse, n->op); genfreeregs(); // Generate the true compound statement genAST(n->mid, NOLABEL, n->op); genfreeregs(); // If there is an optional ELSE clause, // generate the jump to skip to the end if (n->right) cgjump(Lend); // Now the false label cglabel(Lfalse); // Optional ELSE clause: generate the // false compound statement and the // end label if (n->right) { genAST(n->right, NOLABEL, n->op); genfreeregs(); cglabel(Lend); } return (NOREG); } // Generate the code for a WHILE statement static int genWHILE(struct ASTnode *n) { int Lstart, Lend; // Generate the start and end labels // and output the start label Lstart = genlabel(); Lend = genlabel(); cglabel(Lstart); // Generate the condition code followed // by a jump to the end label. genAST(n->left, Lend, n->op); genfreeregs(); // Generate the compound statement for the body genAST(n->right, NOLABEL, n->op); genfreeregs(); // Finally output the jump back to the condition, // and the end label cgjump(Lstart); cglabel(Lend); return (NOREG); } // Given an AST, an optional label, and the AST op // of the parent, generate assembly code recursively. // Return the register id with the tree's final value. int genAST(struct ASTnode *n, int label, int parentASTop) { int leftreg, rightreg; // We have some specific AST node handling at the top // so that we don't evaluate the child sub-trees immediately switch (n->op) { case A_IF: return (genIF(n)); case A_WHILE: return (genWHILE(n)); case A_GLUE: // Do each child statement, and free the // registers after each child genAST(n->left, NOLABEL, n->op); genfreeregs(); genAST(n->right, NOLABEL, n->op); genfreeregs(); return (NOREG); case A_FUNCTION: // Generate the function's preamble before the code // in the child sub-tree cgfuncpreamble(n->v.id); genAST(n->left, NOLABEL, n->op); cgfuncpostamble(n->v.id); return (NOREG); } // General AST node handling below // Get the left and right sub-tree values if (n->left) leftreg = genAST(n->left, NOLABEL, n->op); if (n->right) rightreg = genAST(n->right, NOLABEL, n->op); switch (n->op) { case A_ADD: return (cgadd(leftreg, rightreg)); case A_SUBTRACT: return (cgsub(leftreg, rightreg)); case A_MULTIPLY: return (cgmul(leftreg, rightreg)); case A_DIVIDE: return (cgdiv(leftreg, rightreg)); case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, compare registers // and set one to 1 or 0 based on the comparison. if (parentASTop == A_IF || parentASTop == A_WHILE) return (cgcompare_and_jump(n->op, leftreg, rightreg, label)); else return (cgcompare_and_set(n->op, leftreg, rightreg)); case A_INTLIT: return (cgloadint(n->v.intvalue, n->type)); case A_IDENT: // Load our value if we are an rvalue // or we are being dereferenced if (n->rvalue || parentASTop== A_DEREF) return (cgloadglob(n->v.id)); else return (NOREG); case A_ASSIGN: // Are we assigning to an identifier or through a pointer? switch (n->right->op) { case A_IDENT: return (cgstorglob(leftreg, n->right->v.id)); case A_DEREF: return (cgstorderef(leftreg, rightreg, n->right->type)); default: fatald("Can't A_ASSIGN in genAST(), op", n->op); } case A_WIDEN: // Widen the child's type to the parent's type return (cgwiden(leftreg, n->left->type, n->type)); case A_RETURN: cgreturn(leftreg, Functionid); return (NOREG); case A_FUNCCALL: return (cgcall(leftreg, n->v.id)); case A_ADDR: return (cgaddress(n->v.id)); case A_DEREF: // If we are an rvalue, dereference to get the value we point at, // otherwise leave it for A_ASSIGN to store through the pointer if (n->rvalue) return (cgderef(leftreg, n->left->type)); else return (leftreg); case A_SCALE: // Small optimisation: use shift if the // scale value is a known power of two switch (n->v.size) { case 2: return(cgshlconst(leftreg, 1)); case 4: return(cgshlconst(leftreg, 2)); case 8: return(cgshlconst(leftreg, 3)); default: // Load a register with the size and // multiply the leftreg by this size rightreg= cgloadint(n->v.size, P_INT); return (cgmul(leftreg, rightreg)); } default: fatald("Unknown AST operator", n->op); } return (NOREG); // Keep -Wall happy } void genpreamble() { cgpreamble(); } void genpostamble() { cgpostamble(); } void genfreeregs() { freeall_registers(); } void genprintint(int reg) { cgprintint(reg); } void genglobsym(int id) { cgglobsym(id); } int genprimsize(int type) { return (cgprimsize(type)); } ================================================ FILE: 18_Lvalues_Revisited/lib/printint.c ================================================ #include void printint(long x) { printf("%ld\n", x); } ================================================ FILE: 18_Lvalues_Revisited/main.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "decl.h" #include // Compiler setup and top-level execution // Copyright (c) 2019 Warren Toomey, GPL3 // Initialise global variables static void init() { Line = 1; Putback = '\n'; Globs = 0; O_dumpAST = 0; } // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "Usage: %s [-T] infile\n", prog); exit(1); } // Main program: check arguments and print a usage // if we don't have an argument. Open up the input // file and call scanfile() to scan the tokens in it. int main(int argc, char *argv[]) { int i; // Initialise the globals init(); // Scan for command-line options for (i=1; i= argc) usage(argv[0]); // Open up the input file if ((Infile = fopen(argv[i], "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", argv[i], strerror(errno)); exit(1); } // Create the output file if ((Outfile = fopen("out.s", "w")) == NULL) { fprintf(stderr, "Unable to create out.s: %s\n", strerror(errno)); exit(1); } // For now, ensure that void printint() is defined addglob("printint", P_CHAR, S_FUNCTION, 0); scan(&Token); // Get the first token from the input genpreamble(); // Output the preamble global_declarations(); // Parse the global declarations genpostamble(); // Output the postamble fclose(Outfile); // Close the output file and exit return (0); } ================================================ FILE: 18_Lvalues_Revisited/misc.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Miscellaneous functions // Copyright (c) 2019 Warren Toomey, GPL3 // Ensure that the current token is t, // and fetch the next token. Otherwise // throw an error void match(int t, char *what) { if (Token.token == t) { scan(&Token); } else { fatals("Expected", what); } } // Match a semicolon and fetch the next token void semi(void) { match(T_SEMI, ";"); } // Match a left brace and fetch the next token void lbrace(void) { match(T_LBRACE, "{"); } // Match a right brace and fetch the next token void rbrace(void) { match(T_RBRACE, "}"); } // Match a left parenthesis and fetch the next token void lparen(void) { match(T_LPAREN, "("); } // Match a right parenthesis and fetch the next token void rparen(void) { match(T_RPAREN, ")"); } // Match an identifier and fetch the next token void ident(void) { match(T_IDENT, "identifier"); } // Print out fatal messages void fatal(char *s) { fprintf(stderr, "%s on line %d\n", s, Line); exit(1); } void fatals(char *s1, char *s2) { fprintf(stderr, "%s:%s on line %d\n", s1, s2, Line); exit(1); } void fatald(char *s, int d) { fprintf(stderr, "%s:%d on line %d\n", s, d, Line); exit(1); } void fatalc(char *s, int c) { fprintf(stderr, "%s:%c on line %d\n", s, c, Line); exit(1); } ================================================ FILE: 18_Lvalues_Revisited/scan.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { char *p; p = strchr(s, c); return (p ? p - s : -1); } // Get the next character from the input file. static int next(void) { int c; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return (c); } c = fgetc(Infile); // Read from input file if ('\n' == c) Line++; // Increment line count return (c); } // Put back an unwanted character static void putback(int c) { Putback = c; } // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0; // Convert each character into an int value while ((k = chrpos("0123456789", c)) >= 0) { val = val * 10 + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return (val); } // Scan an identifier from the input file and // store it in buf[]. Return the identifier's length static int scanident(int c, char *buf, int lim) { int i = 0; // Allow digits, alpha and underscores while (isalpha(c) || isdigit(c) || '_' == c) { // Error if we hit the identifier length limit, // else append to buf[] and get next character if (lim - 1 == i) { fatal("Identifier too long"); } else if (i < lim - 1) { buf[i++] = c; } c = next(); } // We hit a non-valid character, put it back. // NUL-terminate the buf[] and return the length putback(c); buf[i] = '\0'; return (i); } // Given a word from the input, return the matching // keyword token number or 0 if it's not a keyword. // Switch on the first letter so that we don't have // to waste time strcmp()ing against all the keywords. static int keyword(char *s) { switch (*s) { case 'c': if (!strcmp(s, "char")) return (T_CHAR); break; case 'e': if (!strcmp(s, "else")) return (T_ELSE); break; case 'f': if (!strcmp(s, "for")) return (T_FOR); break; case 'i': if (!strcmp(s, "if")) return (T_IF); if (!strcmp(s, "int")) return (T_INT); break; case 'l': if (!strcmp(s, "long")) return (T_LONG); break; case 'r': if (!strcmp(s, "return")) return (T_RETURN); break; case 'w': if (!strcmp(s, "while")) return (T_WHILE); break; case 'v': if (!strcmp(s, "void")) return (T_VOID); break; } return (0); } // A pointer to a rejected token static struct token *Rejtoken = NULL; // Reject the token that we just scanned void reject_token(struct token *t) { if (Rejtoken != NULL) fatal("Can't reject token twice"); Rejtoken = t; } // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c, tokentype; // If we have any rejected token, return it if (Rejtoken != NULL) { t = Rejtoken; Rejtoken = NULL; return (1); } // Skip whitespace c = skip(); // Determine the token based on // the input character switch (c) { case EOF: t->token = T_EOF; return (0); case '+': t->token = T_PLUS; break; case '-': t->token = T_MINUS; break; case '*': t->token = T_STAR; break; case '/': t->token = T_SLASH; break; case ';': t->token = T_SEMI; break; case '{': t->token = T_LBRACE; break; case '}': t->token = T_RBRACE; break; case '(': t->token = T_LPAREN; break; case ')': t->token = T_RPAREN; break; case '=': if ((c = next()) == '=') { t->token = T_EQ; } else { putback(c); t->token = T_ASSIGN; } break; case '!': if ((c = next()) == '=') { t->token = T_NE; } else { fatalc("Unrecognised character", c); } break; case '<': if ((c = next()) == '=') { t->token = T_LE; } else { putback(c); t->token = T_LT; } break; case '>': if ((c = next()) == '=') { t->token = T_GE; } else { putback(c); t->token = T_GT; } break; case '&': if ((c = next()) == '&') { t->token = T_LOGAND; } else { putback(c); t->token = T_AMPER; } break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } else if (isalpha(c) || '_' == c) { // Read in a keyword or identifier scanident(c, Text, TEXTLEN); // If it's a recognised keyword, return that token if ((tokentype = keyword(Text)) != 0) { t->token = tokentype; break; } // Not a recognised keyword, so it must be an identifier t->token = T_IDENT; break; } // The character isn't part of any recognised token, error fatalc("Unrecognised character", c); } // We found a token return (1); } ================================================ FILE: 18_Lvalues_Revisited/stmt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of statements // Copyright (c) 2019 Warren Toomey, GPL3 // Prototypes static struct ASTnode *single_statement(void); // compound_statement: // empty, i.e. no statement // | statement // | statement statements // ; // // statement: declaration // | expression_statement // | function_call // | if_statement // | while_statement // | for_statement // | return_statement // ; // if_statement: if_head // | if_head 'else' compound_statement // ; // // if_head: 'if' '(' true_false_expression ')' compound_statement ; // // Parse an IF statement including any // optional ELSE clause and return its AST static struct ASTnode *if_statement(void) { struct ASTnode *condAST, *trueAST, *falseAST = NULL; // Ensure we have 'if' '(' match(T_IF, "if"); lparen(); // Parse the following expression // and the ')' following. Ensure // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) fatal("Bad comparison operator"); rparen(); // Get the AST for the compound statement trueAST = compound_statement(); // If we have an 'else', skip it // and get the AST for the compound statement if (Token.token == T_ELSE) { scan(&Token); falseAST = compound_statement(); } // Build and return the AST for this statement return (mkastnode(A_IF, P_NONE, condAST, trueAST, falseAST, 0)); } // while_statement: 'while' '(' true_false_expression ')' compound_statement ; // // Parse a WHILE statement and return its AST static struct ASTnode *while_statement(void) { struct ASTnode *condAST, *bodyAST; // Ensure we have 'while' '(' match(T_WHILE, "while"); lparen(); // Parse the following expression // and the ')' following. Ensure // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) fatal("Bad comparison operator"); rparen(); // Get the AST for the compound statement bodyAST = compound_statement(); // Build and return the AST for this statement return (mkastnode(A_WHILE, P_NONE, condAST, NULL, bodyAST, 0)); } // for_statement: 'for' '(' preop_statement ';' // true_false_expression ';' // postop_statement ')' compound_statement ; // // preop_statement: statement (for now) // postop_statement: statement (for now) // // Parse a FOR statement and return its AST static struct ASTnode *for_statement(void) { struct ASTnode *condAST, *bodyAST; struct ASTnode *preopAST, *postopAST; struct ASTnode *tree; // Ensure we have 'for' '(' match(T_FOR, "for"); lparen(); // Get the pre_op statement and the ';' preopAST = single_statement(); semi(); // Get the condition and the ';' condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) fatal("Bad comparison operator"); semi(); // Get the post_op statement and the ')' postopAST = single_statement(); rparen(); // Get the compound statement which is the body bodyAST = compound_statement(); // For now, all four sub-trees have to be non-NULL. // Later on, we'll change the semantics for when some are missing // Glue the compound statement and the postop tree tree = mkastnode(A_GLUE, P_NONE, bodyAST, NULL, postopAST, 0); // Make a WHILE loop with the condition and this new body tree = mkastnode(A_WHILE, P_NONE, condAST, NULL, tree, 0); // And glue the preop tree to the A_WHILE tree return (mkastnode(A_GLUE, P_NONE, preopAST, NULL, tree, 0)); } // return_statement: 'return' '(' expression ')' ; // // Parse a return statement and return its AST static struct ASTnode *return_statement(void) { struct ASTnode *tree; // Can't return a value if function returns P_VOID if (Gsym[Functionid].type == P_VOID) fatal("Can't return from a void function"); // Ensure we have 'return' '(' match(T_RETURN, "return"); lparen(); // Parse the following expression tree = binexpr(0); // Ensure this is compatible with the function's type tree = modify_type(tree, Gsym[Functionid].type, 0); if (tree == NULL) fatal("Incompatible type to return"); // Add on the A_RETURN node tree = mkastunary(A_RETURN, P_NONE, tree, 0); // Get the ')' rparen(); return (tree); } // Parse a single statement and return its AST static struct ASTnode *single_statement(void) { int type; switch (Token.token) { case T_CHAR: case T_INT: case T_LONG: // The beginning of a variable declaration. // Parse the type and get the identifier. // Then parse the rest of the declaration. // XXX: These are globals at present. type = parse_type(); ident(); var_declaration(type); return (NULL); // No AST generated here case T_IF: return (if_statement()); case T_WHILE: return (while_statement()); case T_FOR: return (for_statement()); case T_RETURN: return (return_statement()); default: // For now, see if this is an expression. // This catches assignment statements. return (binexpr(0)); } return (NULL); // Keep -Wall happy } // Parse a compound statement // and return its AST struct ASTnode *compound_statement(void) { struct ASTnode *left = NULL; struct ASTnode *tree; // Require a left curly bracket lbrace(); while (1) { // Parse a single statement tree = single_statement(); // Some statements must be followed by a semicolon if (tree != NULL && (tree->op == A_ASSIGN || tree->op == A_RETURN || tree->op == A_FUNCCALL)) semi(); // For each new tree, either save it in left // if left is empty, or glue the left and the // new tree together if (tree != NULL) { if (left == NULL) left = tree; else left = mkastnode(A_GLUE, P_NONE, left, NULL, tree, 0); } // When we hit a right curly bracket, // skip past it and return the AST if (Token.token == T_RBRACE) { rbrace(); return (left); } } } ================================================ FILE: 18_Lvalues_Revisited/sym.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Symbol table functions // Copyright (c) 2019 Warren Toomey, GPL3 // Determine if the symbol s is in the global symbol table. // Return its slot position or -1 if not found. int findglob(char *s) { int i; for (i = 0; i < Globs; i++) { if (*s == *Gsym[i].name && !strcmp(s, Gsym[i].name)) return (i); } return (-1); } // Get the position of a new global symbol slot, or die // if we've run out of positions. static int newglob(void) { int p; if ((p = Globs++) >= NSYMBOLS) fatal("Too many global symbols"); return (p); } // Add a global symbol to the symbol table. // Also set up its type and structural type. // Return the slot number in the symbol table int addglob(char *name, int type, int stype, int endlabel) { int y; // If this is already in the symbol table, return the existing slot if ((y = findglob(name)) != -1) return (y); // Otherwise get a new slot, fill it in and // return the slot number y = newglob(); Gsym[y].name = strdup(name); Gsym[y].type = type; Gsym[y].stype = stype; Gsym[y].endlabel = endlabel; return (y); } ================================================ FILE: 18_Lvalues_Revisited/tests/input01.c ================================================ void main() { printint(12 * 3); printint(18 - 2 * 4); printint(1 + 2 + 9 - 5/2 + 3*5); } ================================================ FILE: 18_Lvalues_Revisited/tests/input02.c ================================================ void main() { int fred; int jim; fred= 5; jim= 12; printint(fred + jim); } ================================================ FILE: 18_Lvalues_Revisited/tests/input03.c ================================================ void main() { int x; x= 1; printint(x); x= x + 1; printint(x); x= x + 1; printint(x); x= x + 1; printint(x); x= x + 1; printint(x); } ================================================ FILE: 18_Lvalues_Revisited/tests/input04.c ================================================ void main() { int x; x= 7 < 9; printint(x); x= 7 <= 9; printint(x); x= 7 != 9; printint(x); x= 7 == 7; printint(x); x= 7 >= 7; printint(x); x= 7 <= 7; printint(x); x= 9 > 7; printint(x); x= 9 >= 7; printint(x); x= 9 != 7; printint(x); } ================================================ FILE: 18_Lvalues_Revisited/tests/input05.c ================================================ void main() { int i; int j; i=6; j=12; if (i < j) { printint(i); } else { printint(j); } } ================================================ FILE: 18_Lvalues_Revisited/tests/input06.c ================================================ void main() { int i; i=1; while (i <= 10) { printint(i); i= i + 1; } } ================================================ FILE: 18_Lvalues_Revisited/tests/input07.c ================================================ void main() { int i; for (i= 1; i <= 10; i= i + 1) { printint(i); } } ================================================ FILE: 18_Lvalues_Revisited/tests/input08.c ================================================ void main() { int i; for (i= 1; i <= 10; i= i + 1) { printint(i); } } ================================================ FILE: 18_Lvalues_Revisited/tests/input09.c ================================================ void main() { int i; for (i= 1; i <= 10; i= i + 1) { printint(i); } } void fred() { int a; int b; a= 12; b= 3 * a; if (a >= b) { printint(2 * b - a); } } ================================================ FILE: 18_Lvalues_Revisited/tests/input10.c ================================================ void main() { int i; char j; j= 20; printint(j); i= 10; printint(i); for (i= 1; i <= 5; i= i + 1) { printint(i); } for (j= 253; j != 2; j= j + 1) { printint(j); } } ================================================ FILE: 18_Lvalues_Revisited/tests/input11.c ================================================ int main() { int i; char j; long k; i= 10; printint(i); j= 20; printint(j); k= 30; printint(k); for (i= 1; i <= 5; i= i + 1) { printint(i); } for (j= 253; j != 4; j= j + 1) { printint(j); } for (k= 1; k <= 5; k= k + 1) { printint(k); } return(i); printint(12345); return(3); } ================================================ FILE: 18_Lvalues_Revisited/tests/input12.c ================================================ int fred() { return(5); } void main() { int x; x= fred(2); printint(x); } ================================================ FILE: 18_Lvalues_Revisited/tests/input13.c ================================================ int fred() { return(56); } void main() { int dummy; int result; dummy= printint(23); result= fred(10); dummy= printint(result); } ================================================ FILE: 18_Lvalues_Revisited/tests/input14.c ================================================ int fred() { return(20); } int main() { int result; printint(10); result= fred(15); printint(result); printint(fred(15)+10); return(0); } ================================================ FILE: 18_Lvalues_Revisited/tests/input15.c ================================================ int main() { char a; char *b; char c; int d; int *e; int f; a= 18; printint(a); b= &a; c= *b; printint(c); d= 12; printint(d); e= &d; f= *e; printint(f); return(0); } ================================================ FILE: 18_Lvalues_Revisited/tests/input16.c ================================================ int c; int d; int *e; int f; int main() { c= 12; d=18; printint(c); e= &c + 1; f= *e; printint(f); return(0); } ================================================ FILE: 18_Lvalues_Revisited/tests/input17.c ================================================ int main() { char a; char *b; int d; int *e; b= &a; *b= 19; printint(a); e= &d; *e= 12; printint(d); return(0); } ================================================ FILE: 18_Lvalues_Revisited/tests/input18.c ================================================ int main() { int a; int b; a= b= 34; printint(a); printint(b); return(0); } ================================================ FILE: 18_Lvalues_Revisited/tests/input18a.c ================================================ int a; int *b; char c; char *d; int main() { b= &a; *b= 15; printint(a); d= &c; *d= 16; printint(c); return(0); } ================================================ FILE: 18_Lvalues_Revisited/tests/mktests ================================================ #!/bin/sh # Make the output files for each test for i in input*c do if [ ! -f "out.$i" ] then cc -o out $i ../lib/printint.c ./out > out.$i rm -f out fi done ================================================ FILE: 18_Lvalues_Revisited/tests/out.input01.c ================================================ 36 10 25 ================================================ FILE: 18_Lvalues_Revisited/tests/out.input02.c ================================================ 17 ================================================ FILE: 18_Lvalues_Revisited/tests/out.input03.c ================================================ 1 2 3 4 5 ================================================ FILE: 18_Lvalues_Revisited/tests/out.input04.c ================================================ 1 1 1 1 1 1 1 1 1 ================================================ FILE: 18_Lvalues_Revisited/tests/out.input05.c ================================================ 6 ================================================ FILE: 18_Lvalues_Revisited/tests/out.input06.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 18_Lvalues_Revisited/tests/out.input07.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 18_Lvalues_Revisited/tests/out.input08.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 18_Lvalues_Revisited/tests/out.input09.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 18_Lvalues_Revisited/tests/out.input10.c ================================================ 20 10 1 2 3 4 5 253 254 255 0 1 ================================================ FILE: 18_Lvalues_Revisited/tests/out.input11.c ================================================ 10 20 30 1 2 3 4 5 253 254 255 0 1 2 3 1 2 3 4 5 ================================================ FILE: 18_Lvalues_Revisited/tests/out.input12.c ================================================ 5 ================================================ FILE: 18_Lvalues_Revisited/tests/out.input13.c ================================================ 23 56 ================================================ FILE: 18_Lvalues_Revisited/tests/out.input14.c ================================================ 10 20 30 ================================================ FILE: 18_Lvalues_Revisited/tests/out.input15.c ================================================ 18 18 12 12 ================================================ FILE: 18_Lvalues_Revisited/tests/out.input16.c ================================================ 12 18 ================================================ FILE: 18_Lvalues_Revisited/tests/out.input17.c ================================================ 19 12 ================================================ FILE: 18_Lvalues_Revisited/tests/out.input18.c ================================================ 34 34 ================================================ FILE: 18_Lvalues_Revisited/tests/out.input18a.c ================================================ 15 16 ================================================ FILE: 18_Lvalues_Revisited/tests/runtests ================================================ #!/bin/sh # Run each test and compare # against known good output if [ ! -f ../comp1 ] then echo "Need to build ../comp1 first!"; exit 1 fi for i in input* do if [ ! -f "out.$i" ] then echo "Can't run test on $i, no output file!" else echo -n $i ../comp1 $i cc -o out out.s ../lib/printint.c ./out > trial.$i cmp -s "out.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo else echo ": OK" fi rm -f out out.s "trial.$i" fi done ================================================ FILE: 18_Lvalues_Revisited/tests/runtestsn ================================================ #!/bin/sh # Run each test and compare # against known good output if [ ! -f ../compn ] then echo "Need to build ../compn first!"; exit 1 fi for i in input* do if [ ! -f "out.$i" ] then echo "Can't run test on $i, no output file!" else echo -n $i ../compn $i nasm -f elf64 out.s cc -no-pie -Wall -o out out.o ../lib/printint.c ./out > trial.$i cmp -s "out.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo else echo ": OK" fi rm -f out out.o out.s "trial.$i" fi done ================================================ FILE: 18_Lvalues_Revisited/tree.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree functions // Copyright (c) 2019 Warren Toomey, GPL3 // Build and return a generic AST node struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, int intvalue) { struct ASTnode *n; // Malloc a new ASTnode n = (struct ASTnode *) malloc(sizeof(struct ASTnode)); if (n == NULL) fatal("Unable to malloc in mkastnode()"); // Copy in the field values and return it n->op = op; n->type = type; n->left = left; n->mid = mid; n->right = right; n->v.intvalue = intvalue; return (n); } // Make an AST leaf node struct ASTnode *mkastleaf(int op, int type, int intvalue) { return (mkastnode(op, type, NULL, NULL, NULL, intvalue)); } // Make a unary AST node: only one child struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, int intvalue) { return (mkastnode(op, type, left, NULL, NULL, intvalue)); } // Generate and return a new label number // just for AST dumping purposes static int gendumplabel(void) { static int id = 1; return (id++); } // Given an AST tree, print it out and follow the // traversal of the tree that genAST() follows void dumpAST(struct ASTnode *n, int label, int level) { int Lfalse, Lstart, Lend; switch (n->op) { case A_IF: Lfalse = gendumplabel(); for (int i=0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_IF"); if (n->right) { Lend = gendumplabel(); fprintf(stdout, ", end L%d", Lend); } fprintf(stdout, "\n"); dumpAST(n->left, Lfalse, level+2); dumpAST(n->mid, NOLABEL, level+2); if (n->right) dumpAST(n->right, NOLABEL, level+2); return; case A_WHILE: Lstart = gendumplabel(); for (int i=0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_WHILE, start L%d\n", Lstart); Lend = gendumplabel(); dumpAST(n->left, Lend, level+2); dumpAST(n->right, NOLABEL, level+2); return; } // Reset level to -2 for A_GLUE if (n->op==A_GLUE) level= -2; // General AST node handling if (n->left) dumpAST(n->left, NOLABEL, level+2); if (n->right) dumpAST(n->right, NOLABEL, level+2); for (int i=0; i < level; i++) fprintf(stdout, " "); switch (n->op) { case A_GLUE: fprintf(stdout, "\n\n"); return; case A_FUNCTION: fprintf(stdout, "A_FUNCTION %s\n", Gsym[n->v.id].name); return; case A_ADD: fprintf(stdout, "A_ADD\n"); return; case A_SUBTRACT: fprintf(stdout, "A_SUBTRACT\n"); return; case A_MULTIPLY: fprintf(stdout, "A_MULTIPLY\n"); return; case A_DIVIDE: fprintf(stdout, "A_DIVIDE\n"); return; case A_EQ: fprintf(stdout, "A_EQ\n"); return; case A_NE: fprintf(stdout, "A_NE\n"); return; case A_LT: fprintf(stdout, "A_LE\n"); return; case A_GT: fprintf(stdout, "A_GT\n"); return; case A_LE: fprintf(stdout, "A_LE\n"); return; case A_GE: fprintf(stdout, "A_GE\n"); return; case A_INTLIT: fprintf(stdout, "A_INTLIT %d\n", n->v.intvalue); return; case A_IDENT: if (n->rvalue) fprintf(stdout, "A_IDENT rval %s\n", Gsym[n->v.id].name); else fprintf(stdout, "A_IDENT %s\n", Gsym[n->v.id].name); return; case A_ASSIGN: fprintf(stdout, "A_ASSIGN\n"); return; case A_WIDEN: fprintf(stdout, "A_WIDEN\n"); return; case A_RETURN: fprintf(stdout, "A_RETURN\n"); return; case A_FUNCCALL: fprintf(stdout, "A_FUNCCALL %s\n", Gsym[n->v.id].name); return; case A_ADDR: fprintf(stdout, "A_ADDR %s\n", Gsym[n->v.id].name); return; case A_DEREF: if (n->rvalue) fprintf(stdout, "A_DEREF rval\n"); else fprintf(stdout, "A_DEREF\n"); return; case A_SCALE: fprintf(stdout, "A_SCALE %d\n", n->v.size); return; default: fatald("Unknown dumpAST operator", n->op); } } ================================================ FILE: 18_Lvalues_Revisited/types.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Types and type handling // Copyright (c) 2019 Warren Toomey, GPL3 // Return true if a type is an int type // of any size, false otherwise int inttype(int type) { if (type == P_CHAR || type == P_INT || type == P_LONG) return (1); return (0); } // Return true if a type is of pointer type int ptrtype(int type) { if (type == P_VOIDPTR || type == P_CHARPTR || type == P_INTPTR || type == P_LONGPTR) return (1); return (0); } // Given a primitive type, return // the type which is a pointer to it int pointer_to(int type) { int newtype; switch (type) { case P_VOID: newtype = P_VOIDPTR; break; case P_CHAR: newtype = P_CHARPTR; break; case P_INT: newtype = P_INTPTR; break; case P_LONG: newtype = P_LONGPTR; break; default: fatald("Unrecognised in pointer_to: type", type); } return (newtype); } // Given a primitive pointer type, return // the type which it points to int value_at(int type) { int newtype; switch (type) { case P_VOIDPTR: newtype = P_VOID; break; case P_CHARPTR: newtype = P_CHAR; break; case P_INTPTR: newtype = P_INT; break; case P_LONGPTR: newtype = P_LONG; break; default: fatald("Unrecognised in value_at: type", type); } return (newtype); } // Given an AST tree and a type which we want it to become, // possibly modify the tree by widening or scaling so that // it is compatible with this type. Return the original tree // if no changes occurred, a modified tree, or NULL if the // tree is not compatible with the given type. // If this will be part of a binary operation, the AST op is not zero. struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op) { int ltype; int lsize, rsize; ltype = tree->type; // Compare scalar int types if (inttype(ltype) && inttype(rtype)) { // Both types same, nothing to do if (ltype == rtype) return (tree); // Get the sizes for each type lsize = genprimsize(ltype); rsize = genprimsize(rtype); // Tree's size is too big if (lsize > rsize) return (NULL); // Widen to the right if (rsize > lsize) return (mkastunary(A_WIDEN, rtype, tree, 0)); } // For pointers on the left if (ptrtype(ltype)) { // OK is same type on right and not doing a binary op if (op == 0 && ltype == rtype) return (tree); } // We can scale only on A_ADD or A_SUBTRACT operation if (op == A_ADD || op == A_SUBTRACT) { // Left is int type, right is pointer type and the size // of the original type is >1: scale the left if (inttype(ltype) && ptrtype(rtype)) { rsize = genprimsize(value_at(rtype)); if (rsize > 1) return (mkastunary(A_SCALE, rtype, tree, rsize)); } } // If we get here, the types are not compatible return (NULL); } ================================================ FILE: 19_Arrays_pt1/Makefile ================================================ SRCS= cg.c decl.c expr.c gen.c main.c misc.c scan.c stmt.c \ sym.c tree.c types.c SRCN= cgn.c decl.c expr.c gen.c main.c misc.c scan.c stmt.c \ sym.c tree.c types.c ARMSRCS= cg_arm.c decl.c expr.c gen.c main.c misc.c scan.c stmt.c \ sym.c tree.c types.c comp1: $(SRCS) cc -o comp1 -g -Wall $(SRCS) compn: $(SRCN) cc -o compn -g -Wall $(SRCN) comp1arm: $(ARMSRCS) cc -o comp1arm -g -Wall $(ARMSRCS) cp comp1arm comp1 clean: rm -f comp1 comp1arm compn *.o *.s out test: comp1 tests/runtests (cd tests; chmod +x runtests; ./runtests) armtest: comp1arm tests/runtests (cd tests; chmod +x runtests; ./runtests) testn: compn tests/runtestsn (cd tests; chmod +x runtestsn; ./runtestsn) test20: comp1 tests/input20.c lib/printint.c ./comp1 tests/input20.c cc -o out out.s lib/printint.c ./out armtest20: comp1arm tests/input20.c lib/printint.c ./comp1 tests/input20.c cc -o out out.s lib/printint.c ./out test20n: compn tests/input20.c lib/printint.c ./compn tests/input20.c nasm -f elf64 out.s cc -no-pie -o out lib/printint.c out.o ./out ================================================ FILE: 19_Arrays_pt1/Notes ================================================ int ary[5]; int *ptr; ary[3]= 63; // Set ary[3] (lvalue) to 63 ptr = ary; // Point ptr to base of ary // ary= ptr; // error: assignment to expression with array type ptr = &ary[0]; // Also point ptr to base of ary, ary[0] is lvalue ptr[4]= 72; // Use ptr like an array, ptr[4] is an lvalue .globl z z: .quad 1 .quad 2 .quad 3 .quad 4 .globl zc zc: .byte 1 .byte 2 .byte 3 .byte 4 .byte 0 .byte 0 .byte 0 .byte 0 ================================================ FILE: 19_Arrays_pt1/Readme.md ================================================ # Part 19: Arrays, part 1 > *My lecturer for the first year of university was a Scotsman with a very heavy accent. Around the third or fourth week of first term, he began saying "Hurray!" a lot in class. It took me about twenty minutes to work out he was saying "array".* So, we begin the work to add arrays to the compiler in this part of the journey. I sat down and wrote a small C program to see what sort of functionality I should try to implement: ```c int ary[5]; // Array of five int elements int *ptr; // Pointer to an int ary[3]= 63; // Set ary[3] (lvalue) to 63 ptr = ary; // Point ptr to base of ary // ary= ptr; // error: assignment to expression with array type ptr = &ary[0]; // Also point ptr to base of ary, ary[0] is lvalue ptr[4]= 72; // Use ptr like an array, ptr[4] is an lvalue ``` Arrays are *like* pointers in that we can dereference both a pointer and an array with the "[ ]" syntax to get access to a specific element. We can use the array's name as a "pointer" and save the array's base into a pointer. We can get the address of an element in the array. But one thing we can't do is "overwrite" the base of an array with a pointer: the elements of the array are mutable, but the base address of the array is not mutable. In this part of the journey, I'll add in: + declarations of array with a fixed size but no initialisation list + array indexes as rvalues in an expression + array indexes as an lvalue in an assignment I also won't implement more than one dimension in each array. ## Parentheses in Expressions At some point I want to try this out: `*(ptr + 2)` which should end up being the same as `ptr[2]`. But we haven't allowed parentheses in expressions yet, so now it's time to add them. ### C Grammar in BNF On the web there is a page with the [BNF Grammar for C](https://www.lysator.liu.se/c/ANSI-C-grammar-y.html) written by Jeff Lee in 1985. I like to reference it to give me ideas and to confirm that I'm not making too many mistakes. One thing to note is that, instead of implemnting the priority of the binary expression operators in C, the grammar uses recursive definitions to make the priorities explicit. Thus: ``` additive_expression : multiplicative_expression | additive_expression '+' multiplicative_expression | additive_expression '-' multiplicative_expression ; ``` shows that we descend into "multiplicative_expression" while we are parsing an "additive_expression", thus giving the '*' and '/' operators a higher precedence than the '+' and '-' operators. Right at the top of the expression precedence hierarchy is: ``` primary_expression : IDENTIFIER | CONSTANT | STRING_LITERAL | '(' expression ')' ; ``` We already have a `primary()` function which is called to find T_INTLIT and T_IDENT tokens, and this conforms to Jeff Lee's C grammar. It's thus the perfect place to add the parsing of parentheses in expressions. We already have T_LPAREN and T_RPAREN as tokens in our language, so there is no work to be done in the lexical scanner. Instead, we simply modify `primary()` to do the extra parsing: ```c static struct ASTnode *primary(void) { struct ASTnode *n; ... switch (Token.token) { case T_INTLIT: ... case T_IDENT: ... case T_LPAREN: // Beginning of a parenthesised expression, skip the '('. // Scan in the expression and the right parenthesis scan(&Token); n = binexpr(0); rparen(); return (n); default: fatald("Expecting a primary expression, got token", Token.token); } // Scan in the next token and return the leaf node scan(&Token); return (n); } ``` And that's it! Just a few extra lines to add parentheses in expression. You'll notice that I explicitly call `rparen()` in the new code and return instead of breaking out of the switch statement. If the code had left the switch statement, the `scan(&Token);` before the final return would not strictly enforce the requirement for a ')' token to match the opening '(' token. The `test/input19.c` test checks that parentheses are working: ```c a= 2; b= 4; c= 3; d= 2; e= (a+b) * (c+d); printint(e); ``` and it should print out 30, i.e. `6 * 5`. ## Symbol Table Changes We have scalar variables (with only one value) and functions in our symbol table. It's time to add arrays. Later on, we'll want to get the number of elements in each array with the `sizeof()` operator. Here are the changes in `defs.h`: ```c // Structural types enum { S_VARIABLE, S_FUNCTION, S_ARRAY }; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol int stype; // Structural type for the symbol int endlabel; // For S_FUNCTIONs, the end label int size; // Number of elements in the symbol }; ``` For now, we will treat arrays as pointers, and so the type for an array is "pointer to" something, e.g. "pointer to int" if the elements in the array are `int`s. We also need to add one more argument to `addglob()` in `sym.c`: ```c int addglob(char *name, int type, int stype, int endlabel, int size) { ... } ``` ## Parsing Array Declarations For now, I'm only going to allow declarations of arrays with a size. The BNF grammar for variable declarations is now: ``` variable_declaration: type identifier ';' | type identifier '[' P_INTLIT ']' ';' ; ``` So we need to see what token is next in `var_declaration()` in `decl.c` and process either a scalar variable declaration or an array declaration: ```c // Parse the declaration of a scalar variable or an array // with a given size. // The identifier has been scanned & we have the type void var_declaration(int type) { int id; // Text now has the identifier's name. // If the next token is a '[' if (Token.token == T_LBRACKET) { // Skip past the '[' scan(&Token); // Check we have an array size if (Token.token == T_INTLIT) { // Add this as a known array and generate its space in assembly. // We treat the array as a pointer to its elements' type id = addglob(Text, pointer_to(type), S_ARRAY, 0, Token.intvalue); genglobsym(id); } // Ensure we have a following ']' scan(&Token); match(T_RBRACKET, "]"); } else { ... // Previous code } // Get the trailing semicolon semi(); } ``` I think that's pretty straight-forward code Later on, we'll add initialisation lists to the declaration of arrays. ## Generating the Array Storage Now that we know the size of the array, we can modify `cgglobsym()` to allocate this space in the assembler: ```c void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Gsym[id].type); // Generate the global identity and the label fprintf(Outfile, "\t.data\n" "\t.globl\t%s\n", Gsym[id].name); fprintf(Outfile, "%s:", Gsym[id].name); // Generate the space for (int i=0; i < Gsym[id].size; i++) { switch(typesize) { case 1: fprintf(Outfile, "\t.byte\t0\n"); break; case 4: fprintf(Outfile, "\t.long\t0\n"); break; case 8: fprintf(Outfile, "\t.quad\t0\n"); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } } ``` With this in place, we can now declare arrays such as: ```c char a[10]; int b[25]; long c[100]; ``` ## Parsing Array Indexes In this part I don't want to get too adventurous. I only want to get basic array indexes as rvalues and lvalues to work. The `test/input20.c` program has the functionality that I want to achieve: ```c int a; int b[25]; int main() { b[3]= 12; a= b[3]; printint(a); return(0); } ``` Back in the BNF grammar for C, we can see that array indexes have *slightly* lower priority than parentheses: ``` primary_expression : IDENTIFIER | CONSTANT | STRING_LITERAL | '(' expression ')' ; postfix_expression : primary_expression | postfix_expression '[' expression ']' ... ``` But for now, I'll parse array indexes also in the `primary()` function. The code to do the semantic analysis ended up being big enough to warrant a new function: ```c static struct ASTnode *primary(void) { struct ASTnode *n; int id; switch (Token.token) { case T_IDENT: // This could be a variable, array index or a // function call. Scan in the next token to find out scan(&Token); // It's a '(', so a function call if (Token.token == T_LPAREN) return (funccall()); // It's a '[', so an array reference if (Token.token == T_LBRACKET) return (array_access()); ``` And here is the `array_access()` function: ```c // Parse the index into an array and // return an AST tree for it static struct ASTnode *array_access(void) { struct ASTnode *left, *right; int id; // Check that the identifier has been defined as an array // then make a leaf node for it that points at the base if ((id = findglob(Text)) == -1 || Gsym[id].stype != S_ARRAY) { fatals("Undeclared array", Text); } left = mkastleaf(A_ADDR, Gsym[id].type, id); // Get the '[' scan(&Token); // Parse the following expression right = binexpr(0); // Get the ']' match(T_RBRACKET, "]"); // Ensure that this is of int type if (!inttype(right->type)) fatal("Array index is not of integer type"); // Scale the index by the size of the element's type right = modify_type(right, left->type, A_ADD); // Return an AST tree where the array's base has the offset // added to it, and dereference the element. Still an lvalue // at this point. left = mkastnode(A_ADD, Gsym[id].type, left, NULL, right, 0); left = mkastunary(A_DEREF, value_at(left->type), left, 0); return (left); } ``` For the array `int x[20];` and the array index `x[6]`, we need to scale the index (6) by the size of `int`s (4), and add this to the address of the array base. Then this element has to be dereferenced. We leave it marked as an lvalue, because we could be trying to do: ```c x[6] = 100; ``` If it does become an rvalue, then `binexpr()` will set the `rvalue` flag in the A_DEREF AST node. ### The Generated AST Trees Going back to our test program `tests/input20.c`, the code that will produce AST trees with array indexes are: ```c b[3]= 12; a= b[3]; ``` Running `comp1 -T tests/input20.c`, we get: ``` A_INTLIT 12 A_WIDEN A_ADDR b A_INTLIT 3 # 3 is scaled by 4 A_SCALE 4 A_ADD # and then added to b's address A_DEREF # and derefenced. Note, stll an lvalue A_ASSIGN A_ADDR b A_INTLIT 3 # As above A_SCALE 4 A_ADD A_DEREF rval # but the dereferenced address will be an rvalue A_IDENT a A_ASSIGN ``` ### Other Minor Parse Changes There are a couple of minor changes to the parser in `expr.c` which took me a while to debug. I needed to be more stringent with the input to the operator precedence lookup function: ```c // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec; if (tokentype >= T_VOID) fatald("Token with no precedence in op_precedence:", tokentype); ... } ``` Until I got the parsing right, I was sending a token not in the precedence table, and `op_precedence()` was reading past the end of the table. Oops! Don't you just love C?! The other change is that, now that we can use expressions as array indexes (e.g. `x[ a+2 ]`), we have to expect the ']' token can end an expression. So, at the end of `binexpr()`: ```c // Update the details of the current token. // If we hit a semicolon, ')' or ']', return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET) { left->rvalue = 1; return (left); } } ``` ## Changes to the Code Generator There are none. We had all the necessary components in our compiler already: scaling integer values, obtaining the address of a variable etc. For our test code: ```c b[3]= 12; a= b[3]; ``` we generate the x86-64 assembly code: ``` movq $12, %r8 leaq b(%rip), %r9 # Get b's address movq $3, %r10 salq $2, %r10 # Shift 3 by 2, i.e. 3 * 4 addq %r9, %r10 # Add to b's address movq %r8, (%r10) # Save 12 into b[3] leaq b(%rip), %r8 # Get b's address movq $3, %r9 salq $2, %r9 # Shift 3 by 2, i.e. 3 * 4 addq %r8, %r9 # Add to b's address movq (%r9), %r9 # Load b[3] into %r9 movl %r9d, a(%rip) # and store in a ``` ## Conclusion and What's Next The parsing changes to add basic array declarations and array expressions (in terms of dealing with the syntax) were quite easy to do. What I found difficult was getting the AST tree nodes correct to scale, add to the base address, and set as lvalue/rvalue. Once this was right, the existing code generator produces the right assembly output. In the next part of our compiler writing journey, we'll add character and string literals to our language and find a way to print them out. [Next step](../20_Char_Str_Literals/Readme.md) ================================================ FILE: 19_Arrays_pt1/cg.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. // We need a list of byte and doubleword registers, too static int freereg[4]; static char *reglist[4] = { "%r8", "%r9", "%r10", "%r11" }; static char *breglist[4] = { "%r8b", "%r9b", "%r10b", "%r11b" }; static char *dreglist[4] = { "%r8d", "%r9d", "%r10d", "%r11d" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n", Outfile); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Gsym[id].name; fprintf(Outfile, "\t.text\n" "\t.globl\t%s\n" "\t.type\t%s, @function\n" "%s:\n" "\tpushq\t%%rbp\n" "\tmovq\t%%rsp, %%rbp\n", name, name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Gsym[id].endlabel); fputs("\tpopq %rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmovq\t$%d, %s\n", value, reglist[r]); return (r); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Print out the code to initialise it switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tmovzbq\t%s(%%rip), %s\n", Gsym[id].name, reglist[r]); break; case P_INT: fprintf(Outfile, "\tmovzbl\t%s(\%%rip), %s\n", Gsym[id].name, reglist[r]); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tmovq\t%s(\%%rip), %s\n", Gsym[id].name, reglist[r]); break; default: fatald("Bad type in cgloadglob:", Gsym[id].type); } return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\taddq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsubq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timulq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s,%%rax\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidivq\t%s\n", reglist[r2]); fprintf(Outfile, "\tmovq\t%%rax,%s\n", reglist[r1]); free_register(r2); return (r1); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { // Get a new register int outr = alloc_register(); fprintf(Outfile, "\tmovq\t%s, %%rdi\n", reglist[r]); fprintf(Outfile, "\tcall\t%s\n", Gsym[id].name); fprintf(Outfile, "\tmovq\t%%rax, %s\n", reglist[outr]); free_register(r); return (outr); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsalq\t$%d, %s\n", val, reglist[r]); return(r); } // Store a register's value into a variable int cgstorglob(int r, int id) { switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %s(\%%rip)\n", breglist[r], Gsym[id].name); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %s(\%%rip)\n", dreglist[r], Gsym[id].name); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tmovq\t%s, %s(\%%rip)\n", reglist[r], Gsym[id].name); break; default: fatald("Bad type in cgloadglob:", Gsym[id].type); } return (r); } // Array of type sizes in P_XXX order. // 0 means no size. static int psize[] = { 0, 0, 1, 4, 8, 8, 8, 8, 8 }; // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { // Check the type is valid if (type < P_NONE || type > P_LONGPTR) fatal("Bad type in cgprimsize()"); return (psize[type]); } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Gsym[id].type); // Generate the global identity and the label fprintf(Outfile, "\t.data\n" "\t.globl\t%s\n", Gsym[id].name); fprintf(Outfile, "%s:", Gsym[id].name); // Generate the space for (int i=0; i < Gsym[id].size; i++) { switch(typesize) { case 1: fprintf(Outfile, "\t.byte\t0\n"); break; case 4: fprintf(Outfile, "\t.long\t0\n"); break; case 8: fprintf(Outfile, "\t.quad\t0\n"); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { // Generate code depending on the function's type switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tmovzbl\t%s, %%eax\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %%eax\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", Gsym[id].type); } cgjump(Gsym[id].endlabel); } // Generate code to load the address of a global // identifier into a variable. Return a new register int cgaddress(int id) { int r = alloc_register(); fprintf(Outfile, "\tleaq\t%s(%%rip), %s\n", Gsym[id].name, reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tmovzbq\t(%s), %s\n", reglist[r], reglist[r]); break; case P_INTPTR: fprintf(Outfile, "\tmovq\t(%s), %s\n", reglist[r], reglist[r]); break; case P_LONGPTR: fprintf(Outfile, "\tmovq\t(%s), %s\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, (%s)\n", breglist[r1], reglist[r2]); break; case P_INT: fprintf(Outfile, "\tmovq\t%s, (%s)\n", reglist[r1], reglist[r2]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, (%s)\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 19_Arrays_pt1/cg_arm.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for ARMv6 on Raspberry Pi // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. static int freereg[4]; static char *reglist[4] = { "r4", "r5", "r6", "r7" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // We have to store large integer literal values in memory. // Keep a list of them which will be output in the postamble #define MAXINTS 1024 int Intlist[MAXINTS]; static int Intslot = 0; // Determine the offset of a large integer // literal from the .L3 label. If the integer // isn't in the list, add it. static void set_int_offset(int val) { int offset = -1; // See if it is already there for (int i = 0; i < Intslot; i++) { if (Intlist[i] == val) { offset = 4 * i; break; } } // Not in the list, so add it if (offset == -1) { offset = 4 * Intslot; if (Intslot == MAXINTS) fatal("Out of int slots in set_int_offset()"); Intlist[Intslot++] = val; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L3+%d\n", offset); } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n", Outfile); } // Print out the assembly postamble void cgpostamble() { // Print out the global variables fprintf(Outfile, ".L2:\n"); for (int i = 0; i < Globs; i++) { if (Gsym[i].stype == S_VARIABLE) fprintf(Outfile, "\t.word %s\n", Gsym[i].name); } // Print out the integer literals fprintf(Outfile, ".L3:\n"); for (int i = 0; i < Intslot; i++) { fprintf(Outfile, "\t.word %d\n", Intlist[i]); } } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Gsym[id].name; fprintf(Outfile, "\t.text\n" "\t.globl\t%s\n" "\t.type\t%s, \%%function\n" "%s:\n" "\tpush\t{fp, lr}\n" "\tadd\tfp, sp, #4\n" "\tsub\tsp, sp, #8\n" "\tstr\tr0, [fp, #-8]\n", name, name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Gsym[id].endlabel); fputs("\tsub\tsp, fp, #4\n" "\tpop\t{fp, pc}\n" "\t.align\t2\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); // If the literal value is small, do it with one instruction if (value <= 1000) fprintf(Outfile, "\tmov\t%s, #%d\n", reglist[r], value); else { set_int_offset(value); fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); } return (r); } // Determine the offset of a variable from the .L2 // label. Yes, this is inefficient code. static void set_var_offset(int id) { int offset = 0; // Walk the symbol table up to id. // Find S_VARIABLEs and add on 4 until // we get to our variable for (int i = 0; i < id; i++) { if (Gsym[i].stype == S_VARIABLE) offset += 4; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L2+%d\n", offset); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tldrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgloadglob:", Gsym[id].type); } return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s, %s\n", reglist[r1], reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\tmul\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { // To do a divide: r0 holds the dividend, r1 holds the divisor. // The quotient is in r0. fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r1]); fprintf(Outfile, "\tmov\tr1, %s\n", reglist[r2]); fprintf(Outfile, "\tbl\t__aeabi_idiv\n"); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r1]); free_register(r2); return (r1); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r]); fprintf(Outfile, "\tbl\t%s\n", Gsym[id].name); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r]); return (r); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tlsl\t%s, %s, #%d\n", reglist[r], reglist[r], val); return(r); } // Store a register's value into a variable int cgstorglob(int r, int id) { // Get the offset to the variable set_var_offset(id); switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tstr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgstorglob:", Gsym[id].type); } return (r); } // Array of type sizes in P_XXX order. // 0 means no size. static int psize[] = { 0, 0, 1, 4, 4, 4, 4, 4 }; // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { // Check the type is valid if (type < P_NONE || type > P_LONGPTR) fatal("Bad type in cgprimsize()"); return (psize[type]); } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Gsym[id].type); fprintf(Outfile, "\t.data\n" "\t.globl\t%s\n", Gsym[id].name); switch(typesize) { case 1: fprintf(Outfile, "%s:\t.byte\t0\n", Gsym[id].name); break; case 4: fprintf(Outfile, "%s:\t.long\t0\n", Gsym[id].name); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "moveq", "movne", "movlt", "movgt", "movle", "movge" }; // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "movne", "moveq", "movge", "movle", "movgt", "movlt" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #1\n", cmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #0\n", invcmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\tuxtb\t%s, %s\n", reglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tb\tL%d\n", l); } // List of inverted branch instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *brlist[] = { "bne", "beq", "bge", "ble", "bgt", "blt" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", brlist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[reg]); cgjump(Gsym[id].endlabel); } // Generate code to load the address of a global // identifier into a variable. Return a new register int cgaddress(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); fprintf(Outfile, "\tmov\t%s, r3\n", reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tldrb\t%s, [%s]\n", reglist[r], reglist[r]); break; case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [%s]\n", reglist[r], reglist[r]); break; } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [%s]\n", reglist[r1], reglist[r2]); break; case P_INT: case P_LONG: fprintf(Outfile, "\tstr\t%s, [%s]\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 19_Arrays_pt1/cgn.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. // We need a list of byte and doubleword registers, too static int freereg[4]; static char *reglist[4] = { "r8", "r9", "r10", "r11" }; static char *breglist[4] = { "r8b", "r9b", "r10b", "r11b" }; static char *dreglist[4] = { "r8d", "r9d", "r10d", "r11d" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\textern\tprintint\n", Outfile); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Gsym[id].name; fprintf(Outfile, "\tsection\t.text\n" "\tglobal\t%s\n" "%s:\n" "\tpush\trbp\n" "\tmov\trbp, rsp\n", name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Gsym[id].endlabel); fputs("\tpop rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], value); return (r); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Print out the code to initialise it switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], Gsym[id].name); break; case P_INT: fprintf(Outfile, "\txor\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tmov\t%s, dword [%s]\n", dreglist[r], Gsym[id].name); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], Gsym[id].name); break; default: fatald("Bad type in cgloadglob:", Gsym[id].type); } return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timul\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmov\trax, %s\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidiv\t%s\n", reglist[r2]); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[r1]); free_register(r2); return (r1); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { // Get a new register int outr = alloc_register(); fprintf(Outfile, "\tmov\trdi, %s\n", reglist[r]); fprintf(Outfile, "\tcall\t%s\n", Gsym[id].name); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[outr]); free_register(r); return (outr); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsal\t%s, %d\n", reglist[r], val); return(r); } // Store a register's value into a variable int cgstorglob(int r, int id) { switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tmov\t[%s], %s\n", Gsym[id].name, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\t[%s], %s\n", Gsym[id].name, dreglist[r]); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tmov\t[%s], %s\n", Gsym[id].name, reglist[r]); break; default: fatald("Bad type in cgloadglob:", Gsym[id].type); } return (r); } // Array of type sizes in P_XXX order. // 0 means no size. static int psize[] = { 0, 0, 1, 4, 8, 8, 8, 8, 8 }; // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { // Check the type is valid if (type < P_NONE || type > P_LONGPTR) fatal("Bad type in cgprimsize()"); return (psize[type]); } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Gsym[id].type); // Generate the global identity and the label fprintf(Outfile, "\tsection\t.data\n" "\tglobal\t%s\n", Gsym[id].name); fprintf(Outfile, "%s:", Gsym[id].name); // Generate the space // original version for (int i=0; i < Gsym[id].size; i++) { switch(typesize) { case 1: fprintf(Outfile, "\tdb\t0\n"); break; case 4: fprintf(Outfile, "\tdd\t0\n"); break; case 8: fprintf(Outfile, "\tdq\t0\n"); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } /* compact version using times instead of loop switch(typesize) { case 1: fprintf(Outfile, "\ttimes\t%d\tdb\t0\n", Gsym[id].size); break; case 4: fprintf(Outfile, "\ttimes\t%d\tdd\t0\n", Gsym[id].size); break; case 8: fprintf(Outfile, "\ttimes\t%d\tdq\t0\n", Gsym[id].size); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } */ } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r2], breglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { // Generate code depending on the function's type switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tmovzx\teax, %s\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmov\teax, %s\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", Gsym[id].type); } cgjump(Gsym[id].endlabel); } // Generate code to load the address of a global // identifier into a variable. Return a new register int cgaddress(int id) { int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, %s\n", reglist[r], Gsym[id].name); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], reglist[r]); break; case P_INTPTR: fprintf(Outfile, "\tmovzx\t%s, word [%s]\n", reglist[r], reglist[r]); break; case P_LONGPTR: fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], reglist[r]); break; } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tmov\t[%s], byte %s\n", reglist[r2], breglist[r1]); break; case P_INT: fprintf(Outfile, "\tmov\t[%s], %s\n", reglist[r2], reglist[r1]); break; case P_LONG: fprintf(Outfile, "\tmov\t[%s], %s\n", reglist[r2], reglist[r1]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 19_Arrays_pt1/data.h ================================================ #ifndef extern_ #define extern_ extern #endif // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 extern_ int Line; // Current line number extern_ int Putback; // Character put back by scanner extern_ int Functionid; // Symbol id of the current function extern_ int Globs; // Position of next free global symbol slot extern_ FILE *Infile; // Input and output files extern_ FILE *Outfile; extern_ struct token Token; // Last token scanned extern_ char Text[TEXTLEN + 1]; // Last identifier scanned extern_ struct symtable Gsym[NSYMBOLS]; // Global symbol table extern_ int O_dumpAST; ================================================ FILE: 19_Arrays_pt1/decl.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of declarations // Copyright (c) 2019 Warren Toomey, GPL3 // Parse the current token and return // a primitive type enum value. Also // scan in the next token int parse_type(void) { int type; switch (Token.token) { case T_VOID: type = P_VOID; break; case T_CHAR: type = P_CHAR; break; case T_INT: type = P_INT; break; case T_LONG: type = P_LONG; break; default: fatald("Illegal type, token", Token.token); } // Scan in one or more further '*' tokens // and determine the correct pointer type while (1) { scan(&Token); if (Token.token != T_STAR) break; type = pointer_to(type); } // We leave with the next token already scanned return (type); } // variable_declaration: type identifier ';' // | type identifier '[' INTLIT ']' ';' // ; // // Parse the declaration of a scalar variable or an array // with a given size. // The identifier has been scanned & we have the type void var_declaration(int type) { int id; // Text now has the identifier's name. // If the next token is a '[' if (Token.token == T_LBRACKET) { // Skip past the '[' scan(&Token); // Check we have an array size if (Token.token == T_INTLIT) { // Add this as a known array and generate its space in assembly. // We treat the array as a pointer to its elements' type id = addglob(Text, pointer_to(type), S_ARRAY, 0, Token.intvalue); genglobsym(id); } // Ensure we have a following ']' scan(&Token); match(T_RBRACKET, "]"); } else { // Add this as a known scalar // and generate its space in assembly id = addglob(Text, type, S_VARIABLE, 0, 1); genglobsym(id); } // Get the trailing semicolon semi(); } // // function_declaration: type identifier '(' ')' compound_statement ; // // Parse the declaration of a simplistic function. // The identifier has been scanned & we have the type struct ASTnode *function_declaration(int type) { struct ASTnode *tree, *finalstmt; int nameslot, endlabel; // Text now has the identifier's name. // Get a label-id for the end label, add the function // to the symbol table, and set the Functionid global // to the function's symbol-id endlabel = genlabel(); nameslot = addglob(Text, type, S_FUNCTION, endlabel, 0); Functionid = nameslot; // Scan in the parentheses lparen(); rparen(); // Get the AST tree for the compound statement tree = compound_statement(); // If the function type isn't P_VOID .. if (type != P_VOID) { // Error if no statements in the function if (tree == NULL) fatal("No statements in function with non-void type"); // Check that the last AST operation in the // compound statement was a return statement finalstmt = (tree->op == A_GLUE) ? tree->right : tree; if (finalstmt == NULL || finalstmt->op != A_RETURN) fatal("No return for function with non-void type"); } // Return an A_FUNCTION node which has the function's nameslot // and the compound statement sub-tree return (mkastunary(A_FUNCTION, type, tree, nameslot)); } // Parse one or more global declarations, either // variables or functions void global_declarations(void) { struct ASTnode *tree; int type; while (1) { // We have to read past the type and identifier // to see either a '(' for a function declaration // or a ',' or ';' for a variable declaration. // Text is filled in by the ident() call. type = parse_type(); ident(); if (Token.token == T_LPAREN) { // Parse the function declaration and // generate the assembly code for it tree = function_declaration(type); if (O_dumpAST) { dumpAST(tree, NOLABEL, 0); fprintf(stdout, "\n\n"); } genAST(tree, NOLABEL, 0); } else { // Parse the global variable declaration var_declaration(type); } // Stop when we have reached EOF if (Token.token == T_EOF) break; } } ================================================ FILE: 19_Arrays_pt1/decl.h ================================================ // Function prototypes for all compiler files // Copyright (c) 2019 Warren Toomey, GPL3 // scan.c void reject_token(struct token *t); int scan(struct token *t); // tree.c struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, int intvalue); struct ASTnode *mkastleaf(int op, int type, int intvalue); struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, int intvalue); void dumpAST(struct ASTnode *n, int label, int parentASTop); // gen.c int genlabel(void); int genAST(struct ASTnode *n, int reg, int parentASTop); void genpreamble(); void genpostamble(); void genfreeregs(); void genglobsym(int id); int genprimsize(int type); void genreturn(int reg, int id); // cg.c void freeall_registers(void); void cgpreamble(); void cgpostamble(); void cgfuncpreamble(int id); void cgfuncpostamble(int id); int cgloadint(int value, int type); int cgloadglob(int id); int cgadd(int r1, int r2); int cgsub(int r1, int r2); int cgmul(int r1, int r2); int cgdiv(int r1, int r2); int cgshlconst(int r, int val); int cgcall(int r, int id); int cgstorglob(int r, int id); void cgglobsym(int id); int cgcompare_and_set(int ASTop, int r1, int r2); int cgcompare_and_jump(int ASTop, int r1, int r2, int label); void cglabel(int l); void cgjump(int l); int cgwiden(int r, int oldtype, int newtype); int cgprimsize(int type); void cgreturn(int reg, int id); int cgaddress(int id); int cgderef(int r, int type); int cgstorderef(int r1, int r2, int type); // expr.c struct ASTnode *binexpr(int ptp); // stmt.c struct ASTnode *compound_statement(void); // misc.c void match(int t, char *what); void semi(void); void lbrace(void); void rbrace(void); void lparen(void); void rparen(void); void ident(void); void fatal(char *s); void fatals(char *s1, char *s2); void fatald(char *s, int d); void fatalc(char *s, int c); // sym.c int findglob(char *s); int addglob(char *name, int type, int stype, int endlabel, int size); // decl.c void var_declaration(int type); struct ASTnode *function_declaration(int type); void global_declarations(void); // types.c int inttype(int type); int parse_type(void); int pointer_to(int type); int value_at(int type); struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op); ================================================ FILE: 19_Arrays_pt1/defs.h ================================================ #include #include #include #include // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 #define TEXTLEN 512 // Length of symbols in input #define NSYMBOLS 1024 // Number of symbol table entries // Token types enum { T_EOF, // Operators T_ASSIGN, T_PLUS, T_MINUS, T_STAR, T_SLASH, T_EQ, T_NE, T_LT, T_GT, T_LE, T_GE, // Type keywords T_VOID, T_CHAR, T_INT, T_LONG, // Structural tokens T_INTLIT, T_SEMI, T_IDENT, T_LBRACE, T_RBRACE, T_LPAREN, T_RPAREN, T_LBRACKET, T_RBRACKET, T_AMPER, T_LOGAND, // Other keywords T_IF, T_ELSE, T_WHILE, T_FOR, T_RETURN }; // Token structure struct token { int token; // Token type, from the enum list above int intvalue; // For T_INTLIT, the integer value }; // AST node types. The first few line up // with the related tokens enum { A_ASSIGN= 1, A_ADD, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE, A_INTLIT, A_IDENT, A_GLUE, A_IF, A_WHILE, A_FUNCTION, A_WIDEN, A_RETURN, A_FUNCCALL, A_DEREF, A_ADDR, A_SCALE }; // Primitive types enum { P_NONE, P_VOID, P_CHAR, P_INT, P_LONG, P_VOIDPTR, P_CHARPTR, P_INTPTR, P_LONGPTR }; // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates int rvalue; // True if the node is an rvalue struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; union { // For A_INTLIT, the integer value int intvalue; // For A_IDENT, the symbol slot number int id; // For A_FUNCTION, the symbol slot number int size; // For A_SCALE, the size to scale by } v; // For A_FUNCCALL, the symbol slot number }; #define NOREG -1 // Use NOREG when the AST generation // functions have no register to return #define NOLABEL 0 // Use NOLABEL when we have no label to // pass to genAST() // Structural types enum { S_VARIABLE, S_FUNCTION, S_ARRAY }; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol int stype; // Structural type for the symbol int endlabel; // For S_FUNCTIONs, the end label int size; // Number of elements in the symbol }; ================================================ FILE: 19_Arrays_pt1/expr.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of expressions // Copyright (c) 2019 Warren Toomey, GPL3 // Parse a function call with a single expression // argument and return its AST static struct ASTnode *funccall(void) { struct ASTnode *tree; int id; // Check that the identifier has been defined as a function, // then make a leaf node for it. if ((id = findglob(Text)) == -1 || Gsym[id].stype != S_FUNCTION) { fatals("Undeclared function", Text); } // Get the '(' lparen(); // Parse the following expression tree = binexpr(0); // Build the function call AST node. Store the // function's return type as this node's type. // Also record the function's symbol-id tree = mkastunary(A_FUNCCALL, Gsym[id].type, tree, id); // Get the ')' rparen(); return (tree); } // Parse the index into an array and // return an AST tree for it static struct ASTnode *array_access(void) { struct ASTnode *left, *right; int id; // Check that the identifier has been defined as an array // then make a leaf node for it that points at the base if ((id = findglob(Text)) == -1 || Gsym[id].stype != S_ARRAY) { fatals("Undeclared array", Text); } left = mkastleaf(A_ADDR, Gsym[id].type, id); // Get the '[' scan(&Token); // Parse the following expression right = binexpr(0); // Get the ']' match(T_RBRACKET, "]"); // Ensure that this is of int type if (!inttype(right->type)) fatal("Array index is not of integer type"); // Scale the index by the size of the element's type right = modify_type(right, left->type, A_ADD); // Return an AST tree where the array's base has the offset // added to it, and dereference the element. Still an lvalue // at this point. left = mkastnode(A_ADD, Gsym[id].type, left, NULL, right, 0); left = mkastunary(A_DEREF, value_at(left->type), left, 0); return (left); } // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(void) { struct ASTnode *n; int id; switch (Token.token) { case T_INTLIT: // For an INTLIT token, make a leaf AST node for it. // Make it a P_CHAR if it's within the P_CHAR range if ((Token.intvalue) >= 0 && (Token.intvalue < 256)) n = mkastleaf(A_INTLIT, P_CHAR, Token.intvalue); else n = mkastleaf(A_INTLIT, P_INT, Token.intvalue); break; case T_IDENT: // This could be a variable, array index or a // function call. Scan in the next token to find out scan(&Token); // It's a '(', so a function call if (Token.token == T_LPAREN) return (funccall()); // It's a '[', so an array reference if (Token.token == T_LBRACKET) { return (array_access()); } // Not a function call, so reject the new token reject_token(&Token); // Check that the variable exists. id = findglob(Text); if (id == -1 || Gsym[id].stype != S_VARIABLE) fatals("Unknown variable", Text); // Make a leaf AST node for it n = mkastleaf(A_IDENT, Gsym[id].type, id); break; case T_LPAREN: // Beginning of a parenthesised expression, skip the '('. // Scan in the expression and the right parenthesis scan(&Token); n = binexpr(0); rparen(); return (n); default: fatald("Expecting a primary expression, got token", Token.token); } // Scan in the next token and return the leaf node scan(&Token); return (n); } // Convert a binary operator token into a binary AST operation. // We rely on a 1:1 mapping from token to AST operation static int binastop(int tokentype) { if (tokentype > T_EOF && tokentype < T_INTLIT) return (tokentype); fatald("Syntax error, token", tokentype); return (0); // Keep -Wall happy } // Return true if a token is right-associative, // false otherwise. static int rightassoc(int tokentype) { if (tokentype == T_ASSIGN) return (1); return (0); } // Operator precedence for each token. Must // match up with the order of tokens in defs.h static int OpPrec[] = { 0, 10, // T_EOF, T_ASSIGN 20, 20, // T_PLUS, T_MINUS 30, 30, // T_STAR, T_SLASH 40, 40, // T_EQ, T_NE 50, 50, 50, 50 // T_LT, T_GT, T_LE, T_GE }; // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec; if (tokentype >= T_VOID) fatald("Token with no precedence in op_precedence:", tokentype); prec = OpPrec[tokentype]; if (prec == 0) fatald("Syntax error, token", tokentype); return (prec); } // prefix_expression: primary // | '*' prefix_expression // | '&' prefix_expression // ; // Parse a prefix expression and return // a sub-tree representing it. struct ASTnode *prefix(void) { struct ASTnode *tree; switch (Token.token) { case T_AMPER: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Ensure that it's an identifier if (tree->op != A_IDENT) fatal("& operator must be followed by an identifier"); // Now change the operator to A_ADDR and the type to // a pointer to the original type tree->op = A_ADDR; tree->type = pointer_to(tree->type); break; case T_STAR: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's either another deref or an // identifier if (tree->op != A_IDENT && tree->op != A_DEREF) fatal("* operator must be followed by an identifier or *"); // Prepend an A_DEREF operation to the tree tree = mkastunary(A_DEREF, value_at(tree->type), tree, 0); break; default: tree = primary(); } return (tree); } // Return an AST tree whose root is a binary operator. // Parameter ptp is the previous token's precedence. struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; struct ASTnode *ltemp, *rtemp; int ASTop; int tokentype; // Get the tree on the left. // Fetch the next token at the same time. left = prefix(); // If we hit a semicolon or ')', return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET) { left->rvalue = 1; return (left); } // While the precedence of this token is more than that of the // previous token precedence, or it's right associative and // equal to the previous token's precedence while ((op_precedence(tokentype) > ptp) || (rightassoc(tokentype) && op_precedence(tokentype) == ptp)) { // Fetch in the next integer literal scan(&Token); // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Determine the operation to be performed on the sub-trees ASTop = binastop(tokentype); if (ASTop == A_ASSIGN) { // Assignment // Make the right tree into an rvalue right->rvalue = 1; // Ensure the right's type matches the left right = modify_type(right, left->type, 0); if (left == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree. However, switch // left and right around, so that the right expression's // code will be generated before the left expression ltemp = left; left = right; right = ltemp; } else { // We are not doing an assignment, so both trees should be rvalues // Convert both trees into rvalue if they are lvalue trees left->rvalue = 1; right->rvalue = 1; // Ensure the two types are compatible by trying // to modify each tree to match the other's type. ltemp = modify_type(left, right->type, ASTop); rtemp = modify_type(right, left->type, ASTop); if (ltemp == NULL && rtemp == NULL) fatal("Incompatible types in binary expression"); if (ltemp != NULL) left = ltemp; if (rtemp != NULL) right = rtemp; } // Join that sub-tree with ours. Convert the token // into an AST operation at the same time. left = mkastnode(binastop(tokentype), left->type, left, NULL, right, 0); // Update the details of the current token. // If we hit a semicolon, ')' or ']', return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET) { left->rvalue = 1; return (left); } } // Return the tree we have when the precedence // is the same or lower left->rvalue = 1; return (left); } ================================================ FILE: 19_Arrays_pt1/gen.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Generic code generator // Copyright (c) 2019 Warren Toomey, GPL3 // Generate and return a new label number int genlabel(void) { static int id = 1; return (id++); } // Generate the code for an IF statement // and an optional ELSE clause static int genIF(struct ASTnode *n) { int Lfalse, Lend; // Generate two labels: one for the // false compound statement, and one // for the end of the overall IF statement. // When there is no ELSE clause, Lfalse _is_ // the ending label! Lfalse = genlabel(); if (n->right) Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. genAST(n->left, Lfalse, n->op); genfreeregs(); // Generate the true compound statement genAST(n->mid, NOLABEL, n->op); genfreeregs(); // If there is an optional ELSE clause, // generate the jump to skip to the end if (n->right) cgjump(Lend); // Now the false label cglabel(Lfalse); // Optional ELSE clause: generate the // false compound statement and the // end label if (n->right) { genAST(n->right, NOLABEL, n->op); genfreeregs(); cglabel(Lend); } return (NOREG); } // Generate the code for a WHILE statement static int genWHILE(struct ASTnode *n) { int Lstart, Lend; // Generate the start and end labels // and output the start label Lstart = genlabel(); Lend = genlabel(); cglabel(Lstart); // Generate the condition code followed // by a jump to the end label. genAST(n->left, Lend, n->op); genfreeregs(); // Generate the compound statement for the body genAST(n->right, NOLABEL, n->op); genfreeregs(); // Finally output the jump back to the condition, // and the end label cgjump(Lstart); cglabel(Lend); return (NOREG); } // Given an AST, an optional label, and the AST op // of the parent, generate assembly code recursively. // Return the register id with the tree's final value. int genAST(struct ASTnode *n, int label, int parentASTop) { int leftreg, rightreg; // We have some specific AST node handling at the top // so that we don't evaluate the child sub-trees immediately switch (n->op) { case A_IF: return (genIF(n)); case A_WHILE: return (genWHILE(n)); case A_GLUE: // Do each child statement, and free the // registers after each child genAST(n->left, NOLABEL, n->op); genfreeregs(); genAST(n->right, NOLABEL, n->op); genfreeregs(); return (NOREG); case A_FUNCTION: // Generate the function's preamble before the code // in the child sub-tree cgfuncpreamble(n->v.id); genAST(n->left, NOLABEL, n->op); cgfuncpostamble(n->v.id); return (NOREG); } // General AST node handling below // Get the left and right sub-tree values if (n->left) leftreg = genAST(n->left, NOLABEL, n->op); if (n->right) rightreg = genAST(n->right, NOLABEL, n->op); switch (n->op) { case A_ADD: return (cgadd(leftreg, rightreg)); case A_SUBTRACT: return (cgsub(leftreg, rightreg)); case A_MULTIPLY: return (cgmul(leftreg, rightreg)); case A_DIVIDE: return (cgdiv(leftreg, rightreg)); case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, compare registers // and set one to 1 or 0 based on the comparison. if (parentASTop == A_IF || parentASTop == A_WHILE) return (cgcompare_and_jump(n->op, leftreg, rightreg, label)); else return (cgcompare_and_set(n->op, leftreg, rightreg)); case A_INTLIT: return (cgloadint(n->v.intvalue, n->type)); case A_IDENT: // Load our value if we are an rvalue // or we are being dereferenced if (n->rvalue || parentASTop== A_DEREF) return (cgloadglob(n->v.id)); else return (NOREG); case A_ASSIGN: // Are we assigning to an identifier or through a pointer? switch (n->right->op) { case A_IDENT: return (cgstorglob(leftreg, n->right->v.id)); case A_DEREF: return (cgstorderef(leftreg, rightreg, n->right->type)); default: fatald("Can't A_ASSIGN in genAST(), op", n->op); } case A_WIDEN: // Widen the child's type to the parent's type return (cgwiden(leftreg, n->left->type, n->type)); case A_RETURN: cgreturn(leftreg, Functionid); return (NOREG); case A_FUNCCALL: return (cgcall(leftreg, n->v.id)); case A_ADDR: return (cgaddress(n->v.id)); case A_DEREF: // If we are an rvalue, dereference to get the value we point at, // otherwise leave it for A_ASSIGN to store through the pointer if (n->rvalue) return (cgderef(leftreg, n->left->type)); else return (leftreg); case A_SCALE: // Small optimisation: use shift if the // scale value is a known power of two switch (n->v.size) { case 2: return(cgshlconst(leftreg, 1)); case 4: return(cgshlconst(leftreg, 2)); case 8: return(cgshlconst(leftreg, 3)); default: // Load a register with the size and // multiply the leftreg by this size rightreg= cgloadint(n->v.size, P_INT); return (cgmul(leftreg, rightreg)); } default: fatald("Unknown AST operator", n->op); } return (NOREG); // Keep -Wall happy } void genpreamble() { cgpreamble(); } void genpostamble() { cgpostamble(); } void genfreeregs() { freeall_registers(); } void genglobsym(int id) { cgglobsym(id); } int genprimsize(int type) { return (cgprimsize(type)); } ================================================ FILE: 19_Arrays_pt1/lib/printint.c ================================================ #include void printint(long x) { printf("%ld\n", x); } ================================================ FILE: 19_Arrays_pt1/main.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "decl.h" #include // Compiler setup and top-level execution // Copyright (c) 2019 Warren Toomey, GPL3 // Initialise global variables static void init() { Line = 1; Putback = '\n'; Globs = 0; O_dumpAST = 0; } // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "Usage: %s [-T] infile\n", prog); exit(1); } // Main program: check arguments and print a usage // if we don't have an argument. Open up the input // file and call scanfile() to scan the tokens in it. int main(int argc, char *argv[]) { int i; // Initialise the globals init(); // Scan for command-line options for (i=1; i= argc) usage(argv[0]); // Open up the input file if ((Infile = fopen(argv[i], "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", argv[i], strerror(errno)); exit(1); } // Create the output file if ((Outfile = fopen("out.s", "w")) == NULL) { fprintf(stderr, "Unable to create out.s: %s\n", strerror(errno)); exit(1); } // For now, ensure that void printint() is defined addglob("printint", P_CHAR, S_FUNCTION, 0, 0); scan(&Token); // Get the first token from the input genpreamble(); // Output the preamble global_declarations(); // Parse the global declarations genpostamble(); // Output the postamble fclose(Outfile); // Close the output file and exit return (0); } ================================================ FILE: 19_Arrays_pt1/misc.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Miscellaneous functions // Copyright (c) 2019 Warren Toomey, GPL3 // Ensure that the current token is t, // and fetch the next token. Otherwise // throw an error void match(int t, char *what) { if (Token.token == t) { scan(&Token); } else { fatals("Expected", what); } } // Match a semicolon and fetch the next token void semi(void) { match(T_SEMI, ";"); } // Match a left brace and fetch the next token void lbrace(void) { match(T_LBRACE, "{"); } // Match a right brace and fetch the next token void rbrace(void) { match(T_RBRACE, "}"); } // Match a left parenthesis and fetch the next token void lparen(void) { match(T_LPAREN, "("); } // Match a right parenthesis and fetch the next token void rparen(void) { match(T_RPAREN, ")"); } // Match an identifier and fetch the next token void ident(void) { match(T_IDENT, "identifier"); } // Print out fatal messages void fatal(char *s) { fprintf(stderr, "%s on line %d\n", s, Line); exit(1); } void fatals(char *s1, char *s2) { fprintf(stderr, "%s:%s on line %d\n", s1, s2, Line); exit(1); } void fatald(char *s, int d) { fprintf(stderr, "%s:%d on line %d\n", s, d, Line); exit(1); } void fatalc(char *s, int c) { fprintf(stderr, "%s:%c on line %d\n", s, c, Line); exit(1); } ================================================ FILE: 19_Arrays_pt1/scan.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { char *p; p = strchr(s, c); return (p ? p - s : -1); } // Get the next character from the input file. static int next(void) { int c; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return (c); } c = fgetc(Infile); // Read from input file if ('\n' == c) Line++; // Increment line count return (c); } // Put back an unwanted character static void putback(int c) { Putback = c; } // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0; // Convert each character into an int value while ((k = chrpos("0123456789", c)) >= 0) { val = val * 10 + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return (val); } // Scan an identifier from the input file and // store it in buf[]. Return the identifier's length static int scanident(int c, char *buf, int lim) { int i = 0; // Allow digits, alpha and underscores while (isalpha(c) || isdigit(c) || '_' == c) { // Error if we hit the identifier length limit, // else append to buf[] and get next character if (lim - 1 == i) { fatal("Identifier too long"); } else if (i < lim - 1) { buf[i++] = c; } c = next(); } // We hit a non-valid character, put it back. // NUL-terminate the buf[] and return the length putback(c); buf[i] = '\0'; return (i); } // Given a word from the input, return the matching // keyword token number or 0 if it's not a keyword. // Switch on the first letter so that we don't have // to waste time strcmp()ing against all the keywords. static int keyword(char *s) { switch (*s) { case 'c': if (!strcmp(s, "char")) return (T_CHAR); break; case 'e': if (!strcmp(s, "else")) return (T_ELSE); break; case 'f': if (!strcmp(s, "for")) return (T_FOR); break; case 'i': if (!strcmp(s, "if")) return (T_IF); if (!strcmp(s, "int")) return (T_INT); break; case 'l': if (!strcmp(s, "long")) return (T_LONG); break; case 'r': if (!strcmp(s, "return")) return (T_RETURN); break; case 'w': if (!strcmp(s, "while")) return (T_WHILE); break; case 'v': if (!strcmp(s, "void")) return (T_VOID); break; } return (0); } // A pointer to a rejected token static struct token *Rejtoken = NULL; // Reject the token that we just scanned void reject_token(struct token *t) { if (Rejtoken != NULL) fatal("Can't reject token twice"); Rejtoken = t; } // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c, tokentype; // If we have any rejected token, return it if (Rejtoken != NULL) { t = Rejtoken; Rejtoken = NULL; return (1); } // Skip whitespace c = skip(); // Determine the token based on // the input character switch (c) { case EOF: t->token = T_EOF; return (0); case '+': t->token = T_PLUS; break; case '-': t->token = T_MINUS; break; case '*': t->token = T_STAR; break; case '/': t->token = T_SLASH; break; case ';': t->token = T_SEMI; break; case '{': t->token = T_LBRACE; break; case '}': t->token = T_RBRACE; break; case '(': t->token = T_LPAREN; break; case ')': t->token = T_RPAREN; break; case '[': t->token = T_LBRACKET; break; case ']': t->token = T_RBRACKET; break; case '=': if ((c = next()) == '=') { t->token = T_EQ; } else { putback(c); t->token = T_ASSIGN; } break; case '!': if ((c = next()) == '=') { t->token = T_NE; } else { fatalc("Unrecognised character", c); } break; case '<': if ((c = next()) == '=') { t->token = T_LE; } else { putback(c); t->token = T_LT; } break; case '>': if ((c = next()) == '=') { t->token = T_GE; } else { putback(c); t->token = T_GT; } break; case '&': if ((c = next()) == '&') { t->token = T_LOGAND; } else { putback(c); t->token = T_AMPER; } break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } else if (isalpha(c) || '_' == c) { // Read in a keyword or identifier scanident(c, Text, TEXTLEN); // If it's a recognised keyword, return that token if ((tokentype = keyword(Text)) != 0) { t->token = tokentype; break; } // Not a recognised keyword, so it must be an identifier t->token = T_IDENT; break; } // The character isn't part of any recognised token, error fatalc("Unrecognised character", c); } // We found a token return (1); } ================================================ FILE: 19_Arrays_pt1/stmt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of statements // Copyright (c) 2019 Warren Toomey, GPL3 // Prototypes static struct ASTnode *single_statement(void); // compound_statement: // empty, i.e. no statement // | statement // | statement statements // ; // // statement: declaration // | expression_statement // | function_call // | if_statement // | while_statement // | for_statement // | return_statement // ; // if_statement: if_head // | if_head 'else' compound_statement // ; // // if_head: 'if' '(' true_false_expression ')' compound_statement ; // // Parse an IF statement including any // optional ELSE clause and return its AST static struct ASTnode *if_statement(void) { struct ASTnode *condAST, *trueAST, *falseAST = NULL; // Ensure we have 'if' '(' match(T_IF, "if"); lparen(); // Parse the following expression // and the ')' following. Ensure // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) fatal("Bad comparison operator"); rparen(); // Get the AST for the compound statement trueAST = compound_statement(); // If we have an 'else', skip it // and get the AST for the compound statement if (Token.token == T_ELSE) { scan(&Token); falseAST = compound_statement(); } // Build and return the AST for this statement return (mkastnode(A_IF, P_NONE, condAST, trueAST, falseAST, 0)); } // while_statement: 'while' '(' true_false_expression ')' compound_statement ; // // Parse a WHILE statement and return its AST static struct ASTnode *while_statement(void) { struct ASTnode *condAST, *bodyAST; // Ensure we have 'while' '(' match(T_WHILE, "while"); lparen(); // Parse the following expression // and the ')' following. Ensure // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) fatal("Bad comparison operator"); rparen(); // Get the AST for the compound statement bodyAST = compound_statement(); // Build and return the AST for this statement return (mkastnode(A_WHILE, P_NONE, condAST, NULL, bodyAST, 0)); } // for_statement: 'for' '(' preop_statement ';' // true_false_expression ';' // postop_statement ')' compound_statement ; // // preop_statement: statement (for now) // postop_statement: statement (for now) // // Parse a FOR statement and return its AST static struct ASTnode *for_statement(void) { struct ASTnode *condAST, *bodyAST; struct ASTnode *preopAST, *postopAST; struct ASTnode *tree; // Ensure we have 'for' '(' match(T_FOR, "for"); lparen(); // Get the pre_op statement and the ';' preopAST = single_statement(); semi(); // Get the condition and the ';' condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) fatal("Bad comparison operator"); semi(); // Get the post_op statement and the ')' postopAST = single_statement(); rparen(); // Get the compound statement which is the body bodyAST = compound_statement(); // For now, all four sub-trees have to be non-NULL. // Later on, we'll change the semantics for when some are missing // Glue the compound statement and the postop tree tree = mkastnode(A_GLUE, P_NONE, bodyAST, NULL, postopAST, 0); // Make a WHILE loop with the condition and this new body tree = mkastnode(A_WHILE, P_NONE, condAST, NULL, tree, 0); // And glue the preop tree to the A_WHILE tree return (mkastnode(A_GLUE, P_NONE, preopAST, NULL, tree, 0)); } // return_statement: 'return' '(' expression ')' ; // // Parse a return statement and return its AST static struct ASTnode *return_statement(void) { struct ASTnode *tree; // Can't return a value if function returns P_VOID if (Gsym[Functionid].type == P_VOID) fatal("Can't return from a void function"); // Ensure we have 'return' '(' match(T_RETURN, "return"); lparen(); // Parse the following expression tree = binexpr(0); // Ensure this is compatible with the function's type tree = modify_type(tree, Gsym[Functionid].type, 0); if (tree == NULL) fatal("Incompatible type to return"); // Add on the A_RETURN node tree = mkastunary(A_RETURN, P_NONE, tree, 0); // Get the ')' rparen(); return (tree); } // Parse a single statement and return its AST static struct ASTnode *single_statement(void) { int type; switch (Token.token) { case T_CHAR: case T_INT: case T_LONG: // The beginning of a variable declaration. // Parse the type and get the identifier. // Then parse the rest of the declaration. // XXX: These are globals at present. type = parse_type(); ident(); var_declaration(type); return (NULL); // No AST generated here case T_IF: return (if_statement()); case T_WHILE: return (while_statement()); case T_FOR: return (for_statement()); case T_RETURN: return (return_statement()); default: // For now, see if this is an expression. // This catches assignment statements. return (binexpr(0)); } return (NULL); // Keep -Wall happy } // Parse a compound statement // and return its AST struct ASTnode *compound_statement(void) { struct ASTnode *left = NULL; struct ASTnode *tree; // Require a left curly bracket lbrace(); while (1) { // Parse a single statement tree = single_statement(); // Some statements must be followed by a semicolon if (tree != NULL && (tree->op == A_ASSIGN || tree->op == A_RETURN || tree->op == A_FUNCCALL)) semi(); // For each new tree, either save it in left // if left is empty, or glue the left and the // new tree together if (tree != NULL) { if (left == NULL) left = tree; else left = mkastnode(A_GLUE, P_NONE, left, NULL, tree, 0); } // When we hit a right curly bracket, // skip past it and return the AST if (Token.token == T_RBRACE) { rbrace(); return (left); } } } ================================================ FILE: 19_Arrays_pt1/sym.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Symbol table functions // Copyright (c) 2019 Warren Toomey, GPL3 // Determine if the symbol s is in the global symbol table. // Return its slot position or -1 if not found. int findglob(char *s) { int i; for (i = 0; i < Globs; i++) { if (*s == *Gsym[i].name && !strcmp(s, Gsym[i].name)) return (i); } return (-1); } // Get the position of a new global symbol slot, or die // if we've run out of positions. static int newglob(void) { int p; if ((p = Globs++) >= NSYMBOLS) fatal("Too many global symbols"); return (p); } // Add a global symbol to the symbol table. Set up its: // + type: char, int etc. // + structural type: var, function, array etc. // + size: number of elements // + endlabel: if this is a function // Return the slot number in the symbol table int addglob(char *name, int type, int stype, int endlabel, int size) { int y; // If this is already in the symbol table, return the existing slot if ((y = findglob(name)) != -1) return (y); // Otherwise get a new slot, fill it in and // return the slot number y = newglob(); Gsym[y].name = strdup(name); Gsym[y].type = type; Gsym[y].stype = stype; Gsym[y].endlabel = endlabel; Gsym[y].size = size; return (y); } ================================================ FILE: 19_Arrays_pt1/tests/input01.c ================================================ void main() { printint(12 * 3); printint(18 - 2 * 4); printint(1 + 2 + 9 - 5/2 + 3*5); } ================================================ FILE: 19_Arrays_pt1/tests/input02.c ================================================ void main() { int fred; int jim; fred= 5; jim= 12; printint(fred + jim); } ================================================ FILE: 19_Arrays_pt1/tests/input03.c ================================================ void main() { int x; x= 1; printint(x); x= x + 1; printint(x); x= x + 1; printint(x); x= x + 1; printint(x); x= x + 1; printint(x); } ================================================ FILE: 19_Arrays_pt1/tests/input04.c ================================================ void main() { int x; x= 7 < 9; printint(x); x= 7 <= 9; printint(x); x= 7 != 9; printint(x); x= 7 == 7; printint(x); x= 7 >= 7; printint(x); x= 7 <= 7; printint(x); x= 9 > 7; printint(x); x= 9 >= 7; printint(x); x= 9 != 7; printint(x); } ================================================ FILE: 19_Arrays_pt1/tests/input05.c ================================================ void main() { int i; int j; i=6; j=12; if (i < j) { printint(i); } else { printint(j); } } ================================================ FILE: 19_Arrays_pt1/tests/input06.c ================================================ void main() { int i; i=1; while (i <= 10) { printint(i); i= i + 1; } } ================================================ FILE: 19_Arrays_pt1/tests/input07.c ================================================ void main() { int i; for (i= 1; i <= 10; i= i + 1) { printint(i); } } ================================================ FILE: 19_Arrays_pt1/tests/input08.c ================================================ void main() { int i; for (i= 1; i <= 10; i= i + 1) { printint(i); } } ================================================ FILE: 19_Arrays_pt1/tests/input09.c ================================================ void main() { int i; for (i= 1; i <= 10; i= i + 1) { printint(i); } } void fred() { int a; int b; a= 12; b= 3 * a; if (a >= b) { printint(2 * b - a); } } ================================================ FILE: 19_Arrays_pt1/tests/input10.c ================================================ void main() { int i; char j; j= 20; printint(j); i= 10; printint(i); for (i= 1; i <= 5; i= i + 1) { printint(i); } for (j= 253; j != 2; j= j + 1) { printint(j); } } ================================================ FILE: 19_Arrays_pt1/tests/input11.c ================================================ int main() { int i; char j; long k; i= 10; printint(i); j= 20; printint(j); k= 30; printint(k); for (i= 1; i <= 5; i= i + 1) { printint(i); } for (j= 253; j != 4; j= j + 1) { printint(j); } for (k= 1; k <= 5; k= k + 1) { printint(k); } return(i); printint(12345); return(3); } ================================================ FILE: 19_Arrays_pt1/tests/input12.c ================================================ int fred() { return(5); } void main() { int x; x= fred(2); printint(x); } ================================================ FILE: 19_Arrays_pt1/tests/input13.c ================================================ int fred() { return(56); } void main() { int dummy; int result; dummy= printint(23); result= fred(10); dummy= printint(result); } ================================================ FILE: 19_Arrays_pt1/tests/input14.c ================================================ int fred() { return(20); } int main() { int result; printint(10); result= fred(15); printint(result); printint(fred(15)+10); return(0); } ================================================ FILE: 19_Arrays_pt1/tests/input15.c ================================================ int main() { char a; char *b; char c; int d; int *e; int f; a= 18; printint(a); b= &a; c= *b; printint(c); d= 12; printint(d); e= &d; f= *e; printint(f); return(0); } ================================================ FILE: 19_Arrays_pt1/tests/input16.c ================================================ int c; int d; int *e; int f; int main() { c= 12; d=18; printint(c); e= &c + 1; f= *e; printint(f); return(0); } ================================================ FILE: 19_Arrays_pt1/tests/input17.c ================================================ int main() { char a; char *b; int d; int *e; b= &a; *b= 19; printint(a); e= &d; *e= 12; printint(d); return(0); } ================================================ FILE: 19_Arrays_pt1/tests/input18.c ================================================ int main() { int a; int b; a= b= 34; printint(a); printint(b); return(0); } ================================================ FILE: 19_Arrays_pt1/tests/input18a.c ================================================ int a; int *b; char c; char *d; int main() { b= &a; *b= 15; printint(a); d= &c; *d= 16; printint(c); return(0); } ================================================ FILE: 19_Arrays_pt1/tests/input19.c ================================================ int a; int b; int c; int d; int e; int main() { a= 2; b= 4; c= 3; d= 2; e= (a+b) * (c+d); printint(e); return(0); } ================================================ FILE: 19_Arrays_pt1/tests/input20.c ================================================ int a; int b[25]; int main() { b[3]= 12; a= b[3]; printint(a); return(0); } ================================================ FILE: 19_Arrays_pt1/tests/mktests ================================================ #!/bin/sh # Make the output files for each test for i in input*c do if [ ! -f "out.$i" ] then cc -o out $i ../lib/printint.c ./out > out.$i rm -f out fi done ================================================ FILE: 19_Arrays_pt1/tests/out.input01.c ================================================ 36 10 25 ================================================ FILE: 19_Arrays_pt1/tests/out.input02.c ================================================ 17 ================================================ FILE: 19_Arrays_pt1/tests/out.input03.c ================================================ 1 2 3 4 5 ================================================ FILE: 19_Arrays_pt1/tests/out.input04.c ================================================ 1 1 1 1 1 1 1 1 1 ================================================ FILE: 19_Arrays_pt1/tests/out.input05.c ================================================ 6 ================================================ FILE: 19_Arrays_pt1/tests/out.input06.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 19_Arrays_pt1/tests/out.input07.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 19_Arrays_pt1/tests/out.input08.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 19_Arrays_pt1/tests/out.input09.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 19_Arrays_pt1/tests/out.input10.c ================================================ 20 10 1 2 3 4 5 253 254 255 0 1 ================================================ FILE: 19_Arrays_pt1/tests/out.input11.c ================================================ 10 20 30 1 2 3 4 5 253 254 255 0 1 2 3 1 2 3 4 5 ================================================ FILE: 19_Arrays_pt1/tests/out.input12.c ================================================ 5 ================================================ FILE: 19_Arrays_pt1/tests/out.input13.c ================================================ 23 56 ================================================ FILE: 19_Arrays_pt1/tests/out.input14.c ================================================ 10 20 30 ================================================ FILE: 19_Arrays_pt1/tests/out.input15.c ================================================ 18 18 12 12 ================================================ FILE: 19_Arrays_pt1/tests/out.input16.c ================================================ 12 18 ================================================ FILE: 19_Arrays_pt1/tests/out.input17.c ================================================ 19 12 ================================================ FILE: 19_Arrays_pt1/tests/out.input18.c ================================================ 34 34 ================================================ FILE: 19_Arrays_pt1/tests/out.input18a.c ================================================ 15 16 ================================================ FILE: 19_Arrays_pt1/tests/out.input19.c ================================================ 30 ================================================ FILE: 19_Arrays_pt1/tests/out.input20.c ================================================ 12 ================================================ FILE: 19_Arrays_pt1/tests/runtests ================================================ #!/bin/sh # Run each test and compare # against known good output if [ ! -f ../comp1 ] then echo "Need to build ../comp1 first!"; exit 1 fi for i in input* do if [ ! -f "out.$i" ] then echo "Can't run test on $i, no output file!" else echo -n $i ../comp1 $i cc -o out out.s ../lib/printint.c ./out > trial.$i cmp -s "out.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo else echo ": OK" fi rm -f out out.s "trial.$i" fi done ================================================ FILE: 19_Arrays_pt1/tests/runtestsn ================================================ #!/bin/sh # Run each test and compare # against known good output if [ ! -f ../compn ] then echo "Need to build ../compn first!"; exit 1 fi for i in input* do if [ ! -f "out.$i" ] then echo "Can't run test on $i, no output file!" else echo -n $i ../compn $i nasm -f elf64 out.s cc -no-pie -Wall -o out out.o ../lib/printint.c ./out > trial.$i cmp -s "out.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo else echo ": OK" fi rm -f out out.o out.s "trial.$i" fi done ================================================ FILE: 19_Arrays_pt1/tree.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree functions // Copyright (c) 2019 Warren Toomey, GPL3 // Build and return a generic AST node struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, int intvalue) { struct ASTnode *n; // Malloc a new ASTnode n = (struct ASTnode *) malloc(sizeof(struct ASTnode)); if (n == NULL) fatal("Unable to malloc in mkastnode()"); // Copy in the field values and return it n->op = op; n->type = type; n->left = left; n->mid = mid; n->right = right; n->v.intvalue = intvalue; return (n); } // Make an AST leaf node struct ASTnode *mkastleaf(int op, int type, int intvalue) { return (mkastnode(op, type, NULL, NULL, NULL, intvalue)); } // Make a unary AST node: only one child struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, int intvalue) { return (mkastnode(op, type, left, NULL, NULL, intvalue)); } // Generate and return a new label number // just for AST dumping purposes static int gendumplabel(void) { static int id = 1; return (id++); } // Given an AST tree, print it out and follow the // traversal of the tree that genAST() follows void dumpAST(struct ASTnode *n, int label, int level) { int Lfalse, Lstart, Lend; switch (n->op) { case A_IF: Lfalse = gendumplabel(); for (int i=0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_IF"); if (n->right) { Lend = gendumplabel(); fprintf(stdout, ", end L%d", Lend); } fprintf(stdout, "\n"); dumpAST(n->left, Lfalse, level+2); dumpAST(n->mid, NOLABEL, level+2); if (n->right) dumpAST(n->right, NOLABEL, level+2); return; case A_WHILE: Lstart = gendumplabel(); for (int i=0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_WHILE, start L%d\n", Lstart); Lend = gendumplabel(); dumpAST(n->left, Lend, level+2); dumpAST(n->right, NOLABEL, level+2); return; } // Reset level to -2 for A_GLUE if (n->op==A_GLUE) level= -2; // General AST node handling if (n->left) dumpAST(n->left, NOLABEL, level+2); if (n->right) dumpAST(n->right, NOLABEL, level+2); for (int i=0; i < level; i++) fprintf(stdout, " "); switch (n->op) { case A_GLUE: fprintf(stdout, "\n\n"); return; case A_FUNCTION: fprintf(stdout, "A_FUNCTION %s\n", Gsym[n->v.id].name); return; case A_ADD: fprintf(stdout, "A_ADD\n"); return; case A_SUBTRACT: fprintf(stdout, "A_SUBTRACT\n"); return; case A_MULTIPLY: fprintf(stdout, "A_MULTIPLY\n"); return; case A_DIVIDE: fprintf(stdout, "A_DIVIDE\n"); return; case A_EQ: fprintf(stdout, "A_EQ\n"); return; case A_NE: fprintf(stdout, "A_NE\n"); return; case A_LT: fprintf(stdout, "A_LE\n"); return; case A_GT: fprintf(stdout, "A_GT\n"); return; case A_LE: fprintf(stdout, "A_LE\n"); return; case A_GE: fprintf(stdout, "A_GE\n"); return; case A_INTLIT: fprintf(stdout, "A_INTLIT %d\n", n->v.intvalue); return; case A_IDENT: if (n->rvalue) fprintf(stdout, "A_IDENT rval %s\n", Gsym[n->v.id].name); else fprintf(stdout, "A_IDENT %s\n", Gsym[n->v.id].name); return; case A_ASSIGN: fprintf(stdout, "A_ASSIGN\n"); return; case A_WIDEN: fprintf(stdout, "A_WIDEN\n"); return; case A_RETURN: fprintf(stdout, "A_RETURN\n"); return; case A_FUNCCALL: fprintf(stdout, "A_FUNCCALL %s\n", Gsym[n->v.id].name); return; case A_ADDR: fprintf(stdout, "A_ADDR %s\n", Gsym[n->v.id].name); return; case A_DEREF: if (n->rvalue) fprintf(stdout, "A_DEREF rval\n"); else fprintf(stdout, "A_DEREF\n"); return; case A_SCALE: fprintf(stdout, "A_SCALE %d\n", n->v.size); return; default: fatald("Unknown dumpAST operator", n->op); } } ================================================ FILE: 19_Arrays_pt1/types.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Types and type handling // Copyright (c) 2019 Warren Toomey, GPL3 // Return true if a type is an int type // of any size, false otherwise int inttype(int type) { if (type == P_CHAR || type == P_INT || type == P_LONG) return (1); return (0); } // Return true if a type is of pointer type int ptrtype(int type) { if (type == P_VOIDPTR || type == P_CHARPTR || type == P_INTPTR || type == P_LONGPTR) return (1); return (0); } // Given a primitive type, return // the type which is a pointer to it int pointer_to(int type) { int newtype; switch (type) { case P_VOID: newtype = P_VOIDPTR; break; case P_CHAR: newtype = P_CHARPTR; break; case P_INT: newtype = P_INTPTR; break; case P_LONG: newtype = P_LONGPTR; break; default: fatald("Unrecognised in pointer_to: type", type); } return (newtype); } // Given a primitive pointer type, return // the type which it points to int value_at(int type) { int newtype; switch (type) { case P_VOIDPTR: newtype = P_VOID; break; case P_CHARPTR: newtype = P_CHAR; break; case P_INTPTR: newtype = P_INT; break; case P_LONGPTR: newtype = P_LONG; break; default: fatald("Unrecognised in value_at: type", type); } return (newtype); } // Given an AST tree and a type which we want it to become, // possibly modify the tree by widening or scaling so that // it is compatible with this type. Return the original tree // if no changes occurred, a modified tree, or NULL if the // tree is not compatible with the given type. // If this will be part of a binary operation, the AST op is not zero. struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op) { int ltype; int lsize, rsize; ltype = tree->type; // Compare scalar int types if (inttype(ltype) && inttype(rtype)) { // Both types same, nothing to do if (ltype == rtype) return (tree); // Get the sizes for each type lsize = genprimsize(ltype); rsize = genprimsize(rtype); // Tree's size is too big if (lsize > rsize) return (NULL); // Widen to the right if (rsize > lsize) return (mkastunary(A_WIDEN, rtype, tree, 0)); } // For pointers on the left if (ptrtype(ltype)) { // OK is same type on right and not doing a binary op if (op == 0 && ltype == rtype) return (tree); } // We can scale only on A_ADD or A_SUBTRACT operation if (op == A_ADD || op == A_SUBTRACT) { // Left is int type, right is pointer type and the size // of the original type is >1: scale the left if (inttype(ltype) && ptrtype(rtype)) { rsize = genprimsize(value_at(rtype)); if (rsize > 1) return (mkastunary(A_SCALE, rtype, tree, rsize)); } } // If we get here, the types are not compatible return (NULL); } ================================================ FILE: 20_Char_Str_Literals/Makefile ================================================ SRCS= cg.c decl.c expr.c gen.c main.c misc.c scan.c stmt.c \ sym.c tree.c types.c SRCN= cgn.c decl.c expr.c gen.c main.c misc.c scan.c stmt.c \ sym.c tree.c types.c ARMSRCS= cg_arm.c decl.c expr.c gen.c main.c misc.c scan.c stmt.c \ sym.c tree.c types.c comp1: $(SRCS) cc -o comp1 -g -Wall $(SRCS) compn: $(SRCN) cc -o compn -g -Wall $(SRCN) comp1arm: $(ARMSRCS) cc -o comp1arm -g -Wall $(ARMSRCS) cp comp1arm comp1 clean: rm -f comp1 comp1arm compn *.o *.s out test: comp1 tests/runtests (cd tests; chmod +x runtests; ./runtests) armtest: comp1arm tests/runtests (cd tests; chmod +x runtests; ./runtests) testn: compn tests/runtestsn (cd tests; chmod +x runtestsn; ./runtestsn) test21: comp1 tests/input21.c lib/printint.c ./comp1 tests/input21.c cc -o out out.s lib/printint.c ./out armtest21: comp1arm tests/input21.c lib/printint.c ./comp1 tests/input21.c cc -o out out.s lib/printint.c ./out test21n: compn tests/input21.c lib/printint.c ./compn tests/input21.c nasm -f elf64 out.s cc -no-pie -o out lib/printint.c out.o ./out ================================================ FILE: 20_Char_Str_Literals/Readme.md ================================================ # Part 20: Character and String Literals I've been wanting to print out "Hello world" with our compiler for quite a while so, now that we have pointers and arrays, it's time in this part of the journey to add character and string literals. These are, of course, literal values (i.e. immediately visible). Character literals have the definition of a single character surrounded by single quotes. String literals have a sequence of characters surrounded by double quotes. Now, seriously, character and string literals in C are just completely crazy. I'm only going to implement the most obvious backslashed-escaped characters. I'm also going to borrow the character and string literal scanning code from SubC to make my life easier. This part of the journey is going to be short, but it will end with "Hello world". ## A New Token We need a single new token for our language: T_STRLIT. This is very similar to T_IDENT in that the text associated with the token is stored in the global `Text` and not in the token structure itself. ## Scanning Character Literals A character literal starts with a single quote, is followed by the definition of a single character and ends with another single quote. The code to interpret that single character is complicated, so let's modify `scan()` in `scan.c` to call it: ```c case '\'': // If it's a quote, scan in the // literal character value and // the trailing quote t->intvalue = scanch(); t->token = T_INTLIT; if (next() != '\'') fatal("Expected '\\'' at end of char literal"); break; ``` We can treat a character literal as an integer literal of type `char`; that is, assuming that we limit ourselves to ASCII and don't try to deal with Unicode. That's what I'm doing here. ### The Code for `scanch()` The code for the `scanch()`function comes from SubC with a few simplifications: ```c // Return the next character from a character // or string literal static int scanch(void) { int c; // Get the next input character and interpret // metacharacters that start with a backslash c = next(); if (c == '\\') { switch (c = next()) { case 'a': return '\a'; case 'b': return '\b'; case 'f': return '\f'; case 'n': return '\n'; case 'r': return '\r'; case 't': return '\t'; case 'v': return '\v'; case '\\': return '\\'; case '"': return '"' ; case '\'': return '\''; default: fatalc("unknown escape sequence", c); } } return (c); // Just an ordinary old character! } ``` The code recognises most of the escaped character sequences, but it doesn't try to recognise octal character codings or other difficult sequences. ## Scanning String Literals A string literal starts with a double quote, is followed by zero or more characters and ends with another double quote. As with character literals, we need to call a separate function in `scan()`: ```c case '"': // Scan in a literal string scanstr(Text); t->token= T_STRLIT; break; ``` We create one of the new T_STRLIT and scan the string into the `Text` buffer. Here is the code for `scanstr()`: ```c // Scan in a string literal from the input file, // and store it in buf[]. Return the length of // the string. static int scanstr(char *buf) { int i, c; // Loop while we have enough buffer space for (i=0; iv.id)); ``` ## Generating the x86-64 Assembly Output We finally get to the actuall new assembly output functions. There are two: one to generate the string's storage and the other to load the base address of the string. ```c // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr= strvalue; *cptr; cptr++) { fprintf(Outfile, "\t.byte\t%d\n", *cptr); } fprintf(Outfile, "\t.byte\t0\n"); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int id) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tleaq\tL%d(\%%rip), %s\n", id, reglist[r]); return (r); } ``` Going back to our example: ```c char *s; s= "Hello world"; ``` The assembly output for this is: ``` L2: .byte 72 # Anonymous string .byte 101 .byte 108 .byte 108 .byte 111 .byte 32 .byte 119 .byte 111 .byte 114 .byte 108 .byte 100 .byte 0 ... leaq L2(%rip), %r8 # Load L2's address movq %r8, s(%rip) # and store in s ``` ## Miscellaneous Changes When writing the test program for this part of the journey, I uncovered another bug in the existing code. When scaling an integer value to match the type size that a pointer points to, I forgot to do nothing when the scale was 1. The code in `modify_type()` in `types.c` is now: ```c // Left is int type, right is pointer type and the size // of the original type is >1: scale the left if (inttype(ltype) && ptrtype(rtype)) { rsize = genprimsize(value_at(rtype)); if (rsize > 1) return (mkastunary(A_SCALE, rtype, tree, rsize)); else return (tree); // Size 1, no need to scale } ``` I'd left the `return (tree)` out, thus returning a NULL tree when trying to scale `char *` pointers. ## Conclusion and What's Next I'm so glad that we can now output text: ``` $ make test ./comp1 tests/input21.c cc -o out out.s lib/printint.c ./out 10 Hello world ``` Most of the work this time was extending our lexical scanner to deal with the character and string literal delimiters and the escaping of characters inside them. But there was some work done on the code generator, too. In the next part of our compiler writing journey, we'll add some more binary operators to the language that the compiler recognises. [Next step](../21_More_Operators/Readme.md) ================================================ FILE: 20_Char_Str_Literals/cg.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. // We need a list of byte and doubleword registers, too static int freereg[4]; static char *reglist[4] = { "%r8", "%r9", "%r10", "%r11" }; static char *breglist[4] = { "%r8b", "%r9b", "%r10b", "%r11b" }; static char *dreglist[4] = { "%r8d", "%r9d", "%r10d", "%r11d" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n", Outfile); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Gsym[id].name; fprintf(Outfile, "\t.text\n" "\t.globl\t%s\n" "\t.type\t%s, @function\n" "%s:\n" "\tpushq\t%%rbp\n" "\tmovq\t%%rsp, %%rbp\n", name, name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Gsym[id].endlabel); fputs("\tpopq %rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmovq\t$%d, %s\n", value, reglist[r]); return (r); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Print out the code to initialise it switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tmovzbq\t%s(%%rip), %s\n", Gsym[id].name, reglist[r]); break; case P_INT: fprintf(Outfile, "\tmovzbl\t%s(\%%rip), %s\n", Gsym[id].name, reglist[r]); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tmovq\t%s(\%%rip), %s\n", Gsym[id].name, reglist[r]); break; default: fatald("Bad type in cgloadglob:", Gsym[id].type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int id) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tleaq\tL%d(\%%rip), %s\n", id, reglist[r]); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\taddq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsubq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timulq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s,%%rax\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidivq\t%s\n", reglist[r2]); fprintf(Outfile, "\tmovq\t%%rax,%s\n", reglist[r1]); free_register(r2); return (r1); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { // Get a new register int outr = alloc_register(); fprintf(Outfile, "\tmovq\t%s, %%rdi\n", reglist[r]); fprintf(Outfile, "\tcall\t%s\n", Gsym[id].name); fprintf(Outfile, "\tmovq\t%%rax, %s\n", reglist[outr]); free_register(r); return (outr); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsalq\t$%d, %s\n", val, reglist[r]); return(r); } // Store a register's value into a variable int cgstorglob(int r, int id) { switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %s(\%%rip)\n", breglist[r], Gsym[id].name); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %s(\%%rip)\n", dreglist[r], Gsym[id].name); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tmovq\t%s, %s(\%%rip)\n", reglist[r], Gsym[id].name); break; default: fatald("Bad type in cgloadglob:", Gsym[id].type); } return (r); } // Array of type sizes in P_XXX order. // 0 means no size. static int psize[] = { 0, 0, 1, 4, 8, 8, 8, 8, 8 }; // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { // Check the type is valid if (type < P_NONE || type > P_LONGPTR) fatal("Bad type in cgprimsize()"); return (psize[type]); } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Gsym[id].type); // Generate the global identity and the label fprintf(Outfile, "\t.data\n" "\t.globl\t%s\n", Gsym[id].name); fprintf(Outfile, "%s:", Gsym[id].name); // Generate the space for (int i=0; i < Gsym[id].size; i++) { switch(typesize) { case 1: fprintf(Outfile, "\t.byte\t0\n"); break; case 4: fprintf(Outfile, "\t.long\t0\n"); break; case 8: fprintf(Outfile, "\t.quad\t0\n"); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr= strvalue; *cptr; cptr++) { fprintf(Outfile, "\t.byte\t%d\n", *cptr); } fprintf(Outfile, "\t.byte\t0\n"); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { // Generate code depending on the function's type switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tmovzbl\t%s, %%eax\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %%eax\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", Gsym[id].type); } cgjump(Gsym[id].endlabel); } // Generate code to load the address of a global // identifier into a variable. Return a new register int cgaddress(int id) { int r = alloc_register(); fprintf(Outfile, "\tleaq\t%s(%%rip), %s\n", Gsym[id].name, reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tmovzbq\t(%s), %s\n", reglist[r], reglist[r]); break; case P_INTPTR: fprintf(Outfile, "\tmovq\t(%s), %s\n", reglist[r], reglist[r]); break; case P_LONGPTR: fprintf(Outfile, "\tmovq\t(%s), %s\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, (%s)\n", breglist[r1], reglist[r2]); break; case P_INT: fprintf(Outfile, "\tmovq\t%s, (%s)\n", reglist[r1], reglist[r2]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, (%s)\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 20_Char_Str_Literals/cg_arm.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for ARMv6 on Raspberry Pi // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. static int freereg[4]; static char *reglist[4] = { "r4", "r5", "r6", "r7" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // We have to store large integer literal values in memory. // Keep a list of them which will be output in the postamble #define MAXINTS 1024 int Intlist[MAXINTS]; static int Intslot = 0; // Determine the offset of a large integer // literal from the .L3 label. If the integer // isn't in the list, add it. static void set_int_offset(int val) { int offset = -1; // See if it is already there for (int i = 0; i < Intslot; i++) { if (Intlist[i] == val) { offset = 4 * i; break; } } // Not in the list, so add it if (offset == -1) { offset = 4 * Intslot; if (Intslot == MAXINTS) fatal("Out of int slots in set_int_offset()"); Intlist[Intslot++] = val; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L3+%d\n", offset); } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n", Outfile); } // Print out the assembly postamble void cgpostamble() { // Print out the global variables fprintf(Outfile, ".L2:\n"); for (int i = 0; i < Globs; i++) { if (Gsym[i].stype == S_VARIABLE) fprintf(Outfile, "\t.word %s\n", Gsym[i].name); } // Print out the integer literals fprintf(Outfile, ".L3:\n"); for (int i = 0; i < Intslot; i++) { fprintf(Outfile, "\t.word %d\n", Intlist[i]); } } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Gsym[id].name; fprintf(Outfile, "\t.text\n" "\t.globl\t%s\n" "\t.type\t%s, \%%function\n" "%s:\n" "\tpush\t{fp, lr}\n" "\tadd\tfp, sp, #4\n" "\tsub\tsp, sp, #8\n" "\tstr\tr0, [fp, #-8]\n", name, name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Gsym[id].endlabel); fputs("\tsub\tsp, fp, #4\n" "\tpop\t{fp, pc}\n" "\t.align\t2\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); // If the literal value is small, do it with one instruction if (value <= 1000) fprintf(Outfile, "\tmov\t%s, #%d\n", reglist[r], value); else { set_int_offset(value); fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); } return (r); } // Determine the offset of a variable from the .L2 // label. Yes, this is inefficient code. static void set_var_offset(int id) { int offset = 0; // Walk the symbol table up to id. // Find S_VARIABLEs and add on 4 until // we get to our variable for (int i = 0; i < id; i++) { if (Gsym[i].stype == S_VARIABLE) offset += 4; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L2+%d\n", offset); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tldrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgloadglob:", Gsym[id].type); } return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s, %s\n", reglist[r1], reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\tmul\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { // To do a divide: r0 holds the dividend, r1 holds the divisor. // The quotient is in r0. fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r1]); fprintf(Outfile, "\tmov\tr1, %s\n", reglist[r2]); fprintf(Outfile, "\tbl\t__aeabi_idiv\n"); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r1]); free_register(r2); return (r1); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r]); fprintf(Outfile, "\tbl\t%s\n", Gsym[id].name); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r]); return (r); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tlsl\t%s, %s, #%d\n", reglist[r], reglist[r], val); return(r); } // Store a register's value into a variable int cgstorglob(int r, int id) { // Get the offset to the variable set_var_offset(id); switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tstr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgstorglob:", Gsym[id].type); } return (r); } // Array of type sizes in P_XXX order. // 0 means no size. static int psize[] = { 0, 0, 1, 4, 4, 4, 4, 4 }; // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { // Check the type is valid if (type < P_NONE || type > P_LONGPTR) fatal("Bad type in cgprimsize()"); return (psize[type]); } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Gsym[id].type); fprintf(Outfile, "\t.data\n" "\t.globl\t%s\n", Gsym[id].name); switch(typesize) { case 1: fprintf(Outfile, "%s:\t.byte\t0\n", Gsym[id].name); break; case 4: fprintf(Outfile, "%s:\t.long\t0\n", Gsym[id].name); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "moveq", "movne", "movlt", "movgt", "movle", "movge" }; // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "movne", "moveq", "movge", "movle", "movgt", "movlt" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #1\n", cmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #0\n", invcmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\tuxtb\t%s, %s\n", reglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tb\tL%d\n", l); } // List of inverted branch instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *brlist[] = { "bne", "beq", "bge", "ble", "bgt", "blt" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", brlist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[reg]); cgjump(Gsym[id].endlabel); } // Generate code to load the address of a global // identifier into a variable. Return a new register int cgaddress(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); fprintf(Outfile, "\tmov\t%s, r3\n", reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tldrb\t%s, [%s]\n", reglist[r], reglist[r]); break; case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [%s]\n", reglist[r], reglist[r]); break; } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [%s]\n", reglist[r1], reglist[r2]); break; case P_INT: case P_LONG: fprintf(Outfile, "\tstr\t%s, [%s]\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 20_Char_Str_Literals/cgn.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. // We need a list of byte and doubleword registers, too static int freereg[4]; static char *reglist[4] = { "r8", "r9", "r10", "r11" }; static char *breglist[4] = { "r8b", "r9b", "r10b", "r11b" }; static char *dreglist[4] = { "r8d", "r9d", "r10d", "r11d" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\textern\tprintint\n", Outfile); fputs("\textern\tprintchar\n", Outfile); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Gsym[id].name; fprintf(Outfile, "\tsection\t.text\n" "\tglobal\t%s\n" "%s:\n" "\tpush\trbp\n" "\tmov\trbp, rsp\n", name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Gsym[id].endlabel); fputs("\tpop rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], value); return (r); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Print out the code to initialise it switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], Gsym[id].name); break; case P_INT: fprintf(Outfile, "\txor\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tmov\t%s, dword [%s]\n", dreglist[r], Gsym[id].name); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], Gsym[id].name); break; default: fatald("Bad type in cgloadglob:", Gsym[id].type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int id) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, L%d\n", reglist[r], id); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timul\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmov\trax, %s\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidiv\t%s\n", reglist[r2]); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[r1]); free_register(r2); return (r1); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { // Get a new register int outr = alloc_register(); fprintf(Outfile, "\tmov\trdi, %s\n", reglist[r]); fprintf(Outfile, "\tcall\t%s\n", Gsym[id].name); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[outr]); free_register(r); return (outr); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsal\t%s, %d\n", reglist[r], val); return(r); } // Store a register's value into a variable int cgstorglob(int r, int id) { switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tmov\t[%s], %s\n", Gsym[id].name, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\t[%s], %s\n", Gsym[id].name, dreglist[r]); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tmov\t[%s], %s\n", Gsym[id].name, reglist[r]); break; default: fatald("Bad type in cgloadglob:", Gsym[id].type); } return (r); } // Array of type sizes in P_XXX order. // 0 means no size. static int psize[] = { 0, 0, 1, 4, 8, 8, 8, 8, 8 }; // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { // Check the type is valid if (type < P_NONE || type > P_LONGPTR) fatal("Bad type in cgprimsize()"); return (psize[type]); } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Gsym[id].type); // Generate the global identity and the label fprintf(Outfile, "\tsection\t.data\n" "\tglobal\t%s\n", Gsym[id].name); fprintf(Outfile, "%s:", Gsym[id].name); // Generate the space // original version for (int i=0; i < Gsym[id].size; i++) { switch(typesize) { case 1: fprintf(Outfile, "\tdb\t0\n"); break; case 4: fprintf(Outfile, "\tdd\t0\n"); break; case 8: fprintf(Outfile, "\tdq\t0\n"); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } /* compact version using times instead of loop switch(typesize) { case 1: fprintf(Outfile, "\ttimes\t%d\tdb\t0\n", Gsym[id].size); break; case 4: fprintf(Outfile, "\ttimes\t%d\tdd\t0\n", Gsym[id].size); break; case 8: fprintf(Outfile, "\ttimes\t%d\tdq\t0\n", Gsym[id].size); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } */ } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr= strvalue; *cptr; cptr++) { fprintf(Outfile, "\tdb\t%d\n", *cptr); } fprintf(Outfile, "\tdb\t0\n"); /* put string in readable format on a single line // probably went overboard with error checking int comma = 0, quote = 0, start = 1; fprintf(Outfile, "\tdb\t"); for (cptr=strvalue; *cptr; cptr++) { if ( ! isprint(*cptr) ) if (comma || start) { fprintf(Outfile, "%d, ", *cptr); start = 0; comma = 1; } else if (quote) { fprintf(Outfile, "\', %d, ", *cptr); comma = 1; quote = 0; } else { fprintf(Outfile, "%d, ", *cptr); comma = 1; quote = 0; } else if (start || comma) { fprintf(Outfile, "\'%c", *cptr); start = comma = 0; quote = 1; } else { fprintf(Outfile, "%c", *cptr); comma = 0; quote = 1; } } if (comma || start) fprintf(Outfile, "0\n"); else fprintf(Outfile, "\', 0\n"); */ } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r2], breglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { // Generate code depending on the function's type switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tmovzx\teax, %s\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmov\teax, %s\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", Gsym[id].type); } cgjump(Gsym[id].endlabel); } // Generate code to load the address of a global // identifier into a variable. Return a new register int cgaddress(int id) { int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, %s\n", reglist[r], Gsym[id].name); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], reglist[r]); break; case P_INTPTR: fprintf(Outfile, "\tmovzx\t%s, word [%s]\n", reglist[r], reglist[r]); break; case P_LONGPTR: fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], reglist[r]); break; } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tmov\t[%s], byte %s\n", reglist[r2], breglist[r1]); break; case P_INT: fprintf(Outfile, "\tmov\t[%s], %s\n", reglist[r2], reglist[r1]); break; case P_LONG: fprintf(Outfile, "\tmov\t[%s], %s\n", reglist[r2], reglist[r1]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 20_Char_Str_Literals/data.h ================================================ #ifndef extern_ #define extern_ extern #endif // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 extern_ int Line; // Current line number extern_ int Putback; // Character put back by scanner extern_ int Functionid; // Symbol id of the current function extern_ int Globs; // Position of next free global symbol slot extern_ FILE *Infile; // Input and output files extern_ FILE *Outfile; extern_ struct token Token; // Last token scanned extern_ char Text[TEXTLEN + 1]; // Last identifier scanned extern_ struct symtable Gsym[NSYMBOLS]; // Global symbol table extern_ int O_dumpAST; ================================================ FILE: 20_Char_Str_Literals/decl.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of declarations // Copyright (c) 2019 Warren Toomey, GPL3 // Parse the current token and return // a primitive type enum value. Also // scan in the next token int parse_type(void) { int type; switch (Token.token) { case T_VOID: type = P_VOID; break; case T_CHAR: type = P_CHAR; break; case T_INT: type = P_INT; break; case T_LONG: type = P_LONG; break; default: fatald("Illegal type, token", Token.token); } // Scan in one or more further '*' tokens // and determine the correct pointer type while (1) { scan(&Token); if (Token.token != T_STAR) break; type = pointer_to(type); } // We leave with the next token already scanned return (type); } // variable_declaration: type identifier ';' // | type identifier '[' INTLIT ']' ';' // ; // // Parse the declaration of a scalar variable or an array // with a given size. // The identifier has been scanned & we have the type void var_declaration(int type) { int id; // Text now has the identifier's name. // If the next token is a '[' if (Token.token == T_LBRACKET) { // Skip past the '[' scan(&Token); // Check we have an array size if (Token.token == T_INTLIT) { // Add this as a known array and generate its space in assembly. // We treat the array as a pointer to its elements' type id = addglob(Text, pointer_to(type), S_ARRAY, 0, Token.intvalue); genglobsym(id); } // Ensure we have a following ']' scan(&Token); match(T_RBRACKET, "]"); } else { // Add this as a known scalar // and generate its space in assembly id = addglob(Text, type, S_VARIABLE, 0, 1); genglobsym(id); } // Get the trailing semicolon semi(); } // // function_declaration: type identifier '(' ')' compound_statement ; // // Parse the declaration of a simplistic function. // The identifier has been scanned & we have the type struct ASTnode *function_declaration(int type) { struct ASTnode *tree, *finalstmt; int nameslot, endlabel; // Text now has the identifier's name. // Get a label-id for the end label, add the function // to the symbol table, and set the Functionid global // to the function's symbol-id endlabel = genlabel(); nameslot = addglob(Text, type, S_FUNCTION, endlabel, 0); Functionid = nameslot; // Scan in the parentheses lparen(); rparen(); // Get the AST tree for the compound statement tree = compound_statement(); // If the function type isn't P_VOID .. if (type != P_VOID) { // Error if no statements in the function if (tree == NULL) fatal("No statements in function with non-void type"); // Check that the last AST operation in the // compound statement was a return statement finalstmt = (tree->op == A_GLUE) ? tree->right : tree; if (finalstmt == NULL || finalstmt->op != A_RETURN) fatal("No return for function with non-void type"); } // Return an A_FUNCTION node which has the function's nameslot // and the compound statement sub-tree return (mkastunary(A_FUNCTION, type, tree, nameslot)); } // Parse one or more global declarations, either // variables or functions void global_declarations(void) { struct ASTnode *tree; int type; while (1) { // We have to read past the type and identifier // to see either a '(' for a function declaration // or a ',' or ';' for a variable declaration. // Text is filled in by the ident() call. type = parse_type(); ident(); if (Token.token == T_LPAREN) { // Parse the function declaration and // generate the assembly code for it tree = function_declaration(type); if (O_dumpAST) { dumpAST(tree, NOLABEL, 0); fprintf(stdout, "\n\n"); } genAST(tree, NOLABEL, 0); } else { // Parse the global variable declaration var_declaration(type); } // Stop when we have reached EOF if (Token.token == T_EOF) break; } } ================================================ FILE: 20_Char_Str_Literals/decl.h ================================================ // Function prototypes for all compiler files // Copyright (c) 2019 Warren Toomey, GPL3 // scan.c void reject_token(struct token *t); int scan(struct token *t); // tree.c struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, int intvalue); struct ASTnode *mkastleaf(int op, int type, int intvalue); struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, int intvalue); void dumpAST(struct ASTnode *n, int label, int parentASTop); // gen.c int genlabel(void); int genAST(struct ASTnode *n, int reg, int parentASTop); void genpreamble(); void genpostamble(); void genfreeregs(); void genglobsym(int id); int genglobstr(char *strvalue); int genprimsize(int type); void genreturn(int reg, int id); // cg.c void freeall_registers(void); void cgpreamble(); void cgpostamble(); void cgfuncpreamble(int id); void cgfuncpostamble(int id); int cgloadint(int value, int type); int cgloadglob(int id); int cgloadglobstr(int id); int cgadd(int r1, int r2); int cgsub(int r1, int r2); int cgmul(int r1, int r2); int cgdiv(int r1, int r2); int cgshlconst(int r, int val); int cgcall(int r, int id); int cgstorglob(int r, int id); void cgglobsym(int id); void cgglobstr(int l, char *strvalue); int cgcompare_and_set(int ASTop, int r1, int r2); int cgcompare_and_jump(int ASTop, int r1, int r2, int label); void cglabel(int l); void cgjump(int l); int cgwiden(int r, int oldtype, int newtype); int cgprimsize(int type); void cgreturn(int reg, int id); int cgaddress(int id); int cgderef(int r, int type); int cgstorderef(int r1, int r2, int type); // expr.c struct ASTnode *binexpr(int ptp); // stmt.c struct ASTnode *compound_statement(void); // misc.c void match(int t, char *what); void semi(void); void lbrace(void); void rbrace(void); void lparen(void); void rparen(void); void ident(void); void fatal(char *s); void fatals(char *s1, char *s2); void fatald(char *s, int d); void fatalc(char *s, int c); // sym.c int findglob(char *s); int addglob(char *name, int type, int stype, int endlabel, int size); // decl.c void var_declaration(int type); struct ASTnode *function_declaration(int type); void global_declarations(void); // types.c int inttype(int type); int parse_type(void); int pointer_to(int type); int value_at(int type); struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op); ================================================ FILE: 20_Char_Str_Literals/defs.h ================================================ #include #include #include #include // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 #define TEXTLEN 512 // Length of symbols in input #define NSYMBOLS 1024 // Number of symbol table entries // Token types enum { T_EOF, // Operators T_ASSIGN, T_PLUS, T_MINUS, T_STAR, T_SLASH, T_EQ, T_NE, T_LT, T_GT, T_LE, T_GE, // Type keywords T_VOID, T_CHAR, T_INT, T_LONG, // Structural tokens T_INTLIT, T_STRLIT, T_SEMI, T_IDENT, T_LBRACE, T_RBRACE, T_LPAREN, T_RPAREN, T_LBRACKET, T_RBRACKET, T_AMPER, T_LOGAND, // Other keywords T_IF, T_ELSE, T_WHILE, T_FOR, T_RETURN }; // Token structure struct token { int token; // Token type, from the enum list above int intvalue; // For T_INTLIT, the integer value }; // AST node types. The first few line up // with the related tokens enum { A_ASSIGN= 1, A_ADD, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE, A_INTLIT, A_STRLIT, A_IDENT, A_GLUE, A_IF, A_WHILE, A_FUNCTION, A_WIDEN, A_RETURN, A_FUNCCALL, A_DEREF, A_ADDR, A_SCALE }; // Primitive types enum { P_NONE, P_VOID, P_CHAR, P_INT, P_LONG, P_VOIDPTR, P_CHARPTR, P_INTPTR, P_LONGPTR }; // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates int rvalue; // True if the node is an rvalue struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; union { // For A_INTLIT, the integer value int intvalue; // For A_IDENT, the symbol slot number int id; // For A_FUNCTION, the symbol slot number int size; // For A_SCALE, the size to scale by } v; // For A_FUNCCALL, the symbol slot number }; #define NOREG -1 // Use NOREG when the AST generation // functions have no register to return #define NOLABEL 0 // Use NOLABEL when we have no label to // pass to genAST() // Structural types enum { S_VARIABLE, S_FUNCTION, S_ARRAY }; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol int stype; // Structural type for the symbol int endlabel; // For S_FUNCTIONs, the end label int size; // Number of elements in the symbol }; ================================================ FILE: 20_Char_Str_Literals/expr.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of expressions // Copyright (c) 2019 Warren Toomey, GPL3 // Parse a function call with a single expression // argument and return its AST static struct ASTnode *funccall(void) { struct ASTnode *tree; int id; // Check that the identifier has been defined as a function, // then make a leaf node for it. if ((id = findglob(Text)) == -1 || Gsym[id].stype != S_FUNCTION) { fatals("Undeclared function", Text); } // Get the '(' lparen(); // Parse the following expression tree = binexpr(0); // Build the function call AST node. Store the // function's return type as this node's type. // Also record the function's symbol-id tree = mkastunary(A_FUNCCALL, Gsym[id].type, tree, id); // Get the ')' rparen(); return (tree); } // Parse the index into an array and // return an AST tree for it static struct ASTnode *array_access(void) { struct ASTnode *left, *right; int id; // Check that the identifier has been defined as an array // then make a leaf node for it that points at the base if ((id = findglob(Text)) == -1 || Gsym[id].stype != S_ARRAY) { fatals("Undeclared array", Text); } left = mkastleaf(A_ADDR, Gsym[id].type, id); // Get the '[' scan(&Token); // Parse the following expression right = binexpr(0); // Get the ']' match(T_RBRACKET, "]"); // Ensure that this is of int type if (!inttype(right->type)) fatal("Array index is not of integer type"); // Scale the index by the size of the element's type right = modify_type(right, left->type, A_ADD); // Return an AST tree where the array's base has the offset // added to it, and dereference the element. Still an lvalue // at this point. left = mkastnode(A_ADD, Gsym[id].type, left, NULL, right, 0); left = mkastunary(A_DEREF, value_at(left->type), left, 0); return (left); } // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(void) { struct ASTnode *n; int id; switch (Token.token) { case T_INTLIT: // For an INTLIT token, make a leaf AST node for it. // Make it a P_CHAR if it's within the P_CHAR range if ((Token.intvalue) >= 0 && (Token.intvalue < 256)) n = mkastleaf(A_INTLIT, P_CHAR, Token.intvalue); else n = mkastleaf(A_INTLIT, P_INT, Token.intvalue); break; case T_STRLIT: // For a STRLIT token, generate the assembly for it. // Then make a leaf AST node for it. id is the string's label. id= genglobstr(Text); n= mkastleaf(A_STRLIT, P_CHARPTR, id); break; case T_IDENT: // This could be a variable, array index or a // function call. Scan in the next token to find out scan(&Token); // It's a '(', so a function call if (Token.token == T_LPAREN) return (funccall()); // It's a '[', so an array reference if (Token.token == T_LBRACKET) { return (array_access()); } // Not a function call, so reject the new token reject_token(&Token); // Check that the variable exists. id = findglob(Text); if (id == -1 || Gsym[id].stype != S_VARIABLE) fatals("Unknown variable", Text); // Make a leaf AST node for it n = mkastleaf(A_IDENT, Gsym[id].type, id); break; case T_LPAREN: // Beginning of a parenthesised expression, skip the '('. // Scan in the expression and the right parenthesis scan(&Token); n = binexpr(0); rparen(); return (n); default: fatald("Expecting a primary expression, got token", Token.token); } // Scan in the next token and return the leaf node scan(&Token); return (n); } // Convert a binary operator token into a binary AST operation. // We rely on a 1:1 mapping from token to AST operation static int binastop(int tokentype) { if (tokentype > T_EOF && tokentype < T_INTLIT) return (tokentype); fatald("Syntax error, token", tokentype); return (0); // Keep -Wall happy } // Return true if a token is right-associative, // false otherwise. static int rightassoc(int tokentype) { if (tokentype == T_ASSIGN) return (1); return (0); } // Operator precedence for each token. Must // match up with the order of tokens in defs.h static int OpPrec[] = { 0, 10, // T_EOF, T_ASSIGN 20, 20, // T_PLUS, T_MINUS 30, 30, // T_STAR, T_SLASH 40, 40, // T_EQ, T_NE 50, 50, 50, 50 // T_LT, T_GT, T_LE, T_GE }; // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec; if (tokentype >= T_VOID) fatald("Token with no precedence in op_precedence:", tokentype); prec = OpPrec[tokentype]; if (prec == 0) fatald("Syntax error, token", tokentype); return (prec); } // prefix_expression: primary // | '*' prefix_expression // | '&' prefix_expression // ; // Parse a prefix expression and return // a sub-tree representing it. struct ASTnode *prefix(void) { struct ASTnode *tree; switch (Token.token) { case T_AMPER: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Ensure that it's an identifier if (tree->op != A_IDENT) fatal("& operator must be followed by an identifier"); // Now change the operator to A_ADDR and the type to // a pointer to the original type tree->op = A_ADDR; tree->type = pointer_to(tree->type); break; case T_STAR: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's either another deref or an // identifier if (tree->op != A_IDENT && tree->op != A_DEREF) fatal("* operator must be followed by an identifier or *"); // Prepend an A_DEREF operation to the tree tree = mkastunary(A_DEREF, value_at(tree->type), tree, 0); break; default: tree = primary(); } return (tree); } // Return an AST tree whose root is a binary operator. // Parameter ptp is the previous token's precedence. struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; struct ASTnode *ltemp, *rtemp; int ASTop; int tokentype; // Get the tree on the left. // Fetch the next token at the same time. left = prefix(); // If we hit a semicolon or ')', return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET) { left->rvalue = 1; return (left); } // While the precedence of this token is more than that of the // previous token precedence, or it's right associative and // equal to the previous token's precedence while ((op_precedence(tokentype) > ptp) || (rightassoc(tokentype) && op_precedence(tokentype) == ptp)) { // Fetch in the next integer literal scan(&Token); // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Determine the operation to be performed on the sub-trees ASTop = binastop(tokentype); if (ASTop == A_ASSIGN) { // Assignment // Make the right tree into an rvalue right->rvalue = 1; // Ensure the right's type matches the left right = modify_type(right, left->type, 0); if (left == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree. However, switch // left and right around, so that the right expression's // code will be generated before the left expression ltemp = left; left = right; right = ltemp; } else { // We are not doing an assignment, so both trees should be rvalues // Convert both trees into rvalue if they are lvalue trees left->rvalue = 1; right->rvalue = 1; // Ensure the two types are compatible by trying // to modify each tree to match the other's type. ltemp = modify_type(left, right->type, ASTop); rtemp = modify_type(right, left->type, ASTop); if (ltemp == NULL && rtemp == NULL) fatal("Incompatible types in binary expression"); if (ltemp != NULL) left = ltemp; if (rtemp != NULL) right = rtemp; } // Join that sub-tree with ours. Convert the token // into an AST operation at the same time. left = mkastnode(binastop(tokentype), left->type, left, NULL, right, 0); // Update the details of the current token. // If we hit a semicolon, ')' or ']', return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET) { left->rvalue = 1; return (left); } } // Return the tree we have when the precedence // is the same or lower left->rvalue = 1; return (left); } ================================================ FILE: 20_Char_Str_Literals/gen.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Generic code generator // Copyright (c) 2019 Warren Toomey, GPL3 // Generate and return a new label number int genlabel(void) { static int id = 1; return (id++); } // Generate the code for an IF statement // and an optional ELSE clause static int genIF(struct ASTnode *n) { int Lfalse, Lend; // Generate two labels: one for the // false compound statement, and one // for the end of the overall IF statement. // When there is no ELSE clause, Lfalse _is_ // the ending label! Lfalse = genlabel(); if (n->right) Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. genAST(n->left, Lfalse, n->op); genfreeregs(); // Generate the true compound statement genAST(n->mid, NOLABEL, n->op); genfreeregs(); // If there is an optional ELSE clause, // generate the jump to skip to the end if (n->right) cgjump(Lend); // Now the false label cglabel(Lfalse); // Optional ELSE clause: generate the // false compound statement and the // end label if (n->right) { genAST(n->right, NOLABEL, n->op); genfreeregs(); cglabel(Lend); } return (NOREG); } // Generate the code for a WHILE statement static int genWHILE(struct ASTnode *n) { int Lstart, Lend; // Generate the start and end labels // and output the start label Lstart = genlabel(); Lend = genlabel(); cglabel(Lstart); // Generate the condition code followed // by a jump to the end label. genAST(n->left, Lend, n->op); genfreeregs(); // Generate the compound statement for the body genAST(n->right, NOLABEL, n->op); genfreeregs(); // Finally output the jump back to the condition, // and the end label cgjump(Lstart); cglabel(Lend); return (NOREG); } // Given an AST, an optional label, and the AST op // of the parent, generate assembly code recursively. // Return the register id with the tree's final value. int genAST(struct ASTnode *n, int label, int parentASTop) { int leftreg, rightreg; // We have some specific AST node handling at the top // so that we don't evaluate the child sub-trees immediately switch (n->op) { case A_IF: return (genIF(n)); case A_WHILE: return (genWHILE(n)); case A_GLUE: // Do each child statement, and free the // registers after each child genAST(n->left, NOLABEL, n->op); genfreeregs(); genAST(n->right, NOLABEL, n->op); genfreeregs(); return (NOREG); case A_FUNCTION: // Generate the function's preamble before the code // in the child sub-tree cgfuncpreamble(n->v.id); genAST(n->left, NOLABEL, n->op); cgfuncpostamble(n->v.id); return (NOREG); } // General AST node handling below // Get the left and right sub-tree values if (n->left) leftreg = genAST(n->left, NOLABEL, n->op); if (n->right) rightreg = genAST(n->right, NOLABEL, n->op); switch (n->op) { case A_ADD: return (cgadd(leftreg, rightreg)); case A_SUBTRACT: return (cgsub(leftreg, rightreg)); case A_MULTIPLY: return (cgmul(leftreg, rightreg)); case A_DIVIDE: return (cgdiv(leftreg, rightreg)); case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, compare registers // and set one to 1 or 0 based on the comparison. if (parentASTop == A_IF || parentASTop == A_WHILE) return (cgcompare_and_jump(n->op, leftreg, rightreg, label)); else return (cgcompare_and_set(n->op, leftreg, rightreg)); case A_INTLIT: return (cgloadint(n->v.intvalue, n->type)); case A_STRLIT: return (cgloadglobstr(n->v.id)); case A_IDENT: // Load our value if we are an rvalue // or we are being dereferenced if (n->rvalue || parentASTop== A_DEREF) return (cgloadglob(n->v.id)); else return (NOREG); case A_ASSIGN: // Are we assigning to an identifier or through a pointer? switch (n->right->op) { case A_IDENT: return (cgstorglob(leftreg, n->right->v.id)); case A_DEREF: return (cgstorderef(leftreg, rightreg, n->right->type)); default: fatald("Can't A_ASSIGN in genAST(), op", n->op); } case A_WIDEN: // Widen the child's type to the parent's type return (cgwiden(leftreg, n->left->type, n->type)); case A_RETURN: cgreturn(leftreg, Functionid); return (NOREG); case A_FUNCCALL: return (cgcall(leftreg, n->v.id)); case A_ADDR: return (cgaddress(n->v.id)); case A_DEREF: // If we are an rvalue, dereference to get the value we point at, // otherwise leave it for A_ASSIGN to store through the pointer if (n->rvalue) return (cgderef(leftreg, n->left->type)); else return (leftreg); case A_SCALE: // Small optimisation: use shift if the // scale value is a known power of two switch (n->v.size) { case 2: return(cgshlconst(leftreg, 1)); case 4: return(cgshlconst(leftreg, 2)); case 8: return(cgshlconst(leftreg, 3)); default: // Load a register with the size and // multiply the leftreg by this size rightreg= cgloadint(n->v.size, P_INT); return (cgmul(leftreg, rightreg)); } default: fatald("Unknown AST operator", n->op); } return (NOREG); // Keep -Wall happy } void genpreamble() { cgpreamble(); } void genpostamble() { cgpostamble(); } void genfreeregs() { freeall_registers(); } void genglobsym(int id) { cgglobsym(id); } int genglobstr(char *strvalue) { int l= genlabel(); cgglobstr(l, strvalue); return(l); } int genprimsize(int type) { return (cgprimsize(type)); } ================================================ FILE: 20_Char_Str_Literals/lib/printint.c ================================================ #include void printint(long x) { printf("%ld\n", x); } void printchar(long x) { putc((char)(x & 0x7f), stdout); } ================================================ FILE: 20_Char_Str_Literals/main.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "decl.h" #include // Compiler setup and top-level execution // Copyright (c) 2019 Warren Toomey, GPL3 // Initialise global variables static void init() { Line = 1; Putback = '\n'; Globs = 0; O_dumpAST = 0; } // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "Usage: %s [-T] infile\n", prog); exit(1); } // Main program: check arguments and print a usage // if we don't have an argument. Open up the input // file and call scanfile() to scan the tokens in it. int main(int argc, char *argv[]) { int i; // Initialise the globals init(); // Scan for command-line options for (i = 1; i < argc; i++) { if (*argv[i] != '-') break; for (int j = 1; argv[i][j]; j++) { switch (argv[i][j]) { case 'T': O_dumpAST = 1; break; default: usage(argv[0]); } } } // Ensure we have an input file argument if (i >= argc) usage(argv[0]); // Open up the input file if ((Infile = fopen(argv[i], "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", argv[i], strerror(errno)); exit(1); } // Create the output file if ((Outfile = fopen("out.s", "w")) == NULL) { fprintf(stderr, "Unable to create out.s: %s\n", strerror(errno)); exit(1); } // For now, ensure that printint() and printchar() are defined addglob("printint", P_INT, S_FUNCTION, 0, 0); addglob("printchar", P_VOID, S_FUNCTION, 0, 0); scan(&Token); // Get the first token from the input genpreamble(); // Output the preamble global_declarations(); // Parse the global declarations genpostamble(); // Output the postamble fclose(Outfile); // Close the output file and exit return (0); } ================================================ FILE: 20_Char_Str_Literals/misc.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Miscellaneous functions // Copyright (c) 2019 Warren Toomey, GPL3 // Ensure that the current token is t, // and fetch the next token. Otherwise // throw an error void match(int t, char *what) { if (Token.token == t) { scan(&Token); } else { fatals("Expected", what); } } // Match a semicolon and fetch the next token void semi(void) { match(T_SEMI, ";"); } // Match a left brace and fetch the next token void lbrace(void) { match(T_LBRACE, "{"); } // Match a right brace and fetch the next token void rbrace(void) { match(T_RBRACE, "}"); } // Match a left parenthesis and fetch the next token void lparen(void) { match(T_LPAREN, "("); } // Match a right parenthesis and fetch the next token void rparen(void) { match(T_RPAREN, ")"); } // Match an identifier and fetch the next token void ident(void) { match(T_IDENT, "identifier"); } // Print out fatal messages void fatal(char *s) { fprintf(stderr, "%s on line %d\n", s, Line); exit(1); } void fatals(char *s1, char *s2) { fprintf(stderr, "%s:%s on line %d\n", s1, s2, Line); exit(1); } void fatald(char *s, int d) { fprintf(stderr, "%s:%d on line %d\n", s, d, Line); exit(1); } void fatalc(char *s, int c) { fprintf(stderr, "%s:%c on line %d\n", s, c, Line); exit(1); } ================================================ FILE: 20_Char_Str_Literals/scan.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { char *p; p = strchr(s, c); return (p ? p - s : -1); } // Get the next character from the input file. static int next(void) { int c; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return (c); } c = fgetc(Infile); // Read from input file if ('\n' == c) Line++; // Increment line count return (c); } // Put back an unwanted character static void putback(int c) { Putback = c; } // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } // Return the next character from a character // or string literal static int scanch(void) { int c; // Get the next input character and interpret // metacharacters that start with a backslash c = next(); if (c == '\\') { switch (c = next()) { case 'a': return '\a'; case 'b': return '\b'; case 'f': return '\f'; case 'n': return '\n'; case 'r': return '\r'; case 't': return '\t'; case 'v': return '\v'; case '\\': return '\\'; case '"': return '"'; case '\'': return '\''; default: fatalc("unknown escape sequence", c); } } return (c); // Just an ordinary old character! } // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0; // Convert each character into an int value while ((k = chrpos("0123456789", c)) >= 0) { val = val * 10 + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return (val); } // Scan in a string literal from the input file, // and store it in buf[]. Return the length of // the string. static int scanstr(char *buf) { int i, c; // Loop while we have enough buffer space for (i=0; itoken = T_EOF; return (0); case '+': t->token = T_PLUS; break; case '-': t->token = T_MINUS; break; case '*': t->token = T_STAR; break; case '/': t->token = T_SLASH; break; case ';': t->token = T_SEMI; break; case '{': t->token = T_LBRACE; break; case '}': t->token = T_RBRACE; break; case '(': t->token = T_LPAREN; break; case ')': t->token = T_RPAREN; break; case '[': t->token = T_LBRACKET; break; case ']': t->token = T_RBRACKET; break; case '=': if ((c = next()) == '=') { t->token = T_EQ; } else { putback(c); t->token = T_ASSIGN; } break; case '!': if ((c = next()) == '=') { t->token = T_NE; } else { fatalc("Unrecognised character", c); } break; case '<': if ((c = next()) == '=') { t->token = T_LE; } else { putback(c); t->token = T_LT; } break; case '>': if ((c = next()) == '=') { t->token = T_GE; } else { putback(c); t->token = T_GT; } break; case '&': if ((c = next()) == '&') { t->token = T_LOGAND; } else { putback(c); t->token = T_AMPER; } break; case '\'': // If it's a quote, scan in the // literal character value and // the trailing quote t->intvalue = scanch(); t->token = T_INTLIT; if (next() != '\'') fatal("Expected '\\'' at end of char literal"); break; case '"': // Scan in a literal string scanstr(Text); t->token= T_STRLIT; break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } else if (isalpha(c) || '_' == c) { // Read in a keyword or identifier scanident(c, Text, TEXTLEN); // If it's a recognised keyword, return that token if ((tokentype = keyword(Text)) != 0) { t->token = tokentype; break; } // Not a recognised keyword, so it must be an identifier t->token = T_IDENT; break; } // The character isn't part of any recognised token, error fatalc("Unrecognised character", c); } // We found a token return (1); } ================================================ FILE: 20_Char_Str_Literals/stmt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of statements // Copyright (c) 2019 Warren Toomey, GPL3 // Prototypes static struct ASTnode *single_statement(void); // compound_statement: // empty, i.e. no statement // | statement // | statement statements // ; // // statement: declaration // | expression_statement // | function_call // | if_statement // | while_statement // | for_statement // | return_statement // ; // if_statement: if_head // | if_head 'else' compound_statement // ; // // if_head: 'if' '(' true_false_expression ')' compound_statement ; // // Parse an IF statement including any // optional ELSE clause and return its AST static struct ASTnode *if_statement(void) { struct ASTnode *condAST, *trueAST, *falseAST = NULL; // Ensure we have 'if' '(' match(T_IF, "if"); lparen(); // Parse the following expression // and the ')' following. Ensure // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) fatal("Bad comparison operator"); rparen(); // Get the AST for the compound statement trueAST = compound_statement(); // If we have an 'else', skip it // and get the AST for the compound statement if (Token.token == T_ELSE) { scan(&Token); falseAST = compound_statement(); } // Build and return the AST for this statement return (mkastnode(A_IF, P_NONE, condAST, trueAST, falseAST, 0)); } // while_statement: 'while' '(' true_false_expression ')' compound_statement ; // // Parse a WHILE statement and return its AST static struct ASTnode *while_statement(void) { struct ASTnode *condAST, *bodyAST; // Ensure we have 'while' '(' match(T_WHILE, "while"); lparen(); // Parse the following expression // and the ')' following. Ensure // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) fatal("Bad comparison operator"); rparen(); // Get the AST for the compound statement bodyAST = compound_statement(); // Build and return the AST for this statement return (mkastnode(A_WHILE, P_NONE, condAST, NULL, bodyAST, 0)); } // for_statement: 'for' '(' preop_statement ';' // true_false_expression ';' // postop_statement ')' compound_statement ; // // preop_statement: statement (for now) // postop_statement: statement (for now) // // Parse a FOR statement and return its AST static struct ASTnode *for_statement(void) { struct ASTnode *condAST, *bodyAST; struct ASTnode *preopAST, *postopAST; struct ASTnode *tree; // Ensure we have 'for' '(' match(T_FOR, "for"); lparen(); // Get the pre_op statement and the ';' preopAST = single_statement(); semi(); // Get the condition and the ';' condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) fatal("Bad comparison operator"); semi(); // Get the post_op statement and the ')' postopAST = single_statement(); rparen(); // Get the compound statement which is the body bodyAST = compound_statement(); // For now, all four sub-trees have to be non-NULL. // Later on, we'll change the semantics for when some are missing // Glue the compound statement and the postop tree tree = mkastnode(A_GLUE, P_NONE, bodyAST, NULL, postopAST, 0); // Make a WHILE loop with the condition and this new body tree = mkastnode(A_WHILE, P_NONE, condAST, NULL, tree, 0); // And glue the preop tree to the A_WHILE tree return (mkastnode(A_GLUE, P_NONE, preopAST, NULL, tree, 0)); } // return_statement: 'return' '(' expression ')' ; // // Parse a return statement and return its AST static struct ASTnode *return_statement(void) { struct ASTnode *tree; // Can't return a value if function returns P_VOID if (Gsym[Functionid].type == P_VOID) fatal("Can't return from a void function"); // Ensure we have 'return' '(' match(T_RETURN, "return"); lparen(); // Parse the following expression tree = binexpr(0); // Ensure this is compatible with the function's type tree = modify_type(tree, Gsym[Functionid].type, 0); if (tree == NULL) fatal("Incompatible type to return"); // Add on the A_RETURN node tree = mkastunary(A_RETURN, P_NONE, tree, 0); // Get the ')' rparen(); return (tree); } // Parse a single statement and return its AST static struct ASTnode *single_statement(void) { int type; switch (Token.token) { case T_CHAR: case T_INT: case T_LONG: // The beginning of a variable declaration. // Parse the type and get the identifier. // Then parse the rest of the declaration. // XXX: These are globals at present. type = parse_type(); ident(); var_declaration(type); return (NULL); // No AST generated here case T_IF: return (if_statement()); case T_WHILE: return (while_statement()); case T_FOR: return (for_statement()); case T_RETURN: return (return_statement()); default: // For now, see if this is an expression. // This catches assignment statements. return (binexpr(0)); } return (NULL); // Keep -Wall happy } // Parse a compound statement // and return its AST struct ASTnode *compound_statement(void) { struct ASTnode *left = NULL; struct ASTnode *tree; // Require a left curly bracket lbrace(); while (1) { // Parse a single statement tree = single_statement(); // Some statements must be followed by a semicolon if (tree != NULL && (tree->op == A_ASSIGN || tree->op == A_RETURN || tree->op == A_FUNCCALL)) semi(); // For each new tree, either save it in left // if left is empty, or glue the left and the // new tree together if (tree != NULL) { if (left == NULL) left = tree; else left = mkastnode(A_GLUE, P_NONE, left, NULL, tree, 0); } // When we hit a right curly bracket, // skip past it and return the AST if (Token.token == T_RBRACE) { rbrace(); return (left); } } } ================================================ FILE: 20_Char_Str_Literals/sym.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Symbol table functions // Copyright (c) 2019 Warren Toomey, GPL3 // Determine if the symbol s is in the global symbol table. // Return its slot position or -1 if not found. int findglob(char *s) { int i; for (i = 0; i < Globs; i++) { if (*s == *Gsym[i].name && !strcmp(s, Gsym[i].name)) return (i); } return (-1); } // Get the position of a new global symbol slot, or die // if we've run out of positions. static int newglob(void) { int p; if ((p = Globs++) >= NSYMBOLS) fatal("Too many global symbols"); return (p); } // Add a global symbol to the symbol table. Set up its: // + type: char, int etc. // + structural type: var, function, array etc. // + size: number of elements // + endlabel: if this is a function // Return the slot number in the symbol table int addglob(char *name, int type, int stype, int endlabel, int size) { int y; // If this is already in the symbol table, return the existing slot if ((y = findglob(name)) != -1) return (y); // Otherwise get a new slot, fill it in and // return the slot number y = newglob(); Gsym[y].name = strdup(name); Gsym[y].type = type; Gsym[y].stype = stype; Gsym[y].endlabel = endlabel; Gsym[y].size = size; return (y); } ================================================ FILE: 20_Char_Str_Literals/tests/input01.c ================================================ void main() { printint(12 * 3); printint(18 - 2 * 4); printint(1 + 2 + 9 - 5/2 + 3*5); } ================================================ FILE: 20_Char_Str_Literals/tests/input02.c ================================================ void main() { int fred; int jim; fred= 5; jim= 12; printint(fred + jim); } ================================================ FILE: 20_Char_Str_Literals/tests/input03.c ================================================ void main() { int x; x= 1; printint(x); x= x + 1; printint(x); x= x + 1; printint(x); x= x + 1; printint(x); x= x + 1; printint(x); } ================================================ FILE: 20_Char_Str_Literals/tests/input04.c ================================================ void main() { int x; x= 7 < 9; printint(x); x= 7 <= 9; printint(x); x= 7 != 9; printint(x); x= 7 == 7; printint(x); x= 7 >= 7; printint(x); x= 7 <= 7; printint(x); x= 9 > 7; printint(x); x= 9 >= 7; printint(x); x= 9 != 7; printint(x); } ================================================ FILE: 20_Char_Str_Literals/tests/input05.c ================================================ void main() { int i; int j; i=6; j=12; if (i < j) { printint(i); } else { printint(j); } } ================================================ FILE: 20_Char_Str_Literals/tests/input06.c ================================================ void main() { int i; i=1; while (i <= 10) { printint(i); i= i + 1; } } ================================================ FILE: 20_Char_Str_Literals/tests/input07.c ================================================ void main() { int i; for (i= 1; i <= 10; i= i + 1) { printint(i); } } ================================================ FILE: 20_Char_Str_Literals/tests/input08.c ================================================ void main() { int i; for (i= 1; i <= 10; i= i + 1) { printint(i); } } ================================================ FILE: 20_Char_Str_Literals/tests/input09.c ================================================ void main() { int i; for (i= 1; i <= 10; i= i + 1) { printint(i); } } void fred() { int a; int b; a= 12; b= 3 * a; if (a >= b) { printint(2 * b - a); } } ================================================ FILE: 20_Char_Str_Literals/tests/input10.c ================================================ void main() { int i; char j; j= 20; printint(j); i= 10; printint(i); for (i= 1; i <= 5; i= i + 1) { printint(i); } for (j= 253; j != 2; j= j + 1) { printint(j); } } ================================================ FILE: 20_Char_Str_Literals/tests/input11.c ================================================ int main() { int i; char j; long k; i= 10; printint(i); j= 20; printint(j); k= 30; printint(k); for (i= 1; i <= 5; i= i + 1) { printint(i); } for (j= 253; j != 4; j= j + 1) { printint(j); } for (k= 1; k <= 5; k= k + 1) { printint(k); } return(i); printint(12345); return(3); } ================================================ FILE: 20_Char_Str_Literals/tests/input12.c ================================================ int fred() { return(5); } void main() { int x; x= fred(2); printint(x); } ================================================ FILE: 20_Char_Str_Literals/tests/input13.c ================================================ int fred() { return(56); } void main() { int dummy; int result; dummy= printint(23); result= fred(10); dummy= printint(result); } ================================================ FILE: 20_Char_Str_Literals/tests/input14.c ================================================ int fred() { return(20); } int main() { int result; printint(10); result= fred(15); printint(result); printint(fred(15)+10); return(0); } ================================================ FILE: 20_Char_Str_Literals/tests/input15.c ================================================ int main() { char a; char *b; char c; int d; int *e; int f; a= 18; printint(a); b= &a; c= *b; printint(c); d= 12; printint(d); e= &d; f= *e; printint(f); return(0); } ================================================ FILE: 20_Char_Str_Literals/tests/input16.c ================================================ int c; int d; int *e; int f; int main() { c= 12; d=18; printint(c); e= &c + 1; f= *e; printint(f); return(0); } ================================================ FILE: 20_Char_Str_Literals/tests/input17.c ================================================ int main() { char a; char *b; int d; int *e; b= &a; *b= 19; printint(a); e= &d; *e= 12; printint(d); return(0); } ================================================ FILE: 20_Char_Str_Literals/tests/input18.c ================================================ int main() { int a; int b; a= b= 34; printint(a); printint(b); return(0); } ================================================ FILE: 20_Char_Str_Literals/tests/input18a.c ================================================ int a; int *b; char c; char *d; int main() { b= &a; *b= 15; printint(a); d= &c; *d= 16; printint(c); return(0); } ================================================ FILE: 20_Char_Str_Literals/tests/input19.c ================================================ int a; int b; int c; int d; int e; int main() { a= 2; b= 4; c= 3; d= 2; e= (a+b) * (c+d); printint(e); return(0); } ================================================ FILE: 20_Char_Str_Literals/tests/input20.c ================================================ int a; int b[25]; int main() { b[3]= 12; a= b[3]; printint(a); return(0); } ================================================ FILE: 20_Char_Str_Literals/tests/input21.c ================================================ char c; char *str; int main() { c= '\n'; printint(c); for (str= "Hello world\n"; *str != 0; str= str + 1) { printchar(*str); } return(0); } ================================================ FILE: 20_Char_Str_Literals/tests/mktests ================================================ #!/bin/sh # Make the output files for each test for i in input*c do if [ ! -f "out.$i" ] then cc -o out $i ../lib/printint.c ./out > out.$i rm -f out fi done ================================================ FILE: 20_Char_Str_Literals/tests/out.input01.c ================================================ 36 10 25 ================================================ FILE: 20_Char_Str_Literals/tests/out.input02.c ================================================ 17 ================================================ FILE: 20_Char_Str_Literals/tests/out.input03.c ================================================ 1 2 3 4 5 ================================================ FILE: 20_Char_Str_Literals/tests/out.input04.c ================================================ 1 1 1 1 1 1 1 1 1 ================================================ FILE: 20_Char_Str_Literals/tests/out.input05.c ================================================ 6 ================================================ FILE: 20_Char_Str_Literals/tests/out.input06.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 20_Char_Str_Literals/tests/out.input07.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 20_Char_Str_Literals/tests/out.input08.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 20_Char_Str_Literals/tests/out.input09.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 20_Char_Str_Literals/tests/out.input10.c ================================================ 20 10 1 2 3 4 5 253 254 255 0 1 ================================================ FILE: 20_Char_Str_Literals/tests/out.input11.c ================================================ 10 20 30 1 2 3 4 5 253 254 255 0 1 2 3 1 2 3 4 5 ================================================ FILE: 20_Char_Str_Literals/tests/out.input12.c ================================================ 5 ================================================ FILE: 20_Char_Str_Literals/tests/out.input13.c ================================================ 23 56 ================================================ FILE: 20_Char_Str_Literals/tests/out.input14.c ================================================ 10 20 30 ================================================ FILE: 20_Char_Str_Literals/tests/out.input15.c ================================================ 18 18 12 12 ================================================ FILE: 20_Char_Str_Literals/tests/out.input16.c ================================================ 12 18 ================================================ FILE: 20_Char_Str_Literals/tests/out.input17.c ================================================ 19 12 ================================================ FILE: 20_Char_Str_Literals/tests/out.input18.c ================================================ 34 34 ================================================ FILE: 20_Char_Str_Literals/tests/out.input18a.c ================================================ 15 16 ================================================ FILE: 20_Char_Str_Literals/tests/out.input19.c ================================================ 30 ================================================ FILE: 20_Char_Str_Literals/tests/out.input20.c ================================================ 12 ================================================ FILE: 20_Char_Str_Literals/tests/out.input21.c ================================================ 10 Hello world ================================================ FILE: 20_Char_Str_Literals/tests/runtests ================================================ #!/bin/sh # Run each test and compare # against known good output if [ ! -f ../comp1 ] then echo "Need to build ../comp1 first!"; exit 1 fi for i in input* do if [ ! -f "out.$i" ] then echo "Can't run test on $i, no output file!" else echo -n $i ../comp1 $i cc -o out out.s ../lib/printint.c ./out > trial.$i cmp -s "out.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo else echo ": OK" fi rm -f out out.s "trial.$i" fi done ================================================ FILE: 20_Char_Str_Literals/tests/runtestsn ================================================ #!/bin/sh # Run each test and compare # against known good output if [ ! -f ../compn ] then echo "Need to build ../compn first!"; exit 1 fi for i in input* do if [ ! -f "out.$i" ] then echo "Can't run test on $i, no output file!" else echo -n $i ../compn $i nasm -f elf64 out.s cc -no-pie -Wall -o out out.o ../lib/printint.c ./out > trial.$i cmp -s "out.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo else echo ": OK" fi rm -f out out.o out.s "trial.$i" fi done ================================================ FILE: 20_Char_Str_Literals/tree.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree functions // Copyright (c) 2019 Warren Toomey, GPL3 // Build and return a generic AST node struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, int intvalue) { struct ASTnode *n; // Malloc a new ASTnode n = (struct ASTnode *) malloc(sizeof(struct ASTnode)); if (n == NULL) fatal("Unable to malloc in mkastnode()"); // Copy in the field values and return it n->op = op; n->type = type; n->left = left; n->mid = mid; n->right = right; n->v.intvalue = intvalue; return (n); } // Make an AST leaf node struct ASTnode *mkastleaf(int op, int type, int intvalue) { return (mkastnode(op, type, NULL, NULL, NULL, intvalue)); } // Make a unary AST node: only one child struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, int intvalue) { return (mkastnode(op, type, left, NULL, NULL, intvalue)); } // Generate and return a new label number // just for AST dumping purposes static int gendumplabel(void) { static int id = 1; return (id++); } // Given an AST tree, print it out and follow the // traversal of the tree that genAST() follows void dumpAST(struct ASTnode *n, int label, int level) { int Lfalse, Lstart, Lend; switch (n->op) { case A_IF: Lfalse = gendumplabel(); for (int i=0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_IF"); if (n->right) { Lend = gendumplabel(); fprintf(stdout, ", end L%d", Lend); } fprintf(stdout, "\n"); dumpAST(n->left, Lfalse, level+2); dumpAST(n->mid, NOLABEL, level+2); if (n->right) dumpAST(n->right, NOLABEL, level+2); return; case A_WHILE: Lstart = gendumplabel(); for (int i=0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_WHILE, start L%d\n", Lstart); Lend = gendumplabel(); dumpAST(n->left, Lend, level+2); dumpAST(n->right, NOLABEL, level+2); return; } // Reset level to -2 for A_GLUE if (n->op==A_GLUE) level= -2; // General AST node handling if (n->left) dumpAST(n->left, NOLABEL, level+2); if (n->right) dumpAST(n->right, NOLABEL, level+2); for (int i=0; i < level; i++) fprintf(stdout, " "); switch (n->op) { case A_GLUE: fprintf(stdout, "\n\n"); return; case A_FUNCTION: fprintf(stdout, "A_FUNCTION %s\n", Gsym[n->v.id].name); return; case A_ADD: fprintf(stdout, "A_ADD\n"); return; case A_SUBTRACT: fprintf(stdout, "A_SUBTRACT\n"); return; case A_MULTIPLY: fprintf(stdout, "A_MULTIPLY\n"); return; case A_DIVIDE: fprintf(stdout, "A_DIVIDE\n"); return; case A_EQ: fprintf(stdout, "A_EQ\n"); return; case A_NE: fprintf(stdout, "A_NE\n"); return; case A_LT: fprintf(stdout, "A_LE\n"); return; case A_GT: fprintf(stdout, "A_GT\n"); return; case A_LE: fprintf(stdout, "A_LE\n"); return; case A_GE: fprintf(stdout, "A_GE\n"); return; case A_INTLIT: fprintf(stdout, "A_INTLIT %d\n", n->v.intvalue); return; case A_STRLIT: fprintf(stdout, "A_STRLIT rval label L%d\n", n->v.id); return; case A_IDENT: if (n->rvalue) fprintf(stdout, "A_IDENT rval %s\n", Gsym[n->v.id].name); else fprintf(stdout, "A_IDENT %s\n", Gsym[n->v.id].name); return; case A_ASSIGN: fprintf(stdout, "A_ASSIGN\n"); return; case A_WIDEN: fprintf(stdout, "A_WIDEN\n"); return; case A_RETURN: fprintf(stdout, "A_RETURN\n"); return; case A_FUNCCALL: fprintf(stdout, "A_FUNCCALL %s\n", Gsym[n->v.id].name); return; case A_ADDR: fprintf(stdout, "A_ADDR %s\n", Gsym[n->v.id].name); return; case A_DEREF: if (n->rvalue) fprintf(stdout, "A_DEREF rval\n"); else fprintf(stdout, "A_DEREF\n"); return; case A_SCALE: fprintf(stdout, "A_SCALE %d\n", n->v.size); return; default: fatald("Unknown dumpAST operator", n->op); } } ================================================ FILE: 20_Char_Str_Literals/types.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Types and type handling // Copyright (c) 2019 Warren Toomey, GPL3 // Return true if a type is an int type // of any size, false otherwise int inttype(int type) { if (type == P_CHAR || type == P_INT || type == P_LONG) return (1); return (0); } // Return true if a type is of pointer type int ptrtype(int type) { if (type == P_VOIDPTR || type == P_CHARPTR || type == P_INTPTR || type == P_LONGPTR) return (1); return (0); } // Given a primitive type, return // the type which is a pointer to it int pointer_to(int type) { int newtype; switch (type) { case P_VOID: newtype = P_VOIDPTR; break; case P_CHAR: newtype = P_CHARPTR; break; case P_INT: newtype = P_INTPTR; break; case P_LONG: newtype = P_LONGPTR; break; default: fatald("Unrecognised in pointer_to: type", type); } return (newtype); } // Given a primitive pointer type, return // the type which it points to int value_at(int type) { int newtype; switch (type) { case P_VOIDPTR: newtype = P_VOID; break; case P_CHARPTR: newtype = P_CHAR; break; case P_INTPTR: newtype = P_INT; break; case P_LONGPTR: newtype = P_LONG; break; default: fatald("Unrecognised in value_at: type", type); } return (newtype); } // Given an AST tree and a type which we want it to become, // possibly modify the tree by widening or scaling so that // it is compatible with this type. Return the original tree // if no changes occurred, a modified tree, or NULL if the // tree is not compatible with the given type. // If this will be part of a binary operation, the AST op is not zero. struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op) { int ltype; int lsize, rsize; ltype = tree->type; // Compare scalar int types if (inttype(ltype) && inttype(rtype)) { // Both types same, nothing to do if (ltype == rtype) return (tree); // Get the sizes for each type lsize = genprimsize(ltype); rsize = genprimsize(rtype); // Tree's size is too big if (lsize > rsize) return (NULL); // Widen to the right if (rsize > lsize) return (mkastunary(A_WIDEN, rtype, tree, 0)); } // For pointers on the left if (ptrtype(ltype)) { // OK is same type on right and not doing a binary op if (op == 0 && ltype == rtype) return (tree); } // We can scale only on A_ADD or A_SUBTRACT operation if (op == A_ADD || op == A_SUBTRACT) { // Left is int type, right is pointer type and the size // of the original type is >1: scale the left if (inttype(ltype) && ptrtype(rtype)) { rsize = genprimsize(value_at(rtype)); if (rsize > 1) return (mkastunary(A_SCALE, rtype, tree, rsize)); else return (tree); // Size 1, no need to scale } } // If we get here, the types are not compatible return (NULL); } ================================================ FILE: 21_More_Operators/Makefile ================================================ SRCS= defs.h cg.c decl.c expr.c gen.c main.c misc.c scan.c stmt.c \ sym.c tree.c types.c SRCN= defs.h cgn.c decl.c expr.c gen.c main.c misc.c scan.c stmt.c \ sym.c tree.c types.c ARMSRCS= defs.h cg_arm.c decl.c expr.c gen.c main.c misc.c scan.c stmt.c \ sym.c tree.c types.c comp1: $(SRCS) cc -o comp1 -g -Wall $(SRCS) compn: $(SRCN) cc -o compn -g -Wall $(SRCN) comp1arm: $(ARMSRCS) cc -o comp1arm -g -Wall $(ARMSRCS) cp comp1arm comp1 clean: rm -f comp1 comp1arm compn *.o *.s out test: comp1 tests/runtests (cd tests; chmod +x runtests; ./runtests) armtest: comp1arm tests/runtests (cd tests; chmod +x runtests; ./runtests) testn: compn tests/runtestsn (cd tests; chmod +x runtestsn; ./runtestsn) ================================================ FILE: 21_More_Operators/Readme.md ================================================ # Part 21: More Operators In this part of our compiler writing journey, I decided to pick some low-hanging fruit and implement many of the expression operators which are still missing. These include: + `++` and `--`, both pre-increment/decrement and post--increment/decrement + unary `-`, `~`, and `!` + binary `^`, `&`, `|`, `<<` and `>>` I also implemented the implicit "not zero operator" which treats an expression rvalue as a boolean value for selection and loop statements, e.g. ```c for (str= "Hello"; *str; str++) ... ``` instead of writing ```c for (str= "Hello"; *str != 0; str++) ... ``` ## Tokens and Scanning As always, we start off with any new tokens in the language. There are a few this time: | Scanned Input | Token | |:-------------:|-------| | || | T_LOGOR | | `&&` | T_LOGAND | | | | T_OR | | `^` | T_XOR | | `<<` | T_LSHIFT | | `>>` | T_RSHIFT | | `++` | T_INC | | `--` | T_DEC | | `~` | T_INVERT | | `!` | T_LOGNOT | Some of these are composed of new single characters, so the scanning of these is easy. For others, we need to distinguish between single characters and pairs of different characters. An example is `<`, `<<` and `<=`. We have already seen how to do the scanning for these in `scan.c`, so I won't give the new code here. Browse through `scan.c` to see the additions. ## Adding the Binary Operators to the Parsing Now we need to parse these operators. Some of these operators are binary operators: `||`, `&&`, `|`, `^`, `<<` and `>>`. We already have a precedence framework in place for binary operators. We can simply add the new operators to the framework. When I did this, I realised that I had several of the existing operators in with the wrong precedence according to [this table of C operator precedence](https://en.cppreference.com/w/c/language/operator_precedence). We also need to align the AST node operations with the set of binary operator tokens. Thus, here are the definitions of the tokens, the AST node types and the operator precedence table from `defs.h` and `expr.c`: ```c // Token types enum { T_EOF, // Binary operators T_ASSIGN, T_LOGOR, T_LOGAND, T_OR, T_XOR, T_AMPER, T_EQ, T_NE, T_LT, T_GT, T_LE, T_GE, T_LSHIFT, T_RSHIFT, T_PLUS, T_MINUS, T_STAR, T_SLASH, // Other operators T_INC, T_DEC, T_INVERT, T_LOGNOT, ... }; // AST node types. The first few line up // with the related tokens enum { A_ASSIGN= 1, A_LOGOR, A_LOGAND, A_OR, A_XOR, A_AND, A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE, A_LSHIFT, A_RSHIFT, A_ADD, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, ... A_PREINC, A_PREDEC, A_POSTINC, A_POSTDEC, A_NEGATE, A_INVERT, A_LOGNOT, ... }; // Operator precedence for each binary token. Must // match up with the order of tokens in defs.h static int OpPrec[] = { 0, 10, 20, 30, // T_EOF, T_ASSIGN, T_LOGOR, T_LOGAND 40, 50, 60, // T_OR, T_XOR, T_AMPER 70, 70, // T_EQ, T_NE 80, 80, 80, 80, // T_LT, T_GT, T_LE, T_GE 90, 90, // T_LSHIFT, T_RSHIFT 100, 100, // T_PLUS, T_MINUS 110, 110 // T_STAR, T_SLASH }; ``` ## New Unary Operators. Now we get to the parsing of the new unary operators, `++`, `--`, `~` and `!`. All of these are prefix operators (i.e. before an expression), but the `++` and `--` operators can also be postfix operators. Thus, we'll need to parse three prefix and two postfix operators, and perform five different semantic actions for them. To prepare for this addition of these new operators, I went back and consulted the [BNF Grammar for C](https://www.lysator.liu.se/c/ANSI-C-grammar-y.html). As these new operators can't be worked into the existing binary operator framework, we'll need to implement them with new functions in our recursive descent parser. Here are the *relevant* sections from the above grammar, rewritten to use our token names: ``` primary_expression : T_IDENT | T_INTLIT | T_STRLIT | '(' expression ')' ; postfix_expression : primary_expression | postfix_expression '[' expression ']' | postfix_expression '(' expression ')' | postfix_expression '++' | postfix_expression '--' ; prefix_expression : postfix_expression | '++' prefix_expression | '--' prefix_expression | prefix_operator prefix_expression ; prefix_operator : '&' | '*' | '-' | '~' | '!' ; multiplicative_expression : prefix_expression | multiplicative_expression '*' prefix_expression | multiplicative_expression '/' prefix_expression | multiplicative_expression '%' prefix_expression ; etc. ``` We implement the binary operators in `binexpr()` in `expr.c`, but this calls `prefix()`, just as `multiplicative_expression` in the above BNF grammar refers to `prefix_expression`. We already have a function called `primary()`. Now we need a function, `postfix()` to deal with the postfix expressions. ## Prefix Operators We already parse a couple of tokens in `prefix()`: T_AMPER and T_STAR. We can add in the new tokens here (T_MINUS, T_INVERT, T_LOGNOT, T_INC and T_DEC) by adding more case statements to the `switch (Token.token)` statement. I won't include the code here because all the cases have a similar structure: + Skip past the token with `scan(&Token)` + Parse the next expression with `prefix()` + Do some semantic checking + Extend the AST tree that was returned by `prefix()` However, the differences between some of the cases are important to cover. For the parsing of the `&` (T_AMPER) token, the expression needs to be treated as an lvalue: if we do `&x`, we want the address of the variable `x`, not the address of `x`'s value. Other cases do need to have the AST tree returned by `prefix()` forced to be an rvalue: + `-` (T_MINUS) + `~` (T_INVERT) + `!` (T_LOGNOT) And, for the pre-increment and pre-decrement operators, we actually *require* the expression to be an lvalue: we can do `++x` but not `++3`. For now, I've written the code to require a simple identifier, but I know later on we will want to parse and deal with `++b[2]` and `++ *ptr`. Also, from a design point of view, we have the option of altering the AST tree returned by `prefix()` (with no new AST nodes), or adding one or more new AST nodes to the tree: + T_AMPER modifies the existing AST tree so the root is A_ADDR + T_STAR adds an A_DEREF node to the root of the tree + T_STAR adds an A_NEGATE node to the root of the tree after possibly widening the tree to be an `int` value. Why? Because the tree might be of type `char` which is unsigned, and you can't negate an unsigned value. + T_INVERT adds an A_INVERT node to the root of the tree + T_LOGNOT adds an A_LOGNOT node to the root of the tree + T_INC adds an A_PREINC node to the root of the tree + T_DEC adds an A_PREDEC node to the root of the tree ## Parsing the Postfix Operators If you look at the BNF grammar I hyperlinked to above, to parse a postfix expression we need to refer to the parsing of a primary expression. To implement this, we need to get the tokens of the primary expression first and then then determine if there are any trailing postfix tokens. Even though the grammar shows "postfix" calling "primary", I've implemented it by scanning the tokens in `primary()` and then deciding to call `postfix()` to parse the postfix tokens. > This turned out to be a mistake -- Warren, writing from the future. The BNF grammar above seems to allow expressions like `x++ ++` because it has: ``` postfix_expression: postfix_expression '++' ; ``` but I'm not going to allow more than one postfix operator after the expression. So let's look at the new code: `primary()` deals with recognising primary expressions: integer literals, string literals and identifiers. It also recognises parenthesised expressions. Only the identifiers can be followed by postfix operators. ```c static struct ASTnode *primary(void) { ... switch (Token.token) { case T_INTLIT: ... case T_STRLIT: ... case T_LPAREN: ... case T_IDENT: return (postfix()); ... } ``` I've moved the parsing of function calls and array references out to `postfix()`, and this is where we parse the postfix `++` and `--` operators: ```c // Parse a postfix expression and return // an AST node representing it. The // identifier is already in Text. static struct ASTnode *postfix(void) { struct ASTnode *n; int id; // Scan in the next token to see if we have a postfix expression scan(&Token); // Function call if (Token.token == T_LPAREN) return (funccall()); // An array reference if (Token.token == T_LBRACKET) return (array_access()); // A variable. Check that the variable exists. id = findglob(Text); if (id == -1 || Gsym[id].stype != S_VARIABLE) fatals("Unknown variable", Text); switch (Token.token) { // Post-increment: skip over the token case T_INC: scan(&Token); n = mkastleaf(A_POSTINC, Gsym[id].type, id); break; // Post-decrement: skip over the token case T_DEC: scan(&Token); n = mkastleaf(A_POSTDEC, Gsym[id].type, id); break; // Just a variable reference default: n = mkastleaf(A_IDENT, Gsym[id].type, id); } return (n); } ``` Another design decision. For `++`, we could have made an A_IDENT AST node with an A_POSTINC parent, but given that we have the identifier's name in `Text`, we can build a single AST node that contains both the node type and the reference to the identifier's slot number in the symbol table. ## Converting an Integer Expression to a Boolean Value Before we leave the parsing side of things and move to the code generation side of things, I should mention the change I made to allow integer expressions to be treated as boolean expressions, e.g. ``` x= a + b; if (x) { printf("x is not zero\n"); } ``` The BNF grammar doesn't provide any explicit syntax rules to restrict expressions to be boolean, e.g: ``` selection_statement : IF '(' expression ')' statement ``` Therefore, we'll have to do this semantically. In `stmt.c` where I parse IF, WHILE and FOR loops, I've added this code: ```c // Parse the following expression // Force a non-comparison expression to be boolean condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, 0); ``` I've introduced a new AST node type, A_TOBOOL. This will generate code to take any integer value. If this value is zero, the result is zero, otherwise the result will be one. ## Generating the Code for the New Operators Now we turn our attention to generating the code for the new operators. Actually, the new AST node types: A_LOGOR, A_LOGAND, A_OR, A_XOR, A_AND, A_LSHIFT, A_RSHIFT, A_PREINC, A_PREDEC, A_POSTINC, A_POSTDEC, A_NEGATE, A_INVERT, A_LOGNOT and A_TOBOOL. All of these are simple calls out to matching functions in the platform-specific code generator in `cg.c`. So the new code in `genAST()` in `gen.c` is simply: ```c case A_AND: return (cgand(leftreg, rightreg)); case A_OR: return (cgor(leftreg, rightreg)); case A_XOR: return (cgxor(leftreg, rightreg)); case A_LSHIFT: return (cgshl(leftreg, rightreg)); case A_RSHIFT: return (cgshr(leftreg, rightreg)); case A_POSTINC: // Load the variable's value into a register, // then increment it return (cgloadglob(n->v.id, n->op)); case A_POSTDEC: // Load the variable's value into a register, // then decrement it return (cgloadglob(n->v.id, n->op)); case A_PREINC: // Load and increment the variable's value into a register return (cgloadglob(n->left->v.id, n->op)); case A_PREDEC: // Load and decrement the variable's value into a register return (cgloadglob(n->left->v.id, n->op)); case A_NEGATE: return (cgnegate(leftreg)); case A_INVERT: return (cginvert(leftreg)); case A_LOGNOT: return (cglognot(leftreg)); case A_TOBOOL: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, set the register // to 0 or 1 based on it's zeroeness or non-zeroeness return (cgboolean(leftreg, parentASTop, label)); ``` ## x86-64 Specific Code Generation Functions That means we can now look at the back-end functions to generate real x86-64 assembly code. For most of the bitwise operations, the x86-64 platform has assembly instructions to do them: ```c int cgand(int r1, int r2) { fprintf(Outfile, "\tandq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\torq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txorq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tnegq\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnotq\t%s\n", reglist[r]); return (r); } ``` With the shift operations, as far as I can tell the shift amount has to be loaded into the `%cl` register first. ```c int cgshl(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshlq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshrq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } ``` The operations that deal with boolean expressions (where the result must be either 0 or 1) are a bit more complicated. ```c // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); return (r); } ``` The `test` instruction essentially AND's the register with itself to set the zero and negative flags. Then we set the register to 1 if it is equal to zero (`sete`). Then we move this 8-bit result into the 64-bit register proper. And here is the code to convert an integer into a boolean value: ```c // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); } return (r); } ``` Again, we do a `test` to get the zero-ness or non-zeroeness of the register. If we are doing this for an selection or loop statement, then `je` to jump if the result was false. Otherwise, use `setnz` to set the register to 1 if it was non-zero originally. ## Increment and Decrement Operations I've left the `++` and `--` operations to last. The subtlety here is that we have to both get the value out of the memory location into a register, and separately increment or decrement it. And we have to choose to do this before or after we load the register. As we already have a `cgloadglob()` function to load a global variable's value, let's modify it to also alter the variable as required. The code is ugly but it does work. ```c // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(int id, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it switch (Gsym[id].type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%s(\%%rip)\n", Gsym[id].name); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%s(\%%rip)\n", Gsym[id].name); fprintf(Outfile, "\tmovzbq\t%s(%%rip), %s\n", Gsym[id].name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%s(\%%rip)\n", Gsym[id].name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%s(\%%rip)\n", Gsym[id].name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%s(\%%rip)\n", Gsym[id].name); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%s(\%%rip)\n", Gsym[id].name); fprintf(Outfile, "\tmovslq\t%s(\%%rip), %s\n", Gsym[id].name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%s(\%%rip)\n", Gsym[id].name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%s(\%%rip)\n", Gsym[id].name); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: if (op == A_PREINC) fprintf(Outfile, "\tincq\t%s(\%%rip)\n", Gsym[id].name); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%s(\%%rip)\n", Gsym[id].name); fprintf(Outfile, "\tmovq\t%s(\%%rip), %s\n", Gsym[id].name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%s(\%%rip)\n", Gsym[id].name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%s(\%%rip)\n", Gsym[id].name); break; default: fatald("Bad type in cgloadglob:", Gsym[id].type); } return (r); } ``` I'm pretty sure that I'll have to rewrite this later on to perform `x= b[5]++`, but this will do for now. After all, baby steps is what I promised for each step of our journey. ## Testing the New Functionality I won't go through the new test input files in detail for this step. They are `input22.c`, `input23.c` and `input24.c` in the `tests` directory. You can browse them and confirm that the compiler can correctly compile them: ``` $ make test ... input22.c: OK input23.c: OK input24.c: OK ``` ## Conclusion and What's Next In terms of extending the functionality of our compiler, this part of the journey added a lot of functionality, but I hope the amount of additional conceptual complexity was minimal. We added a bunch of binary operators and this was done by updating the scanner and changing the operator precedence table. For the unary operators, we added them manually to the parser in the `prefix()` function. For the new postfix operators, we separated the old function call and array index functionality out into a new `postfix()` function, and used this to add in the postfix operators. We did have to worry a bit about lvalues and rvalues here. We also had some design decisions about what AST nodes to add, or if we should just redecorate some existing AST nodes. The code generation ended up being relatively simple because the x86-64 architecture has instructions to implement the operations we needed. However, we did have to set up some specific registers for some of the operations, or perform instruction combinations to do what we wanted. The tricky operations were the increment and decrement operations. I've put code in to get these to work for ordinary variables but we will have to revisit this later. In the next part of our compiler writing journey, I'd like to tackle local variables. Once we can get these to work, we can extend them to also include function parameters and arguments. This will take two or more steps. [Next step](../22_Design_Locals/Readme.md) ================================================ FILE: 21_More_Operators/cg.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. // We need a list of byte and doubleword registers, too static int freereg[4]; static char *reglist[4] = { "%r8", "%r9", "%r10", "%r11" }; static char *breglist[4] = { "%r8b", "%r9b", "%r10b", "%r11b" }; static char *dreglist[4] = { "%r8d", "%r9d", "%r10d", "%r11d" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n", Outfile); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Gsym[id].name; fprintf(Outfile, "\t.text\n" "\t.globl\t%s\n" "\t.type\t%s, @function\n" "%s:\n" "\tpushq\t%%rbp\n" "\tmovq\t%%rsp, %%rbp\n", name, name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Gsym[id].endlabel); fputs("\tpopq %rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmovq\t$%d, %s\n", value, reglist[r]); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(int id, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it switch (Gsym[id].type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%s(\%%rip)\n", Gsym[id].name); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%s(\%%rip)\n", Gsym[id].name); fprintf(Outfile, "\tmovzbq\t%s(%%rip), %s\n", Gsym[id].name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%s(\%%rip)\n", Gsym[id].name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%s(\%%rip)\n", Gsym[id].name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%s(\%%rip)\n", Gsym[id].name); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%s(\%%rip)\n", Gsym[id].name); fprintf(Outfile, "\tmovslq\t%s(\%%rip), %s\n", Gsym[id].name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%s(\%%rip)\n", Gsym[id].name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%s(\%%rip)\n", Gsym[id].name); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: if (op == A_PREINC) fprintf(Outfile, "\tincq\t%s(\%%rip)\n", Gsym[id].name); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%s(\%%rip)\n", Gsym[id].name); fprintf(Outfile, "\tmovq\t%s(\%%rip), %s\n", Gsym[id].name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%s(\%%rip)\n", Gsym[id].name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%s(\%%rip)\n", Gsym[id].name); break; default: fatald("Bad type in cgloadglob:", Gsym[id].type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int id) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tleaq\tL%d(\%%rip), %s\n", id, reglist[r]); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\taddq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsubq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timulq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s,%%rax\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidivq\t%s\n", reglist[r2]); fprintf(Outfile, "\tmovq\t%%rax,%s\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tandq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\torq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txorq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshlq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshrq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tnegq\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnotq\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); return (r); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); } return (r); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { // Get a new register int outr = alloc_register(); fprintf(Outfile, "\tmovq\t%s, %%rdi\n", reglist[r]); fprintf(Outfile, "\tcall\t%s\n", Gsym[id].name); fprintf(Outfile, "\tmovq\t%%rax, %s\n", reglist[outr]); free_register(r); return (outr); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsalq\t$%d, %s\n", val, reglist[r]); return (r); } // Store a register's value into a variable int cgstorglob(int r, int id) { switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %s(\%%rip)\n", breglist[r], Gsym[id].name); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %s(\%%rip)\n", dreglist[r], Gsym[id].name); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tmovq\t%s, %s(\%%rip)\n", reglist[r], Gsym[id].name); break; default: fatald("Bad type in cgloadglob:", Gsym[id].type); } return (r); } // Array of type sizes in P_XXX order. // 0 means no size. static int psize[] = { 0, 0, 1, 4, 8, 8, 8, 8, 8 }; // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { // Check the type is valid if (type < P_NONE || type > P_LONGPTR) fatal("Bad type in cgprimsize()"); return (psize[type]); } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Gsym[id].type); // Generate the global identity and the label fprintf(Outfile, "\t.data\n" "\t.globl\t%s\n", Gsym[id].name); fprintf(Outfile, "%s:", Gsym[id].name); // Generate the space for (int i = 0; i < Gsym[id].size; i++) { switch (typesize) { case 1: fprintf(Outfile, "\t.byte\t0\n"); break; case 4: fprintf(Outfile, "\t.long\t0\n"); break; case 8: fprintf(Outfile, "\t.quad\t0\n"); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\t.byte\t%d\n", *cptr); } fprintf(Outfile, "\t.byte\t0\n"); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { // Generate code depending on the function's type switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tmovzbl\t%s, %%eax\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %%eax\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", Gsym[id].type); } cgjump(Gsym[id].endlabel); } // Generate code to load the address of a global // identifier into a variable. Return a new register int cgaddress(int id) { int r = alloc_register(); fprintf(Outfile, "\tleaq\t%s(%%rip), %s\n", Gsym[id].name, reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tmovzbq\t(%s), %s\n", reglist[r], reglist[r]); break; case P_INTPTR: fprintf(Outfile, "\tmovslq\t(%s), %s\n", reglist[r], reglist[r]); break; case P_LONGPTR: fprintf(Outfile, "\tmovq\t(%s), %s\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, (%s)\n", breglist[r1], reglist[r2]); break; case P_INT: fprintf(Outfile, "\tmovq\t%s, (%s)\n", reglist[r1], reglist[r2]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, (%s)\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 21_More_Operators/cg_arm.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for ARMv6 on Raspberry Pi // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. static int freereg[4]; static char *reglist[4] = { "r4", "r5", "r6", "r7" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // We have to store large integer literal values in memory. // Keep a list of them which will be output in the postamble #define MAXINTS 1024 int Intlist[MAXINTS]; static int Intslot = 0; // Determine the offset of a large integer // literal from the .L3 label. If the integer // isn't in the list, add it. static void set_int_offset(int val) { int offset = -1; // See if it is already there for (int i = 0; i < Intslot; i++) { if (Intlist[i] == val) { offset = 4 * i; break; } } // Not in the list, so add it if (offset == -1) { offset = 4 * Intslot; if (Intslot == MAXINTS) fatal("Out of int slots in set_int_offset()"); Intlist[Intslot++] = val; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L3+%d\n", offset); } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n", Outfile); } // Print out the assembly postamble void cgpostamble() { // Print out the global variables fprintf(Outfile, ".L2:\n"); for (int i = 0; i < Globs; i++) { if (Gsym[i].stype == S_VARIABLE) fprintf(Outfile, "\t.word %s\n", Gsym[i].name); } // Print out the integer literals fprintf(Outfile, ".L3:\n"); for (int i = 0; i < Intslot; i++) { fprintf(Outfile, "\t.word %d\n", Intlist[i]); } } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Gsym[id].name; fprintf(Outfile, "\t.text\n" "\t.globl\t%s\n" "\t.type\t%s, \%%function\n" "%s:\n" "\tpush\t{fp, lr}\n" "\tadd\tfp, sp, #4\n" "\tsub\tsp, sp, #8\n" "\tstr\tr0, [fp, #-8]\n", name, name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Gsym[id].endlabel); fputs("\tsub\tsp, fp, #4\n" "\tpop\t{fp, pc}\n" "\t.align\t2\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); // If the literal value is small, do it with one instruction if (value <= 1000) fprintf(Outfile, "\tmov\t%s, #%d\n", reglist[r], value); else { set_int_offset(value); fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); } return (r); } // Determine the offset of a variable from the .L2 // label. Yes, this is inefficient code. static void set_var_offset(int id) { int offset = 0; // Walk the symbol table up to id. // Find S_VARIABLEs and add on 4 until // we get to our variable for (int i = 0; i < id; i++) { if (Gsym[i].stype == S_VARIABLE) offset += 4; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L2+%d\n", offset); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tldrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgloadglob:", Gsym[id].type); } return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s, %s\n", reglist[r1], reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\tmul\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { // To do a divide: r0 holds the dividend, r1 holds the divisor. // The quotient is in r0. fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r1]); fprintf(Outfile, "\tmov\tr1, %s\n", reglist[r2]); fprintf(Outfile, "\tbl\t__aeabi_idiv\n"); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r1]); free_register(r2); return (r1); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r]); fprintf(Outfile, "\tbl\t%s\n", Gsym[id].name); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r]); return (r); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tlsl\t%s, %s, #%d\n", reglist[r], reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, int id) { // Get the offset to the variable set_var_offset(id); switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tstr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgstorglob:", Gsym[id].type); } return (r); } // Array of type sizes in P_XXX order. // 0 means no size. static int psize[] = { 0, 0, 1, 4, 4, 4, 4, 4 }; // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { // Check the type is valid if (type < P_NONE || type > P_LONGPTR) fatal("Bad type in cgprimsize()"); return (psize[type]); } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Gsym[id].type); fprintf(Outfile, "\t.data\n" "\t.globl\t%s\n", Gsym[id].name); switch (typesize) { case 1: fprintf(Outfile, "%s:\t.byte\t0\n", Gsym[id].name); break; case 4: fprintf(Outfile, "%s:\t.long\t0\n", Gsym[id].name); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "moveq", "movne", "movlt", "movgt", "movle", "movge" }; // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "movne", "moveq", "movge", "movle", "movgt", "movlt" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #1\n", cmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #0\n", invcmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\tuxtb\t%s, %s\n", reglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tb\tL%d\n", l); } // List of inverted branch instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *brlist[] = { "bne", "beq", "bge", "ble", "bgt", "blt" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", brlist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[reg]); cgjump(Gsym[id].endlabel); } // Generate code to load the address of a global // identifier into a variable. Return a new register int cgaddress(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); fprintf(Outfile, "\tmov\t%s, r3\n", reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tldrb\t%s, [%s]\n", reglist[r], reglist[r]); break; case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [%s]\n", reglist[r], reglist[r]); break; } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [%s]\n", reglist[r1], reglist[r2]); break; case P_INT: case P_LONG: fprintf(Outfile, "\tstr\t%s, [%s]\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 21_More_Operators/cgn.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. // We need a list of byte and doubleword registers, too static int freereg[4]; static char *reglist[4] = { "r8", "r9", "r10", "r11" }; static char *breglist[4] = { "r8b", "r9b", "r10b", "r11b" }; static char *dreglist[4] = { "r8d", "r9d", "r10d", "r11d" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\textern\tprintint\n", Outfile); fputs("\textern\tprintchar\n", Outfile); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Gsym[id].name; fprintf(Outfile, "\tsection\t.text\n" "\tglobal\t%s\n" "%s:\n" "\tpush\trbp\n" "\tmov\trbp, rsp\n", name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Gsym[id].endlabel); fputs("\tpop rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], value); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(int id, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it switch (Gsym[id].type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", Gsym[id].name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", Gsym[id].name); fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], Gsym[id].name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", Gsym[id].name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", Gsym[id].name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword [%s]\n", Gsym[id].name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", Gsym[id].name); fprintf(Outfile, "\tmovsx\t%s, word [%s]\n", dreglist[r], Gsym[id].name); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword [%s]\n", Gsym[id].name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", Gsym[id].name); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword [%s]\n", Gsym[id].name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", Gsym[id].name); fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], Gsym[id].name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword [%s]\n", Gsym[id].name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", Gsym[id].name); break; default: fatald("Bad type in cgloadglob:", Gsym[id].type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int id) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, L%d\n", reglist[r], id); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timul\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmov\trax, %s\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidiv\t%s\n", reglist[r2]); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tand\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\tor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshl\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshr\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tneg\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnot\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r], breglist[r]); return (r); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, byte %s\n", reglist[r], breglist[r]); } return (r); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { // Get a new register int outr = alloc_register(); fprintf(Outfile, "\tmov\trdi, %s\n", reglist[r]); fprintf(Outfile, "\tcall\t%s\n", Gsym[id].name); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[outr]); free_register(r); return (outr); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsal\t%s, %d\n", reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, int id) { switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tmov\t[%s], %s\n", Gsym[id].name, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\t[%s], %s\n", Gsym[id].name, dreglist[r]); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tmov\t[%s], %s\n", Gsym[id].name, reglist[r]); break; default: fatald("Bad type in cgloadglob:", Gsym[id].type); } return (r); } // Array of type sizes in P_XXX order. // 0 means no size. static int psize[] = { 0, 0, 1, 4, 8, 8, 8, 8, 8 }; // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { // Check the type is valid if (type < P_NONE || type > P_LONGPTR) fatal("Bad type in cgprimsize()"); return (psize[type]); } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Gsym[id].type); // Generate the global identity and the label fprintf(Outfile, "\tsection\t.data\n" "\tglobal\t%s\n", Gsym[id].name); fprintf(Outfile, "%s:", Gsym[id].name); // Generate the space // original version for (int i = 0; i < Gsym[id].size; i++) { switch(typesize) { case 1: fprintf(Outfile, "\tdb\t0\n"); break; case 4: fprintf(Outfile, "\tdd\t0\n"); break; case 8: fprintf(Outfile, "\tdq\t0\n"); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } /* compact version using times instead of loop switch(typesize) { case 1: fprintf(Outfile, "\ttimes\t%d\tdb\t0\n", Gsym[id].size); break; case 4: fprintf(Outfile, "\ttimes\t%d\tdd\t0\n", Gsym[id].size); break; case 8: fprintf(Outfile, "\ttimes\t%d\tdq\t0\n", Gsym[id].size); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } */ } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\tdb\t%d\n", *cptr); } fprintf(Outfile, "\tdb\t0\n"); /* put string in readable format on a single line // probably went overboard with error checking int comma = 0, quote = 0, start = 1; fprintf(Outfile, "\tdb\t"); for (cptr=strvalue; *cptr; cptr++) { if ( ! isprint(*cptr) ) if (comma || start) { fprintf(Outfile, "%d, ", *cptr); start = 0; comma = 1; } else if (quote) { fprintf(Outfile, "\', %d, ", *cptr); comma = 1; quote = 0; } else { fprintf(Outfile, "%d, ", *cptr); comma = 1; quote = 0; } else if (start || comma) { fprintf(Outfile, "\'%c", *cptr); start = comma = 0; quote = 1; } else { fprintf(Outfile, "%c", *cptr); comma = 0; quote = 1; } } if (comma || start) fprintf(Outfile, "0\n"); else fprintf(Outfile, "\', 0\n"); */ } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r2], breglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { // Generate code depending on the function's type switch (Gsym[id].type) { case P_CHAR: fprintf(Outfile, "\tmovzx\teax, %s\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmov\teax, %s\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", Gsym[id].type); } cgjump(Gsym[id].endlabel); } // Generate code to load the address of a global // identifier into a variable. Return a new register int cgaddress(int id) { int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, %s\n", reglist[r], Gsym[id].name); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], reglist[r]); break; case P_INTPTR: fprintf(Outfile, "\tmovsx\t%s, word [%s]\n", reglist[r], dreglist[r]); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); break; case P_LONGPTR: fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], reglist[r]); break; } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tmov\t[%s], byte %s\n", reglist[r2], breglist[r1]); break; case P_INT: fprintf(Outfile, "\tmov\t[%s], %s\n", reglist[r2], reglist[r1]); break; case P_LONG: fprintf(Outfile, "\tmov\t[%s], %s\n", reglist[r2], reglist[r1]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 21_More_Operators/data.h ================================================ #ifndef extern_ #define extern_ extern #endif // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 extern_ int Line; // Current line number extern_ int Putback; // Character put back by scanner extern_ int Functionid; // Symbol id of the current function extern_ int Globs; // Position of next free global symbol slot extern_ FILE *Infile; // Input and output files extern_ FILE *Outfile; extern_ struct token Token; // Last token scanned extern_ char Text[TEXTLEN + 1]; // Last identifier scanned extern_ struct symtable Gsym[NSYMBOLS]; // Global symbol table extern_ int O_dumpAST; ================================================ FILE: 21_More_Operators/decl.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of declarations // Copyright (c) 2019 Warren Toomey, GPL3 // Parse the current token and return // a primitive type enum value. Also // scan in the next token int parse_type(void) { int type; switch (Token.token) { case T_VOID: type = P_VOID; break; case T_CHAR: type = P_CHAR; break; case T_INT: type = P_INT; break; case T_LONG: type = P_LONG; break; default: fatald("Illegal type, token", Token.token); } // Scan in one or more further '*' tokens // and determine the correct pointer type while (1) { scan(&Token); if (Token.token != T_STAR) break; type = pointer_to(type); } // We leave with the next token already scanned return (type); } // variable_declaration: type identifier ';' // | type identifier '[' INTLIT ']' ';' // ; // // Parse the declaration of a scalar variable or an array // with a given size. // The identifier has been scanned & we have the type void var_declaration(int type) { int id; // Text now has the identifier's name. // If the next token is a '[' if (Token.token == T_LBRACKET) { // Skip past the '[' scan(&Token); // Check we have an array size if (Token.token == T_INTLIT) { // Add this as a known array and generate its space in assembly. // We treat the array as a pointer to its elements' type id = addglob(Text, pointer_to(type), S_ARRAY, 0, Token.intvalue); genglobsym(id); } // Ensure we have a following ']' scan(&Token); match(T_RBRACKET, "]"); } else { // Add this as a known scalar // and generate its space in assembly id = addglob(Text, type, S_VARIABLE, 0, 1); genglobsym(id); } // Get the trailing semicolon semi(); } // // function_declaration: type identifier '(' ')' compound_statement ; // // Parse the declaration of a simplistic function. // The identifier has been scanned & we have the type struct ASTnode *function_declaration(int type) { struct ASTnode *tree, *finalstmt; int nameslot, endlabel; // Text now has the identifier's name. // Get a label-id for the end label, add the function // to the symbol table, and set the Functionid global // to the function's symbol-id endlabel = genlabel(); nameslot = addglob(Text, type, S_FUNCTION, endlabel, 0); Functionid = nameslot; // Scan in the parentheses lparen(); rparen(); // Get the AST tree for the compound statement tree = compound_statement(); // If the function type isn't P_VOID .. if (type != P_VOID) { // Error if no statements in the function if (tree == NULL) fatal("No statements in function with non-void type"); // Check that the last AST operation in the // compound statement was a return statement finalstmt = (tree->op == A_GLUE) ? tree->right : tree; if (finalstmt == NULL || finalstmt->op != A_RETURN) fatal("No return for function with non-void type"); } // Return an A_FUNCTION node which has the function's nameslot // and the compound statement sub-tree return (mkastunary(A_FUNCTION, type, tree, nameslot)); } // Parse one or more global declarations, either // variables or functions void global_declarations(void) { struct ASTnode *tree; int type; while (1) { // We have to read past the type and identifier // to see either a '(' for a function declaration // or a ',' or ';' for a variable declaration. // Text is filled in by the ident() call. type = parse_type(); ident(); if (Token.token == T_LPAREN) { // Parse the function declaration and // generate the assembly code for it tree = function_declaration(type); if (O_dumpAST) { dumpAST(tree, NOLABEL, 0); fprintf(stdout, "\n\n"); } genAST(tree, NOLABEL, 0); } else { // Parse the global variable declaration var_declaration(type); } // Stop when we have reached EOF if (Token.token == T_EOF) break; } } ================================================ FILE: 21_More_Operators/decl.h ================================================ // Function prototypes for all compiler files // Copyright (c) 2019 Warren Toomey, GPL3 // scan.c void reject_token(struct token *t); int scan(struct token *t); // tree.c struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, int intvalue); struct ASTnode *mkastleaf(int op, int type, int intvalue); struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, int intvalue); void dumpAST(struct ASTnode *n, int label, int parentASTop); // gen.c int genlabel(void); int genAST(struct ASTnode *n, int reg, int parentASTop); void genpreamble(); void genpostamble(); void genfreeregs(); void genglobsym(int id); int genglobstr(char *strvalue); int genprimsize(int type); void genreturn(int reg, int id); // cg.c void freeall_registers(void); void cgpreamble(); void cgpostamble(); void cgfuncpreamble(int id); void cgfuncpostamble(int id); int cgloadint(int value, int type); int cgloadglob(int id, int op); int cgloadglobstr(int id); int cgadd(int r1, int r2); int cgsub(int r1, int r2); int cgmul(int r1, int r2); int cgdiv(int r1, int r2); int cgshlconst(int r, int val); int cgcall(int r, int id); int cgstorglob(int r, int id); void cgglobsym(int id); void cgglobstr(int l, char *strvalue); int cgcompare_and_set(int ASTop, int r1, int r2); int cgcompare_and_jump(int ASTop, int r1, int r2, int label); void cglabel(int l); void cgjump(int l); int cgwiden(int r, int oldtype, int newtype); int cgprimsize(int type); void cgreturn(int reg, int id); int cgaddress(int id); int cgderef(int r, int type); int cgstorderef(int r1, int r2, int type); int cgnegate(int r); int cginvert(int r); int cglognot(int r); int cgboolean(int r, int op, int label); int cgand(int r1, int r2); int cgor(int r1, int r2); int cgxor(int r1, int r2); int cgshl(int r1, int r2); int cgshr(int r1, int r2); // expr.c struct ASTnode *binexpr(int ptp); // stmt.c struct ASTnode *compound_statement(void); // misc.c void match(int t, char *what); void semi(void); void lbrace(void); void rbrace(void); void lparen(void); void rparen(void); void ident(void); void fatal(char *s); void fatals(char *s1, char *s2); void fatald(char *s, int d); void fatalc(char *s, int c); // sym.c int findglob(char *s); int addglob(char *name, int type, int stype, int endlabel, int size); // decl.c void var_declaration(int type); struct ASTnode *function_declaration(int type); void global_declarations(void); // types.c int inttype(int type); int parse_type(void); int pointer_to(int type); int value_at(int type); struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op); ================================================ FILE: 21_More_Operators/defs.h ================================================ #include #include #include #include // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 #define TEXTLEN 512 // Length of symbols in input #define NSYMBOLS 1024 // Number of symbol table entries // Token types enum { T_EOF, // Binary operators T_ASSIGN, T_LOGOR, T_LOGAND, T_OR, T_XOR, T_AMPER, T_EQ, T_NE, T_LT, T_GT, T_LE, T_GE, T_LSHIFT, T_RSHIFT, T_PLUS, T_MINUS, T_STAR, T_SLASH, // Other operators T_INC, T_DEC, T_INVERT, T_LOGNOT, // Type keywords T_VOID, T_CHAR, T_INT, T_LONG, // Other keywords T_IF, T_ELSE, T_WHILE, T_FOR, T_RETURN, // Structural tokens T_INTLIT, T_STRLIT, T_SEMI, T_IDENT, T_LBRACE, T_RBRACE, T_LPAREN, T_RPAREN, T_LBRACKET, T_RBRACKET }; // Token structure struct token { int token; // Token type, from the enum list above int intvalue; // For T_INTLIT, the integer value }; // AST node types. The first few line up // with the related tokens enum { A_ASSIGN= 1, A_LOGOR, A_LOGAND, A_OR, A_XOR, A_AND, A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE, A_LSHIFT, A_RSHIFT, A_ADD, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, A_INTLIT, A_STRLIT, A_IDENT, A_GLUE, A_IF, A_WHILE, A_FUNCTION, A_WIDEN, A_RETURN, A_FUNCCALL, A_DEREF, A_ADDR, A_SCALE, A_PREINC, A_PREDEC, A_POSTINC, A_POSTDEC, A_NEGATE, A_INVERT, A_LOGNOT, A_TOBOOL }; // Primitive types enum { P_NONE, P_VOID, P_CHAR, P_INT, P_LONG, P_VOIDPTR, P_CHARPTR, P_INTPTR, P_LONGPTR }; // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates int rvalue; // True if the node is an rvalue struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; union { // For A_INTLIT, the integer value int intvalue; // For A_IDENT, the symbol slot number int id; // For A_FUNCTION, the symbol slot number int size; // For A_SCALE, the size to scale by } v; // For A_FUNCCALL, the symbol slot number }; #define NOREG -1 // Use NOREG when the AST generation // functions have no register to return #define NOLABEL 0 // Use NOLABEL when we have no label to // pass to genAST() // Structural types enum { S_VARIABLE, S_FUNCTION, S_ARRAY }; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol int stype; // Structural type for the symbol int endlabel; // For S_FUNCTIONs, the end label int size; // Number of elements in the symbol }; ================================================ FILE: 21_More_Operators/expr.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of expressions // Copyright (c) 2019 Warren Toomey, GPL3 // Parse a function call with a single expression // argument and return its AST static struct ASTnode *funccall(void) { struct ASTnode *tree; int id; // Check that the identifier has been defined as a function, // then make a leaf node for it. if ((id = findglob(Text)) == -1 || Gsym[id].stype != S_FUNCTION) { fatals("Undeclared function", Text); } // Get the '(' lparen(); // Parse the following expression tree = binexpr(0); // Build the function call AST node. Store the // function's return type as this node's type. // Also record the function's symbol-id tree = mkastunary(A_FUNCCALL, Gsym[id].type, tree, id); // Get the ')' rparen(); return (tree); } // Parse the index into an array and // return an AST tree for it static struct ASTnode *array_access(void) { struct ASTnode *left, *right; int id; // Check that the identifier has been defined as an array // then make a leaf node for it that points at the base if ((id = findglob(Text)) == -1 || Gsym[id].stype != S_ARRAY) { fatals("Undeclared array", Text); } left = mkastleaf(A_ADDR, Gsym[id].type, id); // Get the '[' scan(&Token); // Parse the following expression right = binexpr(0); // Get the ']' match(T_RBRACKET, "]"); // Ensure that this is of int type if (!inttype(right->type)) fatal("Array index is not of integer type"); // Scale the index by the size of the element's type right = modify_type(right, left->type, A_ADD); // Return an AST tree where the array's base has the offset // added to it, and dereference the element. Still an lvalue // at this point. left = mkastnode(A_ADD, Gsym[id].type, left, NULL, right, 0); left = mkastunary(A_DEREF, value_at(left->type), left, 0); return (left); } // Parse a postfix expression and return // an AST node representing it. The // identifier is already in Text. static struct ASTnode *postfix(void) { struct ASTnode *n; int id; // Scan in the next token to see if we have a postfix expression scan(&Token); // Function call if (Token.token == T_LPAREN) return (funccall()); // An array reference if (Token.token == T_LBRACKET) return (array_access()); // A variable. Check that the variable exists. id = findglob(Text); if (id == -1 || Gsym[id].stype != S_VARIABLE) fatals("Unknown variable", Text); switch (Token.token) { // Post-increment: skip over the token case T_INC: scan(&Token); n = mkastleaf(A_POSTINC, Gsym[id].type, id); break; // Post-decrement: skip over the token case T_DEC: scan(&Token); n = mkastleaf(A_POSTDEC, Gsym[id].type, id); break; // Just a variable reference default: n = mkastleaf(A_IDENT, Gsym[id].type, id); } return (n); } // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(void) { struct ASTnode *n; int id; switch (Token.token) { case T_INTLIT: // For an INTLIT token, make a leaf AST node for it. // Make it a P_CHAR if it's within the P_CHAR range if ((Token.intvalue) >= 0 && (Token.intvalue < 256)) n = mkastleaf(A_INTLIT, P_CHAR, Token.intvalue); else n = mkastleaf(A_INTLIT, P_INT, Token.intvalue); break; case T_STRLIT: // For a STRLIT token, generate the assembly for it. // Then make a leaf AST node for it. id is the string's label. id = genglobstr(Text); n = mkastleaf(A_STRLIT, P_CHARPTR, id); break; case T_IDENT: return (postfix()); case T_LPAREN: // Beginning of a parenthesised expression, skip the '('. // Scan in the expression and the right parenthesis scan(&Token); n = binexpr(0); rparen(); return (n); default: fatald("Expecting a primary expression, got token", Token.token); } // Scan in the next token and return the leaf node scan(&Token); return (n); } // Convert a binary operator token into a binary AST operation. // We rely on a 1:1 mapping from token to AST operation static int binastop(int tokentype) { if (tokentype > T_EOF && tokentype <= T_SLASH) return (tokentype); fatald("Syntax error, token", tokentype); return (0); // Keep -Wall happy } // Return true if a token is right-associative, // false otherwise. static int rightassoc(int tokentype) { if (tokentype == T_ASSIGN) return (1); return (0); } // Operator precedence for each token. Must // match up with the order of tokens in defs.h static int OpPrec[] = { 0, 10, 20, 30, // T_EOF, T_ASSIGN, T_LOGOR, T_LOGAND 40, 50, 60, // T_OR, T_XOR, T_AMPER 70, 70, // T_EQ, T_NE 80, 80, 80, 80, // T_LT, T_GT, T_LE, T_GE 90, 90, // T_LSHIFT, T_RSHIFT 100, 100, // T_PLUS, T_MINUS 110, 110 // T_STAR, T_SLASH }; // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec; if (tokentype > T_SLASH) fatald("Token with no precedence in op_precedence:", tokentype); prec = OpPrec[tokentype]; if (prec == 0) fatald("Syntax error, token", tokentype); return (prec); } // prefix_expression: primary // | '*' prefix_expression // | '&' prefix_expression // | '-' prefix_expression // | '++' prefix_expression // | '--' prefix_expression // ; // Parse a prefix expression and return // a sub-tree representing it. struct ASTnode *prefix(void) { struct ASTnode *tree; switch (Token.token) { case T_AMPER: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Ensure that it's an identifier if (tree->op != A_IDENT) fatal("& operator must be followed by an identifier"); // Now change the operator to A_ADDR and the type to // a pointer to the original type tree->op = A_ADDR; tree->type = pointer_to(tree->type); break; case T_STAR: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's either another deref or an // identifier if (tree->op != A_IDENT && tree->op != A_DEREF) fatal("* operator must be followed by an identifier or *"); // Prepend an A_DEREF operation to the tree tree = mkastunary(A_DEREF, value_at(tree->type), tree, 0); break; case T_MINUS: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_NEGATE operation to the tree and // make the child an rvalue. Because chars are unsigned, // also widen this to int so that it's signed tree->rvalue = 1; tree = modify_type(tree, P_INT, 0); tree = mkastunary(A_NEGATE, tree->type, tree, 0); break; case T_INVERT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_INVERT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_INVERT, tree->type, tree, 0); break; case T_LOGNOT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_LOGNOT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_LOGNOT, tree->type, tree, 0); break; case T_INC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("++ operator must be followed by an identifier"); // Prepend an A_PREINC operation to the tree tree = mkastunary(A_PREINC, tree->type, tree, 0); break; case T_DEC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("-- operator must be followed by an identifier"); // Prepend an A_PREDEC operation to the tree tree = mkastunary(A_PREDEC, tree->type, tree, 0); break; default: tree = primary(); } return (tree); } // Return an AST tree whose root is a binary operator. // Parameter ptp is the previous token's precedence. struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; struct ASTnode *ltemp, *rtemp; int ASTop; int tokentype; // Get the tree on the left. // Fetch the next token at the same time. left = prefix(); // If we hit a semicolon or ')', return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET) { left->rvalue = 1; return (left); } // While the precedence of this token is more than that of the // previous token precedence, or it's right associative and // equal to the previous token's precedence while ((op_precedence(tokentype) > ptp) || (rightassoc(tokentype) && op_precedence(tokentype) == ptp)) { // Fetch in the next integer literal scan(&Token); // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Determine the operation to be performed on the sub-trees ASTop = binastop(tokentype); if (ASTop == A_ASSIGN) { // Assignment // Make the right tree into an rvalue right->rvalue = 1; // Ensure the right's type matches the left right = modify_type(right, left->type, 0); if (left == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree. However, switch // left and right around, so that the right expression's // code will be generated before the left expression ltemp = left; left = right; right = ltemp; } else { // We are not doing an assignment, so both trees should be rvalues // Convert both trees into rvalue if they are lvalue trees left->rvalue = 1; right->rvalue = 1; // Ensure the two types are compatible by trying // to modify each tree to match the other's type. ltemp = modify_type(left, right->type, ASTop); rtemp = modify_type(right, left->type, ASTop); if (ltemp == NULL && rtemp == NULL) fatal("Incompatible types in binary expression"); if (ltemp != NULL) left = ltemp; if (rtemp != NULL) right = rtemp; } // Join that sub-tree with ours. Convert the token // into an AST operation at the same time. left = mkastnode(binastop(tokentype), left->type, left, NULL, right, 0); // Update the details of the current token. // If we hit a semicolon, ')' or ']', return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET) { left->rvalue = 1; return (left); } } // Return the tree we have when the precedence // is the same or lower left->rvalue = 1; return (left); } ================================================ FILE: 21_More_Operators/gen.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Generic code generator // Copyright (c) 2019 Warren Toomey, GPL3 // Generate and return a new label number int genlabel(void) { static int id = 1; return (id++); } // Generate the code for an IF statement // and an optional ELSE clause static int genIF(struct ASTnode *n) { int Lfalse, Lend; // Generate two labels: one for the // false compound statement, and one // for the end of the overall IF statement. // When there is no ELSE clause, Lfalse _is_ // the ending label! Lfalse = genlabel(); if (n->right) Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. genAST(n->left, Lfalse, n->op); genfreeregs(); // Generate the true compound statement genAST(n->mid, NOLABEL, n->op); genfreeregs(); // If there is an optional ELSE clause, // generate the jump to skip to the end if (n->right) cgjump(Lend); // Now the false label cglabel(Lfalse); // Optional ELSE clause: generate the // false compound statement and the // end label if (n->right) { genAST(n->right, NOLABEL, n->op); genfreeregs(); cglabel(Lend); } return (NOREG); } // Generate the code for a WHILE statement static int genWHILE(struct ASTnode *n) { int Lstart, Lend; // Generate the start and end labels // and output the start label Lstart = genlabel(); Lend = genlabel(); cglabel(Lstart); // Generate the condition code followed // by a jump to the end label. genAST(n->left, Lend, n->op); genfreeregs(); // Generate the compound statement for the body genAST(n->right, NOLABEL, n->op); genfreeregs(); // Finally output the jump back to the condition, // and the end label cgjump(Lstart); cglabel(Lend); return (NOREG); } // Given an AST, an optional label, and the AST op // of the parent, generate assembly code recursively. // Return the register id with the tree's final value. int genAST(struct ASTnode *n, int label, int parentASTop) { int leftreg, rightreg; // We have some specific AST node handling at the top // so that we don't evaluate the child sub-trees immediately switch (n->op) { case A_IF: return (genIF(n)); case A_WHILE: return (genWHILE(n)); case A_GLUE: // Do each child statement, and free the // registers after each child genAST(n->left, NOLABEL, n->op); genfreeregs(); genAST(n->right, NOLABEL, n->op); genfreeregs(); return (NOREG); case A_FUNCTION: // Generate the function's preamble before the code // in the child sub-tree cgfuncpreamble(n->v.id); genAST(n->left, NOLABEL, n->op); cgfuncpostamble(n->v.id); return (NOREG); } // General AST node handling below // Get the left and right sub-tree values if (n->left) leftreg = genAST(n->left, NOLABEL, n->op); if (n->right) rightreg = genAST(n->right, NOLABEL, n->op); switch (n->op) { case A_ADD: return (cgadd(leftreg, rightreg)); case A_SUBTRACT: return (cgsub(leftreg, rightreg)); case A_MULTIPLY: return (cgmul(leftreg, rightreg)); case A_DIVIDE: return (cgdiv(leftreg, rightreg)); case A_AND: return (cgand(leftreg, rightreg)); case A_OR: return (cgor(leftreg, rightreg)); case A_XOR: return (cgxor(leftreg, rightreg)); case A_LSHIFT: return (cgshl(leftreg, rightreg)); case A_RSHIFT: return (cgshr(leftreg, rightreg)); case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, compare registers // and set one to 1 or 0 based on the comparison. if (parentASTop == A_IF || parentASTop == A_WHILE) return (cgcompare_and_jump(n->op, leftreg, rightreg, label)); else return (cgcompare_and_set(n->op, leftreg, rightreg)); case A_INTLIT: return (cgloadint(n->v.intvalue, n->type)); case A_STRLIT: return (cgloadglobstr(n->v.id)); case A_IDENT: // Load our value if we are an rvalue // or we are being dereferenced if (n->rvalue || parentASTop == A_DEREF) return (cgloadglob(n->v.id, n->op)); else return (NOREG); case A_ASSIGN: // Are we assigning to an identifier or through a pointer? switch (n->right->op) { case A_IDENT: return (cgstorglob(leftreg, n->right->v.id)); case A_DEREF: return (cgstorderef(leftreg, rightreg, n->right->type)); default: fatald("Can't A_ASSIGN in genAST(), op", n->op); } case A_WIDEN: // Widen the child's type to the parent's type return (cgwiden(leftreg, n->left->type, n->type)); case A_RETURN: cgreturn(leftreg, Functionid); return (NOREG); case A_FUNCCALL: return (cgcall(leftreg, n->v.id)); case A_ADDR: return (cgaddress(n->v.id)); case A_DEREF: // If we are an rvalue, dereference to get the value we point at, // otherwise leave it for A_ASSIGN to store through the pointer if (n->rvalue) return (cgderef(leftreg, n->left->type)); else return (leftreg); case A_SCALE: // Small optimisation: use shift if the // scale value is a known power of two switch (n->v.size) { case 2: return (cgshlconst(leftreg, 1)); case 4: return (cgshlconst(leftreg, 2)); case 8: return (cgshlconst(leftreg, 3)); default: // Load a register with the size and // multiply the leftreg by this size rightreg = cgloadint(n->v.size, P_INT); return (cgmul(leftreg, rightreg)); } case A_POSTINC: // Load the variable's value into a register, // then increment it return (cgloadglob(n->v.id, n->op)); case A_POSTDEC: // Load the variable's value into a register, // then decrement it return (cgloadglob(n->v.id, n->op)); case A_PREINC: // Load and increment the variable's value into a register return (cgloadglob(n->left->v.id, n->op)); case A_PREDEC: // Load and decrement the variable's value into a register return (cgloadglob(n->left->v.id, n->op)); case A_NEGATE: return (cgnegate(leftreg)); case A_INVERT: return (cginvert(leftreg)); case A_LOGNOT: return (cglognot(leftreg)); case A_TOBOOL: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, set the register // to 0 or 1 based on it's zeroeness or non-zeroeness return (cgboolean(leftreg, parentASTop, label)); default: fatald("Unknown AST operator", n->op); } return (NOREG); // Keep -Wall happy } void genpreamble() { cgpreamble(); } void genpostamble() { cgpostamble(); } void genfreeregs() { freeall_registers(); } void genglobsym(int id) { cgglobsym(id); } int genglobstr(char *strvalue) { int l = genlabel(); cgglobstr(l, strvalue); return (l); } int genprimsize(int type) { return (cgprimsize(type)); } ================================================ FILE: 21_More_Operators/lib/printint.c ================================================ #include void printint(long x) { printf("%ld\n", x); } void printchar(long x) { putc((char)(x & 0x7f), stdout); } ================================================ FILE: 21_More_Operators/main.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "decl.h" #include // Compiler setup and top-level execution // Copyright (c) 2019 Warren Toomey, GPL3 // Initialise global variables static void init() { Line = 1; Putback = '\n'; Globs = 0; O_dumpAST = 0; } // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "Usage: %s [-T] infile\n", prog); exit(1); } // Main program: check arguments and print a usage // if we don't have an argument. Open up the input // file and call scanfile() to scan the tokens in it. int main(int argc, char *argv[]) { int i; // Initialise the globals init(); // Scan for command-line options for (i = 1; i < argc; i++) { if (*argv[i] != '-') break; for (int j = 1; argv[i][j]; j++) { switch (argv[i][j]) { case 'T': O_dumpAST = 1; break; default: usage(argv[0]); } } } // Ensure we have an input file argument if (i >= argc) usage(argv[0]); // Open up the input file if ((Infile = fopen(argv[i], "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", argv[i], strerror(errno)); exit(1); } // Create the output file if ((Outfile = fopen("out.s", "w")) == NULL) { fprintf(stderr, "Unable to create out.s: %s\n", strerror(errno)); exit(1); } // For now, ensure that printint() and printchar() are defined addglob("printint", P_INT, S_FUNCTION, 0, 0); addglob("printchar", P_VOID, S_FUNCTION, 0, 0); scan(&Token); // Get the first token from the input genpreamble(); // Output the preamble global_declarations(); // Parse the global declarations genpostamble(); // Output the postamble fclose(Outfile); // Close the output file and exit return (0); } ================================================ FILE: 21_More_Operators/misc.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Miscellaneous functions // Copyright (c) 2019 Warren Toomey, GPL3 // Ensure that the current token is t, // and fetch the next token. Otherwise // throw an error void match(int t, char *what) { if (Token.token == t) { scan(&Token); } else { fatals("Expected", what); } } // Match a semicolon and fetch the next token void semi(void) { match(T_SEMI, ";"); } // Match a left brace and fetch the next token void lbrace(void) { match(T_LBRACE, "{"); } // Match a right brace and fetch the next token void rbrace(void) { match(T_RBRACE, "}"); } // Match a left parenthesis and fetch the next token void lparen(void) { match(T_LPAREN, "("); } // Match a right parenthesis and fetch the next token void rparen(void) { match(T_RPAREN, ")"); } // Match an identifier and fetch the next token void ident(void) { match(T_IDENT, "identifier"); } // Print out fatal messages void fatal(char *s) { fprintf(stderr, "%s on line %d\n", s, Line); exit(1); } void fatals(char *s1, char *s2) { fprintf(stderr, "%s:%s on line %d\n", s1, s2, Line); exit(1); } void fatald(char *s, int d) { fprintf(stderr, "%s:%d on line %d\n", s, d, Line); exit(1); } void fatalc(char *s, int c) { fprintf(stderr, "%s:%c on line %d\n", s, c, Line); exit(1); } ================================================ FILE: 21_More_Operators/scan.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { char *p; p = strchr(s, c); return (p ? p - s : -1); } // Get the next character from the input file. static int next(void) { int c; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return (c); } c = fgetc(Infile); // Read from input file if ('\n' == c) Line++; // Increment line count return (c); } // Put back an unwanted character static void putback(int c) { Putback = c; } // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } // Return the next character from a character // or string literal static int scanch(void) { int c; // Get the next input character and interpret // metacharacters that start with a backslash c = next(); if (c == '\\') { switch (c = next()) { case 'a': return '\a'; case 'b': return '\b'; case 'f': return '\f'; case 'n': return '\n'; case 'r': return '\r'; case 't': return '\t'; case 'v': return '\v'; case '\\': return '\\'; case '"': return '"'; case '\'': return '\''; default: fatalc("unknown escape sequence", c); } } return (c); // Just an ordinary old character! } // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0; // Convert each character into an int value while ((k = chrpos("0123456789", c)) >= 0) { val = val * 10 + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return (val); } // Scan in a string literal from the input file, // and store it in buf[]. Return the length of // the string. static int scanstr(char *buf) { int i, c; // Loop while we have enough buffer space for (i = 0; i < TEXTLEN - 1; i++) { // Get the next char and append to buf // Return when we hit the ending double quote if ((c = scanch()) == '"') { buf[i] = 0; return (i); } buf[i] = c; } // Ran out of buf[] space fatal("String literal too long"); return (0); } // Scan an identifier from the input file and // store it in buf[]. Return the identifier's length static int scanident(int c, char *buf, int lim) { int i = 0; // Allow digits, alpha and underscores while (isalpha(c) || isdigit(c) || '_' == c) { // Error if we hit the identifier length limit, // else append to buf[] and get next character if (lim - 1 == i) { fatal("Identifier too long"); } else if (i < lim - 1) { buf[i++] = c; } c = next(); } // We hit a non-valid character, put it back. // NUL-terminate the buf[] and return the length putback(c); buf[i] = '\0'; return (i); } // Given a word from the input, return the matching // keyword token number or 0 if it's not a keyword. // Switch on the first letter so that we don't have // to waste time strcmp()ing against all the keywords. static int keyword(char *s) { switch (*s) { case 'c': if (!strcmp(s, "char")) return (T_CHAR); break; case 'e': if (!strcmp(s, "else")) return (T_ELSE); break; case 'f': if (!strcmp(s, "for")) return (T_FOR); break; case 'i': if (!strcmp(s, "if")) return (T_IF); if (!strcmp(s, "int")) return (T_INT); break; case 'l': if (!strcmp(s, "long")) return (T_LONG); break; case 'r': if (!strcmp(s, "return")) return (T_RETURN); break; case 'w': if (!strcmp(s, "while")) return (T_WHILE); break; case 'v': if (!strcmp(s, "void")) return (T_VOID); break; } return (0); } // A pointer to a rejected token static struct token *Rejtoken = NULL; // Reject the token that we just scanned void reject_token(struct token *t) { if (Rejtoken != NULL) fatal("Can't reject token twice"); Rejtoken = t; } // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c, tokentype; // If we have any rejected token, return it if (Rejtoken != NULL) { t = Rejtoken; Rejtoken = NULL; return (1); } // Skip whitespace c = skip(); // Determine the token based on // the input character switch (c) { case EOF: t->token = T_EOF; return (0); case '+': if ((c = next()) == '+') { t->token = T_INC; } else { putback(c); t->token = T_PLUS; } break; case '-': if ((c = next()) == '-') { t->token = T_DEC; } else { putback(c); t->token = T_MINUS; } break; case '*': t->token = T_STAR; break; case '/': t->token = T_SLASH; break; case ';': t->token = T_SEMI; break; case '{': t->token = T_LBRACE; break; case '}': t->token = T_RBRACE; break; case '(': t->token = T_LPAREN; break; case ')': t->token = T_RPAREN; break; case '[': t->token = T_LBRACKET; break; case ']': t->token = T_RBRACKET; break; case '~': t->token = T_INVERT; break; case '^': t->token = T_XOR; break; case '=': if ((c = next()) == '=') { t->token = T_EQ; } else { putback(c); t->token = T_ASSIGN; } break; case '!': if ((c = next()) == '=') { t->token = T_NE; } else { putback(c); t->token = T_LOGNOT; } break; case '<': if ((c = next()) == '=') { t->token = T_LE; } else if (c == '<') { t->token = T_LSHIFT; } else { putback(c); t->token = T_LT; } break; case '>': if ((c = next()) == '=') { t->token = T_GE; } else if (c == '>') { t->token = T_RSHIFT; } else { putback(c); t->token = T_GT; } break; case '&': if ((c = next()) == '&') { t->token = T_LOGAND; } else { putback(c); t->token = T_AMPER; } break; case '|': if ((c = next()) == '|') { t->token = T_LOGOR; } else { putback(c); t->token = T_OR; } break; case '\'': // If it's a quote, scan in the // literal character value and // the trailing quote t->intvalue = scanch(); t->token = T_INTLIT; if (next() != '\'') fatal("Expected '\\'' at end of char literal"); break; case '"': // Scan in a literal string scanstr(Text); t->token = T_STRLIT; break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } else if (isalpha(c) || '_' == c) { // Read in a keyword or identifier scanident(c, Text, TEXTLEN); // If it's a recognised keyword, return that token if ((tokentype = keyword(Text)) != 0) { t->token = tokentype; break; } // Not a recognised keyword, so it must be an identifier t->token = T_IDENT; break; } // The character isn't part of any recognised token, error fatalc("Unrecognised character", c); } // We found a token return (1); } ================================================ FILE: 21_More_Operators/stmt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of statements // Copyright (c) 2019 Warren Toomey, GPL3 // Prototypes static struct ASTnode *single_statement(void); // compound_statement: // empty, i.e. no statement // | statement // | statement statements // ; // // statement: declaration // | expression_statement // | function_call // | if_statement // | while_statement // | for_statement // | return_statement // ; // if_statement: if_head // | if_head 'else' compound_statement // ; // // if_head: 'if' '(' true_false_expression ')' compound_statement ; // // Parse an IF statement including any // optional ELSE clause and return its AST static struct ASTnode *if_statement(void) { struct ASTnode *condAST, *trueAST, *falseAST = NULL; // Ensure we have 'if' '(' match(T_IF, "if"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, 0); rparen(); // Get the AST for the compound statement trueAST = compound_statement(); // If we have an 'else', skip it // and get the AST for the compound statement if (Token.token == T_ELSE) { scan(&Token); falseAST = compound_statement(); } // Build and return the AST for this statement return (mkastnode(A_IF, P_NONE, condAST, trueAST, falseAST, 0)); } // while_statement: 'while' '(' true_false_expression ')' compound_statement ; // // Parse a WHILE statement and return its AST static struct ASTnode *while_statement(void) { struct ASTnode *condAST, *bodyAST; // Ensure we have 'while' '(' match(T_WHILE, "while"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, 0); rparen(); // Get the AST for the compound statement bodyAST = compound_statement(); // Build and return the AST for this statement return (mkastnode(A_WHILE, P_NONE, condAST, NULL, bodyAST, 0)); } // for_statement: 'for' '(' preop_statement ';' // true_false_expression ';' // postop_statement ')' compound_statement ; // // preop_statement: statement (for now) // postop_statement: statement (for now) // // Parse a FOR statement and return its AST static struct ASTnode *for_statement(void) { struct ASTnode *condAST, *bodyAST; struct ASTnode *preopAST, *postopAST; struct ASTnode *tree; // Ensure we have 'for' '(' match(T_FOR, "for"); lparen(); // Get the pre_op statement and the ';' preopAST = single_statement(); semi(); // Get the condition and the ';'. // Force a non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, 0); semi(); // Get the post_op statement and the ')' postopAST = single_statement(); rparen(); // Get the compound statement which is the body bodyAST = compound_statement(); // For now, all four sub-trees have to be non-NULL. // Later on, we'll change the semantics for when some are missing // Glue the compound statement and the postop tree tree = mkastnode(A_GLUE, P_NONE, bodyAST, NULL, postopAST, 0); // Make a WHILE loop with the condition and this new body tree = mkastnode(A_WHILE, P_NONE, condAST, NULL, tree, 0); // And glue the preop tree to the A_WHILE tree return (mkastnode(A_GLUE, P_NONE, preopAST, NULL, tree, 0)); } // return_statement: 'return' '(' expression ')' ; // // Parse a return statement and return its AST static struct ASTnode *return_statement(void) { struct ASTnode *tree; // Can't return a value if function returns P_VOID if (Gsym[Functionid].type == P_VOID) fatal("Can't return from a void function"); // Ensure we have 'return' '(' match(T_RETURN, "return"); lparen(); // Parse the following expression tree = binexpr(0); // Ensure this is compatible with the function's type tree = modify_type(tree, Gsym[Functionid].type, 0); if (tree == NULL) fatal("Incompatible type to return"); // Add on the A_RETURN node tree = mkastunary(A_RETURN, P_NONE, tree, 0); // Get the ')' rparen(); return (tree); } // Parse a single statement and return its AST static struct ASTnode *single_statement(void) { int type; switch (Token.token) { case T_CHAR: case T_INT: case T_LONG: // The beginning of a variable declaration. // Parse the type and get the identifier. // Then parse the rest of the declaration. // XXX: These are globals at present. type = parse_type(); ident(); var_declaration(type); return (NULL); // No AST generated here case T_IF: return (if_statement()); case T_WHILE: return (while_statement()); case T_FOR: return (for_statement()); case T_RETURN: return (return_statement()); default: // For now, see if this is an expression. // This catches assignment statements. return (binexpr(0)); } return (NULL); // Keep -Wall happy } // Parse a compound statement // and return its AST struct ASTnode *compound_statement(void) { struct ASTnode *left = NULL; struct ASTnode *tree; // Require a left curly bracket lbrace(); while (1) { // Parse a single statement tree = single_statement(); // Some statements must be followed by a semicolon if (tree != NULL && (tree->op == A_ASSIGN || tree->op == A_RETURN || tree->op == A_FUNCCALL)) semi(); // For each new tree, either save it in left // if left is empty, or glue the left and the // new tree together if (tree != NULL) { if (left == NULL) left = tree; else left = mkastnode(A_GLUE, P_NONE, left, NULL, tree, 0); } // When we hit a right curly bracket, // skip past it and return the AST if (Token.token == T_RBRACE) { rbrace(); return (left); } } } ================================================ FILE: 21_More_Operators/sym.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Symbol table functions // Copyright (c) 2019 Warren Toomey, GPL3 // Determine if the symbol s is in the global symbol table. // Return its slot position or -1 if not found. int findglob(char *s) { int i; for (i = 0; i < Globs; i++) { if (*s == *Gsym[i].name && !strcmp(s, Gsym[i].name)) return (i); } return (-1); } // Get the position of a new global symbol slot, or die // if we've run out of positions. static int newglob(void) { int p; if ((p = Globs++) >= NSYMBOLS) fatal("Too many global symbols"); return (p); } // Add a global symbol to the symbol table. Set up its: // + type: char, int etc. // + structural type: var, function, array etc. // + size: number of elements // + endlabel: if this is a function // Return the slot number in the symbol table int addglob(char *name, int type, int stype, int endlabel, int size) { int y; // If this is already in the symbol table, return the existing slot if ((y = findglob(name)) != -1) return (y); // Otherwise get a new slot, fill it in and // return the slot number y = newglob(); Gsym[y].name = strdup(name); Gsym[y].type = type; Gsym[y].stype = stype; Gsym[y].endlabel = endlabel; Gsym[y].size = size; return (y); } ================================================ FILE: 21_More_Operators/tests/input01.c ================================================ void main() { printint(12 * 3); printint(18 - 2 * 4); printint(1 + 2 + 9 - 5/2 + 3*5); } ================================================ FILE: 21_More_Operators/tests/input02.c ================================================ void main() { int fred; int jim; fred= 5; jim= 12; printint(fred + jim); } ================================================ FILE: 21_More_Operators/tests/input03.c ================================================ void main() { int x; x= 1; printint(x); x= x + 1; printint(x); x= x + 1; printint(x); x= x + 1; printint(x); x= x + 1; printint(x); } ================================================ FILE: 21_More_Operators/tests/input04.c ================================================ void main() { int x; x= 7 < 9; printint(x); x= 7 <= 9; printint(x); x= 7 != 9; printint(x); x= 7 == 7; printint(x); x= 7 >= 7; printint(x); x= 7 <= 7; printint(x); x= 9 > 7; printint(x); x= 9 >= 7; printint(x); x= 9 != 7; printint(x); } ================================================ FILE: 21_More_Operators/tests/input05.c ================================================ void main() { int i; int j; i=6; j=12; if (i < j) { printint(i); } else { printint(j); } } ================================================ FILE: 21_More_Operators/tests/input06.c ================================================ void main() { int i; i=1; while (i <= 10) { printint(i); i= i + 1; } } ================================================ FILE: 21_More_Operators/tests/input07.c ================================================ void main() { int i; for (i= 1; i <= 10; i= i + 1) { printint(i); } } ================================================ FILE: 21_More_Operators/tests/input08.c ================================================ void main() { int i; for (i= 1; i <= 10; i= i + 1) { printint(i); } } ================================================ FILE: 21_More_Operators/tests/input09.c ================================================ void main() { int i; for (i= 1; i <= 10; i= i + 1) { printint(i); } } void fred() { int a; int b; a= 12; b= 3 * a; if (a >= b) { printint(2 * b - a); } } ================================================ FILE: 21_More_Operators/tests/input10.c ================================================ void main() { int i; char j; j= 20; printint(j); i= 10; printint(i); for (i= 1; i <= 5; i= i + 1) { printint(i); } for (j= 253; j != 2; j= j + 1) { printint(j); } } ================================================ FILE: 21_More_Operators/tests/input11.c ================================================ int main() { int i; char j; long k; i= 10; printint(i); j= 20; printint(j); k= 30; printint(k); for (i= 1; i <= 5; i= i + 1) { printint(i); } for (j= 253; j != 4; j= j + 1) { printint(j); } for (k= 1; k <= 5; k= k + 1) { printint(k); } return(i); printint(12345); return(3); } ================================================ FILE: 21_More_Operators/tests/input12.c ================================================ int fred() { return(5); } void main() { int x; x= fred(2); printint(x); } ================================================ FILE: 21_More_Operators/tests/input13.c ================================================ int fred() { return(56); } void main() { int dummy; int result; dummy= printint(23); result= fred(10); dummy= printint(result); } ================================================ FILE: 21_More_Operators/tests/input14.c ================================================ int fred() { return(20); } int main() { int result; printint(10); result= fred(15); printint(result); printint(fred(15)+10); return(0); } ================================================ FILE: 21_More_Operators/tests/input15.c ================================================ int main() { char a; char *b; char c; int d; int *e; int f; a= 18; printint(a); b= &a; c= *b; printint(c); d= 12; printint(d); e= &d; f= *e; printint(f); return(0); } ================================================ FILE: 21_More_Operators/tests/input16.c ================================================ int c; int d; int *e; int f; int main() { c= 12; d=18; printint(c); e= &c + 1; f= *e; printint(f); return(0); } ================================================ FILE: 21_More_Operators/tests/input17.c ================================================ int main() { char a; char *b; int d; int *e; b= &a; *b= 19; printint(a); e= &d; *e= 12; printint(d); return(0); } ================================================ FILE: 21_More_Operators/tests/input18.c ================================================ int main() { int a; int b; a= b= 34; printint(a); printint(b); return(0); } ================================================ FILE: 21_More_Operators/tests/input18a.c ================================================ int a; int *b; char c; char *d; int main() { b= &a; *b= 15; printint(a); d= &c; *d= 16; printint(c); return(0); } ================================================ FILE: 21_More_Operators/tests/input19.c ================================================ int a; int b; int c; int d; int e; int main() { a= 2; b= 4; c= 3; d= 2; e= (a+b) * (c+d); printint(e); return(0); } ================================================ FILE: 21_More_Operators/tests/input20.c ================================================ int a; int b[25]; int main() { b[3]= 12; a= b[3]; printint(a); return(0); } ================================================ FILE: 21_More_Operators/tests/input21.c ================================================ char c; char *str; int main() { c= '\n'; printint(c); for (str= "Hello world\n"; *str != 0; str= str + 1) { printchar(*str); } return(0); } ================================================ FILE: 21_More_Operators/tests/input22.c ================================================ char a; char b; char c; int d; int e; int f; long g; long h; long i; int main() { b= 5; c= 7; a= b + c++; printint(a); e= 5; f= 7; d= e + f++; printint(d); h= 5; i= 7; g= h + i++; printint(g); a= b-- + c; printint(a); d= e-- + f; printint(d); g= h-- + i; printint(g); a= ++b + c; printint(a); d= ++e + f; printint(d); g= ++h + i; printint(g); a= b * --c; printint(a); d= e * --f; printint(d); g= h * --i; printint(g); return(0); } ================================================ FILE: 21_More_Operators/tests/input23.c ================================================ char *str; int x; int main() { x= -23; printint(x); printint(-10 * -10); x= 1; x= ~x; printint(x); x= 2 > 5; printint(x); x= !x; printint(x); x= !x; printint(x); x= 13; if (x) { printint(13); } x= 0; if (!x) { printint(14); } for (str= "Hello world\n"; *str; str++) { printchar(*str); } return(0); } ================================================ FILE: 21_More_Operators/tests/input24.c ================================================ int a; int b; int c; int main() { a= 42; b= 19; printint(a & b); printint(a | b); printint(a ^ b); printint(1 << 3); printint(63 >> 3); return(0); } ================================================ FILE: 21_More_Operators/tests/mktests ================================================ #!/bin/sh # Make the output files for each test for i in input*c do if [ ! -f "out.$i" ] then cc -o out $i ../lib/printint.c ./out > out.$i rm -f out fi done ================================================ FILE: 21_More_Operators/tests/out.input01.c ================================================ 36 10 25 ================================================ FILE: 21_More_Operators/tests/out.input02.c ================================================ 17 ================================================ FILE: 21_More_Operators/tests/out.input03.c ================================================ 1 2 3 4 5 ================================================ FILE: 21_More_Operators/tests/out.input04.c ================================================ 1 1 1 1 1 1 1 1 1 ================================================ FILE: 21_More_Operators/tests/out.input05.c ================================================ 6 ================================================ FILE: 21_More_Operators/tests/out.input06.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 21_More_Operators/tests/out.input07.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 21_More_Operators/tests/out.input08.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 21_More_Operators/tests/out.input09.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 21_More_Operators/tests/out.input10.c ================================================ 20 10 1 2 3 4 5 253 254 255 0 1 ================================================ FILE: 21_More_Operators/tests/out.input11.c ================================================ 10 20 30 1 2 3 4 5 253 254 255 0 1 2 3 1 2 3 4 5 ================================================ FILE: 21_More_Operators/tests/out.input12.c ================================================ 5 ================================================ FILE: 21_More_Operators/tests/out.input13.c ================================================ 23 56 ================================================ FILE: 21_More_Operators/tests/out.input14.c ================================================ 10 20 30 ================================================ FILE: 21_More_Operators/tests/out.input15.c ================================================ 18 18 12 12 ================================================ FILE: 21_More_Operators/tests/out.input16.c ================================================ 12 18 ================================================ FILE: 21_More_Operators/tests/out.input17.c ================================================ 19 12 ================================================ FILE: 21_More_Operators/tests/out.input18.c ================================================ 34 34 ================================================ FILE: 21_More_Operators/tests/out.input18a.c ================================================ 15 16 ================================================ FILE: 21_More_Operators/tests/out.input19.c ================================================ 30 ================================================ FILE: 21_More_Operators/tests/out.input20.c ================================================ 12 ================================================ FILE: 21_More_Operators/tests/out.input21.c ================================================ 10 Hello world ================================================ FILE: 21_More_Operators/tests/out.input22.c ================================================ 12 12 12 13 13 13 13 13 13 35 35 35 ================================================ FILE: 21_More_Operators/tests/out.input23.c ================================================ -23 100 -2 0 1 0 13 14 Hello world ================================================ FILE: 21_More_Operators/tests/out.input24.c ================================================ 2 59 57 8 7 ================================================ FILE: 21_More_Operators/tests/runtests ================================================ #!/bin/sh # Run each test and compare # against known good output if [ ! -f ../comp1 ] then echo "Need to build ../comp1 first!"; exit 1 fi for i in input* do if [ ! -f "out.$i" ] then echo "Can't run test on $i, no output file!" else echo -n $i ../comp1 $i cc -o out out.s ../lib/printint.c ./out > trial.$i cmp -s "out.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo else echo ": OK" fi rm -f out out.s "trial.$i" fi done ================================================ FILE: 21_More_Operators/tests/runtestsn ================================================ #!/bin/sh # Run each test and compare # against known good output if [ ! -f ../compn ] then echo "Need to build ../compn first!"; exit 1 fi for i in input* do if [ ! -f "out.$i" ] then echo "Can't run test on $i, no output file!" else echo -n $i ../compn $i nasm -f elf64 out.s cc -no-pie -Wall -o out out.o ../lib/printint.c ./out > trial.$i cmp -s "out.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo else echo ": OK" fi rm -f out out.o out.s "trial.$i" fi done ================================================ FILE: 21_More_Operators/tree.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree functions // Copyright (c) 2019 Warren Toomey, GPL3 // Build and return a generic AST node struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, int intvalue) { struct ASTnode *n; // Malloc a new ASTnode n = (struct ASTnode *) malloc(sizeof(struct ASTnode)); if (n == NULL) fatal("Unable to malloc in mkastnode()"); // Copy in the field values and return it n->op = op; n->type = type; n->left = left; n->mid = mid; n->right = right; n->v.intvalue = intvalue; return (n); } // Make an AST leaf node struct ASTnode *mkastleaf(int op, int type, int intvalue) { return (mkastnode(op, type, NULL, NULL, NULL, intvalue)); } // Make a unary AST node: only one child struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, int intvalue) { return (mkastnode(op, type, left, NULL, NULL, intvalue)); } // Generate and return a new label number // just for AST dumping purposes static int gendumplabel(void) { static int id = 1; return (id++); } // Given an AST tree, print it out and follow the // traversal of the tree that genAST() follows void dumpAST(struct ASTnode *n, int label, int level) { int Lfalse, Lstart, Lend; switch (n->op) { case A_IF: Lfalse = gendumplabel(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_IF"); if (n->right) { Lend = gendumplabel(); fprintf(stdout, ", end L%d", Lend); } fprintf(stdout, "\n"); dumpAST(n->left, Lfalse, level + 2); dumpAST(n->mid, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); return; case A_WHILE: Lstart = gendumplabel(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_WHILE, start L%d\n", Lstart); Lend = gendumplabel(); dumpAST(n->left, Lend, level + 2); dumpAST(n->right, NOLABEL, level + 2); return; } // Reset level to -2 for A_GLUE if (n->op == A_GLUE) level = -2; // General AST node handling if (n->left) dumpAST(n->left, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); for (int i = 0; i < level; i++) fprintf(stdout, " "); switch (n->op) { case A_GLUE: fprintf(stdout, "\n\n"); return; case A_FUNCTION: fprintf(stdout, "A_FUNCTION %s\n", Gsym[n->v.id].name); return; case A_ADD: fprintf(stdout, "A_ADD\n"); return; case A_SUBTRACT: fprintf(stdout, "A_SUBTRACT\n"); return; case A_MULTIPLY: fprintf(stdout, "A_MULTIPLY\n"); return; case A_DIVIDE: fprintf(stdout, "A_DIVIDE\n"); return; case A_EQ: fprintf(stdout, "A_EQ\n"); return; case A_NE: fprintf(stdout, "A_NE\n"); return; case A_LT: fprintf(stdout, "A_LE\n"); return; case A_GT: fprintf(stdout, "A_GT\n"); return; case A_LE: fprintf(stdout, "A_LE\n"); return; case A_GE: fprintf(stdout, "A_GE\n"); return; case A_INTLIT: fprintf(stdout, "A_INTLIT %d\n", n->v.intvalue); return; case A_STRLIT: fprintf(stdout, "A_STRLIT rval label L%d\n", n->v.id); return; case A_IDENT: if (n->rvalue) fprintf(stdout, "A_IDENT rval %s\n", Gsym[n->v.id].name); else fprintf(stdout, "A_IDENT %s\n", Gsym[n->v.id].name); return; case A_ASSIGN: fprintf(stdout, "A_ASSIGN\n"); return; case A_WIDEN: fprintf(stdout, "A_WIDEN\n"); return; case A_RETURN: fprintf(stdout, "A_RETURN\n"); return; case A_FUNCCALL: fprintf(stdout, "A_FUNCCALL %s\n", Gsym[n->v.id].name); return; case A_ADDR: fprintf(stdout, "A_ADDR %s\n", Gsym[n->v.id].name); return; case A_DEREF: if (n->rvalue) fprintf(stdout, "A_DEREF rval\n"); else fprintf(stdout, "A_DEREF\n"); return; case A_SCALE: fprintf(stdout, "A_SCALE %d\n", n->v.size); return; case A_PREINC: fprintf(stdout, "A_PREINC %s\n", Gsym[n->v.id].name); return; case A_PREDEC: fprintf(stdout, "A_PREDEC %s\n", Gsym[n->v.id].name); return; case A_POSTINC: fprintf(stdout, "A_POSTINC\n"); return; case A_POSTDEC: fprintf(stdout, "A_POSTDEC\n"); return; case A_NEGATE: fprintf(stdout, "A_NEGATE\n"); return; default: fatald("Unknown dumpAST operator", n->op); } } ================================================ FILE: 21_More_Operators/types.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Types and type handling // Copyright (c) 2019 Warren Toomey, GPL3 // Return true if a type is an int type // of any size, false otherwise int inttype(int type) { if (type == P_CHAR || type == P_INT || type == P_LONG) return (1); return (0); } // Return true if a type is of pointer type int ptrtype(int type) { if (type == P_VOIDPTR || type == P_CHARPTR || type == P_INTPTR || type == P_LONGPTR) return (1); return (0); } // Given a primitive type, return // the type which is a pointer to it int pointer_to(int type) { int newtype; switch (type) { case P_VOID: newtype = P_VOIDPTR; break; case P_CHAR: newtype = P_CHARPTR; break; case P_INT: newtype = P_INTPTR; break; case P_LONG: newtype = P_LONGPTR; break; default: fatald("Unrecognised in pointer_to: type", type); } return (newtype); } // Given a primitive pointer type, return // the type which it points to int value_at(int type) { int newtype; switch (type) { case P_VOIDPTR: newtype = P_VOID; break; case P_CHARPTR: newtype = P_CHAR; break; case P_INTPTR: newtype = P_INT; break; case P_LONGPTR: newtype = P_LONG; break; default: fatald("Unrecognised in value_at: type", type); } return (newtype); } // Given an AST tree and a type which we want it to become, // possibly modify the tree by widening or scaling so that // it is compatible with this type. Return the original tree // if no changes occurred, a modified tree, or NULL if the // tree is not compatible with the given type. // If this will be part of a binary operation, the AST op is not zero. struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op) { int ltype; int lsize, rsize; ltype = tree->type; // Compare scalar int types if (inttype(ltype) && inttype(rtype)) { // Both types same, nothing to do if (ltype == rtype) return (tree); // Get the sizes for each type lsize = genprimsize(ltype); rsize = genprimsize(rtype); // Tree's size is too big if (lsize > rsize) return (NULL); // Widen to the right if (rsize > lsize) return (mkastunary(A_WIDEN, rtype, tree, 0)); } // For pointers on the left if (ptrtype(ltype)) { // OK is same type on right and not doing a binary op if (op == 0 && ltype == rtype) return (tree); } // We can scale only on A_ADD or A_SUBTRACT operation if (op == A_ADD || op == A_SUBTRACT) { // Left is int type, right is pointer type and the size // of the original type is >1: scale the left if (inttype(ltype) && ptrtype(rtype)) { rsize = genprimsize(value_at(rtype)); if (rsize > 1) return (mkastunary(A_SCALE, rtype, tree, rsize)); else return (tree); // Size 1, no need to scale } } // If we get here, the types are not compatible return (NULL); } ================================================ FILE: 22_Design_Locals/Readme.md ================================================ # Part 22: Design Ideas for Local Variables and Function Calls This is going to be first first part of our compiler writing journey where I don't introduce any new code. This time, I need to step back from the coder's keyboard and take a big-picture view. This will give me a chance to think about how I'm going to implement local variables (in one part) and then function arguments & parameters (in the next part). Both of these steps are going to involve some significant additions and changes to our existing compiler. We also have to deal with new concepts like *stack frames* and *register spills*, which so far I've omitted. Let's start by identifying what new functionality we want to add to the compiler. ## What Functionality Do We Want ### Local and Global Variable Scopes Right now, all our variables are globally visible to all functions. We want to add a [local scope](https://en.wikipedia.org/wiki/Scope_(computer_science)) for variables, so that each function has its own variables that cannot be seen by other functions. Moreover, in the case of recursive functions, each instance of the same function gets its own local variables. However, I only want to add two scopes: *local* and *global*. C actually creates a new scope for every compound statement. In the following example, there are three different `a` variables in three different scopes: ```c #include int a = 2; // Global scope int main() { int a= 5; // Local scope if (a > 2) { int a= 17; // Third scope printf("%d\n", a); // Print 17 } printf("%d\n", a); // Print 5 return(0); } ``` I'm not going to support the third, inner, scope. Two will be enough! ### Function Parameters as Local Variables We also need to support the declaration of zero or more *parameters* to a function, and these need to be treated as variables local to the instance of that function. C functions are "[call by value](https://en.wikipedia.org/wiki/Evaluation_strategy#Call_by_value)": the argument values in the caller of a function are copied into the function's parameters so that the called function can use and modify them. ### Introducing the Stack To create a local scope for multiple instances of the same function, and to provide a place to store the function's parameters, we need a *stack*. At this point, if you don't know much about stacks, you should do a bit of background reading on them. I'd start with this [Wikipedia article on call stacks](https://en.wikipedia.org/wiki/Call_stack). Given that one of the hardware architectures that we support is the Intel x86-64 architecture running Linux, we are going to have to implement the function call mechanism on this architecture. I found this great article by Eli Bendersky on the [stack frame layout on x86-64](https://eli.thegreenplace.net/2011/09/06/stack-frame-layout-on-x86-64/). This is a document that you will definitely need to read before continuing on with this document! As Eli's article is in the public domain, I'm reproducing his picture of the stack frame and the parameters in registers below for the function ```c long myfunc(long a, long b, long c, long d, long e, long f, long g, long h) { long xx, yy, zz; ... } ``` ![](Figs/x64_frame_nonleaf.png) Essentially, on the x86-64 architecture, the values of some parameters will be passed in registers, and some parameter values will be pushed onto the stack. All our local variables will be on the stack but below the stack base pointer. At the same time, we want our compiler to be portable to different architectures. So, we will need to support a general function parameter framework for different architectures which use the only the stack, only registers or a combination of both. ### Spilling Registers Something that I have ignored so far and not implemented yet is [register spilling](https://en.wikipedia.org/wiki/Register_allocation#Spilling). We need to spill some or all the registers that we have allocated for several reasons: + We have run out of registers to allocate as there is only a fixed number of registers. We can spill a register onto the stack so that it is free to allocate. + We need to spill all our allocated register, and all registers with parameters, onto the stack before a function call. This frees them up so they can be used by the called function. On a function call return, we will need to unspill the registers to get the values that we need back. Similarly, if we've spilled a register to make it free, then we need to unspill its old value and reallocate it when it becomes free again. ### Static Variables While not on the list of things to implement immediately, at some point I'll need to allocate [static variables](https://en.wikipedia.org/wiki/Static_variable). There will be some naming issues here for local static variables, but I'll try to keep this in the back of my mind as I implement all of the immediate ideas. ### Initialising Variables We should allow variables to be initialised when they are declared. For global variables, we can definitely initialise them to a constant value, e.g. `int x= 7;` but not to an expression as we don't have a [function context](https://en.wikipedia.org/wiki/Scope_(computer_science)#Function_scope) to run the initialisation code in. However, we should be able to do local variable initialisation, e.g. `int a= 2, b= a+5;` as we can insert the initialisation code for the variable at the start of the function code. ## Ideas and Implementation OK, so these are the ideas and issues that are bubbling around in my designer's mind at this time. Here's how I think I'm going to implement some of them. ### Local Symbols Let's start with the differentiation between local and global variables. The globals have to be visible to all functions, but the locals are only visible to one function. SubC uses the one symbol table to store information about both local and global variables. The global variables are allocated at one end and the local variables are stored at the other. There is code to ensure there is no collision between the two ends in the middle. I like this idea, as we then have a single set of unique symbol slot numbers for every symbol, regardless of its scope. In terms of prioritising local symbols over global symbols, we can search the local end of the symbol table first and, if we don't find a symbol, we can then search through the global end. And, once we finish parsing a function, we can simply wipe the local end of the symbol table. ### Storage Classes C has the concept of [storage classes](https://en.wikipedia.org/wiki/C_syntax#Storage_class_specifiers), and we'll have to implement at least some of these classes. SubC implements several of the storage classes: ```c /* storage classes */ enum { CPUBLIC = 1, // publicly visible symbol CEXTERN, // extern symbol CSTATIC, // static symbols in global context CLSTATC, // static symbols in local context CAUTO, // non-static local identifiers CSPROTO, // function prototype CMEMBER, // field of a struct/union CSTCDEF // unused }; ``` for each symbol in the symbol table. I think I can modify and use this. But I'll probably support fewer storage class types. ### Function Prototypes Every function has a *prototype*: the number and type of each parameter that it has. We need these to ensure the arguments to a function call matches the types and number of function parameters. Somewhere I will need to record the parameter list and types for each function. We can also support the declaration of a function's prototype before the actual declaration of the function itself. Now, where are we going to store this? I could create a separate data structure for function prototypes. I don't want to support two-dimensional arrays in our language, but we will need a list of primitive types for each function. So, my idea is this. We already have S_FUNCTION as the type for our existing symbol table elements. We can have a "number of parameters" field in each symbol table entry to store the number of parameters that the function has. We can then immediately follow this symbol with the symbol table entries for each function parameter. When we are parsing the function's parameter list, we can add the parameters in the global symbol section to record the function's prototype. At the same time, we can also add the parameters as entries in the local symbol section, as they will be used as local variables by the function itself. When we need to determine if the list of arguments to a function call matches the function's prototype, we can find the function's global symbol table entry and then compare the following entries in the symbol table to the argument list. Finally, when doing a search for a global symbol, we can easily skip past the parameter entries for a function by loading the function's "number of parameters" field and skip this many symbol table entries. ### Keeping Parameters in Registers: Not Possible I'm actually writing this section after trying to implement the above, so I've come back to revisit the design a bit. I thought that we would be able to keep the parameters passed as registers in their registers: this would make access to them faster and keep the stack frame smaller. But this isn't always possible for this reason. Consider this code: ```c void myfunction(int a) { // a is a parameter in a register int b; // b is a local variable on the stack // Call a function to update a and b b= function2(&a); } ``` If the `a` parameter is in a register, we won't be able to get its address with the `&` operator. Therefore, we'll have to copy it into memory somewhere. And, given that parameters are variables local to the function, we will need to copy it to the stack. For a while I had ideas of walking the AST looking for which parameters in the tree needed to have real addresses, but then I remembered that I'm following the KISS principle: keep it simple, stupid! So I will copy all parameters out of registers and onto the stack. ### Location of Local Variables How are we going to determine where a parameter or local variable is on the stack, once they have been copied or placed there? To do this, I will add a `posn` field into each local symbol table entry. This will indicate the offset of the variable below the frame base pointer. Looking at the [BNF Grammar for C](https://www.lysator.liu.se/c/ANSI-C-grammar-y.html), the function declaration list (i.e. the list of function parameters) comes before the declaration list for the local variables, and this comes before the statement list. This means that, as we parse the parameters and then the local variables, we can determine at what position they will be on the stack before we get to parse the statement list. ## Conclusion and What's Next I think that's about all I want to do in terms of design before I start on the next parts of our compiler writing journey. I'll tackle local variables by themselves in the next part, and try to add in function calls and parameters in the following part. But it might take three or more steps to get all of the new proposed features implemented. We'll see. [Next step](../23_Local_Variables/Readme.md) ================================================ FILE: 23_Local_Variables/Makefile ================================================ HSRCS= data.h decl.h defs.h SRCS= cg.c decl.c expr.c gen.c main.c misc.c scan.c stmt.c \ sym.c tree.c types.c SRCN= cgn.c decl.c expr.c gen.c main.c misc.c scan.c stmt.c \ sym.c tree.c types.c ARMSRCS= cg_arm.c decl.c expr.c gen.c main.c misc.c scan.c stmt.c \ sym.c tree.c types.c comp1: $(SRCS) $(HSRCS) cc -o comp1 -g -Wall $(SRCS) compn: $(SRCN) $(HSRCS) cc -o compn -g -Wall $(SRCN) comp1arm: $(ARMSRCS) $(HSRCS) cc -o comp1arm -g -Wall $(ARMSRCS) cp comp1arm comp1 clean: rm -f comp1 comp1arm compn *.o *.s out test: comp1 tests/runtests (cd tests; chmod +x runtests; ./runtests) armtest: comp1arm tests/runtests (cd tests; chmod +x runtests; ./runtests) testn: compn tests/runtestsn (cd tests; chmod +x runtestsn; ./runtestsn) ================================================ FILE: 23_Local_Variables/Readme.md ================================================ # Part 23: Local Variables I've just implemented local variables on the stack following the design ideas I described in the previous part of our compiler writing journey, and it all went fine. Below, I will outline the actual code changes. ## Symbol Table Changes We start with the changes to the symbol table as these are central to having two variable scopes: global and local. The structure of the symbol table entries is now (in `defs.h`): ```c // Storage classes enum { C_GLOBAL = 1, // Globally visible symbol C_LOCAL // Locally visible symbol }; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol int stype; // Structural type for the symbol int class; // Storage class for the symbol int endlabel; // For functions, the end label int size; // Number of elements in the symbol int posn; // For locals,the negative offset // from the stack base pointer }; ``` with the `class` and `posn` fields added. As described in the last part, the `posn` is negative and holds an offset from the stack base pointer, i.e. the local variable is stored on the stack. In this part, I've only implemented local variables, not parameters. Also note that we now have symbols marked C_GLOBAL or C_LOCAL. The symbol table's name has also changed, along with the indexed into it (in `data.h`): ```c extern_ struct symtable Symtable[NSYMBOLS]; // Global symbol table extern_ int Globs; // Position of next free global symbol slot extern_ int Locls; // Position of next free local symbol slot ``` Visually, the global symbols are stored in the left-hand side of the symbol table with `Globs` pointing at the next free global symbol slot and `Locls` pointing at the next free local symbol slot. ``` 0xxxx......................................xxxxxxxxxxxxNSYMBOLS-1 ^ ^ | | Globs Locls ``` In `sym.c` as well as the existing `findglob()` and `newglob()` functions to find or allocate a global symbol, we now have `findlocl()` and `newlocl()`. They have code to detect a collision between `Globs` and `Locls`: ```c // Get the position of a new global symbol slot, or die // if we've run out of positions. static int newglob(void) { int p; if ((p = Globs++) >= Locls) fatal("Too many global symbols"); return (p); } // Get the position of a new local symbol slot, or die // if we've run out of positions. static int newlocl(void) { int p; if ((p = Locls--) <= Globs) fatal("Too many local symbols"); return (p); } ``` There is now a generic function `updatesym()` to set all the fields in a symbol table entry. I won't give the code because it simply sets each field one at a time. The `updatesym()` function is called by `addglobl()` and `addlocl()`. These first try to find an existing symbol, allocate a new one if not found, and call `updatesym()` to set the values for this symbol. Finally, there is a new function, `findsymbol()`, that searches for a symbol in both local and global sections of the symbol table: ```c // Determine if the symbol s is in the symbol table. // Return its slot position or -1 if not found. int findsymbol(char *s) { int slot; slot = findlocl(s); if (slot == -1) slot = findglob(s); return (slot); } ``` Throughout the rest of the code, the old calls to `findglob()` have been replaced with calls the `findsymbol()`. ## Changes to Declaration Parsing We need to be able to parse both global and local variable declarations. The code to parse them is (for now) the same, so I added a flag to the function: ```c void var_declaration(int type, int islocal) { ... // Add this as a known array if (islocal) { addlocl(Text, pointer_to(type), S_ARRAY, 0, Token.intvalue); } else { addglob(Text, pointer_to(type), S_ARRAY, 0, Token.intvalue); } ... // Add this as a known scalar if (islocal) { addlocl(Text, type, S_VARIABLE, 0, 1); } else { addglob(Text, type, S_VARIABLE, 0, 1); } ... } ``` There are two calls to `var_declaration()` in our compiler at present. This one in `global_declarations()` in `decl.c` parses global variable declarations: ```c void global_declarations(void) { ... // Parse the global variable declaration var_declaration(type, 0); ... } ``` This one in `single_statement()` in `stmt.c` parses local variable declarations: ```c static struct ASTnode *single_statement(void) { int type; switch (Token.token) { case T_CHAR: case T_INT: case T_LONG: // The beginning of a variable declaration. // Parse the type and get the identifier. // Then parse the rest of the declaration. type = parse_type(); ident(); var_declaration(type, 1); ... } ... } ``` ## Changes to the x86-64 Code Generator As always, many of the `cgXX()` functions in the platform-specific code in `cg.c` are exposed to the rest of the compiler as `genXX()` functions in `gen.c`. That's going to be the case here. So while I only mention the `cgXX()` functions, don't forget that there are often matching `genXX()` functions. For each local variable, we need to allocate a position for it and record this in the symbol table's `posn` field. Here is how we do it. In `cg.c` we have a new static variable and two functions to manipulate it: ```c // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Reset the position of new local variables when parsing a new function void cgresetlocals(void) { localOffset = 0; } // Get the position of the next local variable. // Use the isparam flag to allocate a parameter (not yet XXX). int cggetlocaloffset(int type, int isparam) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } ``` For now, we allocate all local variables on the stack. They are aligned with a minimum of 4 bytes between each one. For 64-bit integers and pointers, that's 8-bytes for each variable, though. > I know, in the past, that multi-byte data items had to be properly aligned in memory or the CPU would fault. It seems that, at least for x86-64, there is [no need to align data items](https://lemire.me/blog/2012/05/31/data-alignment-for-speed-myth-or-reality/). > However, the stack pointer on the x86-64 *does* have to be properly aligned before a function call. In "[Optimizing Subroutines in Assembly Language](https://www.agner.org/optimize/optimizing_assembly.pdf)" by Agner Fog, page 30, he notes that "The stack pointer must be aligned by 16 before any CALL instruction, so that the value of RSP is 8 modulo 16 at the entry of a function." > This means that, as part of the function preamble, we need to set `%rsp` to a correctly aligned value. `cgresetlocals()` is called in `function_declaration()` once we have added the function's name to the symbol table but before we start parsing the local variable declarations. This sets `localOffset` back to zero. We saw that `addlocl()` is called with a new local scalar or local array is parsed. `addlocl()` calls `cggetlocaloffset()` with the type of the new variable. This decrements the offset from the stack base pointer by an approriate amount, and this offset is stored in the `posn` field for the symbol. Now that we have the symbol's offset from the stack base pointer, we now need to modify the code generator so that, when we are accessing a local variable instead of a global variable, we output an offset to `%rbp` instead of naming a global location. Thus, we now have a `cgloadlocal()` function which is nearly identical to `cgloadglob()` except that all `%s(%%rip)` format strings to print `Symtable[id].name` are replaced with `%d(%%rbp)` format strings to print `Symtable[id].posn`. In fact, if you search for `Symtable[id].posn` in `cg.c`, you will spot all of these new local variable references. ### Updating the Stack Pointer Now that we are using locations on the stack, we had better move the stack pointer down below the area which holds our local variables. Thus, we need to modify the stack pointer in our function preamble and postamble: ```c // Print out a function preamble void cgfuncpreamble(int id) { char *name = Symtable[id].name; cgtextseg(); // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset= (localOffset+15) & ~15; fprintf(Outfile, "\t.globl\t%s\n" "\t.type\t%s, @function\n" "%s:\n" "\tpushq\t%%rbp\n" "\tmovq\t%%rsp, %%rbp\n" "\taddq\t$%d,%%rsp\n", name, name, name, -stackOffset); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Symtable[id].endlabel); fprintf(Outfile, "\taddq\t$%d,%%rsp\n", stackOffset); fputs("\tpopq %rbp\n" "\tret\n", Outfile); } ``` Remember that `localOffset` is negative. So we add a negative value in the function preamble, and add a negative negative value in the function postamble. ## Testing the Changes I think that is the bulk of the changes to add local variables to our compiler. The test program `tests/input25.c` demonstrates the storage of local variables on the stack: ```c int a; int b; int c; int main() { char z; int y; int x; x= 10; y= 20; z= 30; a= 5; b= 15; c= 25; } ``` Here is the annotated assembly output: ``` .data .globl a a: .long 0 # Three global variables .globl b b: .long 0 .globl c c: .long 0 .text .globl main .type main, @function main: pushq %rbp movq %rsp, %rbp addq $-16,%rsp # Lower stack pointer by 16 movq $10, %r8 movl %r8d, -12(%rbp) # z is at offset -12 movq $20, %r8 movl %r8d, -8(%rbp) # y is at offset -8 movq $30, %r8 movb %r8b, -4(%rbp) # x is at offfset -4 movq $5, %r8 movl %r8d, a(%rip) # a has a global label movq $15, %r8 movl %r8d, b(%rip) # b has a global label movq $25, %r8 movl %r8d, c(%rip) # c has a global label jmp L1 L1: addq $16,%rsp # Raise stack pointer by 16 popq %rbp ret ``` Finally, a `$ make test` demonstrates that the compiler passes all previous tests. ## Conclusion and What's Next I thought implementing local variables was going to be tricky, but after doing some thinking about the design of a solution, it turned out to be easier than I expected. Somehow I suspect the next step will be the tricky one. In the next part of our compiler writing journey, I will attempt to add function arguments and parameters to our compiler. Wish me luck! [Next step](../24_Function_Params/Readme.md) ================================================ FILE: 23_Local_Variables/cg.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\t.text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\t.data\n", Outfile); currSeg = data_seg; } } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Reset the position of new local variables when parsing a new function void cgresetlocals(void) { localOffset = 0; } // Get the position of the next local variable. // Use the isparam flag to allocate a parameter (not yet XXX). int cggetlocaloffset(int type, int isparam) { // For now just decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; // printf("Returning offset %d for type %d\n", localOffset, type); return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too #define NUMFREEREGS 4 static int freereg[NUMFREEREGS]; static char *reglist[] = { "%r8", "%r9", "%r10", "%r11" }; static char *breglist[] = { "%r8b", "%r9b", "%r10b", "%r11b" }; static char *dreglist[] = { "%r8d", "%r9d", "%r10d", "%r11d" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Symtable[id].name; cgtextseg(); // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset= (localOffset+15) & ~15; // printf("preamble local %d stack %d\n", localOffset, stackOffset); fprintf(Outfile, "\t.globl\t%s\n" "\t.type\t%s, @function\n" "%s:\n" "\tpushq\t%%rbp\n" "\tmovq\t%%rsp, %%rbp\n" "\taddq\t$%d,%%rsp\n", name, name, name, -stackOffset); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Symtable[id].endlabel); fprintf(Outfile, "\taddq\t$%d,%%rsp\n", stackOffset); fputs("\tpopq %rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmovq\t$%d, %s\n", value, reglist[r]); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(int id, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it switch (Symtable[id].type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", Symtable[id].name); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", Symtable[id].name); fprintf(Outfile, "\tmovzbq\t%s(%%rip), %s\n", Symtable[id].name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", Symtable[id].name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", Symtable[id].name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", Symtable[id].name); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", Symtable[id].name); fprintf(Outfile, "\tmovslq\t%s(%%rip), %s\n", Symtable[id].name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", Symtable[id].name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", Symtable[id].name); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: if (op == A_PREINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", Symtable[id].name); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", Symtable[id].name); fprintf(Outfile, "\tmovq\t%s(%%rip), %s\n", Symtable[id].name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", Symtable[id].name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", Symtable[id].name); break; default: fatald("Bad type in cgloadglob:", Symtable[id].type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(int id, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it switch (Symtable[id].type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", Symtable[id].posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", Symtable[id].posn); fprintf(Outfile, "\tmovzbq\t%d(%%rbp), %s\n", Symtable[id].posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", Symtable[id].posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", Symtable[id].posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", Symtable[id].posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", Symtable[id].posn); fprintf(Outfile, "\tmovslq\t%d(%%rbp), %s\n", Symtable[id].posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", Symtable[id].posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", Symtable[id].posn); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: if (op == A_PREINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", Symtable[id].posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", Symtable[id].posn); fprintf(Outfile, "\tmovq\t%d(%%rbp), %s\n", Symtable[id].posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", Symtable[id].posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", Symtable[id].posn); break; default: fatald("Bad type in cgloadlocal:", Symtable[id].type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int id) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tleaq\tL%d(%%rip), %s\n", id, reglist[r]); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\taddq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsubq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timulq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s,%%rax\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidivq\t%s\n", reglist[r2]); fprintf(Outfile, "\tmovq\t%%rax,%s\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tandq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\torq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txorq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshlq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshrq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tnegq\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnotq\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); return (r); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); } return (r); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { // Get a new register int outr = alloc_register(); fprintf(Outfile, "\tmovq\t%s, %%rdi\n", reglist[r]); fprintf(Outfile, "\tcall\t%s\n", Symtable[id].name); fprintf(Outfile, "\tmovq\t%%rax, %s\n", reglist[outr]); free_register(r); return (outr); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsalq\t$%d, %s\n", val, reglist[r]); return (r); } // Store a register's value into a variable int cgstorglob(int r, int id) { switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %s(%%rip)\n", breglist[r], Symtable[id].name); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %s(%%rip)\n", dreglist[r], Symtable[id].name); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tmovq\t%s, %s(%%rip)\n", reglist[r], Symtable[id].name); break; default: fatald("Bad type in cgstorglob:", Symtable[id].type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, int id) { switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %d(%%rbp)\n", breglist[r], Symtable[id].posn); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %d(%%rbp)\n", dreglist[r], Symtable[id].posn); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tmovq\t%s, %d(%%rbp)\n", reglist[r], Symtable[id].posn); break; default: fatald("Bad type in cgstorlocal:", Symtable[id].type); } return (r); } // Array of type sizes in P_XXX order. // 0 means no size. static int psize[] = { 0, 0, 1, 4, 8, 8, 8, 8, 8 }; // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { // Check the type is valid if (type < P_NONE || type > P_LONGPTR) fatal("Bad type in cgprimsize()"); return (psize[type]); } // Generate a global symbol but not functions void cgglobsym(int id) { int typesize; if (Symtable[id].stype == S_FUNCTION) return; // Get the size of the type typesize = cgprimsize(Symtable[id].type); // Generate the global identity and the label cgdataseg(); fprintf(Outfile, "\t.globl\t%s\n", Symtable[id].name); fprintf(Outfile, "%s:", Symtable[id].name); // Generate the space for (int i = 0; i < Symtable[id].size; i++) { switch (typesize) { case 1: fprintf(Outfile, "\t.byte\t0\n"); break; case 4: fprintf(Outfile, "\t.long\t0\n"); break; case 8: fprintf(Outfile, "\t.quad\t0\n"); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\t.byte\t%d\n", *cptr); } fprintf(Outfile, "\t.byte\t0\n"); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { // Generate code depending on the function's type switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tmovzbl\t%s, %%eax\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %%eax\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", Symtable[id].type); } cgjump(Symtable[id].endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(int id) { int r = alloc_register(); if (Symtable[id].class == C_LOCAL) fprintf(Outfile, "\tleaq\t%d(%%rbp), %s\n", Symtable[id].posn, reglist[r]); else fprintf(Outfile, "\tleaq\t%s(%%rip), %s\n", Symtable[id].name, reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tmovzbq\t(%s), %s\n", reglist[r], reglist[r]); break; case P_INTPTR: fprintf(Outfile, "\tmovslq\t(%s), %s\n", reglist[r], reglist[r]); break; case P_LONGPTR: fprintf(Outfile, "\tmovq\t(%s), %s\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, (%s)\n", breglist[r1], reglist[r2]); break; case P_INT: fprintf(Outfile, "\tmovq\t%s, (%s)\n", reglist[r1], reglist[r2]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, (%s)\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 23_Local_Variables/cg_arm.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for ARMv6 on Raspberry Pi // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. static int freereg[4]; static char *reglist[4] = { "r4", "r5", "r6", "r7" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // We have to store large integer literal values in memory. // Keep a list of them which will be output in the postamble #define MAXINTS 1024 int Intlist[MAXINTS]; static int Intslot = 0; // Determine the offset of a large integer // literal from the .L3 label. If the integer // isn't in the list, add it. static void set_int_offset(int val) { int offset = -1; // See if it is already there for (int i = 0; i < Intslot; i++) { if (Intlist[i] == val) { offset = 4 * i; break; } } // Not in the list, so add it if (offset == -1) { offset = 4 * Intslot; if (Intslot == MAXINTS) fatal("Out of int slots in set_int_offset()"); Intlist[Intslot++] = val; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L3+%d\n", offset); } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n", Outfile); } // Print out the assembly postamble void cgpostamble() { // Print out the global variables fprintf(Outfile, ".L2:\n"); for (int i = 0; i < Globs; i++) { if (Symtable[i].stype == S_VARIABLE) fprintf(Outfile, "\t.word %s\n", Symtable[i].name); } // Print out the integer literals fprintf(Outfile, ".L3:\n"); for (int i = 0; i < Intslot; i++) { fprintf(Outfile, "\t.word %d\n", Intlist[i]); } } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Symtable[id].name; fprintf(Outfile, "\t.text\n" "\t.globl\t%s\n" "\t.type\t%s, \%%function\n" "%s:\n" "\tpush\t{fp, lr}\n" "\tadd\tfp, sp, #4\n" "\tsub\tsp, sp, #8\n" "\tstr\tr0, [fp, #-8]\n", name, name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Symtable[id].endlabel); fputs("\tsub\tsp, fp, #4\n" "\tpop\t{fp, pc}\n" "\t.align\t2\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); // If the literal value is small, do it with one instruction if (value <= 1000) fprintf(Outfile, "\tmov\t%s, #%d\n", reglist[r], value); else { set_int_offset(value); fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); } return (r); } // Determine the offset of a variable from the .L2 // label. Yes, this is inefficient code. static void set_var_offset(int id) { int offset = 0; // Walk the symbol table up to id. // Find S_VARIABLEs and add on 4 until // we get to our variable for (int i = 0; i < id; i++) { if (Symtable[i].stype == S_VARIABLE) offset += 4; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L2+%d\n", offset); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tldrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgloadglob:", Symtable[id].type); } return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s, %s\n", reglist[r1], reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\tmul\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { // To do a divide: r0 holds the dividend, r1 holds the divisor. // The quotient is in r0. fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r1]); fprintf(Outfile, "\tmov\tr1, %s\n", reglist[r2]); fprintf(Outfile, "\tbl\t__aeabi_idiv\n"); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r1]); free_register(r2); return (r1); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r]); fprintf(Outfile, "\tbl\t%s\n", Symtable[id].name); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r]); return (r); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tlsl\t%s, %s, #%d\n", reglist[r], reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, int id) { // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tstr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgstorglob:", Symtable[id].type); } return (r); } // Array of type sizes in P_XXX order. // 0 means no size. static int psize[] = { 0, 0, 1, 4, 4, 4, 4, 4 }; // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { // Check the type is valid if (type < P_NONE || type > P_LONGPTR) fatal("Bad type in cgprimsize()"); return (psize[type]); } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Symtable[id].type); fprintf(Outfile, "\t.data\n" "\t.globl\t%s\n", Symtable[id].name); switch (typesize) { case 1: fprintf(Outfile, "%s:\t.byte\t0\n", Symtable[id].name); break; case 4: fprintf(Outfile, "%s:\t.long\t0\n", Symtable[id].name); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "moveq", "movne", "movlt", "movgt", "movle", "movge" }; // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "movne", "moveq", "movge", "movle", "movgt", "movlt" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #1\n", cmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #0\n", invcmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\tuxtb\t%s, %s\n", reglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tb\tL%d\n", l); } // List of inverted branch instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *brlist[] = { "bne", "beq", "bge", "ble", "bgt", "blt" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", brlist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[reg]); cgjump(Symtable[id].endlabel); } // Generate code to load the address of a global // identifier into a variable. Return a new register int cgaddress(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); fprintf(Outfile, "\tmov\t%s, r3\n", reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tldrb\t%s, [%s]\n", reglist[r], reglist[r]); break; case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [%s]\n", reglist[r], reglist[r]); break; } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [%s]\n", reglist[r1], reglist[r2]); break; case P_INT: case P_LONG: fprintf(Outfile, "\tstr\t%s, [%s]\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 23_Local_Variables/cgn.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\tsection .text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\tsection .data\n", Outfile); currSeg = data_seg; } } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Reset the position of new local variables when parsing a new function void cgresetlocals(void) { localOffset = 0; } // Get the position of the next local variable. // Use the isparam flag to allocate a parameter (not yet XXX). int cggetlocaloffset(int type, int isparam) { // For now just decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; // printf("Returning offset %d for type %d\n", localOffset, type); return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too #define NUMFREEREGS 4 static int freereg[NUMFREEREGS]; static char *reglist[] = { "r8", "r9", "r10", "r11" }; static char *breglist[] = { "r8b", "r9b", "r10b", "r11b" }; static char *dreglist[] = { "r8d", "r9d", "r10d", "r11d" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\textern\tprintint\n", Outfile); fputs("\textern\tprintchar\n", Outfile); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Symtable[id].name; cgtextseg(); // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset= (localOffset+15) & ~15; // printf("preamble local %d stack %d\n", localOffset, stackOffset); fprintf(Outfile, "\tglobal\t%s\n" "%s:\n" "\tpush\trbp\n" "\tmov\trbp, rsp\n" "\tadd\trsp, %d\n", name, name, -stackOffset); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Symtable[id].endlabel); fprintf(Outfile, "\tadd\trsp, %d\n", stackOffset); fputs("\tpop rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], value); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(int id, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it switch (Symtable[id].type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", Symtable[id].name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", Symtable[id].name); fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], Symtable[id].name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", Symtable[id].name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", Symtable[id].name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword [%s]\n", Symtable[id].name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", Symtable[id].name); fprintf(Outfile, "\tmovsx\t%s, word [%s]\n", dreglist[r], Symtable[id].name); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword [%s]\n", Symtable[id].name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", Symtable[id].name); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword [%s]\n", Symtable[id].name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", Symtable[id].name); fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], Symtable[id].name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword [%s]\n", Symtable[id].name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", Symtable[id].name); break; default: fatald("Bad type in cgloadglob:", Symtable[id].type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(int id, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it switch (Symtable[id].type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", Symtable[id].posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", Symtable[id].posn); fprintf(Outfile, "\tmovzx\t%s, byte [rbp+%d]\n", reglist[r], Symtable[id].posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", Symtable[id].posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", Symtable[id].posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", Symtable[id].posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", Symtable[id].posn); fprintf(Outfile, "\tmovsx\t%s, word [rbp+%d]\n", reglist[r], Symtable[id].posn); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", Symtable[id].posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", Symtable[id].posn); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", Symtable[id].posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", Symtable[id].posn); fprintf(Outfile, "\tmov\t%s, [rbp+%d]\n", reglist[r], Symtable[id].posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", Symtable[id].posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", Symtable[id].posn); break; default: fatald("Bad type in cgloadlocal:", Symtable[id].type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int id) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, L%d\n", reglist[r], id); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timul\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmov\trax, %s\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidiv\t%s\n", reglist[r2]); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tand\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\tor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshl\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshr\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tneg\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnot\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r], breglist[r]); return (r); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, byte %s\n", reglist[r], breglist[r]); } return (r); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { // Get a new register int outr = alloc_register(); fprintf(Outfile, "\tmov\trdi, %s\n", reglist[r]); fprintf(Outfile, "\tcall\t%s\n", Symtable[id].name); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[outr]); free_register(r); return (outr); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsal\t%s, %d\n", reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, int id) { switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tmov\t[%s], %s\n", Symtable[id].name, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\t[%s], %s\n", Symtable[id].name, dreglist[r]); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tmov\t[%s], %s\n", Symtable[id].name, reglist[r]); break; default: fatald("Bad type in cgloadglob:", Symtable[id].type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, int id) { switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tmov\tbyte\t[rbp+%d], %s\n", Symtable[id].posn, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\tdword\t[rbp+%d], %s\n", Symtable[id].posn, dreglist[r]); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tmov\tqword\t[rbp+%d], %s\n", Symtable[id].posn, reglist[r]); break; default: fatald("Bad type in cgstorlocal:", Symtable[id].type); } return (r); } // Array of type sizes in P_XXX order. // 0 means no size. static int psize[] = { 0, 0, 1, 4, 8, 8, 8, 8, 8 }; // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { // Check the type is valid if (type < P_NONE || type > P_LONGPTR) fatal("Bad type in cgprimsize()"); return (psize[type]); } // Generate a global symbol but not functions void cgglobsym(int id) { int typesize; if (Symtable[id].stype == S_FUNCTION) return; // Get the size of the type typesize = cgprimsize(Symtable[id].type); // Generate the global identity and the label cgdataseg(); fprintf(Outfile, "\tsection\t.data\n" "\tglobal\t%s\n", Symtable[id].name); fprintf(Outfile, "%s:", Symtable[id].name); // Generate the space // original version for (int i = 0; i < Symtable[id].size; i++) { switch(typesize) { case 1: fprintf(Outfile, "\tdb\t0\n"); break; case 4: fprintf(Outfile, "\tdd\t0\n"); break; case 8: fprintf(Outfile, "\tdq\t0\n"); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } /* compact version using times instead of loop switch(typesize) { case 1: fprintf(Outfile, "\ttimes\t%d\tdb\t0\n", Symtable[id].size); break; case 4: fprintf(Outfile, "\ttimes\t%d\tdd\t0\n", Symtable[id].size); break; case 8: fprintf(Outfile, "\ttimes\t%d\tdq\t0\n", Symtable[id].size); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } */ } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\tdb\t%d\n", *cptr); } fprintf(Outfile, "\tdb\t0\n"); /* put string in readable format on a single line // probably went overboard with error checking int comma = 0, quote = 0, start = 1; fprintf(Outfile, "\tdb\t"); for (cptr=strvalue; *cptr; cptr++) { if ( ! isprint(*cptr) ) if (comma || start) { fprintf(Outfile, "%d, ", *cptr); start = 0; comma = 1; } else if (quote) { fprintf(Outfile, "\', %d, ", *cptr); comma = 1; quote = 0; } else { fprintf(Outfile, "%d, ", *cptr); comma = 1; quote = 0; } else if (start || comma) { fprintf(Outfile, "\'%c", *cptr); start = comma = 0; quote = 1; } else { fprintf(Outfile, "%c", *cptr); comma = 0; quote = 1; } } if (comma || start) fprintf(Outfile, "0\n"); else fprintf(Outfile, "\', 0\n"); */ } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r2], breglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { // Generate code depending on the function's type switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tmovzx\teax, %s\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmov\teax, %s\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", Symtable[id].type); } cgjump(Symtable[id].endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(int id) { int r = alloc_register(); if (Symtable[id].class == C_LOCAL) fprintf(Outfile, "\tlea\t%s, [rbp+%d]\n", reglist[r], Symtable[id].posn); else fprintf(Outfile, "\tmov\t%s, %s\n", reglist[r], Symtable[id].name); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], reglist[r]); break; case P_INTPTR: fprintf(Outfile, "\tmovsx\t%s, dword [%s]\n", reglist[r], reglist[r]); break; case P_LONGPTR: fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], reglist[r]); break; } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tmov\t[%s], byte %s\n", reglist[r2], breglist[r1]); break; case P_INT: fprintf(Outfile, "\tmov\t[%s], %s\n", reglist[r2], reglist[r1]); break; case P_LONG: fprintf(Outfile, "\tmov\t[%s], %s\n", reglist[r2], reglist[r1]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 23_Local_Variables/data.h ================================================ #ifndef extern_ #define extern_ extern #endif // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 extern_ int Line; // Current line number extern_ int Putback; // Character put back by scanner extern_ int Functionid; // Symbol id of the current function extern_ int Globs; // Position of next free global symbol slot extern_ int Locls; // Position of next free local symbol slot extern_ FILE *Infile; // Input and output files extern_ FILE *Outfile; extern_ struct token Token; // Last token scanned extern_ char Text[TEXTLEN + 1]; // Last identifier scanned extern_ struct symtable Symtable[NSYMBOLS]; // Global symbol table extern_ int O_dumpAST; ================================================ FILE: 23_Local_Variables/decl.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of declarations // Copyright (c) 2019 Warren Toomey, GPL3 // Parse the current token and return // a primitive type enum value. Also // scan in the next token int parse_type(void) { int type; switch (Token.token) { case T_VOID: type = P_VOID; break; case T_CHAR: type = P_CHAR; break; case T_INT: type = P_INT; break; case T_LONG: type = P_LONG; break; default: fatald("Illegal type, token", Token.token); } // Scan in one or more further '*' tokens // and determine the correct pointer type while (1) { scan(&Token); if (Token.token != T_STAR) break; type = pointer_to(type); } // We leave with the next token already scanned return (type); } // variable_declaration: type identifier ';' // | type identifier '[' INTLIT ']' ';' // ; // // Parse the declaration of a scalar variable or an array // with a given size. // The identifier has been scanned & we have the type void var_declaration(int type, int islocal) { // Text now has the identifier's name. // If the next token is a '[' if (Token.token == T_LBRACKET) { // Skip past the '[' scan(&Token); // Check we have an array size if (Token.token == T_INTLIT) { // Add this as a known array and generate its space in assembly. // We treat the array as a pointer to its elements' type if (islocal) { addlocl(Text, pointer_to(type), S_ARRAY, 0, Token.intvalue); } else { addglob(Text, pointer_to(type), S_ARRAY, 0, Token.intvalue); } } // Ensure we have a following ']' scan(&Token); match(T_RBRACKET, "]"); } else { // Add this as a known scalar // and generate its space in assembly if (islocal) { addlocl(Text, type, S_VARIABLE, 0, 1); } else { addglob(Text, type, S_VARIABLE, 0, 1); } } // Get the trailing semicolon semi(); } // // function_declaration: type identifier '(' ')' compound_statement ; // // Parse the declaration of a simplistic function. // The identifier has been scanned & we have the type struct ASTnode *function_declaration(int type) { struct ASTnode *tree, *finalstmt; int nameslot, endlabel; // Text now has the identifier's name. // Get a label-id for the end label, add the function // to the symbol table, and set the Functionid global // to the function's symbol-id endlabel = genlabel(); nameslot = addglob(Text, type, S_FUNCTION, endlabel, 0); Functionid = nameslot; genresetlocals(); // Reset position of new locals // Scan in the parentheses lparen(); rparen(); // Get the AST tree for the compound statement tree = compound_statement(); // If the function type isn't P_VOID .. if (type != P_VOID) { // Error if no statements in the function if (tree == NULL) fatal("No statements in function with non-void type"); // Check that the last AST operation in the // compound statement was a return statement finalstmt = (tree->op == A_GLUE) ? tree->right : tree; if (finalstmt == NULL || finalstmt->op != A_RETURN) fatal("No return for function with non-void type"); } // Return an A_FUNCTION node which has the function's nameslot // and the compound statement sub-tree return (mkastunary(A_FUNCTION, type, tree, nameslot)); } // Parse one or more global declarations, either // variables or functions void global_declarations(void) { struct ASTnode *tree; int type; while (1) { // We have to read past the type and identifier // to see either a '(' for a function declaration // or a ',' or ';' for a variable declaration. // Text is filled in by the ident() call. type = parse_type(); ident(); if (Token.token == T_LPAREN) { // Parse the function declaration and // generate the assembly code for it tree = function_declaration(type); if (O_dumpAST) { dumpAST(tree, NOLABEL, 0); fprintf(stdout, "\n\n"); } genAST(tree, NOLABEL, 0); } else { // Parse the global variable declaration var_declaration(type, 0); } // Stop when we have reached EOF if (Token.token == T_EOF) break; } } ================================================ FILE: 23_Local_Variables/decl.h ================================================ // Function prototypes for all compiler files // Copyright (c) 2019 Warren Toomey, GPL3 // scan.c void reject_token(struct token *t); int scan(struct token *t); // tree.c struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, int intvalue); struct ASTnode *mkastleaf(int op, int type, int intvalue); struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, int intvalue); void dumpAST(struct ASTnode *n, int label, int parentASTop); // gen.c int genlabel(void); int genAST(struct ASTnode *n, int reg, int parentASTop); void genpreamble(); void genpostamble(); void genfreeregs(); void genglobsym(int id); int genglobstr(char *strvalue); int genprimsize(int type); void genreturn(int reg, int id); void genresetlocals(void); int gengetlocaloffset(int type, int isparam); // cg.c void cgtextseg(); void cgdataseg(); void freeall_registers(void); void cgpreamble(); void cgpostamble(); void cgfuncpreamble(int id); void cgfuncpostamble(int id); int cgloadint(int value, int type); int cgloadglob(int id, int op); int cgloadlocal(int id, int op); int cgloadglobstr(int id); int cgadd(int r1, int r2); int cgsub(int r1, int r2); int cgmul(int r1, int r2); int cgdiv(int r1, int r2); int cgshlconst(int r, int val); int cgcall(int r, int id); int cgstorglob(int r, int id); int cgstorlocal(int r, int id); void cgglobsym(int id); void cgglobstr(int l, char *strvalue); int cgcompare_and_set(int ASTop, int r1, int r2); int cgcompare_and_jump(int ASTop, int r1, int r2, int label); void cglabel(int l); void cgjump(int l); int cgwiden(int r, int oldtype, int newtype); int cgprimsize(int type); void cgreturn(int reg, int id); int cgaddress(int id); int cgderef(int r, int type); int cgstorderef(int r1, int r2, int type); int cgnegate(int r); int cginvert(int r); int cglognot(int r); int cgboolean(int r, int op, int label); int cgand(int r1, int r2); int cgor(int r1, int r2); int cgxor(int r1, int r2); int cgshl(int r1, int r2); int cgshr(int r1, int r2); void cgresetlocals(void); int cggetlocaloffset(int type, int isparam); // expr.c struct ASTnode *binexpr(int ptp); // stmt.c struct ASTnode *compound_statement(void); // misc.c void match(int t, char *what); void semi(void); void lbrace(void); void rbrace(void); void lparen(void); void rparen(void); void ident(void); void fatal(char *s); void fatals(char *s1, char *s2); void fatald(char *s, int d); void fatalc(char *s, int c); // sym.c int findglob(char *s); int findlocl(char *s); int findsymbol(char *s); int addglob(char *name, int type, int stype, int endlabel, int size); int addlocl(char *name, int type, int stype, int endlabel, int size); // decl.c void var_declaration(int type, int islocal); struct ASTnode *function_declaration(int type); void global_declarations(void); // types.c int inttype(int type); int parse_type(void); int pointer_to(int type); int value_at(int type); struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op); ================================================ FILE: 23_Local_Variables/defs.h ================================================ #include #include #include #include // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 #define TEXTLEN 512 // Length of symbols in input #define NSYMBOLS 1024 // Number of symbol table entries // Token types enum { T_EOF, // Binary operators T_ASSIGN, T_LOGOR, T_LOGAND, T_OR, T_XOR, T_AMPER, T_EQ, T_NE, T_LT, T_GT, T_LE, T_GE, T_LSHIFT, T_RSHIFT, T_PLUS, T_MINUS, T_STAR, T_SLASH, // Other operators T_INC, T_DEC, T_INVERT, T_LOGNOT, // Type keywords T_VOID, T_CHAR, T_INT, T_LONG, // Other keywords T_IF, T_ELSE, T_WHILE, T_FOR, T_RETURN, // Structural tokens T_INTLIT, T_STRLIT, T_SEMI, T_IDENT, T_LBRACE, T_RBRACE, T_LPAREN, T_RPAREN, T_LBRACKET, T_RBRACKET }; // Token structure struct token { int token; // Token type, from the enum list above int intvalue; // For T_INTLIT, the integer value }; // AST node types. The first few line up // with the related tokens enum { A_ASSIGN= 1, A_LOGOR, A_LOGAND, A_OR, A_XOR, A_AND, A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE, A_LSHIFT, A_RSHIFT, A_ADD, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, A_INTLIT, A_STRLIT, A_IDENT, A_GLUE, A_IF, A_WHILE, A_FUNCTION, A_WIDEN, A_RETURN, A_FUNCCALL, A_DEREF, A_ADDR, A_SCALE, A_PREINC, A_PREDEC, A_POSTINC, A_POSTDEC, A_NEGATE, A_INVERT, A_LOGNOT, A_TOBOOL }; // Primitive types enum { P_NONE, P_VOID, P_CHAR, P_INT, P_LONG, P_VOIDPTR, P_CHARPTR, P_INTPTR, P_LONGPTR }; // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates int rvalue; // True if the node is an rvalue struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; union { // For A_INTLIT, the integer value int intvalue; // For A_IDENT, the symbol slot number int id; // For A_FUNCTION, the symbol slot number int size; // For A_SCALE, the size to scale by } v; // For A_FUNCCALL, the symbol slot number }; #define NOREG -1 // Use NOREG when the AST generation // functions have no register to return #define NOLABEL 0 // Use NOLABEL when we have no label to // pass to genAST() // Structural types enum { S_VARIABLE, S_FUNCTION, S_ARRAY }; // Storage classes enum { C_GLOBAL = 1, // Globally visible symbol C_LOCAL // Locally visible symbol }; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol int stype; // Structural type for the symbol int class; // Storage class for the symbol int endlabel; // For functions, the end label int size; // Number of elements in the symbol int posn; // For locals,the negative offset // from the stack base pointer }; ================================================ FILE: 23_Local_Variables/expr.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of expressions // Copyright (c) 2019 Warren Toomey, GPL3 // Parse a function call with a single expression // argument and return its AST static struct ASTnode *funccall(void) { struct ASTnode *tree; int id; // Check that the identifier has been defined as a function, // then make a leaf node for it. if ((id = findsymbol(Text)) == -1 || Symtable[id].stype != S_FUNCTION) { fatals("Undeclared function", Text); } // Get the '(' lparen(); // Parse the following expression tree = binexpr(0); // Build the function call AST node. Store the // function's return type as this node's type. // Also record the function's symbol-id tree = mkastunary(A_FUNCCALL, Symtable[id].type, tree, id); // Get the ')' rparen(); return (tree); } // Parse the index into an array and // return an AST tree for it static struct ASTnode *array_access(void) { struct ASTnode *left, *right; int id; // Check that the identifier has been defined as an array // then make a leaf node for it that points at the base if ((id = findsymbol(Text)) == -1 || Symtable[id].stype != S_ARRAY) { fatals("Undeclared array", Text); } left = mkastleaf(A_ADDR, Symtable[id].type, id); // Get the '[' scan(&Token); // Parse the following expression right = binexpr(0); // Get the ']' match(T_RBRACKET, "]"); // Ensure that this is of int type if (!inttype(right->type)) fatal("Array index is not of integer type"); // Scale the index by the size of the element's type right = modify_type(right, left->type, A_ADD); // Return an AST tree where the array's base has the offset // added to it, and dereference the element. Still an lvalue // at this point. left = mkastnode(A_ADD, Symtable[id].type, left, NULL, right, 0); left = mkastunary(A_DEREF, value_at(left->type), left, 0); return (left); } // Parse a postfix expression and return // an AST node representing it. The // identifier is already in Text. static struct ASTnode *postfix(void) { struct ASTnode *n; int id; // Scan in the next token to see if we have a postfix expression scan(&Token); // Function call if (Token.token == T_LPAREN) return (funccall()); // An array reference if (Token.token == T_LBRACKET) return (array_access()); // A variable. Check that the variable exists. id = findsymbol(Text); if (id == -1 || Symtable[id].stype != S_VARIABLE) fatals("Unknown variable", Text); switch (Token.token) { // Post-increment: skip over the token case T_INC: scan(&Token); n = mkastleaf(A_POSTINC, Symtable[id].type, id); break; // Post-decrement: skip over the token case T_DEC: scan(&Token); n = mkastleaf(A_POSTDEC, Symtable[id].type, id); break; // Just a variable reference default: n = mkastleaf(A_IDENT, Symtable[id].type, id); } return (n); } // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(void) { struct ASTnode *n; int id; switch (Token.token) { case T_INTLIT: // For an INTLIT token, make a leaf AST node for it. // Make it a P_CHAR if it's within the P_CHAR range if ((Token.intvalue) >= 0 && (Token.intvalue < 256)) n = mkastleaf(A_INTLIT, P_CHAR, Token.intvalue); else n = mkastleaf(A_INTLIT, P_INT, Token.intvalue); break; case T_STRLIT: // For a STRLIT token, generate the assembly for it. // Then make a leaf AST node for it. id is the string's label. id = genglobstr(Text); n = mkastleaf(A_STRLIT, P_CHARPTR, id); break; case T_IDENT: return (postfix()); case T_LPAREN: // Beginning of a parenthesised expression, skip the '('. // Scan in the expression and the right parenthesis scan(&Token); n = binexpr(0); rparen(); return (n); default: fatald("Expecting a primary expression, got token", Token.token); } // Scan in the next token and return the leaf node scan(&Token); return (n); } // Convert a binary operator token into a binary AST operation. // We rely on a 1:1 mapping from token to AST operation static int binastop(int tokentype) { if (tokentype > T_EOF && tokentype <= T_SLASH) return (tokentype); fatald("Syntax error, token", tokentype); return (0); // Keep -Wall happy } // Return true if a token is right-associative, // false otherwise. static int rightassoc(int tokentype) { if (tokentype == T_ASSIGN) return (1); return (0); } // Operator precedence for each token. Must // match up with the order of tokens in defs.h static int OpPrec[] = { 0, 10, 20, 30, // T_EOF, T_ASSIGN, T_LOGOR, T_LOGAND 40, 50, 60, // T_OR, T_XOR, T_AMPER 70, 70, // T_EQ, T_NE 80, 80, 80, 80, // T_LT, T_GT, T_LE, T_GE 90, 90, // T_LSHIFT, T_RSHIFT 100, 100, // T_PLUS, T_MINUS 110, 110 // T_STAR, T_SLASH }; // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec; if (tokentype > T_SLASH) fatald("Token with no precedence in op_precedence:", tokentype); prec = OpPrec[tokentype]; if (prec == 0) fatald("Syntax error, token", tokentype); return (prec); } // prefix_expression: primary // | '*' prefix_expression // | '&' prefix_expression // | '-' prefix_expression // | '++' prefix_expression // | '--' prefix_expression // ; // Parse a prefix expression and return // a sub-tree representing it. struct ASTnode *prefix(void) { struct ASTnode *tree; switch (Token.token) { case T_AMPER: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Ensure that it's an identifier if (tree->op != A_IDENT) fatal("& operator must be followed by an identifier"); // Now change the operator to A_ADDR and the type to // a pointer to the original type tree->op = A_ADDR; tree->type = pointer_to(tree->type); break; case T_STAR: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's either another deref or an // identifier if (tree->op != A_IDENT && tree->op != A_DEREF) fatal("* operator must be followed by an identifier or *"); // Prepend an A_DEREF operation to the tree tree = mkastunary(A_DEREF, value_at(tree->type), tree, 0); break; case T_MINUS: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_NEGATE operation to the tree and // make the child an rvalue. Because chars are unsigned, // also widen this to int so that it's signed tree->rvalue = 1; tree = modify_type(tree, P_INT, 0); tree = mkastunary(A_NEGATE, tree->type, tree, 0); break; case T_INVERT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_INVERT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_INVERT, tree->type, tree, 0); break; case T_LOGNOT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_LOGNOT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_LOGNOT, tree->type, tree, 0); break; case T_INC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("++ operator must be followed by an identifier"); // Prepend an A_PREINC operation to the tree tree = mkastunary(A_PREINC, tree->type, tree, 0); break; case T_DEC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("-- operator must be followed by an identifier"); // Prepend an A_PREDEC operation to the tree tree = mkastunary(A_PREDEC, tree->type, tree, 0); break; default: tree = primary(); } return (tree); } // Return an AST tree whose root is a binary operator. // Parameter ptp is the previous token's precedence. struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; struct ASTnode *ltemp, *rtemp; int ASTop; int tokentype; // Get the tree on the left. // Fetch the next token at the same time. left = prefix(); // If we hit a semicolon or ')', return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET) { left->rvalue = 1; return (left); } // While the precedence of this token is more than that of the // previous token precedence, or it's right associative and // equal to the previous token's precedence while ((op_precedence(tokentype) > ptp) || (rightassoc(tokentype) && op_precedence(tokentype) == ptp)) { // Fetch in the next integer literal scan(&Token); // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Determine the operation to be performed on the sub-trees ASTop = binastop(tokentype); if (ASTop == A_ASSIGN) { // Assignment // Make the right tree into an rvalue right->rvalue = 1; // Ensure the right's type matches the left right = modify_type(right, left->type, 0); if (left == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree. However, switch // left and right around, so that the right expression's // code will be generated before the left expression ltemp = left; left = right; right = ltemp; } else { // We are not doing an assignment, so both trees should be rvalues // Convert both trees into rvalue if they are lvalue trees left->rvalue = 1; right->rvalue = 1; // Ensure the two types are compatible by trying // to modify each tree to match the other's type. ltemp = modify_type(left, right->type, ASTop); rtemp = modify_type(right, left->type, ASTop); if (ltemp == NULL && rtemp == NULL) fatal("Incompatible types in binary expression"); if (ltemp != NULL) left = ltemp; if (rtemp != NULL) right = rtemp; } // Join that sub-tree with ours. Convert the token // into an AST operation at the same time. left = mkastnode(binastop(tokentype), left->type, left, NULL, right, 0); // Update the details of the current token. // If we hit a semicolon, ')' or ']', return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET) { left->rvalue = 1; return (left); } } // Return the tree we have when the precedence // is the same or lower left->rvalue = 1; return (left); } ================================================ FILE: 23_Local_Variables/gen.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Generic code generator // Copyright (c) 2019 Warren Toomey, GPL3 // Generate and return a new label number int genlabel(void) { static int id = 1; return (id++); } // Generate the code for an IF statement // and an optional ELSE clause static int genIF(struct ASTnode *n) { int Lfalse, Lend; // Generate two labels: one for the // false compound statement, and one // for the end of the overall IF statement. // When there is no ELSE clause, Lfalse _is_ // the ending label! Lfalse = genlabel(); if (n->right) Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. genAST(n->left, Lfalse, n->op); genfreeregs(); // Generate the true compound statement genAST(n->mid, NOLABEL, n->op); genfreeregs(); // If there is an optional ELSE clause, // generate the jump to skip to the end if (n->right) cgjump(Lend); // Now the false label cglabel(Lfalse); // Optional ELSE clause: generate the // false compound statement and the // end label if (n->right) { genAST(n->right, NOLABEL, n->op); genfreeregs(); cglabel(Lend); } return (NOREG); } // Generate the code for a WHILE statement static int genWHILE(struct ASTnode *n) { int Lstart, Lend; // Generate the start and end labels // and output the start label Lstart = genlabel(); Lend = genlabel(); cglabel(Lstart); // Generate the condition code followed // by a jump to the end label. genAST(n->left, Lend, n->op); genfreeregs(); // Generate the compound statement for the body genAST(n->right, NOLABEL, n->op); genfreeregs(); // Finally output the jump back to the condition, // and the end label cgjump(Lstart); cglabel(Lend); return (NOREG); } // Given an AST, an optional label, and the AST op // of the parent, generate assembly code recursively. // Return the register id with the tree's final value. int genAST(struct ASTnode *n, int label, int parentASTop) { int leftreg, rightreg; // We have some specific AST node handling at the top // so that we don't evaluate the child sub-trees immediately switch (n->op) { case A_IF: return (genIF(n)); case A_WHILE: return (genWHILE(n)); case A_GLUE: // Do each child statement, and free the // registers after each child genAST(n->left, NOLABEL, n->op); genfreeregs(); genAST(n->right, NOLABEL, n->op); genfreeregs(); return (NOREG); case A_FUNCTION: // Generate the function's preamble before the code // in the child sub-tree cgfuncpreamble(n->v.id); genAST(n->left, NOLABEL, n->op); cgfuncpostamble(n->v.id); return (NOREG); } // General AST node handling below // Get the left and right sub-tree values if (n->left) leftreg = genAST(n->left, NOLABEL, n->op); if (n->right) rightreg = genAST(n->right, NOLABEL, n->op); switch (n->op) { case A_ADD: return (cgadd(leftreg, rightreg)); case A_SUBTRACT: return (cgsub(leftreg, rightreg)); case A_MULTIPLY: return (cgmul(leftreg, rightreg)); case A_DIVIDE: return (cgdiv(leftreg, rightreg)); case A_AND: return (cgand(leftreg, rightreg)); case A_OR: return (cgor(leftreg, rightreg)); case A_XOR: return (cgxor(leftreg, rightreg)); case A_LSHIFT: return (cgshl(leftreg, rightreg)); case A_RSHIFT: return (cgshr(leftreg, rightreg)); case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, compare registers // and set one to 1 or 0 based on the comparison. if (parentASTop == A_IF || parentASTop == A_WHILE) return (cgcompare_and_jump(n->op, leftreg, rightreg, label)); else return (cgcompare_and_set(n->op, leftreg, rightreg)); case A_INTLIT: return (cgloadint(n->v.intvalue, n->type)); case A_STRLIT: return (cgloadglobstr(n->v.id)); case A_IDENT: // Load our value if we are an rvalue // or we are being dereferenced if (n->rvalue || parentASTop == A_DEREF) { if (Symtable[n->v.id].class == C_LOCAL) { return (cgloadlocal(n->v.id, n->op)); } else { return (cgloadglob(n->v.id, n->op)); } } else return (NOREG); case A_ASSIGN: // Are we assigning to an identifier or through a pointer? switch (n->right->op) { case A_IDENT: if (Symtable[n->right->v.id].class == C_LOCAL) return (cgstorlocal(leftreg, n->right->v.id)); else return (cgstorglob(leftreg, n->right->v.id)); case A_DEREF: return (cgstorderef(leftreg, rightreg, n->right->type)); default: fatald("Can't A_ASSIGN in genAST(), op", n->op); } case A_WIDEN: // Widen the child's type to the parent's type return (cgwiden(leftreg, n->left->type, n->type)); case A_RETURN: cgreturn(leftreg, Functionid); return (NOREG); case A_FUNCCALL: return (cgcall(leftreg, n->v.id)); case A_ADDR: return (cgaddress(n->v.id)); case A_DEREF: // If we are an rvalue, dereference to get the value we point at, // otherwise leave it for A_ASSIGN to store through the pointer if (n->rvalue) return (cgderef(leftreg, n->left->type)); else return (leftreg); case A_SCALE: // Small optimisation: use shift if the // scale value is a known power of two switch (n->v.size) { case 2: return (cgshlconst(leftreg, 1)); case 4: return (cgshlconst(leftreg, 2)); case 8: return (cgshlconst(leftreg, 3)); default: // Load a register with the size and // multiply the leftreg by this size rightreg = cgloadint(n->v.size, P_INT); return (cgmul(leftreg, rightreg)); } case A_POSTINC: // Load the variable's value into a register, // then increment it return (cgloadglob(n->v.id, n->op)); case A_POSTDEC: // Load the variable's value into a register, // then decrement it return (cgloadglob(n->v.id, n->op)); case A_PREINC: // Load and increment the variable's value into a register return (cgloadglob(n->left->v.id, n->op)); case A_PREDEC: // Load and decrement the variable's value into a register return (cgloadglob(n->left->v.id, n->op)); case A_NEGATE: return (cgnegate(leftreg)); case A_INVERT: return (cginvert(leftreg)); case A_LOGNOT: return (cglognot(leftreg)); case A_TOBOOL: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, set the register // to 0 or 1 based on it's zeroeness or non-zeroeness return (cgboolean(leftreg, parentASTop, label)); default: fatald("Unknown AST operator", n->op); } return (NOREG); // Keep -Wall happy } void genpreamble() { cgpreamble(); } void genpostamble() { cgpostamble(); } void genfreeregs() { freeall_registers(); } void genglobsym(int id) { cgglobsym(id); } int genglobstr(char *strvalue) { int l = genlabel(); cgglobstr(l, strvalue); return (l); } int genprimsize(int type) { return (cgprimsize(type)); } void genresetlocals(void) { cgresetlocals(); } int gengetlocaloffset(int type, int isparam) { return (cggetlocaloffset(type, isparam)); } ================================================ FILE: 23_Local_Variables/lib/printint.c ================================================ #include void printint(long x) { printf("%ld\n", x); } void printchar(long x) { putc((char)(x & 0x7f), stdout); } ================================================ FILE: 23_Local_Variables/main.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "decl.h" #include // Compiler setup and top-level execution // Copyright (c) 2019 Warren Toomey, GPL3 // Initialise global variables static void init() { Line = 1; Putback = '\n'; Globs = 0; Locls = NSYMBOLS - 1; O_dumpAST = 0; } // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "Usage: %s [-T] infile\n", prog); exit(1); } // Main program: check arguments and print a usage // if we don't have an argument. Open up the input // file and call scanfile() to scan the tokens in it. int main(int argc, char *argv[]) { int i; // Initialise the globals init(); // Scan for command-line options for (i = 1; i < argc; i++) { if (*argv[i] != '-') break; for (int j = 1; argv[i][j]; j++) { switch (argv[i][j]) { case 'T': O_dumpAST = 1; break; default: usage(argv[0]); } } } // Ensure we have an input file argument if (i >= argc) usage(argv[0]); // Open up the input file if ((Infile = fopen(argv[i], "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", argv[i], strerror(errno)); exit(1); } // Create the output file if ((Outfile = fopen("out.s", "w")) == NULL) { fprintf(stderr, "Unable to create out.s: %s\n", strerror(errno)); exit(1); } // For now, ensure that printint() and printchar() are defined addglob("printint", P_INT, S_FUNCTION, 0, 0); addglob("printchar", P_VOID, S_FUNCTION, 0, 0); scan(&Token); // Get the first token from the input genpreamble(); // Output the preamble global_declarations(); // Parse the global declarations genpostamble(); // Output the postamble fclose(Outfile); // Close the output file and exit return (0); } ================================================ FILE: 23_Local_Variables/misc.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Miscellaneous functions // Copyright (c) 2019 Warren Toomey, GPL3 // Ensure that the current token is t, // and fetch the next token. Otherwise // throw an error void match(int t, char *what) { if (Token.token == t) { scan(&Token); } else { fatals("Expected", what); } } // Match a semicolon and fetch the next token void semi(void) { match(T_SEMI, ";"); } // Match a left brace and fetch the next token void lbrace(void) { match(T_LBRACE, "{"); } // Match a right brace and fetch the next token void rbrace(void) { match(T_RBRACE, "}"); } // Match a left parenthesis and fetch the next token void lparen(void) { match(T_LPAREN, "("); } // Match a right parenthesis and fetch the next token void rparen(void) { match(T_RPAREN, ")"); } // Match an identifier and fetch the next token void ident(void) { match(T_IDENT, "identifier"); } // Print out fatal messages void fatal(char *s) { fprintf(stderr, "%s on line %d\n", s, Line); exit(1); } void fatals(char *s1, char *s2) { fprintf(stderr, "%s:%s on line %d\n", s1, s2, Line); exit(1); } void fatald(char *s, int d) { fprintf(stderr, "%s:%d on line %d\n", s, d, Line); exit(1); } void fatalc(char *s, int c) { fprintf(stderr, "%s:%c on line %d\n", s, c, Line); exit(1); } ================================================ FILE: 23_Local_Variables/scan.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { char *p; p = strchr(s, c); return (p ? p - s : -1); } // Get the next character from the input file. static int next(void) { int c; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return (c); } c = fgetc(Infile); // Read from input file if ('\n' == c) Line++; // Increment line count return (c); } // Put back an unwanted character static void putback(int c) { Putback = c; } // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } // Return the next character from a character // or string literal static int scanch(void) { int c; // Get the next input character and interpret // metacharacters that start with a backslash c = next(); if (c == '\\') { switch (c = next()) { case 'a': return '\a'; case 'b': return '\b'; case 'f': return '\f'; case 'n': return '\n'; case 'r': return '\r'; case 't': return '\t'; case 'v': return '\v'; case '\\': return '\\'; case '"': return '"'; case '\'': return '\''; default: fatalc("unknown escape sequence", c); } } return (c); // Just an ordinary old character! } // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0; // Convert each character into an int value while ((k = chrpos("0123456789", c)) >= 0) { val = val * 10 + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return (val); } // Scan in a string literal from the input file, // and store it in buf[]. Return the length of // the string. static int scanstr(char *buf) { int i, c; // Loop while we have enough buffer space for (i = 0; i < TEXTLEN - 1; i++) { // Get the next char and append to buf // Return when we hit the ending double quote if ((c = scanch()) == '"') { buf[i] = 0; return (i); } buf[i] = c; } // Ran out of buf[] space fatal("String literal too long"); return (0); } // Scan an identifier from the input file and // store it in buf[]. Return the identifier's length static int scanident(int c, char *buf, int lim) { int i = 0; // Allow digits, alpha and underscores while (isalpha(c) || isdigit(c) || '_' == c) { // Error if we hit the identifier length limit, // else append to buf[] and get next character if (lim - 1 == i) { fatal("Identifier too long"); } else if (i < lim - 1) { buf[i++] = c; } c = next(); } // We hit a non-valid character, put it back. // NUL-terminate the buf[] and return the length putback(c); buf[i] = '\0'; return (i); } // Given a word from the input, return the matching // keyword token number or 0 if it's not a keyword. // Switch on the first letter so that we don't have // to waste time strcmp()ing against all the keywords. static int keyword(char *s) { switch (*s) { case 'c': if (!strcmp(s, "char")) return (T_CHAR); break; case 'e': if (!strcmp(s, "else")) return (T_ELSE); break; case 'f': if (!strcmp(s, "for")) return (T_FOR); break; case 'i': if (!strcmp(s, "if")) return (T_IF); if (!strcmp(s, "int")) return (T_INT); break; case 'l': if (!strcmp(s, "long")) return (T_LONG); break; case 'r': if (!strcmp(s, "return")) return (T_RETURN); break; case 'w': if (!strcmp(s, "while")) return (T_WHILE); break; case 'v': if (!strcmp(s, "void")) return (T_VOID); break; } return (0); } // A pointer to a rejected token static struct token *Rejtoken = NULL; // Reject the token that we just scanned void reject_token(struct token *t) { if (Rejtoken != NULL) fatal("Can't reject token twice"); Rejtoken = t; } // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c, tokentype; // If we have any rejected token, return it if (Rejtoken != NULL) { t = Rejtoken; Rejtoken = NULL; return (1); } // Skip whitespace c = skip(); // Determine the token based on // the input character switch (c) { case EOF: t->token = T_EOF; return (0); case '+': if ((c = next()) == '+') { t->token = T_INC; } else { putback(c); t->token = T_PLUS; } break; case '-': if ((c = next()) == '-') { t->token = T_DEC; } else { putback(c); t->token = T_MINUS; } break; case '*': t->token = T_STAR; break; case '/': t->token = T_SLASH; break; case ';': t->token = T_SEMI; break; case '{': t->token = T_LBRACE; break; case '}': t->token = T_RBRACE; break; case '(': t->token = T_LPAREN; break; case ')': t->token = T_RPAREN; break; case '[': t->token = T_LBRACKET; break; case ']': t->token = T_RBRACKET; break; case '~': t->token = T_INVERT; break; case '^': t->token = T_XOR; break; case '=': if ((c = next()) == '=') { t->token = T_EQ; } else { putback(c); t->token = T_ASSIGN; } break; case '!': if ((c = next()) == '=') { t->token = T_NE; } else { putback(c); t->token = T_LOGNOT; } break; case '<': if ((c = next()) == '=') { t->token = T_LE; } else if (c == '<') { t->token = T_LSHIFT; } else { putback(c); t->token = T_LT; } break; case '>': if ((c = next()) == '=') { t->token = T_GE; } else if (c == '>') { t->token = T_RSHIFT; } else { putback(c); t->token = T_GT; } break; case '&': if ((c = next()) == '&') { t->token = T_LOGAND; } else { putback(c); t->token = T_AMPER; } break; case '|': if ((c = next()) == '|') { t->token = T_LOGOR; } else { putback(c); t->token = T_OR; } break; case '\'': // If it's a quote, scan in the // literal character value and // the trailing quote t->intvalue = scanch(); t->token = T_INTLIT; if (next() != '\'') fatal("Expected '\\'' at end of char literal"); break; case '"': // Scan in a literal string scanstr(Text); t->token = T_STRLIT; break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } else if (isalpha(c) || '_' == c) { // Read in a keyword or identifier scanident(c, Text, TEXTLEN); // If it's a recognised keyword, return that token if ((tokentype = keyword(Text)) != 0) { t->token = tokentype; break; } // Not a recognised keyword, so it must be an identifier t->token = T_IDENT; break; } // The character isn't part of any recognised token, error fatalc("Unrecognised character", c); } // We found a token return (1); } ================================================ FILE: 23_Local_Variables/stmt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of statements // Copyright (c) 2019 Warren Toomey, GPL3 // Prototypes static struct ASTnode *single_statement(void); // compound_statement: // empty, i.e. no statement // | statement // | statement statements // ; // // statement: declaration // | expression_statement // | function_call // | if_statement // | while_statement // | for_statement // | return_statement // ; // if_statement: if_head // | if_head 'else' compound_statement // ; // // if_head: 'if' '(' true_false_expression ')' compound_statement ; // // Parse an IF statement including any // optional ELSE clause and return its AST static struct ASTnode *if_statement(void) { struct ASTnode *condAST, *trueAST, *falseAST = NULL; // Ensure we have 'if' '(' match(T_IF, "if"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, 0); rparen(); // Get the AST for the compound statement trueAST = compound_statement(); // If we have an 'else', skip it // and get the AST for the compound statement if (Token.token == T_ELSE) { scan(&Token); falseAST = compound_statement(); } // Build and return the AST for this statement return (mkastnode(A_IF, P_NONE, condAST, trueAST, falseAST, 0)); } // while_statement: 'while' '(' true_false_expression ')' compound_statement ; // // Parse a WHILE statement and return its AST static struct ASTnode *while_statement(void) { struct ASTnode *condAST, *bodyAST; // Ensure we have 'while' '(' match(T_WHILE, "while"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, 0); rparen(); // Get the AST for the compound statement bodyAST = compound_statement(); // Build and return the AST for this statement return (mkastnode(A_WHILE, P_NONE, condAST, NULL, bodyAST, 0)); } // for_statement: 'for' '(' preop_statement ';' // true_false_expression ';' // postop_statement ')' compound_statement ; // // preop_statement: statement (for now) // postop_statement: statement (for now) // // Parse a FOR statement and return its AST static struct ASTnode *for_statement(void) { struct ASTnode *condAST, *bodyAST; struct ASTnode *preopAST, *postopAST; struct ASTnode *tree; // Ensure we have 'for' '(' match(T_FOR, "for"); lparen(); // Get the pre_op statement and the ';' preopAST = single_statement(); semi(); // Get the condition and the ';'. // Force a non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, 0); semi(); // Get the post_op statement and the ')' postopAST = single_statement(); rparen(); // Get the compound statement which is the body bodyAST = compound_statement(); // For now, all four sub-trees have to be non-NULL. // Later on, we'll change the semantics for when some are missing // Glue the compound statement and the postop tree tree = mkastnode(A_GLUE, P_NONE, bodyAST, NULL, postopAST, 0); // Make a WHILE loop with the condition and this new body tree = mkastnode(A_WHILE, P_NONE, condAST, NULL, tree, 0); // And glue the preop tree to the A_WHILE tree return (mkastnode(A_GLUE, P_NONE, preopAST, NULL, tree, 0)); } // return_statement: 'return' '(' expression ')' ; // // Parse a return statement and return its AST static struct ASTnode *return_statement(void) { struct ASTnode *tree; // Can't return a value if function returns P_VOID if (Symtable[Functionid].type == P_VOID) fatal("Can't return from a void function"); // Ensure we have 'return' '(' match(T_RETURN, "return"); lparen(); // Parse the following expression tree = binexpr(0); // Ensure this is compatible with the function's type tree = modify_type(tree, Symtable[Functionid].type, 0); if (tree == NULL) fatal("Incompatible type to return"); // Add on the A_RETURN node tree = mkastunary(A_RETURN, P_NONE, tree, 0); // Get the ')' rparen(); return (tree); } // Parse a single statement and return its AST static struct ASTnode *single_statement(void) { int type; switch (Token.token) { case T_CHAR: case T_INT: case T_LONG: // The beginning of a variable declaration. // Parse the type and get the identifier. // Then parse the rest of the declaration. type = parse_type(); ident(); var_declaration(type, 1); return (NULL); // No AST generated here case T_IF: return (if_statement()); case T_WHILE: return (while_statement()); case T_FOR: return (for_statement()); case T_RETURN: return (return_statement()); default: // For now, see if this is an expression. // This catches assignment statements. return (binexpr(0)); } return (NULL); // Keep -Wall happy } // Parse a compound statement // and return its AST struct ASTnode *compound_statement(void) { struct ASTnode *left = NULL; struct ASTnode *tree; // Require a left curly bracket lbrace(); while (1) { // Parse a single statement tree = single_statement(); // Some statements must be followed by a semicolon if (tree != NULL && (tree->op == A_ASSIGN || tree->op == A_RETURN || tree->op == A_FUNCCALL)) semi(); // For each new tree, either save it in left // if left is empty, or glue the left and the // new tree together if (tree != NULL) { if (left == NULL) left = tree; else left = mkastnode(A_GLUE, P_NONE, left, NULL, tree, 0); } // When we hit a right curly bracket, // skip past it and return the AST if (Token.token == T_RBRACE) { rbrace(); return (left); } } } ================================================ FILE: 23_Local_Variables/sym.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Symbol table functions // Copyright (c) 2019 Warren Toomey, GPL3 // Determine if the symbol s is in the global symbol table. // Return its slot position or -1 if not found. int findglob(char *s) { int i; for (i = 0; i < Globs; i++) { if (*s == *Symtable[i].name && !strcmp(s, Symtable[i].name)) return (i); } return (-1); } // Get the position of a new global symbol slot, or die // if we've run out of positions. static int newglob(void) { int p; if ((p = Globs++) >= Locls) fatal("Too many global symbols"); return (p); } // Determine if the symbol s is in the local symbol table. // Return its slot position or -1 if not found. int findlocl(char *s) { int i; for (i = Locls + 1; i < NSYMBOLS; i++) { if (*s == *Symtable[i].name && !strcmp(s, Symtable[i].name)) return (i); } return (-1); } // Get the position of a new local symbol slot, or die // if we've run out of positions. static int newlocl(void) { int p; if ((p = Locls--) <= Globs) fatal("Too many local symbols"); return (p); } // Update a symbol at the given slot number in the symbol table. Set up its: // + type: char, int etc. // + structural type: var, function, array etc. // + size: number of elements // + endlabel: if this is a function // + posn: Position information for local symbols static void updatesym(int slot, char *name, int type, int stype, int class, int endlabel, int size, int posn) { if (slot < 0 || slot >= NSYMBOLS) fatal("Invalid symbol slot number in updatesym()"); Symtable[slot].name = strdup(name); Symtable[slot].type = type; Symtable[slot].stype = stype; Symtable[slot].class = class; Symtable[slot].endlabel = endlabel; Symtable[slot].size = size; Symtable[slot].posn = posn; } // Add a global symbol to the symbol table. Set up its: // + type: char, int etc. // + structural type: var, function, array etc. // + size: number of elements // + endlabel: if this is a function // Return the slot number in the symbol table int addglob(char *name, int type, int stype, int endlabel, int size) { int slot; // If this is already in the symbol table, return the existing slot if ((slot = findglob(name)) != -1) return (slot); // Otherwise get a new slot, fill it in and // return the slot number slot = newglob(); updatesym(slot, name, type, stype, C_GLOBAL, endlabel, size, 0); genglobsym(slot); return (slot); } // Add a local symbol to the symbol table. Set up its: // + type: char, int etc. // + structural type: var, function, array etc. // + size: number of elements // + endlabel: if this is a function // Return the slot number in the symbol table int addlocl(char *name, int type, int stype, int endlabel, int size) { int slot, posn; // If this is already in the symbol table, return the existing slot if ((slot = findlocl(name)) != -1) return (slot); // Otherwise get a new symbol slot and a position for this local. // Update the symbol table entry and return the slot number slot = newlocl(); posn = gengetlocaloffset(type, 0); // XXX 0 for now updatesym(slot, name, type, stype, C_LOCAL, endlabel, size, posn); return (slot); } // Determine if the symbol s is in the symbol table. // Return its slot position or -1 if not found. int findsymbol(char *s) { int slot; slot = findlocl(s); if (slot == -1) slot = findglob(s); return (slot); } ================================================ FILE: 23_Local_Variables/tests/input01.c ================================================ void main() { printint(12 * 3); printint(18 - 2 * 4); printint(1 + 2 + 9 - 5/2 + 3*5); } ================================================ FILE: 23_Local_Variables/tests/input02.c ================================================ void main() { int fred; int jim; fred= 5; jim= 12; printint(fred + jim); } ================================================ FILE: 23_Local_Variables/tests/input03.c ================================================ void main() { int x; x= 1; printint(x); x= x + 1; printint(x); x= x + 1; printint(x); x= x + 1; printint(x); x= x + 1; printint(x); } ================================================ FILE: 23_Local_Variables/tests/input04.c ================================================ void main() { int x; x= 7 < 9; printint(x); x= 7 <= 9; printint(x); x= 7 != 9; printint(x); x= 7 == 7; printint(x); x= 7 >= 7; printint(x); x= 7 <= 7; printint(x); x= 9 > 7; printint(x); x= 9 >= 7; printint(x); x= 9 != 7; printint(x); } ================================================ FILE: 23_Local_Variables/tests/input05.c ================================================ void main() { int i; int j; i=6; j=12; if (i < j) { printint(i); } else { printint(j); } } ================================================ FILE: 23_Local_Variables/tests/input06.c ================================================ void main() { int i; i=1; while (i <= 10) { printint(i); i= i + 1; } } ================================================ FILE: 23_Local_Variables/tests/input07.c ================================================ void main() { int i; for (i= 1; i <= 10; i= i + 1) { printint(i); } } ================================================ FILE: 23_Local_Variables/tests/input08.c ================================================ void main() { int i; for (i= 1; i <= 10; i= i + 1) { printint(i); } } ================================================ FILE: 23_Local_Variables/tests/input09.c ================================================ void main() { int i; for (i= 1; i <= 10; i= i + 1) { printint(i); } } void fred() { int a; int b; a= 12; b= 3 * a; if (a >= b) { printint(2 * b - a); } } ================================================ FILE: 23_Local_Variables/tests/input10.c ================================================ void main() { int i; char j; j= 20; printint(j); i= 10; printint(i); for (i= 1; i <= 5; i= i + 1) { printint(i); } for (j= 253; j != 2; j= j + 1) { printint(j); } } ================================================ FILE: 23_Local_Variables/tests/input11.c ================================================ int main() { int i; char j; long k; i= 10; printint(i); j= 20; printint(j); k= 30; printint(k); for (i= 1; i <= 5; i= i + 1) { printint(i); } for (j= 253; j != 4; j= j + 1) { printint(j); } for (k= 1; k <= 5; k= k + 1) { printint(k); } return(i); printint(12345); return(3); } ================================================ FILE: 23_Local_Variables/tests/input12.c ================================================ int fred() { return(5); } void main() { int x; x= fred(2); printint(x); } ================================================ FILE: 23_Local_Variables/tests/input13.c ================================================ int fred() { return(56); } void main() { int dummy; int result; dummy= printint(23); result= fred(10); dummy= printint(result); } ================================================ FILE: 23_Local_Variables/tests/input14.c ================================================ int fred() { return(20); } int main() { int result; printint(10); result= fred(15); printint(result); printint(fred(15)+10); return(0); } ================================================ FILE: 23_Local_Variables/tests/input15.c ================================================ int main() { char a; char *b; char c; int d; int *e; int f; a= 18; printint(a); b= &a; c= *b; printint(c); d= 12; printint(d); e= &d; f= *e; printint(f); return(0); } ================================================ FILE: 23_Local_Variables/tests/input16.c ================================================ int c; int d; int *e; int f; int main() { c= 12; d=18; printint(c); e= &c + 1; f= *e; printint(f); return(0); } ================================================ FILE: 23_Local_Variables/tests/input17.c ================================================ int main() { char a; char *b; int d; int *e; b= &a; *b= 19; printint(a); e= &d; *e= 12; printint(d); return(0); } ================================================ FILE: 23_Local_Variables/tests/input18.c ================================================ int main() { int a; int b; a= b= 34; printint(a); printint(b); return(0); } ================================================ FILE: 23_Local_Variables/tests/input18a.c ================================================ int a; int *b; char c; char *d; int main() { b= &a; *b= 15; printint(a); d= &c; *d= 16; printint(c); return(0); } ================================================ FILE: 23_Local_Variables/tests/input19.c ================================================ int a; int b; int c; int d; int e; int main() { a= 2; b= 4; c= 3; d= 2; e= (a+b) * (c+d); printint(e); return(0); } ================================================ FILE: 23_Local_Variables/tests/input20.c ================================================ int a; int b[25]; int main() { b[3]= 12; a= b[3]; printint(a); return(0); } ================================================ FILE: 23_Local_Variables/tests/input21.c ================================================ char c; char *str; int main() { c= '\n'; printint(c); for (str= "Hello world\n"; *str != 0; str= str + 1) { printchar(*str); } return(0); } ================================================ FILE: 23_Local_Variables/tests/input22.c ================================================ char a; char b; char c; int d; int e; int f; long g; long h; long i; int main() { b= 5; c= 7; a= b + c++; printint(a); e= 5; f= 7; d= e + f++; printint(d); h= 5; i= 7; g= h + i++; printint(g); a= b-- + c; printint(a); d= e-- + f; printint(d); g= h-- + i; printint(g); a= ++b + c; printint(a); d= ++e + f; printint(d); g= ++h + i; printint(g); a= b * --c; printint(a); d= e * --f; printint(d); g= h * --i; printint(g); return(0); } ================================================ FILE: 23_Local_Variables/tests/input23.c ================================================ char *str; int x; int main() { x= -23; printint(x); printint(-10 * -10); x= 1; x= ~x; printint(x); x= 2 > 5; printint(x); x= !x; printint(x); x= !x; printint(x); x= 13; if (x) { printint(13); } x= 0; if (!x) { printint(14); } for (str= "Hello world\n"; *str; str++) { printchar(*str); } return(0); } ================================================ FILE: 23_Local_Variables/tests/input24.c ================================================ int a; int b; int c; int main() { a= 42; b= 19; printint(a & b); printint(a | b); printint(a ^ b); printint(1 << 3); printint(63 >> 3); return(0); } ================================================ FILE: 23_Local_Variables/tests/input25.c ================================================ int a; int b; int c; int main() { char z; int y; int x; x= 10; y= 20; z= 30; printint(x); printint(y); printint(z); a= 5; b= 15; c= 25; printint(a); printint(b); printint(c); return(0); } ================================================ FILE: 23_Local_Variables/tests/mktests ================================================ #!/bin/sh # Make the output files for each test for i in input*c do if [ ! -f "out.$i" ] then cc -o out $i ../lib/printint.c ./out > out.$i rm -f out fi done ================================================ FILE: 23_Local_Variables/tests/out.input01.c ================================================ 36 10 25 ================================================ FILE: 23_Local_Variables/tests/out.input02.c ================================================ 17 ================================================ FILE: 23_Local_Variables/tests/out.input03.c ================================================ 1 2 3 4 5 ================================================ FILE: 23_Local_Variables/tests/out.input04.c ================================================ 1 1 1 1 1 1 1 1 1 ================================================ FILE: 23_Local_Variables/tests/out.input05.c ================================================ 6 ================================================ FILE: 23_Local_Variables/tests/out.input06.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 23_Local_Variables/tests/out.input07.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 23_Local_Variables/tests/out.input08.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 23_Local_Variables/tests/out.input09.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 23_Local_Variables/tests/out.input10.c ================================================ 20 10 1 2 3 4 5 253 254 255 0 1 ================================================ FILE: 23_Local_Variables/tests/out.input11.c ================================================ 10 20 30 1 2 3 4 5 253 254 255 0 1 2 3 1 2 3 4 5 ================================================ FILE: 23_Local_Variables/tests/out.input12.c ================================================ 5 ================================================ FILE: 23_Local_Variables/tests/out.input13.c ================================================ 23 56 ================================================ FILE: 23_Local_Variables/tests/out.input14.c ================================================ 10 20 30 ================================================ FILE: 23_Local_Variables/tests/out.input15.c ================================================ 18 18 12 12 ================================================ FILE: 23_Local_Variables/tests/out.input16.c ================================================ 12 18 ================================================ FILE: 23_Local_Variables/tests/out.input17.c ================================================ 19 12 ================================================ FILE: 23_Local_Variables/tests/out.input18.c ================================================ 34 34 ================================================ FILE: 23_Local_Variables/tests/out.input18a.c ================================================ 15 16 ================================================ FILE: 23_Local_Variables/tests/out.input19.c ================================================ 30 ================================================ FILE: 23_Local_Variables/tests/out.input20.c ================================================ 12 ================================================ FILE: 23_Local_Variables/tests/out.input21.c ================================================ 10 Hello world ================================================ FILE: 23_Local_Variables/tests/out.input22.c ================================================ 12 12 12 13 13 13 13 13 13 35 35 35 ================================================ FILE: 23_Local_Variables/tests/out.input23.c ================================================ -23 100 -2 0 1 0 13 14 Hello world ================================================ FILE: 23_Local_Variables/tests/out.input24.c ================================================ 2 59 57 8 7 ================================================ FILE: 23_Local_Variables/tests/out.input25.c ================================================ 10 20 30 5 15 25 ================================================ FILE: 23_Local_Variables/tests/runtests ================================================ #!/bin/sh # Run each test and compare # against known good output if [ ! -f ../comp1 ] then echo "Need to build ../comp1 first!"; exit 1 fi for i in input* do if [ ! -f "out.$i" ] then echo "Can't run test on $i, no output file!" else echo -n $i ../comp1 $i cc -o out out.s ../lib/printint.c ./out > trial.$i cmp -s "out.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo else echo ": OK" fi rm -f out out.s "trial.$i" fi done ================================================ FILE: 23_Local_Variables/tests/runtestsn ================================================ #!/bin/sh # Run each test and compare # against known good output if [ ! -f ../compn ] then echo "Need to build ../compn first!"; exit 1 fi for i in input* do if [ ! -f "out.$i" ] then echo "Can't run test on $i, no output file!" else echo -n $i ../compn $i nasm -f elf64 out.s cc -no-pie -Wall -o out out.o ../lib/printint.c ./out > trial.$i cmp -s "out.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo else echo ": OK" fi rm -f out out.o out.s "trial.$i" fi done ================================================ FILE: 23_Local_Variables/tree.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree functions // Copyright (c) 2019 Warren Toomey, GPL3 // Build and return a generic AST node struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, int intvalue) { struct ASTnode *n; // Malloc a new ASTnode n = (struct ASTnode *) malloc(sizeof(struct ASTnode)); if (n == NULL) fatal("Unable to malloc in mkastnode()"); // Copy in the field values and return it n->op = op; n->type = type; n->left = left; n->mid = mid; n->right = right; n->v.intvalue = intvalue; return (n); } // Make an AST leaf node struct ASTnode *mkastleaf(int op, int type, int intvalue) { return (mkastnode(op, type, NULL, NULL, NULL, intvalue)); } // Make a unary AST node: only one child struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, int intvalue) { return (mkastnode(op, type, left, NULL, NULL, intvalue)); } // Generate and return a new label number // just for AST dumping purposes static int gendumplabel(void) { static int id = 1; return (id++); } // Given an AST tree, print it out and follow the // traversal of the tree that genAST() follows void dumpAST(struct ASTnode *n, int label, int level) { int Lfalse, Lstart, Lend; switch (n->op) { case A_IF: Lfalse = gendumplabel(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_IF"); if (n->right) { Lend = gendumplabel(); fprintf(stdout, ", end L%d", Lend); } fprintf(stdout, "\n"); dumpAST(n->left, Lfalse, level + 2); dumpAST(n->mid, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); return; case A_WHILE: Lstart = gendumplabel(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_WHILE, start L%d\n", Lstart); Lend = gendumplabel(); dumpAST(n->left, Lend, level + 2); dumpAST(n->right, NOLABEL, level + 2); return; } // Reset level to -2 for A_GLUE if (n->op == A_GLUE) level = -2; // General AST node handling if (n->left) dumpAST(n->left, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); for (int i = 0; i < level; i++) fprintf(stdout, " "); switch (n->op) { case A_GLUE: fprintf(stdout, "\n\n"); return; case A_FUNCTION: fprintf(stdout, "A_FUNCTION %s\n", Symtable[n->v.id].name); return; case A_ADD: fprintf(stdout, "A_ADD\n"); return; case A_SUBTRACT: fprintf(stdout, "A_SUBTRACT\n"); return; case A_MULTIPLY: fprintf(stdout, "A_MULTIPLY\n"); return; case A_DIVIDE: fprintf(stdout, "A_DIVIDE\n"); return; case A_EQ: fprintf(stdout, "A_EQ\n"); return; case A_NE: fprintf(stdout, "A_NE\n"); return; case A_LT: fprintf(stdout, "A_LE\n"); return; case A_GT: fprintf(stdout, "A_GT\n"); return; case A_LE: fprintf(stdout, "A_LE\n"); return; case A_GE: fprintf(stdout, "A_GE\n"); return; case A_INTLIT: fprintf(stdout, "A_INTLIT %d\n", n->v.intvalue); return; case A_STRLIT: fprintf(stdout, "A_STRLIT rval label L%d\n", n->v.id); return; case A_IDENT: if (n->rvalue) fprintf(stdout, "A_IDENT rval %s\n", Symtable[n->v.id].name); else fprintf(stdout, "A_IDENT %s\n", Symtable[n->v.id].name); return; case A_ASSIGN: fprintf(stdout, "A_ASSIGN\n"); return; case A_WIDEN: fprintf(stdout, "A_WIDEN\n"); return; case A_RETURN: fprintf(stdout, "A_RETURN\n"); return; case A_FUNCCALL: fprintf(stdout, "A_FUNCCALL %s\n", Symtable[n->v.id].name); return; case A_ADDR: fprintf(stdout, "A_ADDR %s\n", Symtable[n->v.id].name); return; case A_DEREF: if (n->rvalue) fprintf(stdout, "A_DEREF rval\n"); else fprintf(stdout, "A_DEREF\n"); return; case A_SCALE: fprintf(stdout, "A_SCALE %d\n", n->v.size); return; case A_PREINC: fprintf(stdout, "A_PREINC %s\n", Symtable[n->v.id].name); return; case A_PREDEC: fprintf(stdout, "A_PREDEC %s\n", Symtable[n->v.id].name); return; case A_POSTINC: fprintf(stdout, "A_POSTINC\n"); return; case A_POSTDEC: fprintf(stdout, "A_POSTDEC\n"); return; case A_NEGATE: fprintf(stdout, "A_NEGATE\n"); return; default: fatald("Unknown dumpAST operator", n->op); } } ================================================ FILE: 23_Local_Variables/types.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Types and type handling // Copyright (c) 2019 Warren Toomey, GPL3 // Return true if a type is an int type // of any size, false otherwise int inttype(int type) { if (type == P_CHAR || type == P_INT || type == P_LONG) return (1); return (0); } // Return true if a type is of pointer type int ptrtype(int type) { if (type == P_VOIDPTR || type == P_CHARPTR || type == P_INTPTR || type == P_LONGPTR) return (1); return (0); } // Given a primitive type, return // the type which is a pointer to it int pointer_to(int type) { int newtype; switch (type) { case P_VOID: newtype = P_VOIDPTR; break; case P_CHAR: newtype = P_CHARPTR; break; case P_INT: newtype = P_INTPTR; break; case P_LONG: newtype = P_LONGPTR; break; default: fatald("Unrecognised in pointer_to: type", type); } return (newtype); } // Given a primitive pointer type, return // the type which it points to int value_at(int type) { int newtype; switch (type) { case P_VOIDPTR: newtype = P_VOID; break; case P_CHARPTR: newtype = P_CHAR; break; case P_INTPTR: newtype = P_INT; break; case P_LONGPTR: newtype = P_LONG; break; default: fatald("Unrecognised in value_at: type", type); } return (newtype); } // Given an AST tree and a type which we want it to become, // possibly modify the tree by widening or scaling so that // it is compatible with this type. Return the original tree // if no changes occurred, a modified tree, or NULL if the // tree is not compatible with the given type. // If this will be part of a binary operation, the AST op is not zero. struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op) { int ltype; int lsize, rsize; ltype = tree->type; // Compare scalar int types if (inttype(ltype) && inttype(rtype)) { // Both types same, nothing to do if (ltype == rtype) return (tree); // Get the sizes for each type lsize = genprimsize(ltype); rsize = genprimsize(rtype); // Tree's size is too big if (lsize > rsize) return (NULL); // Widen to the right if (rsize > lsize) return (mkastunary(A_WIDEN, rtype, tree, 0)); } // For pointers on the left if (ptrtype(ltype)) { // OK is same type on right and not doing a binary op if (op == 0 && ltype == rtype) return (tree); } // We can scale only on A_ADD or A_SUBTRACT operation if (op == A_ADD || op == A_SUBTRACT) { // Left is int type, right is pointer type and the size // of the original type is >1: scale the left if (inttype(ltype) && ptrtype(rtype)) { rsize = genprimsize(value_at(rtype)); if (rsize > 1) return (mkastunary(A_SCALE, rtype, tree, rsize)); else return (tree); // Size 1, no need to scale } } // If we get here, the types are not compatible return (NULL); } ================================================ FILE: 24_Function_Params/Makefile ================================================ HSRCS= data.h decl.h defs.h SRCS= cg.c decl.c expr.c gen.c main.c misc.c \ scan.c stmt.c sym.c tree.c types.c SRCN= cgn.c decl.c expr.c gen.c main.c misc.c \ scan.c stmt.c sym.c tree.c types.c ARMSRCS= cg_arm.c decl.c expr.c gen.c main.c misc.c \ scan.c stmt.c sym.c tree.c types.c comp1: $(SRCS) $(HSRCS) cc -o comp1 -g -Wall $(SRCS) compn: $(SRCN) $(HSRCS) cc -o compn -g -Wall $(SRCN) comp1arm: $(ARMSRCS) $(HSRCS) cc -o comp1arm -g -Wall $(ARMSRCS) cp comp1arm comp1 clean: rm -f comp1 comp1arm compn *.o *.s out test: comp1 tests/runtests (cd tests; chmod +x runtests; ./runtests) armtest: comp1arm tests/runtests (cd tests; chmod +x runtests; ./runtests) testn: compn tests/runtestsn (cd tests; chmod +x runtestsn; ./runtestsn) test27: comp1 input27a.c input27b.c ./comp1 input27a.c cc -o out input27b.c out.s lib/printint.c ./out test27n: compn input27a.c input27b.c ./compn input27a.c nasm -f elf64 out.s cc -o out input27b.c out.o lib/printint.c ./out ================================================ FILE: 24_Function_Params/Readme.md ================================================ # Part 24: Function Parameters I've just implemented the copying of function parameters out of the registers and onto the function's stack, but I haven't yet implemented the calling of a function with arguments. As a recap, here is the image from Eli Bendersky's article on the [stack frame layout on x86-64](https://eli.thegreenplace.net/2011/09/06/stack-frame-layout-on-x86-64/). ![](../22_Design_Locals/Figs/x64_frame_nonleaf.png) Up to six "call by value" arguments to a function are passed in via the registers '%rdi' to '%r9'. For more than six arguments, the remaining arguments are pushed on the stack. When the function is called, it pushes the previous stack base pointer onto the stack, moves the stack base pointer down to point at the same location as the stack pointer, and then moves the stack pointer to the lowest local variable (at minimum). Why "at minimum"? Well, we also have to lower the stack pointer down to be a multiple of sixteen, so that the stack base pointer is aligned correctly before we call another function. The arguments which were pushed on the stack are going to remain there, with an offset from the stack base pointer which is positive. All the register-passed arguments we will copy onto the stack, and also set up locations on the stack for our local variables. These will have offsets from the stack base pointer which are negative. This is the goal, but we have to get a few things done first. ## New Tokens and Scanning To start with, function declarations in ANSI C are a comma-separated list of types and variable names, e.g. ```c int function(int x, char y, long z) { ... } ``` Thus, we need a new token, T_COMMA, and a change to the lexical scanner to read it in. I'll leave you to read the changes to `scan()` in `scan.c`. ## A New Storage Class In the last part of our compiler writing journey, I described the changes to the symbol table to support both global and local variables. We store globals at one end of the table, and locals at the other end. Now, I'm going to introduce function parameters. I've added a new storage class definition in `defs.h`: ```c // Storage classes enum { C_GLOBAL = 1, // Globally visible symbol C_LOCAL, // Locally visible symbol C_PARAM // Locally visible function parameter }; ``` Where will they appear in the symbol table? Actually, the same parameter will appear in both the global and the local end of the table. In the global symbol list, we will define the function's symbol first with an C_GLOBAL, S_FUNCTION entry. Then, we will define all the parameters with consecutive entries that are marked as C_PARAM. This is the function's *prototype*. It means that, when we call the function later, we can compare the argument list to the parameter list and ensure that they match. At the same time, the same list of parameters are stored in the local symbol list, marked as C_PARAM instead of C_LOCAL. This allows us to distinguish between the variables someone else sent to us, and the variables we declared ourselves. ## Changes to the Parser In this part of the journey, I'm only dealing with function declarations. We will need to modify the parser to do this. Once we have parsed the function's type, name and the opening '(', we can look for any parameters. Each parameter is declared following the normal variable declaration syntax, but instead of ending with a semicolon, the parameter declarations are separated from commas. The old `var_declaration()` function in `decl.c` used to scan in the T_SEMI token at the end of a variable declaration. This has now been moved out to the previous callers of `var_declaration()`. We now have a new function, `param_declaration()` whose job is to read the list of (zero or more) parameters that follow after the opening parenthesis: ```c // param_declaration: // | variable_declaration // | variable_declaration ',' param_declaration // // Parse the parameters in parentheses after the function name. // Add them as symbols to the symbol table and return the number // of parameters. static int param_declaration(void) { int type; int paramcnt=0; // Loop until the final right parentheses while (Token.token != T_RPAREN) { // Get the type and identifier // and add it to the symbol table type = parse_type(); ident(); var_declaration(type, 1, 1); paramcnt++; // Must have a ',' or ')' at this point switch (Token.token) { case T_COMMA: scan(&Token); break; case T_RPAREN: break; default: fatald("Unexpected token in parameter list", Token.token); } } // Return the count of parameters return(paramcnt); } ``` The two '1' arguments to `var_declaration()` indicate that this is a local variable and also a parameter declaration. And in `var_declaration()`, we now do: ```c // Add this as a known scalar // and generate its space in assembly if (islocal) { if (addlocl(Text, type, S_VARIABLE, isparam, 1)==-1) fatals("Duplicate local variable declaration", Text); } else { addglob(Text, type, S_VARIABLE, 0, 1); } ``` The code used to allow duplicate local variable declarations, but this is now going to cause the stack to grow more than is needed, so I've made any duplicate declaration a fatal error. ## Symbol Table Changes Earlier on, I said that a parameter would be placed in both the global and local ends of the symbol table, but the above code only shows a call to `addlocl()`. So what's going on, then? I've modified `addlocal()` to also add a parameter to the global end: ```c int addlocl(char *name, int type, int stype, int isparam, int size) { int localslot, globalslot; ... localslot = newlocl(); if (isparam) { updatesym(localslot, name, type, stype, C_PARAM, 0, size, 0); globalslot = newglob(); updatesym(globalslot, name, type, stype, C_PARAM, 0, size, 0); } else { updatesym(localslot, name, type, stype, C_LOCAL, 0, size, 0); } ``` Not only do we get a local slot in the symbol table for a parameter, we also get a global slot for it. And both are marked as C_PARAM, not C_LOCAL. Given that the global end now contains symbols which are not C_GLOBAL, we need to modify the code to search for global symbols: ```c // Determine if the symbol s is in the global symbol table. // Return its slot position or -1 if not found. // Skip C_PARAM entries int findglob(char *s) { int i; for (i = 0; i < Globs; i++) { if (Symtable[i].class == C_PARAM) continue; if (*s == *Symtable[i].name && !strcmp(s, Symtable[i].name)) return (i); } return (-1); } ``` ## x86-64 Code Generator Changes That's about it for parsing function parameters and recording their existence in the symbol table. Now we need to generate a suitable function preamble that copies in-register arguments into positions on the stack as well as setting up the new stack base pointer and stack pointer. I realised, after I'd written the `cgresetlocals()` in the last part, than I can reset the stack offset when I call `cgfuncpreamble()`, so I've removed this function. Also, the code to calculate an offset for a new local variable only needs to be visible in `cg.c`, so I've renamed it: ```c // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. static int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } ``` I've also switched from calculating a negative offset to calculating a positive offset, as this makes the maths (in my head) easier. I still return a negative offset as shown by the return value. We have six new register that are going to hold argument values, so we had better name them somewhere. I've extended the list of register names thus: ```c #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "%r10", "%r11", "%r12", "%r13", "%r9", "%r8", "%rcx", "%rdx", "%rsi", "%rdi" }; static char *breglist[] = { "%r10b", "%r11b", "%r12b", "%r13b", "%r9b", "%r8b", "%cl", "%dl", "%sil", "%dil" }; static char *dreglist[] = { "%r10d", "%r11d", "%r12d", "%r13d", "%r9d", "%r8d", "%ecx", "%edx", "%esi", "%edi" }; ``` FIRSTPARAMREG is actually the last entry position in each list. We will start at this end and work backwards. Now we turn our attention to the function that's going to do all the work for us, `cgfuncpreamble()`. Let's look at the code in stages: ```c // Print out a function preamble void cgfuncpreamble(int id) { char *name = Symtable[id].name; int i; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset= 0; // Output the function start, save the %rsp and %rsp fprintf(Outfile, "\t.globl\t%s\n" "\t.type\t%s, @function\n" "%s:\n" "\tpushq\t%%rbp\n" "\tmovq\t%%rsp, %%rbp\n", name, name, name); ``` First up, declare the function, save the old base pointer and move it down to where the current stack pointer is. We also know that any on-stack arguments will be 16 above the new base pointer, and we know which will be the register with the first parameter in it. ```c // Copy any in-register parameters to the stack // Stop after no more than six parameter registers for (i = NSYMBOLS - 1; i > Locls; i--) { if (Symtable[i].class != C_PARAM) break; if (i < NSYMBOLS - 6) break; Symtable[i].posn = newlocaloffset(Symtable[i].type); cgstorlocal(paramReg--, i); } ``` This loops up to six times, but leaves the loop once we hit something that isn't a C_PARAM, i.e. a C_LOCAL. Call `newlocaloffset()` to generate an offset from the base pointer on the stack, and copy the register argument to this location on the stack. ```c // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (; i > Locls; i--) { if (Symtable[i].class == C_PARAM) { Symtable[i].posn = paramOffset; paramOffset += 8; } else { Symtable[i].posn = newlocaloffset(Symtable[i].type); } } ``` For each remaining local variable: if it's a C_PARAM, then it is already on the stack, so simply record its existing position in the symbol table. If it's a C_LOCAL, create a new position on the stack and record it. We now have our new stack frame set up with all the local variables that we need. All that is left is to align the stack pointer on a multiple of sixteen: ```c // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\taddq\t$%d,%%rsp\n", -stackOffset); } ``` `stackOffset` is a static variable visible throughout `cg.c`. We need to remember this value as, at the function's postamble, we need to increase the stack value by the amount that we lowered it, as well as restore the old stack base pointer: ```c // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Symtable[id].endlabel); fprintf(Outfile, "\taddq\t$%d,%%rsp\n", stackOffset); fputs("\tpopq %rbp\n" "\tret\n", Outfile); } ``` ## Testing the Changes With these changes to the compiler, we can declare a function with many parameters as well as whatever local variables we need. But the compiler doesn't yet generate code to pass arguments in registers etc. So, to test this change to our compiler, we write some functions with parameters and compile them with our compiler (`input27a.c`): ```c int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printint(a); printint(b); printint(c); printint(d); printint(e); printint(f); printint(g); printint(h); return(0); } int param5(int a, int b, int c, int d, int e) { printint(a); printint(b); printint(c); printint(d); printint(e); return(0); } int param2(int a, int b) { int c; int d; int e; c= 3; d= 4; e= 5; printint(a); printint(b); printint(c); printint(d); printint(e); return(0); } int param0() { int a; int b; int c; int d; int e; a= 1; b= 2; c= 3; d= 4; e= 5; printint(a); printint(b); printint(c); printint(d); printint(e); return(0); } ``` And we write a separate file, `input27b.c`, and compile this with `gcc`: ```c #include extern int param8(int a, int b, int c, int d, int e, int f, int g, int h); extern int param5(int a, int b, int c, int d, int e); extern int param2(int a, int b); extern int param0(); int main() { param8(1,2,3,4,5,6,7,8); puts("--"); param5(1,2,3,4,5); puts("--"); param2(1,2); puts("--"); param0(); return(0); } ``` Then we can link them together and see if the executable runs: ``` cc -o comp1 -g -Wall cg.c decl.c expr.c gen.c main.c misc.c scan.c stmt.c sym.c tree.c types.c ./comp1 input27a.c cc -o out input27b.c out.s lib/printint.c ./out 1 2 3 4 5 6 7 8 -- 1 2 3 4 5 -- 1 2 3 4 5 -- 1 2 3 4 5 ``` And it works! I put an exclamation mark in because it still feels like magic sometimes when things work. Let's examine the assembly code for `param8()`: ``` param8: pushq %rbp # Save %rbp, move %rsp movq %rsp, %rbp movl %edi, -4(%rbp) # Copy six arguments into locals movl %esi, -8(%rbp) # on the stack movl %edx, -12(%rbp) movl %ecx, -16(%rbp) movl %r8d, -20(%rbp) movl %r9d, -24(%rbp) addq $-32,%rsp # Lower stack pointer by 32 movslq -4(%rbp), %r10 movq %r10, %rdi call printint # Print -4(%rbp), i.e. a movq %rax, %r11 movslq -8(%rbp), %r10 movq %r10, %rdi call printint # Print -8(%rbp), i.e. b movq %rax, %r11 movslq -12(%rbp), %r10 movq %r10, %rdi call printint # Print -12(%rbp), i.e. c movq %rax, %r11 movslq -16(%rbp), %r10 movq %r10, %rdi call printint # Print -16(%rbp), i.e. d movq %rax, %r11 movslq -20(%rbp), %r10 movq %r10, %rdi call printint # Print -20(%rbp), i.e. e movq %rax, %r11 movslq -24(%rbp), %r10 movq %r10, %rdi call printint # Print -24(%rbp), i.e. f movq %rax, %r11 movslq 16(%rbp), %r10 movq %r10, %rdi call printint # Print 16(%rbp), i.e. g movq %rax, %r11 movslq 24(%rbp), %r10 movq %r10, %rdi call printint # Print 24(%rbp), i.e. h movq %rax, %r11 movq $0, %r10 movl %r10d, %eax jmp L1 L1: addq $32,%rsp # Raise stack pointer by 32 popq %rbp # Restore %rbp and return ret ``` Some of the other functions in `input27a.c` have both parameter variables and locally declared variables, so it seems the preamble being generated is correct (OK, works well enough to pass these tests!). ## Conclusion and What's Next I took a couple of attempts to get this right. The first time I walked the local symbol list in the wrong direction and got the order of parameters incorrect. And I misread the image from Eli Bendersky's article which resulted in my preamble tromping on the old base pointer. In a way, this was good because the rewritten code is a lot cleaner than the original code. In the next part of our compiler writing journey, I'll modify the compiler to make function calls with an arbirary number of arguments. Then I can move `input27a.c` and `input27b.c` into the `tests/` directory. [Next step](../25_Function_Arguments/Readme.md) ================================================ FILE: 24_Function_Params/cg.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\t.text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\t.data\n", Outfile); currSeg = data_seg; } } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. static int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "%r10", "%r11", "%r12", "%r13", "%r9", "%r8", "%rcx", "%rdx", "%rsi", "%rdi" }; static char *breglist[] = { "%r10b", "%r11b", "%r12b", "%r13b", "%r9b", "%r8b", "%cl", "%dl", "%sil", "%dil" }; static char *dreglist[] = { "%r10d", "%r11d", "%r12d", "%r13d", "%r9d", "%r8d", "%ecx", "%edx", "%esi", "%edi" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Symtable[id].name; int i; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset= 0; // Output the function start, save the %rsp and %rsp fprintf(Outfile, "\t.globl\t%s\n" "\t.type\t%s, @function\n" "%s:\n" "\tpushq\t%%rbp\n" "\tmovq\t%%rsp, %%rbp\n", name, name, name); // Copy any in-register parameters to the stack // Stop after no more than six parameter registers for (i = NSYMBOLS - 1; i > Locls; i--) { if (Symtable[i].class != C_PARAM) break; if (i < NSYMBOLS - 6) break; Symtable[i].posn = newlocaloffset(Symtable[i].type); cgstorlocal(paramReg--, i); } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (; i > Locls; i--) { if (Symtable[i].class == C_PARAM) { Symtable[i].posn = paramOffset; paramOffset += 8; } else { Symtable[i].posn = newlocaloffset(Symtable[i].type); } } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\taddq\t$%d,%%rsp\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Symtable[id].endlabel); fprintf(Outfile, "\taddq\t$%d,%%rsp\n", stackOffset); fputs("\tpopq %rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmovq\t$%d, %s\n", value, reglist[r]); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(int id, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it switch (Symtable[id].type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", Symtable[id].name); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", Symtable[id].name); fprintf(Outfile, "\tmovzbq\t%s(%%rip), %s\n", Symtable[id].name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", Symtable[id].name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", Symtable[id].name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", Symtable[id].name); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", Symtable[id].name); fprintf(Outfile, "\tmovslq\t%s(%%rip), %s\n", Symtable[id].name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", Symtable[id].name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", Symtable[id].name); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: if (op == A_PREINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", Symtable[id].name); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", Symtable[id].name); fprintf(Outfile, "\tmovq\t%s(%%rip), %s\n", Symtable[id].name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", Symtable[id].name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", Symtable[id].name); break; default: fatald("Bad type in cgloadglob:", Symtable[id].type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(int id, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it switch (Symtable[id].type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", Symtable[id].posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", Symtable[id].posn); fprintf(Outfile, "\tmovzbq\t%d(%%rbp), %s\n", Symtable[id].posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", Symtable[id].posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", Symtable[id].posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", Symtable[id].posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", Symtable[id].posn); fprintf(Outfile, "\tmovslq\t%d(%%rbp), %s\n", Symtable[id].posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", Symtable[id].posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", Symtable[id].posn); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: if (op == A_PREINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", Symtable[id].posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", Symtable[id].posn); fprintf(Outfile, "\tmovq\t%d(%%rbp), %s\n", Symtable[id].posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", Symtable[id].posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", Symtable[id].posn); break; default: fatald("Bad type in cgloadlocal:", Symtable[id].type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int id) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tleaq\tL%d(%%rip), %s\n", id, reglist[r]); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\taddq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsubq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timulq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s,%%rax\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidivq\t%s\n", reglist[r2]); fprintf(Outfile, "\tmovq\t%%rax,%s\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tandq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\torq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txorq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshlq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshrq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tnegq\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnotq\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); return (r); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); } return (r); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { // Get a new register int outr = alloc_register(); fprintf(Outfile, "\tmovq\t%s, %%rdi\n", reglist[r]); fprintf(Outfile, "\tcall\t%s\n", Symtable[id].name); fprintf(Outfile, "\tmovq\t%%rax, %s\n", reglist[outr]); free_register(r); return (outr); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsalq\t$%d, %s\n", val, reglist[r]); return (r); } // Store a register's value into a variable int cgstorglob(int r, int id) { switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %s(%%rip)\n", breglist[r], Symtable[id].name); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %s(%%rip)\n", dreglist[r], Symtable[id].name); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tmovq\t%s, %s(%%rip)\n", reglist[r], Symtable[id].name); break; default: fatald("Bad type in cgstorglob:", Symtable[id].type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, int id) { switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %d(%%rbp)\n", breglist[r], Symtable[id].posn); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %d(%%rbp)\n", dreglist[r], Symtable[id].posn); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tmovq\t%s, %d(%%rbp)\n", reglist[r], Symtable[id].posn); break; default: fatald("Bad type in cgstorlocal:", Symtable[id].type); } return (r); } // Array of type sizes in P_XXX order. // 0 means no size. static int psize[] = { 0, 0, 1, 4, 8, 8, 8, 8, 8 }; // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { // Check the type is valid if (type < P_NONE || type > P_LONGPTR) fatal("Bad type in cgprimsize()"); return (psize[type]); } // Generate a global symbol but not functions void cgglobsym(int id) { int typesize; if (Symtable[id].stype == S_FUNCTION) return; // Get the size of the type typesize = cgprimsize(Symtable[id].type); // Generate the global identity and the label cgdataseg(); fprintf(Outfile, "\t.globl\t%s\n", Symtable[id].name); fprintf(Outfile, "%s:", Symtable[id].name); // Generate the space for (int i = 0; i < Symtable[id].size; i++) { switch (typesize) { case 1: fprintf(Outfile, "\t.byte\t0\n"); break; case 4: fprintf(Outfile, "\t.long\t0\n"); break; case 8: fprintf(Outfile, "\t.quad\t0\n"); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\t.byte\t%d\n", *cptr); } fprintf(Outfile, "\t.byte\t0\n"); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { // Generate code depending on the function's type switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tmovzbl\t%s, %%eax\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %%eax\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", Symtable[id].type); } cgjump(Symtable[id].endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(int id) { int r = alloc_register(); if (Symtable[id].class == C_LOCAL) fprintf(Outfile, "\tleaq\t%d(%%rbp), %s\n", Symtable[id].posn, reglist[r]); else fprintf(Outfile, "\tleaq\t%s(%%rip), %s\n", Symtable[id].name, reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tmovzbq\t(%s), %s\n", reglist[r], reglist[r]); break; case P_INTPTR: fprintf(Outfile, "\tmovslq\t(%s), %s\n", reglist[r], reglist[r]); break; case P_LONGPTR: fprintf(Outfile, "\tmovq\t(%s), %s\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, (%s)\n", breglist[r1], reglist[r2]); break; case P_INT: fprintf(Outfile, "\tmovq\t%s, (%s)\n", reglist[r1], reglist[r2]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, (%s)\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 24_Function_Params/cg_arm.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for ARMv6 on Raspberry Pi // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. static int freereg[4]; static char *reglist[4] = { "r4", "r5", "r6", "r7" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // We have to store large integer literal values in memory. // Keep a list of them which will be output in the postamble #define MAXINTS 1024 int Intlist[MAXINTS]; static int Intslot = 0; // Determine the offset of a large integer // literal from the .L3 label. If the integer // isn't in the list, add it. static void set_int_offset(int val) { int offset = -1; // See if it is already there for (int i = 0; i < Intslot; i++) { if (Intlist[i] == val) { offset = 4 * i; break; } } // Not in the list, so add it if (offset == -1) { offset = 4 * Intslot; if (Intslot == MAXINTS) fatal("Out of int slots in set_int_offset()"); Intlist[Intslot++] = val; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L3+%d\n", offset); } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n", Outfile); } // Print out the assembly postamble void cgpostamble() { // Print out the global variables fprintf(Outfile, ".L2:\n"); for (int i = 0; i < Globs; i++) { if (Symtable[i].stype == S_VARIABLE) fprintf(Outfile, "\t.word %s\n", Symtable[i].name); } // Print out the integer literals fprintf(Outfile, ".L3:\n"); for (int i = 0; i < Intslot; i++) { fprintf(Outfile, "\t.word %d\n", Intlist[i]); } } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Symtable[id].name; fprintf(Outfile, "\t.text\n" "\t.globl\t%s\n" "\t.type\t%s, \%%function\n" "%s:\n" "\tpush\t{fp, lr}\n" "\tadd\tfp, sp, #4\n" "\tsub\tsp, sp, #8\n" "\tstr\tr0, [fp, #-8]\n", name, name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Symtable[id].endlabel); fputs("\tsub\tsp, fp, #4\n" "\tpop\t{fp, pc}\n" "\t.align\t2\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); // If the literal value is small, do it with one instruction if (value <= 1000) fprintf(Outfile, "\tmov\t%s, #%d\n", reglist[r], value); else { set_int_offset(value); fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); } return (r); } // Determine the offset of a variable from the .L2 // label. Yes, this is inefficient code. static void set_var_offset(int id) { int offset = 0; // Walk the symbol table up to id. // Find S_VARIABLEs and add on 4 until // we get to our variable for (int i = 0; i < id; i++) { if (Symtable[i].stype == S_VARIABLE) offset += 4; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L2+%d\n", offset); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tldrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgloadglob:", Symtable[id].type); } return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s, %s\n", reglist[r1], reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\tmul\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { // To do a divide: r0 holds the dividend, r1 holds the divisor. // The quotient is in r0. fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r1]); fprintf(Outfile, "\tmov\tr1, %s\n", reglist[r2]); fprintf(Outfile, "\tbl\t__aeabi_idiv\n"); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r1]); free_register(r2); return (r1); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r]); fprintf(Outfile, "\tbl\t%s\n", Symtable[id].name); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r]); return (r); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tlsl\t%s, %s, #%d\n", reglist[r], reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, int id) { // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tstr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgstorglob:", Symtable[id].type); } return (r); } // Array of type sizes in P_XXX order. // 0 means no size. static int psize[] = { 0, 0, 1, 4, 4, 4, 4, 4 }; // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { // Check the type is valid if (type < P_NONE || type > P_LONGPTR) fatal("Bad type in cgprimsize()"); return (psize[type]); } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Symtable[id].type); fprintf(Outfile, "\t.data\n" "\t.globl\t%s\n", Symtable[id].name); switch (typesize) { case 1: fprintf(Outfile, "%s:\t.byte\t0\n", Symtable[id].name); break; case 4: fprintf(Outfile, "%s:\t.long\t0\n", Symtable[id].name); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "moveq", "movne", "movlt", "movgt", "movle", "movge" }; // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "movne", "moveq", "movge", "movle", "movgt", "movlt" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #1\n", cmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #0\n", invcmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\tuxtb\t%s, %s\n", reglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tb\tL%d\n", l); } // List of inverted branch instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *brlist[] = { "bne", "beq", "bge", "ble", "bgt", "blt" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", brlist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[reg]); cgjump(Symtable[id].endlabel); } // Generate code to load the address of a global // identifier into a variable. Return a new register int cgaddress(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); fprintf(Outfile, "\tmov\t%s, r3\n", reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tldrb\t%s, [%s]\n", reglist[r], reglist[r]); break; case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [%s]\n", reglist[r], reglist[r]); break; } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [%s]\n", reglist[r1], reglist[r2]); break; case P_INT: case P_LONG: fprintf(Outfile, "\tstr\t%s, [%s]\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 24_Function_Params/cgn.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\tsection .text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\tsection .data\n", Outfile); currSeg = data_seg; } } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "r10", "r11", "r12", "r13", "r9", "r8", "rcx", "rdx", "rsi", "rdi" }; static char *breglist[] = { "r10b", "r11b", "r12b", "r13b", "r9b", "r8b", "cl", "dl", "sil", "dil" }; static char *dreglist[] = { "r10d", "r11d", "r12d", "r13d", "r9d", "r8d", "ecx", "edx", "esi", "edi" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\textern\tprintint\n", Outfile); fputs("\textern\tprintchar\n", Outfile); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Symtable[id].name; int i; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the rsp and rbp fprintf(Outfile, "\tglobal\t%s\n" "%s:\n" "\tpush\trbp\n" "\tmov\trbp, rsp\n", name, name); // Copy any in-register parameters to the stack // Stop after no more than six parameter registers for (i = NSYMBOLS - 1; i > Locls; i--) { if (Symtable[i].class != C_PARAM) break; if (i < NSYMBOLS - 6) break; Symtable[i].posn = newlocaloffset(Symtable[i].type); cgstorlocal(paramReg--, i); } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (; i > Locls; i--) { if (Symtable[i].class == C_PARAM) { Symtable[i].posn = paramOffset; paramOffset += 8; } else { Symtable[i].posn = newlocaloffset(Symtable[i].type); } } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\tadd\trsp, %d\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Symtable[id].endlabel); fprintf(Outfile, "\tadd\trsp, %d\n", stackOffset); fputs("\tpop rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], value); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(int id, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it switch (Symtable[id].type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", Symtable[id].name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", Symtable[id].name); fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], Symtable[id].name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", Symtable[id].name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", Symtable[id].name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword [%s]\n", Symtable[id].name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", Symtable[id].name); fprintf(Outfile, "\tmovsx\t%s, word [%s]\n", dreglist[r], Symtable[id].name); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword [%s]\n", Symtable[id].name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", Symtable[id].name); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword [%s]\n", Symtable[id].name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", Symtable[id].name); fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], Symtable[id].name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword [%s]\n", Symtable[id].name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", Symtable[id].name); break; default: fatald("Bad type in cgloadglob:", Symtable[id].type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(int id, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it switch (Symtable[id].type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", Symtable[id].posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", Symtable[id].posn); fprintf(Outfile, "\tmovzx\t%s, byte [rbp+%d]\n", reglist[r], Symtable[id].posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", Symtable[id].posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", Symtable[id].posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", Symtable[id].posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", Symtable[id].posn); fprintf(Outfile, "\tmovsx\t%s, word [rbp+%d]\n", reglist[r], Symtable[id].posn); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", Symtable[id].posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", Symtable[id].posn); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", Symtable[id].posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", Symtable[id].posn); fprintf(Outfile, "\tmov\t%s, [rbp+%d]\n", reglist[r], Symtable[id].posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", Symtable[id].posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", Symtable[id].posn); break; default: fatald("Bad type in cgloadlocal:", Symtable[id].type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int id) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, L%d\n", reglist[r], id); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timul\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmov\trax, %s\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidiv\t%s\n", reglist[r2]); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tand\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\tor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshl\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshr\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tneg\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnot\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r], breglist[r]); return (r); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, byte %s\n", reglist[r], breglist[r]); } return (r); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { // Get a new register int outr = alloc_register(); fprintf(Outfile, "\tmov\trdi, %s\n", reglist[r]); fprintf(Outfile, "\tcall\t%s\n", Symtable[id].name); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[outr]); free_register(r); return (outr); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsal\t%s, %d\n", reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, int id) { switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tmov\t[%s], %s\n", Symtable[id].name, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\t[%s], %s\n", Symtable[id].name, dreglist[r]); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tmov\t[%s], %s\n", Symtable[id].name, reglist[r]); break; default: fatald("Bad type in cgloadglob:", Symtable[id].type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, int id) { switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tmov\tbyte\t[rbp+%d], %s\n", Symtable[id].posn, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\tdword\t[rbp+%d], %s\n", Symtable[id].posn, dreglist[r]); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tmov\tqword\t[rbp+%d], %s\n", Symtable[id].posn, reglist[r]); break; default: fatald("Bad type in cgstorlocal:", Symtable[id].type); } return (r); } // Array of type sizes in P_XXX order. // 0 means no size. static int psize[] = { 0, 0, 1, 4, 8, 8, 8, 8, 8 }; // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { // Check the type is valid if (type < P_NONE || type > P_LONGPTR) fatal("Bad type in cgprimsize()"); return (psize[type]); } // Generate a global symbol but not functions void cgglobsym(int id) { int typesize; if (Symtable[id].stype == S_FUNCTION) return; // Get the size of the type typesize = cgprimsize(Symtable[id].type); // Generate the global identity and the label cgdataseg(); fprintf(Outfile, "\tsection\t.data\n" "\tglobal\t%s\n", Symtable[id].name); fprintf(Outfile, "%s:", Symtable[id].name); // Generate the space // original version for (int i = 0; i < Symtable[id].size; i++) { switch(typesize) { case 1: fprintf(Outfile, "\tdb\t0\n"); break; case 4: fprintf(Outfile, "\tdd\t0\n"); break; case 8: fprintf(Outfile, "\tdq\t0\n"); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } /* compact version using times instead of loop switch(typesize) { case 1: fprintf(Outfile, "\ttimes\t%d\tdb\t0\n", Symtable[id].size); break; case 4: fprintf(Outfile, "\ttimes\t%d\tdd\t0\n", Symtable[id].size); break; case 8: fprintf(Outfile, "\ttimes\t%d\tdq\t0\n", Symtable[id].size); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } */ } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\tdb\t%d\n", *cptr); } fprintf(Outfile, "\tdb\t0\n"); /* put string in readable format on a single line // probably went overboard with error checking int comma = 0, quote = 0, start = 1; fprintf(Outfile, "\tdb\t"); for (cptr=strvalue; *cptr; cptr++) { if ( ! isprint(*cptr) ) if (comma || start) { fprintf(Outfile, "%d, ", *cptr); start = 0; comma = 1; } else if (quote) { fprintf(Outfile, "\', %d, ", *cptr); comma = 1; quote = 0; } else { fprintf(Outfile, "%d, ", *cptr); comma = 1; quote = 0; } else if (start || comma) { fprintf(Outfile, "\'%c", *cptr); start = comma = 0; quote = 1; } else { fprintf(Outfile, "%c", *cptr); comma = 0; quote = 1; } } if (comma || start) fprintf(Outfile, "0\n"); else fprintf(Outfile, "\', 0\n"); */ } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r2], breglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { // Generate code depending on the function's type switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tmovzx\teax, %s\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmov\teax, %s\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", Symtable[id].type); } cgjump(Symtable[id].endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(int id) { int r = alloc_register(); if (Symtable[id].class == C_LOCAL) fprintf(Outfile, "\tlea\t%s, [rbp+%d]\n", reglist[r], Symtable[id].posn); else fprintf(Outfile, "\tmov\t%s, %s\n", reglist[r], Symtable[id].name); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], reglist[r]); break; case P_INTPTR: fprintf(Outfile, "\tmovsx\t%s, dword [%s]\n", reglist[r], reglist[r]); break; case P_LONGPTR: fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tmov\t[%s], byte %s\n", reglist[r2], breglist[r1]); break; case P_INT: fprintf(Outfile, "\tmov\t[%s], %s\n", reglist[r2], reglist[r1]); break; case P_LONG: fprintf(Outfile, "\tmov\t[%s], %s\n", reglist[r2], reglist[r1]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 24_Function_Params/data.h ================================================ #ifndef extern_ #define extern_ extern #endif // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 extern_ int Line; // Current line number extern_ int Putback; // Character put back by scanner extern_ int Functionid; // Symbol id of the current function extern_ int Globs; // Position of next free global symbol slot extern_ int Locls; // Position of next free local symbol slot extern_ FILE *Infile; // Input and output files extern_ FILE *Outfile; extern_ struct token Token; // Last token scanned extern_ char Text[TEXTLEN + 1]; // Last identifier scanned extern_ struct symtable Symtable[NSYMBOLS]; // Global symbol table extern_ int O_dumpAST; ================================================ FILE: 24_Function_Params/decl.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of declarations // Copyright (c) 2019 Warren Toomey, GPL3 // Parse the current token and return // a primitive type enum value. Also // scan in the next token int parse_type(void) { int type; switch (Token.token) { case T_VOID: type = P_VOID; break; case T_CHAR: type = P_CHAR; break; case T_INT: type = P_INT; break; case T_LONG: type = P_LONG; break; default: fatald("Illegal type, token", Token.token); } // Scan in one or more further '*' tokens // and determine the correct pointer type while (1) { scan(&Token); if (Token.token != T_STAR) break; type = pointer_to(type); } // We leave with the next token already scanned return (type); } // variable_declaration: type identifier ';' // | type identifier '[' INTLIT ']' ';' // ; // // Parse the declaration of a scalar variable or an array // with a given size. // The identifier has been scanned & we have the type // islocal is set if this is a local variable // isparam is set if this local variable is a function parameter void var_declaration(int type, int islocal, int isparam) { // Text now has the identifier's name. // If the next token is a '[' if (Token.token == T_LBRACKET) { // Skip past the '[' scan(&Token); // Check we have an array size if (Token.token == T_INTLIT) { // Add this as a known array and generate its space in assembly. // We treat the array as a pointer to its elements' type if (islocal) { fatal("For now, declaration of local arrays is not implemented"); } else { addglob(Text, pointer_to(type), S_ARRAY, 0, Token.intvalue); } } // Ensure we have a following ']' scan(&Token); match(T_RBRACKET, "]"); } else { // Add this as a known scalar // and generate its space in assembly if (islocal) { if (addlocl(Text, type, S_VARIABLE, isparam, 1)==-1) fatals("Duplicate local variable declaration", Text); } else { addglob(Text, type, S_VARIABLE, 0, 1); } } } // param_declaration: // | variable_declaration // | variable_declaration ',' param_declaration // // Parse the parameters in parentheses after the function name. // Add them as symbols to the symbol table and return the number // of parameters. static int param_declaration(void) { int type; int paramcnt=0; // Loop until the final right parentheses while (Token.token != T_RPAREN) { // Get the type and identifier // and add it to the symbol table type = parse_type(); ident(); var_declaration(type, 1, 1); paramcnt++; // Must have a ',' or ')' at this point switch (Token.token) { case T_COMMA: scan(&Token); break; case T_RPAREN: break; default: fatald("Unexpected token in parameter list", Token.token); } } // Return the count of parameters return(paramcnt); } // // function_declaration: type identifier '(' ')' compound_statement ; // // Parse the declaration of a simplistic function. // The identifier has been scanned & we have the type struct ASTnode *function_declaration(int type) { struct ASTnode *tree, *finalstmt; int nameslot, endlabel, paramcnt; // Text now has the identifier's name. // Get a label-id for the end label, add the function // to the symbol table, and set the Functionid global // to the function's symbol-id endlabel = genlabel(); nameslot = addglob(Text, type, S_FUNCTION, endlabel, 0); Functionid = nameslot; // Scan in the parentheses and any parameters // Update the function symbol entry with the number of parameters lparen(); paramcnt= param_declaration(); Symtable[nameslot].nelems= paramcnt; rparen(); // Get the AST tree for the compound statement tree = compound_statement(); // If the function type isn't P_VOID .. if (type != P_VOID) { // Error if no statements in the function if (tree == NULL) fatal("No statements in function with non-void type"); // Check that the last AST operation in the // compound statement was a return statement finalstmt = (tree->op == A_GLUE) ? tree->right : tree; if (finalstmt == NULL || finalstmt->op != A_RETURN) fatal("No return for function with non-void type"); } // Return an A_FUNCTION node which has the function's nameslot // and the compound statement sub-tree return (mkastunary(A_FUNCTION, type, tree, nameslot)); } // Parse one or more global declarations, either // variables or functions void global_declarations(void) { struct ASTnode *tree; int type; while (1) { // We have to read past the type and identifier // to see either a '(' for a function declaration // or a ',' or ';' for a variable declaration. // Text is filled in by the ident() call. type = parse_type(); ident(); if (Token.token == T_LPAREN) { // Parse the function declaration and // generate the assembly code for it tree = function_declaration(type); if (O_dumpAST) { dumpAST(tree, NOLABEL, 0); fprintf(stdout, "\n\n"); } genAST(tree, NOLABEL, 0); // Now free the symbols associated // with this function freeloclsyms(); } else { // Parse the global variable declaration // and skip past the trailing semicolon var_declaration(type, 0, 0); semi(); } // Stop when we have reached EOF if (Token.token == T_EOF) break; } } ================================================ FILE: 24_Function_Params/decl.h ================================================ // Function prototypes for all compiler files // Copyright (c) 2019 Warren Toomey, GPL3 // scan.c void reject_token(struct token *t); int scan(struct token *t); // tree.c struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, int intvalue); struct ASTnode *mkastleaf(int op, int type, int intvalue); struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, int intvalue); void dumpAST(struct ASTnode *n, int label, int parentASTop); // gen.c int genlabel(void); int genAST(struct ASTnode *n, int reg, int parentASTop); void genpreamble(); void genpostamble(); void genfreeregs(); void genglobsym(int id); int genglobstr(char *strvalue); int genprimsize(int type); void genreturn(int reg, int id); // cg.c void cgtextseg(); void cgdataseg(); void freeall_registers(void); void cgpreamble(); void cgpostamble(); void cgfuncpreamble(int id); void cgfuncpostamble(int id); int cgloadint(int value, int type); int cgloadglob(int id, int op); int cgloadlocal(int id, int op); int cgloadglobstr(int id); int cgadd(int r1, int r2); int cgsub(int r1, int r2); int cgmul(int r1, int r2); int cgdiv(int r1, int r2); int cgshlconst(int r, int val); int cgcall(int r, int id); int cgstorglob(int r, int id); int cgstorlocal(int r, int id); void cgglobsym(int id); void cgglobstr(int l, char *strvalue); int cgcompare_and_set(int ASTop, int r1, int r2); int cgcompare_and_jump(int ASTop, int r1, int r2, int label); void cglabel(int l); void cgjump(int l); int cgwiden(int r, int oldtype, int newtype); int cgprimsize(int type); void cgreturn(int reg, int id); int cgaddress(int id); int cgderef(int r, int type); int cgstorderef(int r1, int r2, int type); int cgnegate(int r); int cginvert(int r); int cglognot(int r); int cgboolean(int r, int op, int label); int cgand(int r1, int r2); int cgor(int r1, int r2); int cgxor(int r1, int r2); int cgshl(int r1, int r2); int cgshr(int r1, int r2); // expr.c struct ASTnode *binexpr(int ptp); // stmt.c struct ASTnode *compound_statement(void); // misc.c void match(int t, char *what); void semi(void); void lbrace(void); void rbrace(void); void lparen(void); void rparen(void); void ident(void); void fatal(char *s); void fatals(char *s1, char *s2); void fatald(char *s, int d); void fatalc(char *s, int c); // sym.c int findglob(char *s); int findlocl(char *s); int findsymbol(char *s); int addglob(char *name, int type, int stype, int endlabel, int size); int addlocl(char *name, int type, int stype, int isparam, int size); void freeloclsyms(void); // decl.c void var_declaration(int type, int islocal, int isparam); struct ASTnode *function_declaration(int type); void global_declarations(void); // types.c int inttype(int type); int parse_type(void); int pointer_to(int type); int value_at(int type); struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op); ================================================ FILE: 24_Function_Params/defs.h ================================================ #include #include #include #include // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 #define TEXTLEN 512 // Length of symbols in input #define NSYMBOLS 1024 // Number of symbol table entries // Token types enum { T_EOF, // Binary operators T_ASSIGN, T_LOGOR, T_LOGAND, T_OR, T_XOR, T_AMPER, T_EQ, T_NE, T_LT, T_GT, T_LE, T_GE, T_LSHIFT, T_RSHIFT, T_PLUS, T_MINUS, T_STAR, T_SLASH, // Other operators T_INC, T_DEC, T_INVERT, T_LOGNOT, // Type keywords T_VOID, T_CHAR, T_INT, T_LONG, // Other keywords T_IF, T_ELSE, T_WHILE, T_FOR, T_RETURN, // Structural tokens T_INTLIT, T_STRLIT, T_SEMI, T_IDENT, T_LBRACE, T_RBRACE, T_LPAREN, T_RPAREN, T_LBRACKET, T_RBRACKET, T_COMMA }; // Token structure struct token { int token; // Token type, from the enum list above int intvalue; // For T_INTLIT, the integer value }; // AST node types. The first few line up // with the related tokens enum { A_ASSIGN= 1, A_LOGOR, A_LOGAND, A_OR, A_XOR, A_AND, A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE, A_LSHIFT, A_RSHIFT, A_ADD, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, A_INTLIT, A_STRLIT, A_IDENT, A_GLUE, A_IF, A_WHILE, A_FUNCTION, A_WIDEN, A_RETURN, A_FUNCCALL, A_DEREF, A_ADDR, A_SCALE, A_PREINC, A_PREDEC, A_POSTINC, A_POSTDEC, A_NEGATE, A_INVERT, A_LOGNOT, A_TOBOOL }; // Primitive types enum { P_NONE, P_VOID, P_CHAR, P_INT, P_LONG, P_VOIDPTR, P_CHARPTR, P_INTPTR, P_LONGPTR }; // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates int rvalue; // True if the node is an rvalue struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; union { // For A_INTLIT, the integer value int intvalue; // For A_IDENT, the symbol slot number int id; // For A_FUNCTION, the symbol slot number int size; // For A_SCALE, the size to scale by } v; // For A_FUNCCALL, the symbol slot number }; #define NOREG -1 // Use NOREG when the AST generation // functions have no register to return #define NOLABEL 0 // Use NOLABEL when we have no label to // pass to genAST() // Structural types enum { S_VARIABLE, S_FUNCTION, S_ARRAY }; // Storage classes enum { C_GLOBAL = 1, // Globally visible symbol C_LOCAL, // Locally visible symbol C_PARAM // Locally visible function parameter }; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol int stype; // Structural type for the symbol int class; // Storage class for the symbol int endlabel; // For S_FUNCTIONs, the end label int size; // Number of elements in the symbol int posn; // For locals, either the negative offset // from stack base pointer, or register id #define nelems posn // For functions, # of params // For structs, # of fields }; ================================================ FILE: 24_Function_Params/expr.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of expressions // Copyright (c) 2019 Warren Toomey, GPL3 // Parse a function call with a single expression // argument and return its AST static struct ASTnode *funccall(void) { struct ASTnode *tree; int id; // Check that the identifier has been defined as a function, // then make a leaf node for it. if ((id = findsymbol(Text)) == -1 || Symtable[id].stype != S_FUNCTION) { fatals("Undeclared function", Text); } // Get the '(' lparen(); // Parse the following expression tree = binexpr(0); // Build the function call AST node. Store the // function's return type as this node's type. // Also record the function's symbol-id tree = mkastunary(A_FUNCCALL, Symtable[id].type, tree, id); // Get the ')' rparen(); return (tree); } // Parse the index into an array and // return an AST tree for it static struct ASTnode *array_access(void) { struct ASTnode *left, *right; int id; // Check that the identifier has been defined as an array // then make a leaf node for it that points at the base if ((id = findsymbol(Text)) == -1 || Symtable[id].stype != S_ARRAY) { fatals("Undeclared array", Text); } left = mkastleaf(A_ADDR, Symtable[id].type, id); // Get the '[' scan(&Token); // Parse the following expression right = binexpr(0); // Get the ']' match(T_RBRACKET, "]"); // Ensure that this is of int type if (!inttype(right->type)) fatal("Array index is not of integer type"); // Scale the index by the size of the element's type right = modify_type(right, left->type, A_ADD); // Return an AST tree where the array's base has the offset // added to it, and dereference the element. Still an lvalue // at this point. left = mkastnode(A_ADD, Symtable[id].type, left, NULL, right, 0); left = mkastunary(A_DEREF, value_at(left->type), left, 0); return (left); } // Parse a postfix expression and return // an AST node representing it. The // identifier is already in Text. static struct ASTnode *postfix(void) { struct ASTnode *n; int id; // Scan in the next token to see if we have a postfix expression scan(&Token); // Function call if (Token.token == T_LPAREN) return (funccall()); // An array reference if (Token.token == T_LBRACKET) return (array_access()); // A variable. Check that the variable exists. id = findsymbol(Text); if (id == -1 || Symtable[id].stype != S_VARIABLE) fatals("Unknown variable", Text); switch (Token.token) { // Post-increment: skip over the token case T_INC: scan(&Token); n = mkastleaf(A_POSTINC, Symtable[id].type, id); break; // Post-decrement: skip over the token case T_DEC: scan(&Token); n = mkastleaf(A_POSTDEC, Symtable[id].type, id); break; // Just a variable reference default: n = mkastleaf(A_IDENT, Symtable[id].type, id); } return (n); } // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(void) { struct ASTnode *n; int id; switch (Token.token) { case T_INTLIT: // For an INTLIT token, make a leaf AST node for it. // Make it a P_CHAR if it's within the P_CHAR range if ((Token.intvalue) >= 0 && (Token.intvalue < 256)) n = mkastleaf(A_INTLIT, P_CHAR, Token.intvalue); else n = mkastleaf(A_INTLIT, P_INT, Token.intvalue); break; case T_STRLIT: // For a STRLIT token, generate the assembly for it. // Then make a leaf AST node for it. id is the string's label. id = genglobstr(Text); n = mkastleaf(A_STRLIT, P_CHARPTR, id); break; case T_IDENT: return (postfix()); case T_LPAREN: // Beginning of a parenthesised expression, skip the '('. // Scan in the expression and the right parenthesis scan(&Token); n = binexpr(0); rparen(); return (n); default: fatald("Expecting a primary expression, got token", Token.token); } // Scan in the next token and return the leaf node scan(&Token); return (n); } // Convert a binary operator token into a binary AST operation. // We rely on a 1:1 mapping from token to AST operation static int binastop(int tokentype) { if (tokentype > T_EOF && tokentype <= T_SLASH) return (tokentype); fatald("Syntax error, token", tokentype); return (0); // Keep -Wall happy } // Return true if a token is right-associative, // false otherwise. static int rightassoc(int tokentype) { if (tokentype == T_ASSIGN) return (1); return (0); } // Operator precedence for each token. Must // match up with the order of tokens in defs.h static int OpPrec[] = { 0, 10, 20, 30, // T_EOF, T_ASSIGN, T_LOGOR, T_LOGAND 40, 50, 60, // T_OR, T_XOR, T_AMPER 70, 70, // T_EQ, T_NE 80, 80, 80, 80, // T_LT, T_GT, T_LE, T_GE 90, 90, // T_LSHIFT, T_RSHIFT 100, 100, // T_PLUS, T_MINUS 110, 110 // T_STAR, T_SLASH }; // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec; if (tokentype > T_SLASH) fatald("Token with no precedence in op_precedence:", tokentype); prec = OpPrec[tokentype]; if (prec == 0) fatald("Syntax error, token", tokentype); return (prec); } // prefix_expression: primary // | '*' prefix_expression // | '&' prefix_expression // | '-' prefix_expression // | '++' prefix_expression // | '--' prefix_expression // ; // Parse a prefix expression and return // a sub-tree representing it. struct ASTnode *prefix(void) { struct ASTnode *tree; switch (Token.token) { case T_AMPER: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Ensure that it's an identifier if (tree->op != A_IDENT) fatal("& operator must be followed by an identifier"); // Now change the operator to A_ADDR and the type to // a pointer to the original type tree->op = A_ADDR; tree->type = pointer_to(tree->type); break; case T_STAR: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's either another deref or an // identifier if (tree->op != A_IDENT && tree->op != A_DEREF) fatal("* operator must be followed by an identifier or *"); // Prepend an A_DEREF operation to the tree tree = mkastunary(A_DEREF, value_at(tree->type), tree, 0); break; case T_MINUS: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_NEGATE operation to the tree and // make the child an rvalue. Because chars are unsigned, // also widen this to int so that it's signed tree->rvalue = 1; tree = modify_type(tree, P_INT, 0); tree = mkastunary(A_NEGATE, tree->type, tree, 0); break; case T_INVERT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_INVERT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_INVERT, tree->type, tree, 0); break; case T_LOGNOT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_LOGNOT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_LOGNOT, tree->type, tree, 0); break; case T_INC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("++ operator must be followed by an identifier"); // Prepend an A_PREINC operation to the tree tree = mkastunary(A_PREINC, tree->type, tree, 0); break; case T_DEC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("-- operator must be followed by an identifier"); // Prepend an A_PREDEC operation to the tree tree = mkastunary(A_PREDEC, tree->type, tree, 0); break; default: tree = primary(); } return (tree); } // Return an AST tree whose root is a binary operator. // Parameter ptp is the previous token's precedence. struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; struct ASTnode *ltemp, *rtemp; int ASTop; int tokentype; // Get the tree on the left. // Fetch the next token at the same time. left = prefix(); // If we hit a semicolon or ')', return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET) { left->rvalue = 1; return (left); } // While the precedence of this token is more than that of the // previous token precedence, or it's right associative and // equal to the previous token's precedence while ((op_precedence(tokentype) > ptp) || (rightassoc(tokentype) && op_precedence(tokentype) == ptp)) { // Fetch in the next integer literal scan(&Token); // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Determine the operation to be performed on the sub-trees ASTop = binastop(tokentype); if (ASTop == A_ASSIGN) { // Assignment // Make the right tree into an rvalue right->rvalue = 1; // Ensure the right's type matches the left right = modify_type(right, left->type, 0); if (left == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree. However, switch // left and right around, so that the right expression's // code will be generated before the left expression ltemp = left; left = right; right = ltemp; } else { // We are not doing an assignment, so both trees should be rvalues // Convert both trees into rvalue if they are lvalue trees left->rvalue = 1; right->rvalue = 1; // Ensure the two types are compatible by trying // to modify each tree to match the other's type. ltemp = modify_type(left, right->type, ASTop); rtemp = modify_type(right, left->type, ASTop); if (ltemp == NULL && rtemp == NULL) fatal("Incompatible types in binary expression"); if (ltemp != NULL) left = ltemp; if (rtemp != NULL) right = rtemp; } // Join that sub-tree with ours. Convert the token // into an AST operation at the same time. left = mkastnode(binastop(tokentype), left->type, left, NULL, right, 0); // Update the details of the current token. // If we hit a semicolon, ')' or ']', return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET) { left->rvalue = 1; return (left); } } // Return the tree we have when the precedence // is the same or lower left->rvalue = 1; return (left); } ================================================ FILE: 24_Function_Params/gen.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Generic code generator // Copyright (c) 2019 Warren Toomey, GPL3 // Generate and return a new label number int genlabel(void) { static int id = 1; return (id++); } // Generate the code for an IF statement // and an optional ELSE clause static int genIF(struct ASTnode *n) { int Lfalse, Lend; // Generate two labels: one for the // false compound statement, and one // for the end of the overall IF statement. // When there is no ELSE clause, Lfalse _is_ // the ending label! Lfalse = genlabel(); if (n->right) Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. genAST(n->left, Lfalse, n->op); genfreeregs(); // Generate the true compound statement genAST(n->mid, NOLABEL, n->op); genfreeregs(); // If there is an optional ELSE clause, // generate the jump to skip to the end if (n->right) cgjump(Lend); // Now the false label cglabel(Lfalse); // Optional ELSE clause: generate the // false compound statement and the // end label if (n->right) { genAST(n->right, NOLABEL, n->op); genfreeregs(); cglabel(Lend); } return (NOREG); } // Generate the code for a WHILE statement static int genWHILE(struct ASTnode *n) { int Lstart, Lend; // Generate the start and end labels // and output the start label Lstart = genlabel(); Lend = genlabel(); cglabel(Lstart); // Generate the condition code followed // by a jump to the end label. genAST(n->left, Lend, n->op); genfreeregs(); // Generate the compound statement for the body genAST(n->right, NOLABEL, n->op); genfreeregs(); // Finally output the jump back to the condition, // and the end label cgjump(Lstart); cglabel(Lend); return (NOREG); } // Given an AST, an optional label, and the AST op // of the parent, generate assembly code recursively. // Return the register id with the tree's final value. int genAST(struct ASTnode *n, int label, int parentASTop) { int leftreg, rightreg; // We have some specific AST node handling at the top // so that we don't evaluate the child sub-trees immediately switch (n->op) { case A_IF: return (genIF(n)); case A_WHILE: return (genWHILE(n)); case A_GLUE: // Do each child statement, and free the // registers after each child genAST(n->left, NOLABEL, n->op); genfreeregs(); genAST(n->right, NOLABEL, n->op); genfreeregs(); return (NOREG); case A_FUNCTION: // Generate the function's preamble before the code // in the child sub-tree cgfuncpreamble(n->v.id); genAST(n->left, NOLABEL, n->op); cgfuncpostamble(n->v.id); return (NOREG); } // General AST node handling below // Get the left and right sub-tree values if (n->left) leftreg = genAST(n->left, NOLABEL, n->op); if (n->right) rightreg = genAST(n->right, NOLABEL, n->op); switch (n->op) { case A_ADD: return (cgadd(leftreg, rightreg)); case A_SUBTRACT: return (cgsub(leftreg, rightreg)); case A_MULTIPLY: return (cgmul(leftreg, rightreg)); case A_DIVIDE: return (cgdiv(leftreg, rightreg)); case A_AND: return (cgand(leftreg, rightreg)); case A_OR: return (cgor(leftreg, rightreg)); case A_XOR: return (cgxor(leftreg, rightreg)); case A_LSHIFT: return (cgshl(leftreg, rightreg)); case A_RSHIFT: return (cgshr(leftreg, rightreg)); case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, compare registers // and set one to 1 or 0 based on the comparison. if (parentASTop == A_IF || parentASTop == A_WHILE) return (cgcompare_and_jump(n->op, leftreg, rightreg, label)); else return (cgcompare_and_set(n->op, leftreg, rightreg)); case A_INTLIT: return (cgloadint(n->v.intvalue, n->type)); case A_STRLIT: return (cgloadglobstr(n->v.id)); case A_IDENT: // Load our value if we are an rvalue // or we are being dereferenced if (n->rvalue || parentASTop == A_DEREF) { if (Symtable[n->v.id].class == C_GLOBAL) { return (cgloadglob(n->v.id, n->op)); } else { return (cgloadlocal(n->v.id, n->op)); } } else return (NOREG); case A_ASSIGN: // Are we assigning to an identifier or through a pointer? switch (n->right->op) { case A_IDENT: if (Symtable[n->right->v.id].class == C_GLOBAL) return (cgstorglob(leftreg, n->right->v.id)); else return (cgstorlocal(leftreg, n->right->v.id)); case A_DEREF: return (cgstorderef(leftreg, rightreg, n->right->type)); default: fatald("Can't A_ASSIGN in genAST(), op", n->op); } case A_WIDEN: // Widen the child's type to the parent's type return (cgwiden(leftreg, n->left->type, n->type)); case A_RETURN: cgreturn(leftreg, Functionid); return (NOREG); case A_FUNCCALL: return (cgcall(leftreg, n->v.id)); case A_ADDR: return (cgaddress(n->v.id)); case A_DEREF: // If we are an rvalue, dereference to get the value we point at, // otherwise leave it for A_ASSIGN to store through the pointer if (n->rvalue) return (cgderef(leftreg, n->left->type)); else return (leftreg); case A_SCALE: // Small optimisation: use shift if the // scale value is a known power of two switch (n->v.size) { case 2: return (cgshlconst(leftreg, 1)); case 4: return (cgshlconst(leftreg, 2)); case 8: return (cgshlconst(leftreg, 3)); default: // Load a register with the size and // multiply the leftreg by this size rightreg = cgloadint(n->v.size, P_INT); return (cgmul(leftreg, rightreg)); } case A_POSTINC: // Load the variable's value into a register, // then increment it return (cgloadglob(n->v.id, n->op)); case A_POSTDEC: // Load the variable's value into a register, // then decrement it return (cgloadglob(n->v.id, n->op)); case A_PREINC: // Load and increment the variable's value into a register return (cgloadglob(n->left->v.id, n->op)); case A_PREDEC: // Load and decrement the variable's value into a register return (cgloadglob(n->left->v.id, n->op)); case A_NEGATE: return (cgnegate(leftreg)); case A_INVERT: return (cginvert(leftreg)); case A_LOGNOT: return (cglognot(leftreg)); case A_TOBOOL: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, set the register // to 0 or 1 based on it's zeroeness or non-zeroeness return (cgboolean(leftreg, parentASTop, label)); default: fatald("Unknown AST operator", n->op); } return (NOREG); // Keep -Wall happy } void genpreamble() { cgpreamble(); } void genpostamble() { cgpostamble(); } void genfreeregs() { freeall_registers(); } void genglobsym(int id) { cgglobsym(id); } int genglobstr(char *strvalue) { int l = genlabel(); cgglobstr(l, strvalue); return (l); } int genprimsize(int type) { return (cgprimsize(type)); } ================================================ FILE: 24_Function_Params/input27a.c ================================================ int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printint(a); printint(b); printint(c); printint(d); printint(e); printint(f); printint(g); printint(h); return(0); } int param5(int a, int b, int c, int d, int e) { printint(a); printint(b); printint(c); printint(d); printint(e); return(0); } int param2(int a, int b) { int c; int d; int e; c= 3; d= 4; e= 5; printint(a); printint(b); printint(c); printint(d); printint(e); return(0); } int param0() { int a; int b; int c; int d; int e; a= 1; b= 2; c= 3; d= 4; e= 5; printint(a); printint(b); printint(c); printint(d); printint(e); return(0); } ================================================ FILE: 24_Function_Params/input27b.c ================================================ #include extern int param8(int a, int b, int c, int d, int e, int f, int g, int h); extern int param5(int a, int b, int c, int d, int e); extern int param2(int a, int b); extern int param0(); int main() { param8(1,2,3,4,5,6,7,8); puts("--"); param5(1,2,3,4,5); puts("--"); param2(1,2); puts("--"); param0(); return(0); } ================================================ FILE: 24_Function_Params/lib/printint.c ================================================ #include void printint(long x) { printf("%ld\n", x); } void printchar(long x) { putc((char)(x & 0x7f), stdout); } ================================================ FILE: 24_Function_Params/main.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "decl.h" #include // Compiler setup and top-level execution // Copyright (c) 2019 Warren Toomey, GPL3 // Initialise global variables static void init() { Line = 1; Putback = '\n'; Globs = 0; Locls = NSYMBOLS - 1; O_dumpAST = 0; } // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "Usage: %s [-T] infile\n", prog); exit(1); } // Main program: check arguments and print a usage // if we don't have an argument. Open up the input // file and call scanfile() to scan the tokens in it. int main(int argc, char *argv[]) { int i; // Initialise the globals init(); // Scan for command-line options for (i = 1; i < argc; i++) { if (*argv[i] != '-') break; for (int j = 1; argv[i][j]; j++) { switch (argv[i][j]) { case 'T': O_dumpAST = 1; break; default: usage(argv[0]); } } } // Ensure we have an input file argument if (i >= argc) usage(argv[0]); // Open up the input file if ((Infile = fopen(argv[i], "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", argv[i], strerror(errno)); exit(1); } // Create the output file if ((Outfile = fopen("out.s", "w")) == NULL) { fprintf(stderr, "Unable to create out.s: %s\n", strerror(errno)); exit(1); } // For now, ensure that printint() and printchar() are defined addglob("printint", P_INT, S_FUNCTION, 0, 0); addglob("printchar", P_VOID, S_FUNCTION, 0, 0); scan(&Token); // Get the first token from the input genpreamble(); // Output the preamble global_declarations(); // Parse the global declarations genpostamble(); // Output the postamble fclose(Outfile); // Close the output file and exit return (0); } ================================================ FILE: 24_Function_Params/misc.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Miscellaneous functions // Copyright (c) 2019 Warren Toomey, GPL3 // Ensure that the current token is t, // and fetch the next token. Otherwise // throw an error void match(int t, char *what) { if (Token.token == t) { scan(&Token); } else { fatals("Expected", what); } } // Match a semicolon and fetch the next token void semi(void) { match(T_SEMI, ";"); } // Match a left brace and fetch the next token void lbrace(void) { match(T_LBRACE, "{"); } // Match a right brace and fetch the next token void rbrace(void) { match(T_RBRACE, "}"); } // Match a left parenthesis and fetch the next token void lparen(void) { match(T_LPAREN, "("); } // Match a right parenthesis and fetch the next token void rparen(void) { match(T_RPAREN, ")"); } // Match an identifier and fetch the next token void ident(void) { match(T_IDENT, "identifier"); } // Print out fatal messages void fatal(char *s) { fprintf(stderr, "%s on line %d\n", s, Line); exit(1); } void fatals(char *s1, char *s2) { fprintf(stderr, "%s:%s on line %d\n", s1, s2, Line); exit(1); } void fatald(char *s, int d) { fprintf(stderr, "%s:%d on line %d\n", s, d, Line); exit(1); } void fatalc(char *s, int c) { fprintf(stderr, "%s:%c on line %d\n", s, c, Line); exit(1); } ================================================ FILE: 24_Function_Params/scan.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { char *p; p = strchr(s, c); return (p ? p - s : -1); } // Get the next character from the input file. static int next(void) { int c; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return (c); } c = fgetc(Infile); // Read from input file if ('\n' == c) Line++; // Increment line count return (c); } // Put back an unwanted character static void putback(int c) { Putback = c; } // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } // Return the next character from a character // or string literal static int scanch(void) { int c; // Get the next input character and interpret // metacharacters that start with a backslash c = next(); if (c == '\\') { switch (c = next()) { case 'a': return '\a'; case 'b': return '\b'; case 'f': return '\f'; case 'n': return '\n'; case 'r': return '\r'; case 't': return '\t'; case 'v': return '\v'; case '\\': return '\\'; case '"': return '"'; case '\'': return '\''; default: fatalc("unknown escape sequence", c); } } return (c); // Just an ordinary old character! } // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0; // Convert each character into an int value while ((k = chrpos("0123456789", c)) >= 0) { val = val * 10 + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return (val); } // Scan in a string literal from the input file, // and store it in buf[]. Return the length of // the string. static int scanstr(char *buf) { int i, c; // Loop while we have enough buffer space for (i = 0; i < TEXTLEN - 1; i++) { // Get the next char and append to buf // Return when we hit the ending double quote if ((c = scanch()) == '"') { buf[i] = 0; return (i); } buf[i] = c; } // Ran out of buf[] space fatal("String literal too long"); return (0); } // Scan an identifier from the input file and // store it in buf[]. Return the identifier's length static int scanident(int c, char *buf, int lim) { int i = 0; // Allow digits, alpha and underscores while (isalpha(c) || isdigit(c) || '_' == c) { // Error if we hit the identifier length limit, // else append to buf[] and get next character if (lim - 1 == i) { fatal("Identifier too long"); } else if (i < lim - 1) { buf[i++] = c; } c = next(); } // We hit a non-valid character, put it back. // NUL-terminate the buf[] and return the length putback(c); buf[i] = '\0'; return (i); } // Given a word from the input, return the matching // keyword token number or 0 if it's not a keyword. // Switch on the first letter so that we don't have // to waste time strcmp()ing against all the keywords. static int keyword(char *s) { switch (*s) { case 'c': if (!strcmp(s, "char")) return (T_CHAR); break; case 'e': if (!strcmp(s, "else")) return (T_ELSE); break; case 'f': if (!strcmp(s, "for")) return (T_FOR); break; case 'i': if (!strcmp(s, "if")) return (T_IF); if (!strcmp(s, "int")) return (T_INT); break; case 'l': if (!strcmp(s, "long")) return (T_LONG); break; case 'r': if (!strcmp(s, "return")) return (T_RETURN); break; case 'w': if (!strcmp(s, "while")) return (T_WHILE); break; case 'v': if (!strcmp(s, "void")) return (T_VOID); break; } return (0); } // A pointer to a rejected token static struct token *Rejtoken = NULL; // Reject the token that we just scanned void reject_token(struct token *t) { if (Rejtoken != NULL) fatal("Can't reject token twice"); Rejtoken = t; } // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c, tokentype; // If we have any rejected token, return it if (Rejtoken != NULL) { t = Rejtoken; Rejtoken = NULL; return (1); } // Skip whitespace c = skip(); // Determine the token based on // the input character switch (c) { case EOF: t->token = T_EOF; return (0); case '+': if ((c = next()) == '+') { t->token = T_INC; } else { putback(c); t->token = T_PLUS; } break; case '-': if ((c = next()) == '-') { t->token = T_DEC; } else { putback(c); t->token = T_MINUS; } break; case '*': t->token = T_STAR; break; case '/': t->token = T_SLASH; break; case ';': t->token = T_SEMI; break; case '{': t->token = T_LBRACE; break; case '}': t->token = T_RBRACE; break; case '(': t->token = T_LPAREN; break; case ')': t->token = T_RPAREN; break; case '[': t->token = T_LBRACKET; break; case ']': t->token = T_RBRACKET; break; case '~': t->token = T_INVERT; break; case '^': t->token = T_XOR; break; case ',': t->token = T_COMMA; break; case '=': if ((c = next()) == '=') { t->token = T_EQ; } else { putback(c); t->token = T_ASSIGN; } break; case '!': if ((c = next()) == '=') { t->token = T_NE; } else { putback(c); t->token = T_LOGNOT; } break; case '<': if ((c = next()) == '=') { t->token = T_LE; } else if (c == '<') { t->token = T_LSHIFT; } else { putback(c); t->token = T_LT; } break; case '>': if ((c = next()) == '=') { t->token = T_GE; } else if (c == '>') { t->token = T_RSHIFT; } else { putback(c); t->token = T_GT; } break; case '&': if ((c = next()) == '&') { t->token = T_LOGAND; } else { putback(c); t->token = T_AMPER; } break; case '|': if ((c = next()) == '|') { t->token = T_LOGOR; } else { putback(c); t->token = T_OR; } break; case '\'': // If it's a quote, scan in the // literal character value and // the trailing quote t->intvalue = scanch(); t->token = T_INTLIT; if (next() != '\'') fatal("Expected '\\'' at end of char literal"); break; case '"': // Scan in a literal string scanstr(Text); t->token = T_STRLIT; break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } else if (isalpha(c) || '_' == c) { // Read in a keyword or identifier scanident(c, Text, TEXTLEN); // If it's a recognised keyword, return that token if ((tokentype = keyword(Text)) != 0) { t->token = tokentype; break; } // Not a recognised keyword, so it must be an identifier t->token = T_IDENT; break; } // The character isn't part of any recognised token, error fatalc("Unrecognised character", c); } // We found a token return (1); } ================================================ FILE: 24_Function_Params/stmt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of statements // Copyright (c) 2019 Warren Toomey, GPL3 // Prototypes static struct ASTnode *single_statement(void); // compound_statement: // empty, i.e. no statement // | statement // | statement statements // ; // // statement: declaration // | expression_statement // | function_call // | if_statement // | while_statement // | for_statement // | return_statement // ; // if_statement: if_head // | if_head 'else' compound_statement // ; // // if_head: 'if' '(' true_false_expression ')' compound_statement ; // // Parse an IF statement including any // optional ELSE clause and return its AST static struct ASTnode *if_statement(void) { struct ASTnode *condAST, *trueAST, *falseAST = NULL; // Ensure we have 'if' '(' match(T_IF, "if"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, 0); rparen(); // Get the AST for the compound statement trueAST = compound_statement(); // If we have an 'else', skip it // and get the AST for the compound statement if (Token.token == T_ELSE) { scan(&Token); falseAST = compound_statement(); } // Build and return the AST for this statement return (mkastnode(A_IF, P_NONE, condAST, trueAST, falseAST, 0)); } // while_statement: 'while' '(' true_false_expression ')' compound_statement ; // // Parse a WHILE statement and return its AST static struct ASTnode *while_statement(void) { struct ASTnode *condAST, *bodyAST; // Ensure we have 'while' '(' match(T_WHILE, "while"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, 0); rparen(); // Get the AST for the compound statement bodyAST = compound_statement(); // Build and return the AST for this statement return (mkastnode(A_WHILE, P_NONE, condAST, NULL, bodyAST, 0)); } // for_statement: 'for' '(' preop_statement ';' // true_false_expression ';' // postop_statement ')' compound_statement ; // // preop_statement: statement (for now) // postop_statement: statement (for now) // // Parse a FOR statement and return its AST static struct ASTnode *for_statement(void) { struct ASTnode *condAST, *bodyAST; struct ASTnode *preopAST, *postopAST; struct ASTnode *tree; // Ensure we have 'for' '(' match(T_FOR, "for"); lparen(); // Get the pre_op statement and the ';' preopAST = single_statement(); semi(); // Get the condition and the ';'. // Force a non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, 0); semi(); // Get the post_op statement and the ')' postopAST = single_statement(); rparen(); // Get the compound statement which is the body bodyAST = compound_statement(); // For now, all four sub-trees have to be non-NULL. // Later on, we'll change the semantics for when some are missing // Glue the compound statement and the postop tree tree = mkastnode(A_GLUE, P_NONE, bodyAST, NULL, postopAST, 0); // Make a WHILE loop with the condition and this new body tree = mkastnode(A_WHILE, P_NONE, condAST, NULL, tree, 0); // And glue the preop tree to the A_WHILE tree return (mkastnode(A_GLUE, P_NONE, preopAST, NULL, tree, 0)); } // return_statement: 'return' '(' expression ')' ; // // Parse a return statement and return its AST static struct ASTnode *return_statement(void) { struct ASTnode *tree; // Can't return a value if function returns P_VOID if (Symtable[Functionid].type == P_VOID) fatal("Can't return from a void function"); // Ensure we have 'return' '(' match(T_RETURN, "return"); lparen(); // Parse the following expression tree = binexpr(0); // Ensure this is compatible with the function's type tree = modify_type(tree, Symtable[Functionid].type, 0); if (tree == NULL) fatal("Incompatible type to return"); // Add on the A_RETURN node tree = mkastunary(A_RETURN, P_NONE, tree, 0); // Get the ')' rparen(); return (tree); } // Parse a single statement and return its AST static struct ASTnode *single_statement(void) { int type; switch (Token.token) { case T_CHAR: case T_INT: case T_LONG: // The beginning of a variable declaration. // Parse the type and get the identifier. // Then parse the rest of the declaration // and skip over the semicolon type = parse_type(); ident(); var_declaration(type, 1, 0); semi(); return (NULL); // No AST generated here case T_IF: return (if_statement()); case T_WHILE: return (while_statement()); case T_FOR: return (for_statement()); case T_RETURN: return (return_statement()); default: // For now, see if this is an expression. // This catches assignment statements. return (binexpr(0)); } return (NULL); // Keep -Wall happy } // Parse a compound statement // and return its AST struct ASTnode *compound_statement(void) { struct ASTnode *left = NULL; struct ASTnode *tree; // Require a left curly bracket lbrace(); while (1) { // Parse a single statement tree = single_statement(); // Some statements must be followed by a semicolon if (tree != NULL && (tree->op == A_ASSIGN || tree->op == A_RETURN || tree->op == A_FUNCCALL)) semi(); // For each new tree, either save it in left // if left is empty, or glue the left and the // new tree together if (tree != NULL) { if (left == NULL) left = tree; else left = mkastnode(A_GLUE, P_NONE, left, NULL, tree, 0); } // When we hit a right curly bracket, // skip past it and return the AST if (Token.token == T_RBRACE) { rbrace(); return (left); } } } ================================================ FILE: 24_Function_Params/sym.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Symbol table functions // Copyright (c) 2019 Warren Toomey, GPL3 // Determine if the symbol s is in the global symbol table. // Return its slot position or -1 if not found. // Skip C_PARAM entries int findglob(char *s) { int i; for (i = 0; i < Globs; i++) { if (Symtable[i].class == C_PARAM) continue; if (*s == *Symtable[i].name && !strcmp(s, Symtable[i].name)) return (i); } return (-1); } // Get the position of a new global symbol slot, or die // if we've run out of positions. static int newglob(void) { int p; if ((p = Globs++) >= Locls) fatal("Too many global symbols"); return (p); } // Determine if the symbol s is in the local symbol table. // Return its slot position or -1 if not found. int findlocl(char *s) { int i; for (i = Locls + 1; i < NSYMBOLS; i++) { if (*s == *Symtable[i].name && !strcmp(s, Symtable[i].name)) return (i); } return (-1); } // Get the position of a new local symbol slot, or die // if we've run out of positions. static int newlocl(void) { int p; if ((p = Locls--) <= Globs) fatal("Too many local symbols"); return (p); } // Clear all the entries in the // local symbol table void freeloclsyms(void) { Locls = NSYMBOLS - 1; } // Update a symbol at the given slot number in the symbol table. Set up its: // + type: char, int etc. // + structural type: var, function, array etc. // + size: number of elements // + endlabel: if this is a function // + posn: Position information for local symbols static void updatesym(int slot, char *name, int type, int stype, int class, int endlabel, int size, int posn) { if (slot < 0 || slot >= NSYMBOLS) fatal("Invalid symbol slot number in updatesym()"); Symtable[slot].name = strdup(name); Symtable[slot].type = type; Symtable[slot].stype = stype; Symtable[slot].class = class; Symtable[slot].endlabel = endlabel; Symtable[slot].size = size; Symtable[slot].posn = posn; } // Add a global symbol to the symbol table. Set up its: // + type: char, int etc. // + structural type: var, function, array etc. // + size: number of elements // + endlabel: if this is a function // Return the slot number in the symbol table int addglob(char *name, int type, int stype, int endlabel, int size) { int slot; // If this is already in the symbol table, return the existing slot if ((slot = findglob(name)) != -1) return (slot); // Otherwise get a new slot, fill it in and // return the slot number slot = newglob(); updatesym(slot, name, type, stype, C_GLOBAL, endlabel, size, 0); genglobsym(slot); return (slot); } // Add a local symbol to the symbol table. Set up its: // + type: char, int etc. // + structural type: var, function, array etc. // + size: number of elements // + isparam: if true, this is a parameter to the function // Return the slot number in the symbol table, -1 if a duplicate entry int addlocl(char *name, int type, int stype, int isparam, int size) { int localslot, globalslot; // If this is already in the symbol table, return an error if ((localslot = findlocl(name)) != -1) return (-1); // Otherwise get a new symbol slot and a position for this local. // Update the local symbol table entry. If this is a parameter, // also create a global C_PARAM entry to build the function's prototype. localslot = newlocl(); if (isparam) { updatesym(localslot, name, type, stype, C_PARAM, 0, size, 0); globalslot = newglob(); updatesym(globalslot, name, type, stype, C_PARAM, 0, size, 0); } else { updatesym(localslot, name, type, stype, C_LOCAL, 0, size, 0); } // Return the local symbol's slot return (localslot); } // Determine if the symbol s is in the symbol table. // Return its slot position or -1 if not found. int findsymbol(char *s) { int slot; slot = findlocl(s); if (slot == -1) slot = findglob(s); return (slot); } ================================================ FILE: 24_Function_Params/tests/input01.c ================================================ void main() { printint(12 * 3); printint(18 - 2 * 4); printint(1 + 2 + 9 - 5/2 + 3*5); } ================================================ FILE: 24_Function_Params/tests/input02.c ================================================ void main() { int fred; int jim; fred= 5; jim= 12; printint(fred + jim); } ================================================ FILE: 24_Function_Params/tests/input03.c ================================================ void main() { int x; x= 1; printint(x); x= x + 1; printint(x); x= x + 1; printint(x); x= x + 1; printint(x); x= x + 1; printint(x); } ================================================ FILE: 24_Function_Params/tests/input04.c ================================================ void main() { int x; x= 7 < 9; printint(x); x= 7 <= 9; printint(x); x= 7 != 9; printint(x); x= 7 == 7; printint(x); x= 7 >= 7; printint(x); x= 7 <= 7; printint(x); x= 9 > 7; printint(x); x= 9 >= 7; printint(x); x= 9 != 7; printint(x); } ================================================ FILE: 24_Function_Params/tests/input05.c ================================================ void main() { int i; int j; i=6; j=12; if (i < j) { printint(i); } else { printint(j); } } ================================================ FILE: 24_Function_Params/tests/input06.c ================================================ void main() { int i; i=1; while (i <= 10) { printint(i); i= i + 1; } } ================================================ FILE: 24_Function_Params/tests/input07.c ================================================ void main() { int i; for (i= 1; i <= 10; i= i + 1) { printint(i); } } ================================================ FILE: 24_Function_Params/tests/input08.c ================================================ void main() { int i; for (i= 1; i <= 10; i= i + 1) { printint(i); } } ================================================ FILE: 24_Function_Params/tests/input09.c ================================================ void main() { int i; for (i= 1; i <= 10; i= i + 1) { printint(i); } } void fred() { int a; int b; a= 12; b= 3 * a; if (a >= b) { printint(2 * b - a); } } ================================================ FILE: 24_Function_Params/tests/input10.c ================================================ void main() { int i; char j; j= 20; printint(j); i= 10; printint(i); for (i= 1; i <= 5; i= i + 1) { printint(i); } for (j= 253; j != 2; j= j + 1) { printint(j); } } ================================================ FILE: 24_Function_Params/tests/input11.c ================================================ int main() { int i; char j; long k; i= 10; printint(i); j= 20; printint(j); k= 30; printint(k); for (i= 1; i <= 5; i= i + 1) { printint(i); } for (j= 253; j != 4; j= j + 1) { printint(j); } for (k= 1; k <= 5; k= k + 1) { printint(k); } return(i); printint(12345); return(3); } ================================================ FILE: 24_Function_Params/tests/input12.c ================================================ int fred() { return(5); } void main() { int x; x= fred(2); printint(x); } ================================================ FILE: 24_Function_Params/tests/input13.c ================================================ int fred() { return(56); } void main() { int dummy; int result; dummy= printint(23); result= fred(10); dummy= printint(result); } ================================================ FILE: 24_Function_Params/tests/input14.c ================================================ int fred() { return(20); } int main() { int result; printint(10); result= fred(15); printint(result); printint(fred(15)+10); return(0); } ================================================ FILE: 24_Function_Params/tests/input15.c ================================================ int main() { char a; char *b; char c; int d; int *e; int f; a= 18; printint(a); b= &a; c= *b; printint(c); d= 12; printint(d); e= &d; f= *e; printint(f); return(0); } ================================================ FILE: 24_Function_Params/tests/input16.c ================================================ int c; int d; int *e; int f; int main() { c= 12; d=18; printint(c); e= &c + 1; f= *e; printint(f); return(0); } ================================================ FILE: 24_Function_Params/tests/input17.c ================================================ int main() { char a; char *b; int d; int *e; b= &a; *b= 19; printint(a); e= &d; *e= 12; printint(d); return(0); } ================================================ FILE: 24_Function_Params/tests/input18.c ================================================ int main() { int a; int b; a= b= 34; printint(a); printint(b); return(0); } ================================================ FILE: 24_Function_Params/tests/input18a.c ================================================ int a; int *b; char c; char *d; int main() { b= &a; *b= 15; printint(a); d= &c; *d= 16; printint(c); return(0); } ================================================ FILE: 24_Function_Params/tests/input19.c ================================================ int a; int b; int c; int d; int e; int main() { a= 2; b= 4; c= 3; d= 2; e= (a+b) * (c+d); printint(e); return(0); } ================================================ FILE: 24_Function_Params/tests/input20.c ================================================ int a; int b[25]; int main() { b[3]= 12; a= b[3]; printint(a); return(0); } ================================================ FILE: 24_Function_Params/tests/input21.c ================================================ char c; char *str; int main() { c= '\n'; printint(c); for (str= "Hello world\n"; *str != 0; str= str + 1) { printchar(*str); } return(0); } ================================================ FILE: 24_Function_Params/tests/input22.c ================================================ char a; char b; char c; int d; int e; int f; long g; long h; long i; int main() { b= 5; c= 7; a= b + c++; printint(a); e= 5; f= 7; d= e + f++; printint(d); h= 5; i= 7; g= h + i++; printint(g); a= b-- + c; printint(a); d= e-- + f; printint(d); g= h-- + i; printint(g); a= ++b + c; printint(a); d= ++e + f; printint(d); g= ++h + i; printint(g); a= b * --c; printint(a); d= e * --f; printint(d); g= h * --i; printint(g); return(0); } ================================================ FILE: 24_Function_Params/tests/input23.c ================================================ char *str; int x; int main() { x= -23; printint(x); printint(-10 * -10); x= 1; x= ~x; printint(x); x= 2 > 5; printint(x); x= !x; printint(x); x= !x; printint(x); x= 13; if (x) { printint(13); } x= 0; if (!x) { printint(14); } for (str= "Hello world\n"; *str; str++) { printchar(*str); } return(0); } ================================================ FILE: 24_Function_Params/tests/input24.c ================================================ int a; int b; int c; int main() { a= 42; b= 19; printint(a & b); printint(a | b); printint(a ^ b); printint(1 << 3); printint(63 >> 3); return(0); } ================================================ FILE: 24_Function_Params/tests/input25.c ================================================ int a; int b; int c; int main() { char z; int y; int x; x= 10; y= 20; z= 30; printint(x); printint(y); printint(z); a= 5; b= 15; c= 25; printint(a); printint(b); printint(c); return(0); } ================================================ FILE: 24_Function_Params/tests/input26.c ================================================ int main(int a, char b, long c, int d, int e, int f, int g, int h) { int i; int j; int k; a= 13; printint(a); b= 23; printint(b); c= 34; printint(c); d= 44; printint(d); e= 54; printint(e); f= 64; printint(f); g= 74; printint(g); h= 84; printint(h); i= 94; printint(i); j= 95; printint(j); k= 96; printint(k); return(0); } ================================================ FILE: 24_Function_Params/tests/mktests ================================================ #!/bin/sh # Make the output files for each test for i in input*c do if [ ! -f "out.$i" ] then cc -o out $i ../lib/printint.c ./out > out.$i rm -f out fi done ================================================ FILE: 24_Function_Params/tests/out.input01.c ================================================ 36 10 25 ================================================ FILE: 24_Function_Params/tests/out.input02.c ================================================ 17 ================================================ FILE: 24_Function_Params/tests/out.input03.c ================================================ 1 2 3 4 5 ================================================ FILE: 24_Function_Params/tests/out.input04.c ================================================ 1 1 1 1 1 1 1 1 1 ================================================ FILE: 24_Function_Params/tests/out.input05.c ================================================ 6 ================================================ FILE: 24_Function_Params/tests/out.input06.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 24_Function_Params/tests/out.input07.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 24_Function_Params/tests/out.input08.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 24_Function_Params/tests/out.input09.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 24_Function_Params/tests/out.input10.c ================================================ 20 10 1 2 3 4 5 253 254 255 0 1 ================================================ FILE: 24_Function_Params/tests/out.input11.c ================================================ 10 20 30 1 2 3 4 5 253 254 255 0 1 2 3 1 2 3 4 5 ================================================ FILE: 24_Function_Params/tests/out.input12.c ================================================ 5 ================================================ FILE: 24_Function_Params/tests/out.input13.c ================================================ 23 56 ================================================ FILE: 24_Function_Params/tests/out.input14.c ================================================ 10 20 30 ================================================ FILE: 24_Function_Params/tests/out.input15.c ================================================ 18 18 12 12 ================================================ FILE: 24_Function_Params/tests/out.input16.c ================================================ 12 18 ================================================ FILE: 24_Function_Params/tests/out.input17.c ================================================ 19 12 ================================================ FILE: 24_Function_Params/tests/out.input18.c ================================================ 34 34 ================================================ FILE: 24_Function_Params/tests/out.input18a.c ================================================ 15 16 ================================================ FILE: 24_Function_Params/tests/out.input19.c ================================================ 30 ================================================ FILE: 24_Function_Params/tests/out.input20.c ================================================ 12 ================================================ FILE: 24_Function_Params/tests/out.input21.c ================================================ 10 Hello world ================================================ FILE: 24_Function_Params/tests/out.input22.c ================================================ 12 12 12 13 13 13 13 13 13 35 35 35 ================================================ FILE: 24_Function_Params/tests/out.input23.c ================================================ -23 100 -2 0 1 0 13 14 Hello world ================================================ FILE: 24_Function_Params/tests/out.input24.c ================================================ 2 59 57 8 7 ================================================ FILE: 24_Function_Params/tests/out.input25.c ================================================ 10 20 30 5 15 25 ================================================ FILE: 24_Function_Params/tests/out.input26.c ================================================ 13 23 34 44 54 64 74 84 94 95 96 ================================================ FILE: 24_Function_Params/tests/runtests ================================================ #!/bin/sh # Run each test and compare # against known good output if [ ! -f ../comp1 ] then echo "Need to build ../comp1 first!"; exit 1 fi for i in input* do if [ ! -f "out.$i" ] then echo "Can't run test on $i, no output file!" else echo -n $i ../comp1 $i cc -o out out.s ../lib/printint.c ./out > trial.$i cmp -s "out.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo else echo ": OK" fi rm -f out out.s "trial.$i" fi done ================================================ FILE: 24_Function_Params/tests/runtestsn ================================================ #!/bin/sh # Run each test and compare # against known good output if [ ! -f ../compn ] then echo "Need to build ../compn first!"; exit 1 fi for i in input* do if [ ! -f "out.$i" ] then echo "Can't run test on $i, no output file!" else echo -n $i ../compn $i nasm -f elf64 out.s cc -no-pie -Wall -o out out.o ../lib/printint.c ./out > trial.$i cmp -s "out.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo else echo ": OK" fi rm -f out out.o out.s "trial.$i" fi done ================================================ FILE: 24_Function_Params/tree.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree functions // Copyright (c) 2019 Warren Toomey, GPL3 // Build and return a generic AST node struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, int intvalue) { struct ASTnode *n; // Malloc a new ASTnode n = (struct ASTnode *) malloc(sizeof(struct ASTnode)); if (n == NULL) fatal("Unable to malloc in mkastnode()"); // Copy in the field values and return it n->op = op; n->type = type; n->left = left; n->mid = mid; n->right = right; n->v.intvalue = intvalue; return (n); } // Make an AST leaf node struct ASTnode *mkastleaf(int op, int type, int intvalue) { return (mkastnode(op, type, NULL, NULL, NULL, intvalue)); } // Make a unary AST node: only one child struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, int intvalue) { return (mkastnode(op, type, left, NULL, NULL, intvalue)); } // Generate and return a new label number // just for AST dumping purposes static int gendumplabel(void) { static int id = 1; return (id++); } // Given an AST tree, print it out and follow the // traversal of the tree that genAST() follows void dumpAST(struct ASTnode *n, int label, int level) { int Lfalse, Lstart, Lend; switch (n->op) { case A_IF: Lfalse = gendumplabel(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_IF"); if (n->right) { Lend = gendumplabel(); fprintf(stdout, ", end L%d", Lend); } fprintf(stdout, "\n"); dumpAST(n->left, Lfalse, level + 2); dumpAST(n->mid, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); return; case A_WHILE: Lstart = gendumplabel(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_WHILE, start L%d\n", Lstart); Lend = gendumplabel(); dumpAST(n->left, Lend, level + 2); dumpAST(n->right, NOLABEL, level + 2); return; } // Reset level to -2 for A_GLUE if (n->op == A_GLUE) level = -2; // General AST node handling if (n->left) dumpAST(n->left, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); for (int i = 0; i < level; i++) fprintf(stdout, " "); switch (n->op) { case A_GLUE: fprintf(stdout, "\n\n"); return; case A_FUNCTION: fprintf(stdout, "A_FUNCTION %s\n", Symtable[n->v.id].name); return; case A_ADD: fprintf(stdout, "A_ADD\n"); return; case A_SUBTRACT: fprintf(stdout, "A_SUBTRACT\n"); return; case A_MULTIPLY: fprintf(stdout, "A_MULTIPLY\n"); return; case A_DIVIDE: fprintf(stdout, "A_DIVIDE\n"); return; case A_EQ: fprintf(stdout, "A_EQ\n"); return; case A_NE: fprintf(stdout, "A_NE\n"); return; case A_LT: fprintf(stdout, "A_LE\n"); return; case A_GT: fprintf(stdout, "A_GT\n"); return; case A_LE: fprintf(stdout, "A_LE\n"); return; case A_GE: fprintf(stdout, "A_GE\n"); return; case A_INTLIT: fprintf(stdout, "A_INTLIT %d\n", n->v.intvalue); return; case A_STRLIT: fprintf(stdout, "A_STRLIT rval label L%d\n", n->v.id); return; case A_IDENT: if (n->rvalue) fprintf(stdout, "A_IDENT rval %s\n", Symtable[n->v.id].name); else fprintf(stdout, "A_IDENT %s\n", Symtable[n->v.id].name); return; case A_ASSIGN: fprintf(stdout, "A_ASSIGN\n"); return; case A_WIDEN: fprintf(stdout, "A_WIDEN\n"); return; case A_RETURN: fprintf(stdout, "A_RETURN\n"); return; case A_FUNCCALL: fprintf(stdout, "A_FUNCCALL %s\n", Symtable[n->v.id].name); return; case A_ADDR: fprintf(stdout, "A_ADDR %s\n", Symtable[n->v.id].name); return; case A_DEREF: if (n->rvalue) fprintf(stdout, "A_DEREF rval\n"); else fprintf(stdout, "A_DEREF\n"); return; case A_SCALE: fprintf(stdout, "A_SCALE %d\n", n->v.size); return; case A_PREINC: fprintf(stdout, "A_PREINC %s\n", Symtable[n->v.id].name); return; case A_PREDEC: fprintf(stdout, "A_PREDEC %s\n", Symtable[n->v.id].name); return; case A_POSTINC: fprintf(stdout, "A_POSTINC\n"); return; case A_POSTDEC: fprintf(stdout, "A_POSTDEC\n"); return; case A_NEGATE: fprintf(stdout, "A_NEGATE\n"); return; default: fatald("Unknown dumpAST operator", n->op); } } ================================================ FILE: 24_Function_Params/types.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Types and type handling // Copyright (c) 2019 Warren Toomey, GPL3 // Return true if a type is an int type // of any size, false otherwise int inttype(int type) { if (type == P_CHAR || type == P_INT || type == P_LONG) return (1); return (0); } // Return true if a type is of pointer type int ptrtype(int type) { if (type == P_VOIDPTR || type == P_CHARPTR || type == P_INTPTR || type == P_LONGPTR) return (1); return (0); } // Given a primitive type, return // the type which is a pointer to it int pointer_to(int type) { int newtype; switch (type) { case P_VOID: newtype = P_VOIDPTR; break; case P_CHAR: newtype = P_CHARPTR; break; case P_INT: newtype = P_INTPTR; break; case P_LONG: newtype = P_LONGPTR; break; default: fatald("Unrecognised in pointer_to: type", type); } return (newtype); } // Given a primitive pointer type, return // the type which it points to int value_at(int type) { int newtype; switch (type) { case P_VOIDPTR: newtype = P_VOID; break; case P_CHARPTR: newtype = P_CHAR; break; case P_INTPTR: newtype = P_INT; break; case P_LONGPTR: newtype = P_LONG; break; default: fatald("Unrecognised in value_at: type", type); } return (newtype); } // Given an AST tree and a type which we want it to become, // possibly modify the tree by widening or scaling so that // it is compatible with this type. Return the original tree // if no changes occurred, a modified tree, or NULL if the // tree is not compatible with the given type. // If this will be part of a binary operation, the AST op is not zero. struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op) { int ltype; int lsize, rsize; ltype = tree->type; // Compare scalar int types if (inttype(ltype) && inttype(rtype)) { // Both types same, nothing to do if (ltype == rtype) return (tree); // Get the sizes for each type lsize = genprimsize(ltype); rsize = genprimsize(rtype); // Tree's size is too big if (lsize > rsize) return (NULL); // Widen to the right if (rsize > lsize) return (mkastunary(A_WIDEN, rtype, tree, 0)); } // For pointers on the left if (ptrtype(ltype)) { // OK is same type on right and not doing a binary op if (op == 0 && ltype == rtype) return (tree); } // We can scale only on A_ADD or A_SUBTRACT operation if (op == A_ADD || op == A_SUBTRACT) { // Left is int type, right is pointer type and the size // of the original type is >1: scale the left if (inttype(ltype) && ptrtype(rtype)) { rsize = genprimsize(value_at(rtype)); if (rsize > 1) return (mkastunary(A_SCALE, rtype, tree, rsize)); else return (tree); // Size 1, no need to scale } } // If we get here, the types are not compatible return (NULL); } ================================================ FILE: 25_Function_Arguments/Makefile ================================================ HSRCS= data.h decl.h defs.h SRCS= cg.c decl.c expr.c gen.c main.c misc.c \ scan.c stmt.c sym.c tree.c types.c SRCN= cgn.c decl.c expr.c gen.c main.c misc.c \ scan.c stmt.c sym.c tree.c types.c ARMSRCS= cg_arm.c decl.c expr.c gen.c main.c misc.c \ scan.c stmt.c sym.c tree.c types.c comp1: $(SRCS) $(HSRCS) cc -o comp1 -g -Wall $(SRCS) compn: $(SRCN) $(HSRCS) cc -o compn -g -Wall $(SRCN) comp1arm: $(ARMSRCS) $(HSRCS) cc -o comp1arm -g -Wall $(ARMSRCS) cp comp1arm comp1 clean: rm -f comp1 comp1arm compn *.o *.s out test: comp1 tests/runtests (cd tests; chmod +x runtests; ./runtests) armtest: comp1arm tests/runtests (cd tests; chmod +x runtests; ./runtests) testn: compn tests/runtestsn (cd tests; chmod +x runtestsn; ./runtestsn) ================================================ FILE: 25_Function_Arguments/Readme.md ================================================ # Part 25: Function Calls and Arguments In this part of our compiler writing journey, I'm going to add the ability to call functions with an arbitrary number of arguments; the argument's values will be copied into the function's parameters and appear as local variables. I haven't done this yet, because there is a bit of design thinking to be done before the coding can begin. Once more, let's review the image from Eli Bendersky's article on the [stack frame layout on x86-64](https://eli.thegreenplace.net/2011/09/06/stack-frame-layout-on-x86-64/). ![](../22_Design_Locals/Figs/x64_frame_nonleaf.png) Up to six "call by value" arguments to a function are passed in via the registers `%rdi` to `%r9`. For more than six arguments, the remaining arguments are pushed on the stack. Look closely at the argument values on the stack. Even though `h` is the last argument, it is pushed first on the stack (which grows downwards), and the `g` argument is pushed *after* the `h` argument. One of the *Bad Things* about C is that there is no defined order of expression evaluation. As noted [here](https://en.cppreference.com/w/c/language/eval_order): > [The] order of evaluation of the operands of any C operator, including the order of evaluation of function arguments in a function-call expression ... is unspecified ... . The compiler will evaluate them in any order ... This makes the language potentially unportable: the behaviour of code on one platform with one compiler may have different behaviour on a different platform or when compiled with a different compiler. For us, though, this lack of defined evaluation order is a *Good Thing*, only because we can generate our argument values in the order that makes it easier to write our compiler. I'm being flippant here: this is really not much of a good thing. Because the x86-64 platform expects the last argument's value to be pushed on the stack first, I'll need to write the code to process arguments from the last to the first. I should make sure that the code could be easily altered to allow processing in the other direction: perhaps a `genXXX()` query function could be written to tell our code which direction to process the arguments. I'll leave that to be written later. ### Generating an AST of Expressions We already have the A_GLUE AST node type, so it should be easy to write a function to parse the argument expressions and build an AST tree. For a function call `function(expr1, expr2, expr3, expr4)`, I've decided to build the tree like this: ``` A_FUNCCALL / A_GLUE / \ A_GLUE expr4 / \ A_GLUE expr3 / \ A_GLUE expr2 / \ NULL expr1 ``` Each expression is on the right, and previous expressions are on the left. I will have to traverse the sub-tree of expressions right to left, to ensure that I process `expr4` before `expr3` in case the former has to be pushed on the x86-64 stack before the latter. We already have a `funccall()` function to parse a simple function call with always one argument. I'll modify this to call an `expression_list()` function to parse the expression list and build the A_GLUE sub-tree. It will return a count of the number of expressions by storing this count in the top A_GLUE AST node. Then, in `funccall()`, we can check the type of all the expressions against the function's prototype which should be stored in the global symbol table. I think that's enough on the design side of things. Let's now get on to the implementation. ## Expression Parsing Changes Well, I got the code done in an hour or so and I'm pleasantly surprised. To borrow a quote that floats around on Twitter: > Weeks of programming can save you hours of planning. Conversely, a bit of time spent on design always helps with the efficiency of coding. Let's have a look at the changes. We'll start with the parsing. We now have to parse a comma-separated list of expressions, and build that A_GLUE AST tree with child expressions on the right, and previous expression trees on the left. Here is the code in `expr.c`: ```c // expression_list: // | expression // | expression ',' expression_list // ; // Parse a list of zero or more comma-separated expressions and // return an AST composed of A_GLUE nodes with the left-hand child // being the sub-tree of previous expressions (or NULL) and the right-hand // child being the next expression. Each A_GLUE node will have size field // set to the number of expressions in the tree at this point. If no // expressions are parsed, NULL is returned static struct ASTnode *expression_list(void) { struct ASTnode *tree = NULL; struct ASTnode *child = NULL; int exprcount = 0; // Loop until the final right parentheses while (Token.token != T_RPAREN) { // Parse the next expression and increment the expression count child = binexpr(0); exprcount++; // Build an A_GLUE AST node with the previous tree as the left child // and the new expression as the right child. Store the expression count. tree = mkastnode(A_GLUE, P_NONE, tree, NULL, child, exprcount); // Must have a ',' or ')' at this point switch (Token.token) { case T_COMMA: scan(&Token); break; case T_RPAREN: break; default: fatald("Unexpected token in expression list", Token.token); } } // Return the tree of expressions return (tree); } ``` That turned out to be much easier to code than I was expecting. Now, we need to interface this with the existing function call parser: ```c // Parse a function call and return its AST static struct ASTnode *funccall(void) { struct ASTnode *tree; int id; // Check that the identifier has been defined as a function, // then make a leaf node for it. if ((id = findsymbol(Text)) == -1 || Symtable[id].stype != S_FUNCTION) { fatals("Undeclared function", Text); } // Get the '(' lparen(); // Parse the argument expression list tree = expression_list(); // XXX Check type of each argument against the function's prototype // Build the function call AST node. Store the // function's return type as this node's type. // Also record the function's symbol-id tree = mkastunary(A_FUNCCALL, Symtable[id].type, tree, id); // Get the ')' rparen(); return (tree); } ``` Note the `XXX` which is my reminder that I still have work to perform. The parser does check that the function has previously been declared, but as yet it doesn't compare the argument types against the function's prototype. I'll do that soon. The AST tree that is returned now has the shape that I drew up near the beginning of this article. Now it's time to walk it and generate assembly code. ## Changes to the Generic Code Generator The way the compiler is written, the code that walks the AST is architecture-neutral is in `gen.c`, and the actual platform-dependent back-end is in `cg.c`. So we start with the changes to `gen.c`. There is a non-trivial amount of code needed to walk this new AST structure, so I now have a function to deal with function calls. In `genAST()` we now have: ```c // n is the AST node being processed switch (n->op) { ... case A_FUNCCALL: return (gen_funccall(n)); } ``` The code to walk the new AST structure is here: ```c // Generate the code to copy the arguments of a // function call to its parameters, then call the // function itself. Return the register that holds // the function's return value. static int gen_funccall(struct ASTnode *n) { struct ASTnode *gluetree = n->left; int reg; int numargs=0; // If there is a list of arguments, walk this list // from the last argument (right-hand child) to the // first while (gluetree) { // Calculate the expression's value reg = genAST(gluetree->right, NOLABEL, gluetree->op); // Copy this into the n'th function parameter: size is 1, 2, 3, ... cgcopyarg(reg, gluetree->v.size); // Keep the first (highest) number of arguments if (numargs==0) numargs= gluetree->v.size; genfreeregs(); gluetree = gluetree->left; } // Call the function, clean up the stack (based on numargs), // and return its result return (cgcall(n->v.id, numargs)); } ``` There are a few things to note. We generate the expression code by calling `genAST()` on the right child. Also, we set `numargs` to the first `size` value, which is the number of arguments (one-based not zero-based). Then we call `cgcopyarg()` to copy this value into the function's *n'th* parameter. Once the copy is done, we can free all our registers in preparation for the next expression, and walk down the left child for the previous expression. Finally, we run `cgcall()` to generate the actual call to the function. Because we may have pushed argument values on the stack, we provide this with the number of arguments in total so it can work out how many to pop back off the stack. There is no hardware-specific code here but, as I mentioned at the top, we are walking the expression tree from the last expression to the first. Not all architectures will want this, so there is room to make the code more flexible in terms of the order of evaluation. ## Changes to `cg.c` Now we get to the functions that generate actual x86-64 assembly code output. We have created a new one, `cgcopyarg()`, and modified an existing one, `cgcall()`. But first, a reminder that we have these lists of registers: ```c #define FIRSTPARAMREG 9 // Position of first parameter register static char *reglist[] = { "%r10", "%r11", "%r12", "%r13", "%r9", "%r8", "%rcx", "%rdx", "%rsi", "%rdi" }; static char *breglist[] = { "%r10b", "%r11b", "%r12b", "%r13b", "%r9b", "%r8b", "%cl", "%dl", "%sil", "%dil" }; static char *dreglist[] = { "%r10d", "%r11d", "%r12d", "%r13d", "%r9d", "%r8d", "%ecx", "%edx", "%esi", "%edi" }; ``` with FIRSTPARAMREG set to the last index position: we will walk backwards down this list. Also, remember that the argument position numbers we will get are one-based (i.e 1, 2, 3, 4, ...) not zero-based (0, 1, 2, 3, ...), but the array above is zero-based. You will see a few `+1` or `-1` adjustments in the code below. Here is `cgcopyarg()`: ```c // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpushq\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmovq\t%s, %s\n", reglist[r], reglist[FIRSTPARAMREG - argposn + 1]); } } ``` Nice and simple except for the `+1`. Now the code for `cgcall()`: ```c // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(int id, int numargs) { // Get a new register int outr = alloc_register(); // Call the function fprintf(Outfile, "\tcall\t%s\n", Symtable[id].name); // Remove any arguments pushed on the stack if (numargs>6) fprintf(Outfile, "\taddq\t$%d, %%rsp\n", 8*(numargs-6)); // and copy the return value into our register fprintf(Outfile, "\tmovq\t%%rax, %s\n", reglist[outr]); return (outr); } ``` Again, nice and simple. ## Testing the Changes In the last part of our compiler writing journey, we had two separate test programs `input27a.c` and `input27b.c`: we had to compile one of them with `gcc`. Now, we can combine them together and compile it all with our compiler. There is a second test program `input28.c` with some more examples of function calling. As always: ``` $ make test cc -o comp1 -g -Wall cg.c decl.c expr.c gen.c main.c misc.c scan.c stmt.c sym.c tree.c types.c (cd tests; chmod +x runtests; ./runtests) ... input25.c: OK input26.c: OK input27.c: OK input28.c: OK ``` ## Conclusion and What's Next Right now, I feel that our compiler has just gone from being a "toy" compiler to one which is nearly useful: we can now write multi-function programs and call between the functions. It took a few steps to get there, but I think each step was not a giant one. There is obviously still a big journey left. We need to add structs, unions, external identifiers and a pre-processor. Then we have to make the compiler more robust, provide better error detections, possibly add warnings etc. So, perhaps we are about half-way at this point. In the next part of our compiler writing journey, I think I'm going to add the ability to write function prototypes. This will allow us to link in outside functions. I'm thinking of those original Unix functions and system calls which are `int` and `char *` based such as `open()`, `read()`, `write()`, `strcpy()` etc. It will be nice to compile some useful programs with our compiler. [Next step](../26_Prototypes/Readme.md) ================================================ FILE: 25_Function_Arguments/cg.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\t.text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\t.data\n", Outfile); currSeg = data_seg; } } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. static int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "%r10", "%r11", "%r12", "%r13", "%r9", "%r8", "%rcx", "%rdx", "%rsi", "%rdi" }; static char *breglist[] = { "%r10b", "%r11b", "%r12b", "%r13b", "%r9b", "%r8b", "%cl", "%dl", "%sil", "%dil" }; static char *dreglist[] = { "%r10d", "%r11d", "%r12d", "%r13d", "%r9d", "%r8d", "%ecx", "%edx", "%esi", "%edi" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Symtable[id].name; int i; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the %rsp and %rsp fprintf(Outfile, "\t.globl\t%s\n" "\t.type\t%s, @function\n" "%s:\n" "\tpushq\t%%rbp\n" "\tmovq\t%%rsp, %%rbp\n", name, name, name); // Copy any in-register parameters to the stack // Stop after no more than six parameter registers for (i = NSYMBOLS - 1; i > Locls; i--) { if (Symtable[i].class != C_PARAM) break; if (i < NSYMBOLS - 6) break; Symtable[i].posn = newlocaloffset(Symtable[i].type); cgstorlocal(paramReg--, i); } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (; i > Locls; i--) { if (Symtable[i].class == C_PARAM) { Symtable[i].posn = paramOffset; paramOffset += 8; } else { Symtable[i].posn = newlocaloffset(Symtable[i].type); } } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\taddq\t$%d,%%rsp\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Symtable[id].endlabel); fprintf(Outfile, "\taddq\t$%d,%%rsp\n", stackOffset); fputs("\tpopq %rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmovq\t$%d, %s\n", value, reglist[r]); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(int id, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it switch (Symtable[id].type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", Symtable[id].name); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", Symtable[id].name); fprintf(Outfile, "\tmovzbq\t%s(%%rip), %s\n", Symtable[id].name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", Symtable[id].name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", Symtable[id].name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", Symtable[id].name); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", Symtable[id].name); fprintf(Outfile, "\tmovslq\t%s(%%rip), %s\n", Symtable[id].name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", Symtable[id].name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", Symtable[id].name); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: if (op == A_PREINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", Symtable[id].name); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", Symtable[id].name); fprintf(Outfile, "\tmovq\t%s(%%rip), %s\n", Symtable[id].name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", Symtable[id].name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", Symtable[id].name); break; default: fatald("Bad type in cgloadglob:", Symtable[id].type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(int id, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it switch (Symtable[id].type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", Symtable[id].posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", Symtable[id].posn); fprintf(Outfile, "\tmovzbq\t%d(%%rbp), %s\n", Symtable[id].posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", Symtable[id].posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", Symtable[id].posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", Symtable[id].posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", Symtable[id].posn); fprintf(Outfile, "\tmovslq\t%d(%%rbp), %s\n", Symtable[id].posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", Symtable[id].posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", Symtable[id].posn); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: if (op == A_PREINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", Symtable[id].posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", Symtable[id].posn); fprintf(Outfile, "\tmovq\t%d(%%rbp), %s\n", Symtable[id].posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", Symtable[id].posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", Symtable[id].posn); break; default: fatald("Bad type in cgloadlocal:", Symtable[id].type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int id) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tleaq\tL%d(%%rip), %s\n", id, reglist[r]); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\taddq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsubq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timulq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s,%%rax\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidivq\t%s\n", reglist[r2]); fprintf(Outfile, "\tmovq\t%%rax,%s\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tandq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\torq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txorq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshlq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshrq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tnegq\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnotq\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); return (r); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(int id, int numargs) { // Get a new register int outr = alloc_register(); // Call the function fprintf(Outfile, "\tcall\t%s\n", Symtable[id].name); // Remove any arguments pushed on the stack if (numargs>6) fprintf(Outfile, "\taddq\t$%d, %%rsp\n", 8*(numargs-6)); // and copy the return value into our register fprintf(Outfile, "\tmovq\t%%rax, %s\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpushq\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmovq\t%s, %s\n", reglist[r], reglist[FIRSTPARAMREG - argposn + 1]); } } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsalq\t$%d, %s\n", val, reglist[r]); return (r); } // Store a register's value into a variable int cgstorglob(int r, int id) { switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %s(%%rip)\n", breglist[r], Symtable[id].name); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %s(%%rip)\n", dreglist[r], Symtable[id].name); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tmovq\t%s, %s(%%rip)\n", reglist[r], Symtable[id].name); break; default: fatald("Bad type in cgstorglob:", Symtable[id].type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, int id) { switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %d(%%rbp)\n", breglist[r], Symtable[id].posn); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %d(%%rbp)\n", dreglist[r], Symtable[id].posn); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tmovq\t%s, %d(%%rbp)\n", reglist[r], Symtable[id].posn); break; default: fatald("Bad type in cgstorlocal:", Symtable[id].type); } return (r); } // Array of type sizes in P_XXX order. // 0 means no size. static int psize[] = { 0, 0, 1, 4, 8, 8, 8, 8, 8 }; // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { // Check the type is valid if (type < P_NONE || type > P_LONGPTR) fatal("Bad type in cgprimsize()"); return (psize[type]); } // Generate a global symbol but not functions void cgglobsym(int id) { int typesize; if (Symtable[id].stype == S_FUNCTION) return; // Get the size of the type typesize = cgprimsize(Symtable[id].type); // Generate the global identity and the label cgdataseg(); fprintf(Outfile, "\t.globl\t%s\n", Symtable[id].name); fprintf(Outfile, "%s:", Symtable[id].name); // Generate the space for (int i = 0; i < Symtable[id].size; i++) { switch (typesize) { case 1: fprintf(Outfile, "\t.byte\t0\n"); break; case 4: fprintf(Outfile, "\t.long\t0\n"); break; case 8: fprintf(Outfile, "\t.quad\t0\n"); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\t.byte\t%d\n", *cptr); } fprintf(Outfile, "\t.byte\t0\n"); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { // Generate code depending on the function's type switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tmovzbl\t%s, %%eax\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %%eax\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", Symtable[id].type); } cgjump(Symtable[id].endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(int id) { int r = alloc_register(); if (Symtable[id].class == C_LOCAL) fprintf(Outfile, "\tleaq\t%d(%%rbp), %s\n", Symtable[id].posn, reglist[r]); else fprintf(Outfile, "\tleaq\t%s(%%rip), %s\n", Symtable[id].name, reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tmovzbq\t(%s), %s\n", reglist[r], reglist[r]); break; case P_INTPTR: fprintf(Outfile, "\tmovslq\t(%s), %s\n", reglist[r], reglist[r]); break; case P_LONGPTR: fprintf(Outfile, "\tmovq\t(%s), %s\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, (%s)\n", breglist[r1], reglist[r2]); break; case P_INT: fprintf(Outfile, "\tmovq\t%s, (%s)\n", reglist[r1], reglist[r2]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, (%s)\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 25_Function_Arguments/cg_arm.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for ARMv6 on Raspberry Pi // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. static int freereg[4]; static char *reglist[4] = { "r4", "r5", "r6", "r7" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // We have to store large integer literal values in memory. // Keep a list of them which will be output in the postamble #define MAXINTS 1024 int Intlist[MAXINTS]; static int Intslot = 0; // Determine the offset of a large integer // literal from the .L3 label. If the integer // isn't in the list, add it. static void set_int_offset(int val) { int offset = -1; // See if it is already there for (int i = 0; i < Intslot; i++) { if (Intlist[i] == val) { offset = 4 * i; break; } } // Not in the list, so add it if (offset == -1) { offset = 4 * Intslot; if (Intslot == MAXINTS) fatal("Out of int slots in set_int_offset()"); Intlist[Intslot++] = val; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L3+%d\n", offset); } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n", Outfile); } // Print out the assembly postamble void cgpostamble() { // Print out the global variables fprintf(Outfile, ".L2:\n"); for (int i = 0; i < Globs; i++) { if (Symtable[i].stype == S_VARIABLE) fprintf(Outfile, "\t.word %s\n", Symtable[i].name); } // Print out the integer literals fprintf(Outfile, ".L3:\n"); for (int i = 0; i < Intslot; i++) { fprintf(Outfile, "\t.word %d\n", Intlist[i]); } } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Symtable[id].name; fprintf(Outfile, "\t.text\n" "\t.globl\t%s\n" "\t.type\t%s, \%%function\n" "%s:\n" "\tpush\t{fp, lr}\n" "\tadd\tfp, sp, #4\n" "\tsub\tsp, sp, #8\n" "\tstr\tr0, [fp, #-8]\n", name, name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Symtable[id].endlabel); fputs("\tsub\tsp, fp, #4\n" "\tpop\t{fp, pc}\n" "\t.align\t2\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); // If the literal value is small, do it with one instruction if (value <= 1000) fprintf(Outfile, "\tmov\t%s, #%d\n", reglist[r], value); else { set_int_offset(value); fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); } return (r); } // Determine the offset of a variable from the .L2 // label. Yes, this is inefficient code. static void set_var_offset(int id) { int offset = 0; // Walk the symbol table up to id. // Find S_VARIABLEs and add on 4 until // we get to our variable for (int i = 0; i < id; i++) { if (Symtable[i].stype == S_VARIABLE) offset += 4; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L2+%d\n", offset); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tldrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgloadglob:", Symtable[id].type); } return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s, %s\n", reglist[r1], reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\tmul\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { // To do a divide: r0 holds the dividend, r1 holds the divisor. // The quotient is in r0. fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r1]); fprintf(Outfile, "\tmov\tr1, %s\n", reglist[r2]); fprintf(Outfile, "\tbl\t__aeabi_idiv\n"); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r1]); free_register(r2); return (r1); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r]); fprintf(Outfile, "\tbl\t%s\n", Symtable[id].name); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r]); return (r); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tlsl\t%s, %s, #%d\n", reglist[r], reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, int id) { // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tstr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgstorglob:", Symtable[id].type); } return (r); } // Array of type sizes in P_XXX order. // 0 means no size. static int psize[] = { 0, 0, 1, 4, 4, 4, 4, 4 }; // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { // Check the type is valid if (type < P_NONE || type > P_LONGPTR) fatal("Bad type in cgprimsize()"); return (psize[type]); } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Symtable[id].type); fprintf(Outfile, "\t.data\n" "\t.globl\t%s\n", Symtable[id].name); switch (typesize) { case 1: fprintf(Outfile, "%s:\t.byte\t0\n", Symtable[id].name); break; case 4: fprintf(Outfile, "%s:\t.long\t0\n", Symtable[id].name); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "moveq", "movne", "movlt", "movgt", "movle", "movge" }; // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "movne", "moveq", "movge", "movle", "movgt", "movlt" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #1\n", cmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #0\n", invcmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\tuxtb\t%s, %s\n", reglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tb\tL%d\n", l); } // List of inverted branch instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *brlist[] = { "bne", "beq", "bge", "ble", "bgt", "blt" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", brlist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[reg]); cgjump(Symtable[id].endlabel); } // Generate code to load the address of a global // identifier into a variable. Return a new register int cgaddress(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); fprintf(Outfile, "\tmov\t%s, r3\n", reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tldrb\t%s, [%s]\n", reglist[r], reglist[r]); break; case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [%s]\n", reglist[r], reglist[r]); break; } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [%s]\n", reglist[r1], reglist[r2]); break; case P_INT: case P_LONG: fprintf(Outfile, "\tstr\t%s, [%s]\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 25_Function_Arguments/cgn.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\tsection .text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\tsection .data\n", Outfile); currSeg = data_seg; } } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "r10", "r11", "r12", "r13", "r9", "r8", "rcx", "rdx", "rsi", "rdi" }; static char *breglist[] = { "r10b", "r11b", "r12b", "r13b", "r9b", "r8b", "cl", "dl", "sil", "dil" }; static char *dreglist[] = { "r10d", "r11d", "r12d", "r13d", "r9d", "r8d", "ecx", "edx", "esi", "edi" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\textern\tprintint\n", Outfile); fputs("\textern\tprintchar\n", Outfile); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Symtable[id].name; int i; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the rsp and rbp fprintf(Outfile, "\tglobal\t%s\n" "%s:\n" "\tpush\trbp\n" "\tmov\trbp, rsp\n", name, name); // Copy any in-register parameters to the stack // Stop after no more than six parameter registers for (i = NSYMBOLS - 1; i > Locls; i--) { if (Symtable[i].class != C_PARAM) break; if (i < NSYMBOLS - 6) break; Symtable[i].posn = newlocaloffset(Symtable[i].type); cgstorlocal(paramReg--, i); } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (; i > Locls; i--) { if (Symtable[i].class == C_PARAM) { Symtable[i].posn = paramOffset; paramOffset += 8; } else { Symtable[i].posn = newlocaloffset(Symtable[i].type); } } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\tadd\trsp, %d\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Symtable[id].endlabel); fprintf(Outfile, "\tadd\trsp, %d\n", stackOffset); fputs("\tpop rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], value); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(int id, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it switch (Symtable[id].type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", Symtable[id].name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", Symtable[id].name); fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], Symtable[id].name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", Symtable[id].name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", Symtable[id].name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword [%s]\n", Symtable[id].name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", Symtable[id].name); fprintf(Outfile, "\tmovsx\t%s, word [%s]\n", dreglist[r], Symtable[id].name); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword [%s]\n", Symtable[id].name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", Symtable[id].name); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword [%s]\n", Symtable[id].name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", Symtable[id].name); fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], Symtable[id].name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword [%s]\n", Symtable[id].name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", Symtable[id].name); break; default: fatald("Bad type in cgloadglob:", Symtable[id].type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(int id, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it switch (Symtable[id].type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", Symtable[id].posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", Symtable[id].posn); fprintf(Outfile, "\tmovzx\t%s, byte [rbp+%d]\n", reglist[r], Symtable[id].posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", Symtable[id].posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", Symtable[id].posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", Symtable[id].posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", Symtable[id].posn); fprintf(Outfile, "\tmovsx\t%s, word [rbp+%d]\n", reglist[r], Symtable[id].posn); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", Symtable[id].posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", Symtable[id].posn); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", Symtable[id].posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", Symtable[id].posn); fprintf(Outfile, "\tmov\t%s, [rbp+%d]\n", reglist[r], Symtable[id].posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", Symtable[id].posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", Symtable[id].posn); break; default: fatald("Bad type in cgloadlocal:", Symtable[id].type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int id) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, L%d\n", reglist[r], id); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timul\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmov\trax, %s\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidiv\t%s\n", reglist[r2]); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tand\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\tor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshl\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshr\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tneg\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnot\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r], breglist[r]); return (r); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, byte %s\n", reglist[r], breglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(int id, int numargs) { // Get a new register int outr = alloc_register(); // Call the function fprintf(Outfile, "\tcall\t%s\n", Symtable[id].name); // Remove any arguments pushed on the stack if (numargs>6) fprintf(Outfile, "\tadd\trsp, %d\n", 8*(numargs-6)); // and copy the return value into our register fprintf(Outfile, "\tmov\t%s, rax\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpush\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmov\t%s, %s\n", reglist[FIRSTPARAMREG - argposn + 1], reglist[r]); } } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsal\t%s, %d\n", reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, int id) { switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tmov\t[%s], %s\n", Symtable[id].name, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\t[%s], %s\n", Symtable[id].name, dreglist[r]); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tmov\t[%s], %s\n", Symtable[id].name, reglist[r]); break; default: fatald("Bad type in cgloadglob:", Symtable[id].type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, int id) { switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tmov\tbyte\t[rbp+%d], %s\n", Symtable[id].posn, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\tdword\t[rbp+%d], %s\n", Symtable[id].posn, dreglist[r]); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tmov\tqword\t[rbp+%d], %s\n", Symtable[id].posn, reglist[r]); break; default: fatald("Bad type in cgstorlocal:", Symtable[id].type); } return (r); } // Array of type sizes in P_XXX order. // 0 means no size. static int psize[] = { 0, 0, 1, 4, 8, 8, 8, 8, 8 }; // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { // Check the type is valid if (type < P_NONE || type > P_LONGPTR) fatal("Bad type in cgprimsize()"); return (psize[type]); } // Generate a global symbol but not functions void cgglobsym(int id) { int typesize; if (Symtable[id].stype == S_FUNCTION) return; // Get the size of the type typesize = cgprimsize(Symtable[id].type); // Generate the global identity and the label cgdataseg(); fprintf(Outfile, "\tsection\t.data\n" "\tglobal\t%s\n", Symtable[id].name); fprintf(Outfile, "%s:", Symtable[id].name); // Generate the space // original version for (int i = 0; i < Symtable[id].size; i++) { switch(typesize) { case 1: fprintf(Outfile, "\tdb\t0\n"); break; case 4: fprintf(Outfile, "\tdd\t0\n"); break; case 8: fprintf(Outfile, "\tdq\t0\n"); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } /* compact version using times instead of loop switch(typesize) { case 1: fprintf(Outfile, "\ttimes\t%d\tdb\t0\n", Symtable[id].size); break; case 4: fprintf(Outfile, "\ttimes\t%d\tdd\t0\n", Symtable[id].size); break; case 8: fprintf(Outfile, "\ttimes\t%d\tdq\t0\n", Symtable[id].size); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } */ } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\tdb\t%d\n", *cptr); } fprintf(Outfile, "\tdb\t0\n"); /* put string in readable format on a single line // probably went overboard with error checking int comma = 0, quote = 0, start = 1; fprintf(Outfile, "\tdb\t"); for (cptr=strvalue; *cptr; cptr++) { if ( ! isprint(*cptr) ) if (comma || start) { fprintf(Outfile, "%d, ", *cptr); start = 0; comma = 1; } else if (quote) { fprintf(Outfile, "\', %d, ", *cptr); comma = 1; quote = 0; } else { fprintf(Outfile, "%d, ", *cptr); comma = 1; quote = 0; } else if (start || comma) { fprintf(Outfile, "\'%c", *cptr); start = comma = 0; quote = 1; } else { fprintf(Outfile, "%c", *cptr); comma = 0; quote = 1; } } if (comma || start) fprintf(Outfile, "0\n"); else fprintf(Outfile, "\', 0\n"); */ } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r2], breglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { // Generate code depending on the function's type switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tmovzx\teax, %s\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmov\teax, %s\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", Symtable[id].type); } cgjump(Symtable[id].endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(int id) { int r = alloc_register(); if (Symtable[id].class == C_LOCAL) fprintf(Outfile, "\tlea\t%s, [rbp+%d]\n", reglist[r], Symtable[id].posn); else fprintf(Outfile, "\tmov\t%s, %s\n", reglist[r], Symtable[id].name); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], reglist[r]); break; case P_INTPTR: fprintf(Outfile, "\tmovsx\t%s, dword [%s]\n", reglist[r], reglist[r]); break; case P_LONGPTR: fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tmov\t[%s], byte %s\n", reglist[r2], breglist[r1]); break; case P_INT: fprintf(Outfile, "\tmov\t[%s], %s\n", reglist[r2], reglist[r1]); break; case P_LONG: fprintf(Outfile, "\tmov\t[%s], %s\n", reglist[r2], reglist[r1]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 25_Function_Arguments/data.h ================================================ #ifndef extern_ #define extern_ extern #endif // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 extern_ int Line; // Current line number extern_ int Putback; // Character put back by scanner extern_ int Functionid; // Symbol id of the current function extern_ int Globs; // Position of next free global symbol slot extern_ int Locls; // Position of next free local symbol slot extern_ FILE *Infile; // Input and output files extern_ FILE *Outfile; extern_ struct token Token; // Last token scanned extern_ char Text[TEXTLEN + 1]; // Last identifier scanned extern_ struct symtable Symtable[NSYMBOLS]; // Global symbol table extern_ int O_dumpAST; ================================================ FILE: 25_Function_Arguments/decl.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of declarations // Copyright (c) 2019 Warren Toomey, GPL3 // Parse the current token and return // a primitive type enum value. Also // scan in the next token int parse_type(void) { int type; switch (Token.token) { case T_VOID: type = P_VOID; break; case T_CHAR: type = P_CHAR; break; case T_INT: type = P_INT; break; case T_LONG: type = P_LONG; break; default: fatald("Illegal type, token", Token.token); } // Scan in one or more further '*' tokens // and determine the correct pointer type while (1) { scan(&Token); if (Token.token != T_STAR) break; type = pointer_to(type); } // We leave with the next token already scanned return (type); } // variable_declaration: type identifier ';' // | type identifier '[' INTLIT ']' ';' // ; // // Parse the declaration of a scalar variable or an array // with a given size. // The identifier has been scanned & we have the type // islocal is set if this is a local variable // isparam is set if this local variable is a function parameter void var_declaration(int type, int islocal, int isparam) { // Text now has the identifier's name. // If the next token is a '[' if (Token.token == T_LBRACKET) { // Skip past the '[' scan(&Token); // Check we have an array size if (Token.token == T_INTLIT) { // Add this as a known array and generate its space in assembly. // We treat the array as a pointer to its elements' type if (islocal) { fatal("For now, declaration of local arrays is not implemented"); } else { addglob(Text, pointer_to(type), S_ARRAY, 0, Token.intvalue); } } // Ensure we have a following ']' scan(&Token); match(T_RBRACKET, "]"); } else { // Add this as a known scalar // and generate its space in assembly if (islocal) { if (addlocl(Text, type, S_VARIABLE, isparam, 1)==-1) fatals("Duplicate local variable declaration", Text); } else { addglob(Text, type, S_VARIABLE, 0, 1); } } } // param_declaration: // | variable_declaration // | variable_declaration ',' param_declaration // // Parse the parameters in parentheses after the function name. // Add them as symbols to the symbol table and return the number // of parameters. static int param_declaration(void) { int type; int paramcnt=0; // Loop until the final right parentheses while (Token.token != T_RPAREN) { // Get the type and identifier // and add it to the symbol table type = parse_type(); ident(); var_declaration(type, 1, 1); paramcnt++; // Must have a ',' or ')' at this point switch (Token.token) { case T_COMMA: scan(&Token); break; case T_RPAREN: break; default: fatald("Unexpected token in parameter list", Token.token); } } // Return the count of parameters return(paramcnt); } // // function_declaration: type identifier '(' ')' compound_statement ; // // Parse the declaration of a simplistic function. // The identifier has been scanned & we have the type struct ASTnode *function_declaration(int type) { struct ASTnode *tree, *finalstmt; int nameslot, endlabel, paramcnt; // Text now has the identifier's name. // Get a label-id for the end label, add the function // to the symbol table, and set the Functionid global // to the function's symbol-id endlabel = genlabel(); nameslot = addglob(Text, type, S_FUNCTION, endlabel, 0); Functionid = nameslot; // Scan in the parentheses and any parameters // Update the function symbol entry with the number of parameters lparen(); paramcnt= param_declaration(); Symtable[nameslot].nelems= paramcnt; rparen(); // Get the AST tree for the compound statement tree = compound_statement(); // If the function type isn't P_VOID .. if (type != P_VOID) { // Error if no statements in the function if (tree == NULL) fatal("No statements in function with non-void type"); // Check that the last AST operation in the // compound statement was a return statement finalstmt = (tree->op == A_GLUE) ? tree->right : tree; if (finalstmt == NULL || finalstmt->op != A_RETURN) fatal("No return for function with non-void type"); } // Return an A_FUNCTION node which has the function's nameslot // and the compound statement sub-tree return (mkastunary(A_FUNCTION, type, tree, nameslot)); } // Parse one or more global declarations, either // variables or functions void global_declarations(void) { struct ASTnode *tree; int type; while (1) { // We have to read past the type and identifier // to see either a '(' for a function declaration // or a ',' or ';' for a variable declaration. // Text is filled in by the ident() call. type = parse_type(); ident(); if (Token.token == T_LPAREN) { // Parse the function declaration and // generate the assembly code for it tree = function_declaration(type); if (O_dumpAST) { dumpAST(tree, NOLABEL, 0); fprintf(stdout, "\n\n"); } genAST(tree, NOLABEL, 0); // Now free the symbols associated // with this function freeloclsyms(); } else { // Parse the global variable declaration // and skip past the trailing semicolon var_declaration(type, 0, 0); semi(); } // Stop when we have reached EOF if (Token.token == T_EOF) break; } } ================================================ FILE: 25_Function_Arguments/decl.h ================================================ // Function prototypes for all compiler files // Copyright (c) 2019 Warren Toomey, GPL3 // scan.c void reject_token(struct token *t); int scan(struct token *t); // tree.c struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, int intvalue); struct ASTnode *mkastleaf(int op, int type, int intvalue); struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, int intvalue); void dumpAST(struct ASTnode *n, int label, int parentASTop); // gen.c int genlabel(void); int genAST(struct ASTnode *n, int reg, int parentASTop); void genpreamble(); void genpostamble(); void genfreeregs(); void genglobsym(int id); int genglobstr(char *strvalue); int genprimsize(int type); void genreturn(int reg, int id); // cg.c void cgtextseg(); void cgdataseg(); void freeall_registers(void); void cgpreamble(); void cgpostamble(); void cgfuncpreamble(int id); void cgfuncpostamble(int id); int cgloadint(int value, int type); int cgloadglob(int id, int op); int cgloadlocal(int id, int op); int cgloadglobstr(int id); int cgadd(int r1, int r2); int cgsub(int r1, int r2); int cgmul(int r1, int r2); int cgdiv(int r1, int r2); int cgshlconst(int r, int val); int cgcall(int id, int numargs); void cgcopyarg(int r, int argposn); int cgstorglob(int r, int id); int cgstorlocal(int r, int id); void cgglobsym(int id); void cgglobstr(int l, char *strvalue); int cgcompare_and_set(int ASTop, int r1, int r2); int cgcompare_and_jump(int ASTop, int r1, int r2, int label); void cglabel(int l); void cgjump(int l); int cgwiden(int r, int oldtype, int newtype); int cgprimsize(int type); void cgreturn(int reg, int id); int cgaddress(int id); int cgderef(int r, int type); int cgstorderef(int r1, int r2, int type); int cgnegate(int r); int cginvert(int r); int cglognot(int r); int cgboolean(int r, int op, int label); int cgand(int r1, int r2); int cgor(int r1, int r2); int cgxor(int r1, int r2); int cgshl(int r1, int r2); int cgshr(int r1, int r2); // expr.c struct ASTnode *binexpr(int ptp); // stmt.c struct ASTnode *compound_statement(void); // misc.c void match(int t, char *what); void semi(void); void lbrace(void); void rbrace(void); void lparen(void); void rparen(void); void ident(void); void fatal(char *s); void fatals(char *s1, char *s2); void fatald(char *s, int d); void fatalc(char *s, int c); // sym.c int findglob(char *s); int findlocl(char *s); int findsymbol(char *s); int addglob(char *name, int type, int stype, int endlabel, int size); int addlocl(char *name, int type, int stype, int isparam, int size); void freeloclsyms(void); // decl.c void var_declaration(int type, int islocal, int isparam); struct ASTnode *function_declaration(int type); void global_declarations(void); // types.c int inttype(int type); int parse_type(void); int pointer_to(int type); int value_at(int type); struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op); ================================================ FILE: 25_Function_Arguments/defs.h ================================================ #include #include #include #include // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 #define TEXTLEN 512 // Length of symbols in input #define NSYMBOLS 1024 // Number of symbol table entries // Token types enum { T_EOF, // Binary operators T_ASSIGN, T_LOGOR, T_LOGAND, T_OR, T_XOR, T_AMPER, T_EQ, T_NE, T_LT, T_GT, T_LE, T_GE, T_LSHIFT, T_RSHIFT, T_PLUS, T_MINUS, T_STAR, T_SLASH, // Other operators T_INC, T_DEC, T_INVERT, T_LOGNOT, // Type keywords T_VOID, T_CHAR, T_INT, T_LONG, // Other keywords T_IF, T_ELSE, T_WHILE, T_FOR, T_RETURN, // Structural tokens T_INTLIT, T_STRLIT, T_SEMI, T_IDENT, T_LBRACE, T_RBRACE, T_LPAREN, T_RPAREN, T_LBRACKET, T_RBRACKET, T_COMMA }; // Token structure struct token { int token; // Token type, from the enum list above int intvalue; // For T_INTLIT, the integer value }; // AST node types. The first few line up // with the related tokens enum { A_ASSIGN= 1, A_LOGOR, A_LOGAND, A_OR, A_XOR, A_AND, A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE, A_LSHIFT, A_RSHIFT, A_ADD, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, A_INTLIT, A_STRLIT, A_IDENT, A_GLUE, A_IF, A_WHILE, A_FUNCTION, A_WIDEN, A_RETURN, A_FUNCCALL, A_DEREF, A_ADDR, A_SCALE, A_PREINC, A_PREDEC, A_POSTINC, A_POSTDEC, A_NEGATE, A_INVERT, A_LOGNOT, A_TOBOOL }; // Primitive types enum { P_NONE, P_VOID, P_CHAR, P_INT, P_LONG, P_VOIDPTR, P_CHARPTR, P_INTPTR, P_LONGPTR }; // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates int rvalue; // True if the node is an rvalue struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; union { // For A_INTLIT, the integer value int intvalue; // For A_IDENT, the symbol slot number int id; // For A_FUNCTION, the symbol slot number int size; // For A_SCALE, the size to scale by } v; // For A_FUNCCALL, the symbol slot number }; #define NOREG -1 // Use NOREG when the AST generation // functions have no register to return #define NOLABEL 0 // Use NOLABEL when we have no label to // pass to genAST() // Structural types enum { S_VARIABLE, S_FUNCTION, S_ARRAY }; // Storage classes enum { C_GLOBAL = 1, // Globally visible symbol C_LOCAL, // Locally visible symbol C_PARAM // Locally visible function parameter }; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol int stype; // Structural type for the symbol int class; // Storage class for the symbol int endlabel; // For S_FUNCTIONs, the end label int size; // Number of elements in the symbol int posn; // For locals, either the negative offset // from stack base pointer, or register id #define nelems posn // For functions, # of params // For structs, # of fields }; ================================================ FILE: 25_Function_Arguments/expr.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of expressions // Copyright (c) 2019 Warren Toomey, GPL3 // expression_list: // | expression // | expression ',' expression_list // ; // Parse a list of zero or more comma-separated expressions and // return an AST composed of A_GLUE nodes with the left-hand child // being the sub-tree of previous expressions (or NULL) and the right-hand // child being the next expression. Each A_GLUE node will have size field // set to the number of expressions in the tree at this point. If no // expressions are parsed, NULL is returned static struct ASTnode *expression_list(void) { struct ASTnode *tree = NULL; struct ASTnode *child = NULL; int exprcount = 0; // Loop until the final right parentheses while (Token.token != T_RPAREN) { // Parse the next expression and increment the expression count child = binexpr(0); exprcount++; // Build an A_GLUE AST node with the previous tree as the left child // and the new expression as the right child. Store the expression count. tree = mkastnode(A_GLUE, P_NONE, tree, NULL, child, exprcount); // Must have a ',' or ')' at this point switch (Token.token) { case T_COMMA: scan(&Token); break; case T_RPAREN: break; default: fatald("Unexpected token in expression list", Token.token); } } // Return the tree of expressions return (tree); } // Parse a function call and return its AST static struct ASTnode *funccall(void) { struct ASTnode *tree; int id; // Check that the identifier has been defined as a function, // then make a leaf node for it. if ((id = findsymbol(Text)) == -1 || Symtable[id].stype != S_FUNCTION) { fatals("Undeclared function", Text); } // Get the '(' lparen(); // Parse the argument expression list tree = expression_list(); // XXX Check type of each argument against the function's prototype // Build the function call AST node. Store the // function's return type as this node's type. // Also record the function's symbol-id tree = mkastunary(A_FUNCCALL, Symtable[id].type, tree, id); // Get the ')' rparen(); return (tree); } // Parse the index into an array and return an AST tree for it static struct ASTnode *array_access(void) { struct ASTnode *left, *right; int id; // Check that the identifier has been defined as an array // then make a leaf node for it that points at the base if ((id = findsymbol(Text)) == -1 || Symtable[id].stype != S_ARRAY) { fatals("Undeclared array", Text); } left = mkastleaf(A_ADDR, Symtable[id].type, id); // Get the '[' scan(&Token); // Parse the following expression right = binexpr(0); // Get the ']' match(T_RBRACKET, "]"); // Ensure that this is of int type if (!inttype(right->type)) fatal("Array index is not of integer type"); // Scale the index by the size of the element's type right = modify_type(right, left->type, A_ADD); // Return an AST tree where the array's base has the offset // added to it, and dereference the element. Still an lvalue // at this point. left = mkastnode(A_ADD, Symtable[id].type, left, NULL, right, 0); left = mkastunary(A_DEREF, value_at(left->type), left, 0); return (left); } // Parse a postfix expression and return // an AST node representing it. The // identifier is already in Text. static struct ASTnode *postfix(void) { struct ASTnode *n; int id; // Scan in the next token to see if we have a postfix expression scan(&Token); // Function call if (Token.token == T_LPAREN) return (funccall()); // An array reference if (Token.token == T_LBRACKET) return (array_access()); // A variable. Check that the variable exists. id = findsymbol(Text); if (id == -1 || Symtable[id].stype != S_VARIABLE) fatals("Unknown variable", Text); switch (Token.token) { // Post-increment: skip over the token case T_INC: scan(&Token); n = mkastleaf(A_POSTINC, Symtable[id].type, id); break; // Post-decrement: skip over the token case T_DEC: scan(&Token); n = mkastleaf(A_POSTDEC, Symtable[id].type, id); break; // Just a variable reference default: n = mkastleaf(A_IDENT, Symtable[id].type, id); } return (n); } // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(void) { struct ASTnode *n; int id; switch (Token.token) { case T_INTLIT: // For an INTLIT token, make a leaf AST node for it. // Make it a P_CHAR if it's within the P_CHAR range if ((Token.intvalue) >= 0 && (Token.intvalue < 256)) n = mkastleaf(A_INTLIT, P_CHAR, Token.intvalue); else n = mkastleaf(A_INTLIT, P_INT, Token.intvalue); break; case T_STRLIT: // For a STRLIT token, generate the assembly for it. // Then make a leaf AST node for it. id is the string's label. id = genglobstr(Text); n = mkastleaf(A_STRLIT, P_CHARPTR, id); break; case T_IDENT: return (postfix()); case T_LPAREN: // Beginning of a parenthesised expression, skip the '('. // Scan in the expression and the right parenthesis scan(&Token); n = binexpr(0); rparen(); return (n); default: fatald("Expecting a primary expression, got token", Token.token); } // Scan in the next token and return the leaf node scan(&Token); return (n); } // Convert a binary operator token into a binary AST operation. // We rely on a 1:1 mapping from token to AST operation static int binastop(int tokentype) { if (tokentype > T_EOF && tokentype <= T_SLASH) return (tokentype); fatald("Syntax error, token", tokentype); return (0); // Keep -Wall happy } // Return true if a token is right-associative, // false otherwise. static int rightassoc(int tokentype) { if (tokentype == T_ASSIGN) return (1); return (0); } // Operator precedence for each token. Must // match up with the order of tokens in defs.h static int OpPrec[] = { 0, 10, 20, 30, // T_EOF, T_ASSIGN, T_LOGOR, T_LOGAND 40, 50, 60, // T_OR, T_XOR, T_AMPER 70, 70, // T_EQ, T_NE 80, 80, 80, 80, // T_LT, T_GT, T_LE, T_GE 90, 90, // T_LSHIFT, T_RSHIFT 100, 100, // T_PLUS, T_MINUS 110, 110 // T_STAR, T_SLASH }; // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec; if (tokentype > T_SLASH) fatald("Token with no precedence in op_precedence:", tokentype); prec = OpPrec[tokentype]; if (prec == 0) fatald("Syntax error, token", tokentype); return (prec); } // prefix_expression: primary // | '*' prefix_expression // | '&' prefix_expression // | '-' prefix_expression // | '++' prefix_expression // | '--' prefix_expression // ; // Parse a prefix expression and return // a sub-tree representing it. struct ASTnode *prefix(void) { struct ASTnode *tree; switch (Token.token) { case T_AMPER: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Ensure that it's an identifier if (tree->op != A_IDENT) fatal("& operator must be followed by an identifier"); // Now change the operator to A_ADDR and the type to // a pointer to the original type tree->op = A_ADDR; tree->type = pointer_to(tree->type); break; case T_STAR: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's either another deref or an // identifier if (tree->op != A_IDENT && tree->op != A_DEREF) fatal("* operator must be followed by an identifier or *"); // Prepend an A_DEREF operation to the tree tree = mkastunary(A_DEREF, value_at(tree->type), tree, 0); break; case T_MINUS: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_NEGATE operation to the tree and // make the child an rvalue. Because chars are unsigned, // also widen this to int so that it's signed tree->rvalue = 1; tree = modify_type(tree, P_INT, 0); tree = mkastunary(A_NEGATE, tree->type, tree, 0); break; case T_INVERT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_INVERT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_INVERT, tree->type, tree, 0); break; case T_LOGNOT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_LOGNOT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_LOGNOT, tree->type, tree, 0); break; case T_INC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("++ operator must be followed by an identifier"); // Prepend an A_PREINC operation to the tree tree = mkastunary(A_PREINC, tree->type, tree, 0); break; case T_DEC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("-- operator must be followed by an identifier"); // Prepend an A_PREDEC operation to the tree tree = mkastunary(A_PREDEC, tree->type, tree, 0); break; default: tree = primary(); } return (tree); } // Return an AST tree whose root is a binary operator. // Parameter ptp is the previous token's precedence. struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; struct ASTnode *ltemp, *rtemp; int ASTop; int tokentype; // Get the tree on the left. // Fetch the next token at the same time. left = prefix(); // If we hit one of several terminating tokens, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA) { left->rvalue = 1; return (left); } // While the precedence of this token is more than that of the // previous token precedence, or it's right associative and // equal to the previous token's precedence while ((op_precedence(tokentype) > ptp) || (rightassoc(tokentype) && op_precedence(tokentype) == ptp)) { // Fetch in the next integer literal scan(&Token); // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Determine the operation to be performed on the sub-trees ASTop = binastop(tokentype); if (ASTop == A_ASSIGN) { // Assignment // Make the right tree into an rvalue right->rvalue = 1; // Ensure the right's type matches the left right = modify_type(right, left->type, 0); if (left == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree. However, switch // left and right around, so that the right expression's // code will be generated before the left expression ltemp = left; left = right; right = ltemp; } else { // We are not doing an assignment, so both trees should be rvalues // Convert both trees into rvalue if they are lvalue trees left->rvalue = 1; right->rvalue = 1; // Ensure the two types are compatible by trying // to modify each tree to match the other's type. ltemp = modify_type(left, right->type, ASTop); rtemp = modify_type(right, left->type, ASTop); if (ltemp == NULL && rtemp == NULL) fatal("Incompatible types in binary expression"); if (ltemp != NULL) left = ltemp; if (rtemp != NULL) right = rtemp; } // Join that sub-tree with ours. Convert the token // into an AST operation at the same time. left = mkastnode(binastop(tokentype), left->type, left, NULL, right, 0); // Update the details of the current token. // If we hit a terminating token, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA) { left->rvalue = 1; return (left); } } // Return the tree we have when the precedence // is the same or lower left->rvalue = 1; return (left); } ================================================ FILE: 25_Function_Arguments/gen.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Generic code generator // Copyright (c) 2019 Warren Toomey, GPL3 // Generate and return a new label number int genlabel(void) { static int id = 1; return (id++); } // Generate the code for an IF statement // and an optional ELSE clause static int genIF(struct ASTnode *n) { int Lfalse, Lend; // Generate two labels: one for the // false compound statement, and one // for the end of the overall IF statement. // When there is no ELSE clause, Lfalse _is_ // the ending label! Lfalse = genlabel(); if (n->right) Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. genAST(n->left, Lfalse, n->op); genfreeregs(); // Generate the true compound statement genAST(n->mid, NOLABEL, n->op); genfreeregs(); // If there is an optional ELSE clause, // generate the jump to skip to the end if (n->right) cgjump(Lend); // Now the false label cglabel(Lfalse); // Optional ELSE clause: generate the // false compound statement and the // end label if (n->right) { genAST(n->right, NOLABEL, n->op); genfreeregs(); cglabel(Lend); } return (NOREG); } // Generate the code for a WHILE statement static int genWHILE(struct ASTnode *n) { int Lstart, Lend; // Generate the start and end labels // and output the start label Lstart = genlabel(); Lend = genlabel(); cglabel(Lstart); // Generate the condition code followed // by a jump to the end label. genAST(n->left, Lend, n->op); genfreeregs(); // Generate the compound statement for the body genAST(n->right, NOLABEL, n->op); genfreeregs(); // Finally output the jump back to the condition, // and the end label cgjump(Lstart); cglabel(Lend); return (NOREG); } // Generate the code to copy the arguments of a // function call to its parameters, then call the // function itself. Return the register that holds // the function's return value. static int gen_funccall(struct ASTnode *n) { struct ASTnode *gluetree = n->left; int reg; int numargs=0; // If there is a list of arguments, walk this list // from the last argument (right-hand child) to the // first while (gluetree) { // Calculate the expression's value reg = genAST(gluetree->right, NOLABEL, gluetree->op); // Copy this into the n'th function parameter: size is 1, 2, 3, ... cgcopyarg(reg, gluetree->v.size); // Keep the first (highest) number of arguments if (numargs==0) numargs= gluetree->v.size; genfreeregs(); gluetree = gluetree->left; } // Call the function, clean up the stack (based on numargs), // and return its result return (cgcall(n->v.id, numargs)); } // Given an AST, an optional label, and the AST op // of the parent, generate assembly code recursively. // Return the register id with the tree's final value. int genAST(struct ASTnode *n, int label, int parentASTop) { int leftreg, rightreg; // We have some specific AST node handling at the top // so that we don't evaluate the child sub-trees immediately switch (n->op) { case A_IF: return (genIF(n)); case A_WHILE: return (genWHILE(n)); case A_FUNCCALL: return (gen_funccall(n)); case A_GLUE: // Do each child statement, and free the // registers after each child genAST(n->left, NOLABEL, n->op); genfreeregs(); genAST(n->right, NOLABEL, n->op); genfreeregs(); return (NOREG); case A_FUNCTION: // Generate the function's preamble before the code // in the child sub-tree cgfuncpreamble(n->v.id); genAST(n->left, NOLABEL, n->op); cgfuncpostamble(n->v.id); return (NOREG); } // General AST node handling below // Get the left and right sub-tree values if (n->left) leftreg = genAST(n->left, NOLABEL, n->op); if (n->right) rightreg = genAST(n->right, NOLABEL, n->op); switch (n->op) { case A_ADD: return (cgadd(leftreg, rightreg)); case A_SUBTRACT: return (cgsub(leftreg, rightreg)); case A_MULTIPLY: return (cgmul(leftreg, rightreg)); case A_DIVIDE: return (cgdiv(leftreg, rightreg)); case A_AND: return (cgand(leftreg, rightreg)); case A_OR: return (cgor(leftreg, rightreg)); case A_XOR: return (cgxor(leftreg, rightreg)); case A_LSHIFT: return (cgshl(leftreg, rightreg)); case A_RSHIFT: return (cgshr(leftreg, rightreg)); case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, compare registers // and set one to 1 or 0 based on the comparison. if (parentASTop == A_IF || parentASTop == A_WHILE) return (cgcompare_and_jump(n->op, leftreg, rightreg, label)); else return (cgcompare_and_set(n->op, leftreg, rightreg)); case A_INTLIT: return (cgloadint(n->v.intvalue, n->type)); case A_STRLIT: return (cgloadglobstr(n->v.id)); case A_IDENT: // Load our value if we are an rvalue // or we are being dereferenced if (n->rvalue || parentASTop == A_DEREF) { if (Symtable[n->v.id].class == C_GLOBAL) { return (cgloadglob(n->v.id, n->op)); } else { return (cgloadlocal(n->v.id, n->op)); } } else return (NOREG); case A_ASSIGN: // Are we assigning to an identifier or through a pointer? switch (n->right->op) { case A_IDENT: if (Symtable[n->right->v.id].class == C_GLOBAL) return (cgstorglob(leftreg, n->right->v.id)); else return (cgstorlocal(leftreg, n->right->v.id)); case A_DEREF: return (cgstorderef(leftreg, rightreg, n->right->type)); default: fatald("Can't A_ASSIGN in genAST(), op", n->op); } case A_WIDEN: // Widen the child's type to the parent's type return (cgwiden(leftreg, n->left->type, n->type)); case A_RETURN: cgreturn(leftreg, Functionid); return (NOREG); case A_ADDR: return (cgaddress(n->v.id)); case A_DEREF: // If we are an rvalue, dereference to get the value we point at, // otherwise leave it for A_ASSIGN to store through the pointer if (n->rvalue) return (cgderef(leftreg, n->left->type)); else return (leftreg); case A_SCALE: // Small optimisation: use shift if the // scale value is a known power of two switch (n->v.size) { case 2: return (cgshlconst(leftreg, 1)); case 4: return (cgshlconst(leftreg, 2)); case 8: return (cgshlconst(leftreg, 3)); default: // Load a register with the size and // multiply the leftreg by this size rightreg = cgloadint(n->v.size, P_INT); return (cgmul(leftreg, rightreg)); } case A_POSTINC: // Load the variable's value into a register, // then increment it return (cgloadglob(n->v.id, n->op)); case A_POSTDEC: // Load the variable's value into a register, // then decrement it return (cgloadglob(n->v.id, n->op)); case A_PREINC: // Load and increment the variable's value into a register return (cgloadglob(n->left->v.id, n->op)); case A_PREDEC: // Load and decrement the variable's value into a register return (cgloadglob(n->left->v.id, n->op)); case A_NEGATE: return (cgnegate(leftreg)); case A_INVERT: return (cginvert(leftreg)); case A_LOGNOT: return (cglognot(leftreg)); case A_TOBOOL: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, set the register // to 0 or 1 based on it's zeroeness or non-zeroeness return (cgboolean(leftreg, parentASTop, label)); default: fatald("Unknown AST operator", n->op); } return (NOREG); // Keep -Wall happy } void genpreamble() { cgpreamble(); } void genpostamble() { cgpostamble(); } void genfreeregs() { freeall_registers(); } void genglobsym(int id) { cgglobsym(id); } int genglobstr(char *strvalue) { int l = genlabel(); cgglobstr(l, strvalue); return (l); } int genprimsize(int type) { return (cgprimsize(type)); } ================================================ FILE: 25_Function_Arguments/lib/printint.c ================================================ #include void printint(long x) { printf("%ld\n", x); } void printchar(long x) { putc((char)(x & 0x7f), stdout); } ================================================ FILE: 25_Function_Arguments/main.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "decl.h" #include // Compiler setup and top-level execution // Copyright (c) 2019 Warren Toomey, GPL3 // Initialise global variables static void init() { Line = 1; Putback = '\n'; Globs = 0; Locls = NSYMBOLS - 1; O_dumpAST = 0; } // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "Usage: %s [-T] infile\n", prog); exit(1); } // Main program: check arguments and print a usage // if we don't have an argument. Open up the input // file and call scanfile() to scan the tokens in it. int main(int argc, char *argv[]) { int i; // Initialise the globals init(); // Scan for command-line options for (i = 1; i < argc; i++) { if (*argv[i] != '-') break; for (int j = 1; argv[i][j]; j++) { switch (argv[i][j]) { case 'T': O_dumpAST = 1; break; default: usage(argv[0]); } } } // Ensure we have an input file argument if (i >= argc) usage(argv[0]); // Open up the input file if ((Infile = fopen(argv[i], "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", argv[i], strerror(errno)); exit(1); } // Create the output file if ((Outfile = fopen("out.s", "w")) == NULL) { fprintf(stderr, "Unable to create out.s: %s\n", strerror(errno)); exit(1); } // For now, ensure that printint() and printchar() are defined addglob("printint", P_INT, S_FUNCTION, 0, 0); addglob("printchar", P_VOID, S_FUNCTION, 0, 0); scan(&Token); // Get the first token from the input genpreamble(); // Output the preamble global_declarations(); // Parse the global declarations genpostamble(); // Output the postamble fclose(Outfile); // Close the output file and exit return (0); } ================================================ FILE: 25_Function_Arguments/misc.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Miscellaneous functions // Copyright (c) 2019 Warren Toomey, GPL3 // Ensure that the current token is t, // and fetch the next token. Otherwise // throw an error void match(int t, char *what) { if (Token.token == t) { scan(&Token); } else { fatals("Expected", what); } } // Match a semicolon and fetch the next token void semi(void) { match(T_SEMI, ";"); } // Match a left brace and fetch the next token void lbrace(void) { match(T_LBRACE, "{"); } // Match a right brace and fetch the next token void rbrace(void) { match(T_RBRACE, "}"); } // Match a left parenthesis and fetch the next token void lparen(void) { match(T_LPAREN, "("); } // Match a right parenthesis and fetch the next token void rparen(void) { match(T_RPAREN, ")"); } // Match an identifier and fetch the next token void ident(void) { match(T_IDENT, "identifier"); } // Print out fatal messages void fatal(char *s) { fprintf(stderr, "%s on line %d\n", s, Line); exit(1); } void fatals(char *s1, char *s2) { fprintf(stderr, "%s:%s on line %d\n", s1, s2, Line); exit(1); } void fatald(char *s, int d) { fprintf(stderr, "%s:%d on line %d\n", s, d, Line); exit(1); } void fatalc(char *s, int c) { fprintf(stderr, "%s:%c on line %d\n", s, c, Line); exit(1); } ================================================ FILE: 25_Function_Arguments/scan.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { char *p; p = strchr(s, c); return (p ? p - s : -1); } // Get the next character from the input file. static int next(void) { int c; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return (c); } c = fgetc(Infile); // Read from input file if ('\n' == c) Line++; // Increment line count return (c); } // Put back an unwanted character static void putback(int c) { Putback = c; } // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } // Return the next character from a character // or string literal static int scanch(void) { int c; // Get the next input character and interpret // metacharacters that start with a backslash c = next(); if (c == '\\') { switch (c = next()) { case 'a': return '\a'; case 'b': return '\b'; case 'f': return '\f'; case 'n': return '\n'; case 'r': return '\r'; case 't': return '\t'; case 'v': return '\v'; case '\\': return '\\'; case '"': return '"'; case '\'': return '\''; default: fatalc("unknown escape sequence", c); } } return (c); // Just an ordinary old character! } // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0; // Convert each character into an int value while ((k = chrpos("0123456789", c)) >= 0) { val = val * 10 + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return (val); } // Scan in a string literal from the input file, // and store it in buf[]. Return the length of // the string. static int scanstr(char *buf) { int i, c; // Loop while we have enough buffer space for (i = 0; i < TEXTLEN - 1; i++) { // Get the next char and append to buf // Return when we hit the ending double quote if ((c = scanch()) == '"') { buf[i] = 0; return (i); } buf[i] = c; } // Ran out of buf[] space fatal("String literal too long"); return (0); } // Scan an identifier from the input file and // store it in buf[]. Return the identifier's length static int scanident(int c, char *buf, int lim) { int i = 0; // Allow digits, alpha and underscores while (isalpha(c) || isdigit(c) || '_' == c) { // Error if we hit the identifier length limit, // else append to buf[] and get next character if (lim - 1 == i) { fatal("Identifier too long"); } else if (i < lim - 1) { buf[i++] = c; } c = next(); } // We hit a non-valid character, put it back. // NUL-terminate the buf[] and return the length putback(c); buf[i] = '\0'; return (i); } // Given a word from the input, return the matching // keyword token number or 0 if it's not a keyword. // Switch on the first letter so that we don't have // to waste time strcmp()ing against all the keywords. static int keyword(char *s) { switch (*s) { case 'c': if (!strcmp(s, "char")) return (T_CHAR); break; case 'e': if (!strcmp(s, "else")) return (T_ELSE); break; case 'f': if (!strcmp(s, "for")) return (T_FOR); break; case 'i': if (!strcmp(s, "if")) return (T_IF); if (!strcmp(s, "int")) return (T_INT); break; case 'l': if (!strcmp(s, "long")) return (T_LONG); break; case 'r': if (!strcmp(s, "return")) return (T_RETURN); break; case 'w': if (!strcmp(s, "while")) return (T_WHILE); break; case 'v': if (!strcmp(s, "void")) return (T_VOID); break; } return (0); } // A pointer to a rejected token static struct token *Rejtoken = NULL; // Reject the token that we just scanned void reject_token(struct token *t) { if (Rejtoken != NULL) fatal("Can't reject token twice"); Rejtoken = t; } // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c, tokentype; // If we have any rejected token, return it if (Rejtoken != NULL) { t = Rejtoken; Rejtoken = NULL; return (1); } // Skip whitespace c = skip(); // Determine the token based on // the input character switch (c) { case EOF: t->token = T_EOF; return (0); case '+': if ((c = next()) == '+') { t->token = T_INC; } else { putback(c); t->token = T_PLUS; } break; case '-': if ((c = next()) == '-') { t->token = T_DEC; } else { putback(c); t->token = T_MINUS; } break; case '*': t->token = T_STAR; break; case '/': t->token = T_SLASH; break; case ';': t->token = T_SEMI; break; case '{': t->token = T_LBRACE; break; case '}': t->token = T_RBRACE; break; case '(': t->token = T_LPAREN; break; case ')': t->token = T_RPAREN; break; case '[': t->token = T_LBRACKET; break; case ']': t->token = T_RBRACKET; break; case '~': t->token = T_INVERT; break; case '^': t->token = T_XOR; break; case ',': t->token = T_COMMA; break; case '=': if ((c = next()) == '=') { t->token = T_EQ; } else { putback(c); t->token = T_ASSIGN; } break; case '!': if ((c = next()) == '=') { t->token = T_NE; } else { putback(c); t->token = T_LOGNOT; } break; case '<': if ((c = next()) == '=') { t->token = T_LE; } else if (c == '<') { t->token = T_LSHIFT; } else { putback(c); t->token = T_LT; } break; case '>': if ((c = next()) == '=') { t->token = T_GE; } else if (c == '>') { t->token = T_RSHIFT; } else { putback(c); t->token = T_GT; } break; case '&': if ((c = next()) == '&') { t->token = T_LOGAND; } else { putback(c); t->token = T_AMPER; } break; case '|': if ((c = next()) == '|') { t->token = T_LOGOR; } else { putback(c); t->token = T_OR; } break; case '\'': // If it's a quote, scan in the // literal character value and // the trailing quote t->intvalue = scanch(); t->token = T_INTLIT; if (next() != '\'') fatal("Expected '\\'' at end of char literal"); break; case '"': // Scan in a literal string scanstr(Text); t->token = T_STRLIT; break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } else if (isalpha(c) || '_' == c) { // Read in a keyword or identifier scanident(c, Text, TEXTLEN); // If it's a recognised keyword, return that token if ((tokentype = keyword(Text)) != 0) { t->token = tokentype; break; } // Not a recognised keyword, so it must be an identifier t->token = T_IDENT; break; } // The character isn't part of any recognised token, error fatalc("Unrecognised character", c); } // We found a token return (1); } ================================================ FILE: 25_Function_Arguments/stmt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of statements // Copyright (c) 2019 Warren Toomey, GPL3 // Prototypes static struct ASTnode *single_statement(void); // compound_statement: // empty, i.e. no statement // | statement // | statement statements // ; // // statement: declaration // | expression_statement // | function_call // | if_statement // | while_statement // | for_statement // | return_statement // ; // if_statement: if_head // | if_head 'else' compound_statement // ; // // if_head: 'if' '(' true_false_expression ')' compound_statement ; // // Parse an IF statement including any // optional ELSE clause and return its AST static struct ASTnode *if_statement(void) { struct ASTnode *condAST, *trueAST, *falseAST = NULL; // Ensure we have 'if' '(' match(T_IF, "if"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, 0); rparen(); // Get the AST for the compound statement trueAST = compound_statement(); // If we have an 'else', skip it // and get the AST for the compound statement if (Token.token == T_ELSE) { scan(&Token); falseAST = compound_statement(); } // Build and return the AST for this statement return (mkastnode(A_IF, P_NONE, condAST, trueAST, falseAST, 0)); } // while_statement: 'while' '(' true_false_expression ')' compound_statement ; // // Parse a WHILE statement and return its AST static struct ASTnode *while_statement(void) { struct ASTnode *condAST, *bodyAST; // Ensure we have 'while' '(' match(T_WHILE, "while"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, 0); rparen(); // Get the AST for the compound statement bodyAST = compound_statement(); // Build and return the AST for this statement return (mkastnode(A_WHILE, P_NONE, condAST, NULL, bodyAST, 0)); } // for_statement: 'for' '(' preop_statement ';' // true_false_expression ';' // postop_statement ')' compound_statement ; // // preop_statement: statement (for now) // postop_statement: statement (for now) // // Parse a FOR statement and return its AST static struct ASTnode *for_statement(void) { struct ASTnode *condAST, *bodyAST; struct ASTnode *preopAST, *postopAST; struct ASTnode *tree; // Ensure we have 'for' '(' match(T_FOR, "for"); lparen(); // Get the pre_op statement and the ';' preopAST = single_statement(); semi(); // Get the condition and the ';'. // Force a non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, 0); semi(); // Get the post_op statement and the ')' postopAST = single_statement(); rparen(); // Get the compound statement which is the body bodyAST = compound_statement(); // For now, all four sub-trees have to be non-NULL. // Later on, we'll change the semantics for when some are missing // Glue the compound statement and the postop tree tree = mkastnode(A_GLUE, P_NONE, bodyAST, NULL, postopAST, 0); // Make a WHILE loop with the condition and this new body tree = mkastnode(A_WHILE, P_NONE, condAST, NULL, tree, 0); // And glue the preop tree to the A_WHILE tree return (mkastnode(A_GLUE, P_NONE, preopAST, NULL, tree, 0)); } // return_statement: 'return' '(' expression ')' ; // // Parse a return statement and return its AST static struct ASTnode *return_statement(void) { struct ASTnode *tree; // Can't return a value if function returns P_VOID if (Symtable[Functionid].type == P_VOID) fatal("Can't return from a void function"); // Ensure we have 'return' '(' match(T_RETURN, "return"); lparen(); // Parse the following expression tree = binexpr(0); // Ensure this is compatible with the function's type tree = modify_type(tree, Symtable[Functionid].type, 0); if (tree == NULL) fatal("Incompatible type to return"); // Add on the A_RETURN node tree = mkastunary(A_RETURN, P_NONE, tree, 0); // Get the ')' rparen(); return (tree); } // Parse a single statement and return its AST static struct ASTnode *single_statement(void) { int type; switch (Token.token) { case T_CHAR: case T_INT: case T_LONG: // The beginning of a variable declaration. // Parse the type and get the identifier. // Then parse the rest of the declaration // and skip over the semicolon type = parse_type(); ident(); var_declaration(type, 1, 0); semi(); return (NULL); // No AST generated here case T_IF: return (if_statement()); case T_WHILE: return (while_statement()); case T_FOR: return (for_statement()); case T_RETURN: return (return_statement()); default: // For now, see if this is an expression. // This catches assignment statements. return (binexpr(0)); } return (NULL); // Keep -Wall happy } // Parse a compound statement // and return its AST struct ASTnode *compound_statement(void) { struct ASTnode *left = NULL; struct ASTnode *tree; // Require a left curly bracket lbrace(); while (1) { // Parse a single statement tree = single_statement(); // Some statements must be followed by a semicolon if (tree != NULL && (tree->op == A_ASSIGN || tree->op == A_RETURN || tree->op == A_FUNCCALL)) semi(); // For each new tree, either save it in left // if left is empty, or glue the left and the // new tree together if (tree != NULL) { if (left == NULL) left = tree; else left = mkastnode(A_GLUE, P_NONE, left, NULL, tree, 0); } // When we hit a right curly bracket, // skip past it and return the AST if (Token.token == T_RBRACE) { rbrace(); return (left); } } } ================================================ FILE: 25_Function_Arguments/sym.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Symbol table functions // Copyright (c) 2019 Warren Toomey, GPL3 // Determine if the symbol s is in the global symbol table. // Return its slot position or -1 if not found. // Skip C_PARAM entries int findglob(char *s) { int i; for (i = 0; i < Globs; i++) { if (Symtable[i].class == C_PARAM) continue; if (*s == *Symtable[i].name && !strcmp(s, Symtable[i].name)) return (i); } return (-1); } // Get the position of a new global symbol slot, or die // if we've run out of positions. static int newglob(void) { int p; if ((p = Globs++) >= Locls) fatal("Too many global symbols"); return (p); } // Determine if the symbol s is in the local symbol table. // Return its slot position or -1 if not found. int findlocl(char *s) { int i; for (i = Locls + 1; i < NSYMBOLS; i++) { if (*s == *Symtable[i].name && !strcmp(s, Symtable[i].name)) return (i); } return (-1); } // Get the position of a new local symbol slot, or die // if we've run out of positions. static int newlocl(void) { int p; if ((p = Locls--) <= Globs) fatal("Too many local symbols"); return (p); } // Clear all the entries in the // local symbol table void freeloclsyms(void) { Locls = NSYMBOLS - 1; } // Update a symbol at the given slot number in the symbol table. Set up its: // + type: char, int etc. // + structural type: var, function, array etc. // + size: number of elements // + endlabel: if this is a function // + posn: Position information for local symbols static void updatesym(int slot, char *name, int type, int stype, int class, int endlabel, int size, int posn) { if (slot < 0 || slot >= NSYMBOLS) fatal("Invalid symbol slot number in updatesym()"); Symtable[slot].name = strdup(name); Symtable[slot].type = type; Symtable[slot].stype = stype; Symtable[slot].class = class; Symtable[slot].endlabel = endlabel; Symtable[slot].size = size; Symtable[slot].posn = posn; } // Add a global symbol to the symbol table. Set up its: // + type: char, int etc. // + structural type: var, function, array etc. // + size: number of elements // + endlabel: if this is a function // Return the slot number in the symbol table int addglob(char *name, int type, int stype, int endlabel, int size) { int slot; // If this is already in the symbol table, return the existing slot if ((slot = findglob(name)) != -1) return (slot); // Otherwise get a new slot, fill it in and // return the slot number slot = newglob(); updatesym(slot, name, type, stype, C_GLOBAL, endlabel, size, 0); genglobsym(slot); return (slot); } // Add a local symbol to the symbol table. Set up its: // + type: char, int etc. // + structural type: var, function, array etc. // + size: number of elements // + isparam: if true, this is a parameter to the function // Return the slot number in the symbol table, -1 if a duplicate entry int addlocl(char *name, int type, int stype, int isparam, int size) { int localslot, globalslot; // If this is already in the symbol table, return an error if ((localslot = findlocl(name)) != -1) return (-1); // Otherwise get a new symbol slot and a position for this local. // Update the local symbol table entry. If this is a parameter, // also create a global C_PARAM entry to build the function's prototype. localslot = newlocl(); if (isparam) { updatesym(localslot, name, type, stype, C_PARAM, 0, size, 0); globalslot = newglob(); updatesym(globalslot, name, type, stype, C_PARAM, 0, size, 0); } else { updatesym(localslot, name, type, stype, C_LOCAL, 0, size, 0); } // Return the local symbol's slot return (localslot); } // Determine if the symbol s is in the symbol table. // Return its slot position or -1 if not found. int findsymbol(char *s) { int slot; slot = findlocl(s); if (slot == -1) slot = findglob(s); return (slot); } ================================================ FILE: 25_Function_Arguments/tests/input01.c ================================================ void main() { printint(12 * 3); printint(18 - 2 * 4); printint(1 + 2 + 9 - 5/2 + 3*5); } ================================================ FILE: 25_Function_Arguments/tests/input02.c ================================================ void main() { int fred; int jim; fred= 5; jim= 12; printint(fred + jim); } ================================================ FILE: 25_Function_Arguments/tests/input03.c ================================================ void main() { int x; x= 1; printint(x); x= x + 1; printint(x); x= x + 1; printint(x); x= x + 1; printint(x); x= x + 1; printint(x); } ================================================ FILE: 25_Function_Arguments/tests/input04.c ================================================ void main() { int x; x= 7 < 9; printint(x); x= 7 <= 9; printint(x); x= 7 != 9; printint(x); x= 7 == 7; printint(x); x= 7 >= 7; printint(x); x= 7 <= 7; printint(x); x= 9 > 7; printint(x); x= 9 >= 7; printint(x); x= 9 != 7; printint(x); } ================================================ FILE: 25_Function_Arguments/tests/input05.c ================================================ void main() { int i; int j; i=6; j=12; if (i < j) { printint(i); } else { printint(j); } } ================================================ FILE: 25_Function_Arguments/tests/input06.c ================================================ void main() { int i; i=1; while (i <= 10) { printint(i); i= i + 1; } } ================================================ FILE: 25_Function_Arguments/tests/input07.c ================================================ void main() { int i; for (i= 1; i <= 10; i= i + 1) { printint(i); } } ================================================ FILE: 25_Function_Arguments/tests/input08.c ================================================ void main() { int i; for (i= 1; i <= 10; i= i + 1) { printint(i); } } ================================================ FILE: 25_Function_Arguments/tests/input09.c ================================================ void main() { int i; for (i= 1; i <= 10; i= i + 1) { printint(i); } } void fred() { int a; int b; a= 12; b= 3 * a; if (a >= b) { printint(2 * b - a); } } ================================================ FILE: 25_Function_Arguments/tests/input10.c ================================================ void main() { int i; char j; j= 20; printint(j); i= 10; printint(i); for (i= 1; i <= 5; i= i + 1) { printint(i); } for (j= 253; j != 2; j= j + 1) { printint(j); } } ================================================ FILE: 25_Function_Arguments/tests/input11.c ================================================ int main() { int i; char j; long k; i= 10; printint(i); j= 20; printint(j); k= 30; printint(k); for (i= 1; i <= 5; i= i + 1) { printint(i); } for (j= 253; j != 4; j= j + 1) { printint(j); } for (k= 1; k <= 5; k= k + 1) { printint(k); } return(i); printint(12345); return(3); } ================================================ FILE: 25_Function_Arguments/tests/input12.c ================================================ int fred() { return(5); } void main() { int x; x= fred(2); printint(x); } ================================================ FILE: 25_Function_Arguments/tests/input13.c ================================================ int fred() { return(56); } void main() { int dummy; int result; dummy= printint(23); result= fred(10); dummy= printint(result); } ================================================ FILE: 25_Function_Arguments/tests/input14.c ================================================ int fred() { return(20); } int main() { int result; printint(10); result= fred(15); printint(result); printint(fred(15)+10); return(0); } ================================================ FILE: 25_Function_Arguments/tests/input15.c ================================================ int main() { char a; char *b; char c; int d; int *e; int f; a= 18; printint(a); b= &a; c= *b; printint(c); d= 12; printint(d); e= &d; f= *e; printint(f); return(0); } ================================================ FILE: 25_Function_Arguments/tests/input16.c ================================================ int c; int d; int *e; int f; int main() { c= 12; d=18; printint(c); e= &c + 1; f= *e; printint(f); return(0); } ================================================ FILE: 25_Function_Arguments/tests/input17.c ================================================ int main() { char a; char *b; int d; int *e; b= &a; *b= 19; printint(a); e= &d; *e= 12; printint(d); return(0); } ================================================ FILE: 25_Function_Arguments/tests/input18.c ================================================ int main() { int a; int b; a= b= 34; printint(a); printint(b); return(0); } ================================================ FILE: 25_Function_Arguments/tests/input18a.c ================================================ int a; int *b; char c; char *d; int main() { b= &a; *b= 15; printint(a); d= &c; *d= 16; printint(c); return(0); } ================================================ FILE: 25_Function_Arguments/tests/input19.c ================================================ int a; int b; int c; int d; int e; int main() { a= 2; b= 4; c= 3; d= 2; e= (a+b) * (c+d); printint(e); return(0); } ================================================ FILE: 25_Function_Arguments/tests/input20.c ================================================ int a; int b[25]; int main() { b[3]= 12; a= b[3]; printint(a); return(0); } ================================================ FILE: 25_Function_Arguments/tests/input21.c ================================================ char c; char *str; int main() { c= '\n'; printint(c); for (str= "Hello world\n"; *str != 0; str= str + 1) { printchar(*str); } return(0); } ================================================ FILE: 25_Function_Arguments/tests/input22.c ================================================ char a; char b; char c; int d; int e; int f; long g; long h; long i; int main() { b= 5; c= 7; a= b + c++; printint(a); e= 5; f= 7; d= e + f++; printint(d); h= 5; i= 7; g= h + i++; printint(g); a= b-- + c; printint(a); d= e-- + f; printint(d); g= h-- + i; printint(g); a= ++b + c; printint(a); d= ++e + f; printint(d); g= ++h + i; printint(g); a= b * --c; printint(a); d= e * --f; printint(d); g= h * --i; printint(g); return(0); } ================================================ FILE: 25_Function_Arguments/tests/input23.c ================================================ char *str; int x; int main() { x= -23; printint(x); printint(-10 * -10); x= 1; x= ~x; printint(x); x= 2 > 5; printint(x); x= !x; printint(x); x= !x; printint(x); x= 13; if (x) { printint(13); } x= 0; if (!x) { printint(14); } for (str= "Hello world\n"; *str; str++) { printchar(*str); } return(0); } ================================================ FILE: 25_Function_Arguments/tests/input24.c ================================================ int a; int b; int c; int main() { a= 42; b= 19; printint(a & b); printint(a | b); printint(a ^ b); printint(1 << 3); printint(63 >> 3); return(0); } ================================================ FILE: 25_Function_Arguments/tests/input25.c ================================================ int a; int b; int c; int main() { char z; int y; int x; x= 10; y= 20; z= 30; printint(x); printint(y); printint(z); a= 5; b= 15; c= 25; printint(a); printint(b); printint(c); return(0); } ================================================ FILE: 25_Function_Arguments/tests/input26.c ================================================ int main(int a, char b, long c, int d, int e, int f, int g, int h) { int i; int j; int k; a= 13; printint(a); b= 23; printint(b); c= 34; printint(c); d= 44; printint(d); e= 54; printint(e); f= 64; printint(f); g= 74; printint(g); h= 84; printint(h); i= 94; printint(i); j= 95; printint(j); k= 96; printint(k); return(0); } ================================================ FILE: 25_Function_Arguments/tests/input27.c ================================================ int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printint(a); printint(b); printint(c); printint(d); printint(e); printint(f); printint(g); printint(h); return(0); } int param5(int a, int b, int c, int d, int e) { printint(a); printint(b); printint(c); printint(d); printint(e); return(0); } int param2(int a, int b) { int c; int d; int e; c= 3; d= 4; e= 5; printint(a); printint(b); printint(c); printint(d); printint(e); return(0); } int param0() { int a; int b; int c; int d; int e; a= 1; b= 2; c= 3; d= 4; e= 5; printint(a); printint(b); printint(c); printint(d); printint(e); return(0); } int main() { param8(1,2,3,4,5,6,7,8); param5(1,2,3,4,5); param2(1,2); param0(); return(0); } ================================================ FILE: 25_Function_Arguments/tests/input28.c ================================================ int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printint(a); printint(b); printint(c); printint(d); printint(e); printint(f); printint(g); printint(h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printint(x); return(0); } ================================================ FILE: 25_Function_Arguments/tests/mktests ================================================ #!/bin/sh # Make the output files for each test for i in input*c do if [ ! -f "out.$i" ] then cc -o out $i ../lib/printint.c ./out > out.$i rm -f out fi done ================================================ FILE: 25_Function_Arguments/tests/out.input01.c ================================================ 36 10 25 ================================================ FILE: 25_Function_Arguments/tests/out.input02.c ================================================ 17 ================================================ FILE: 25_Function_Arguments/tests/out.input03.c ================================================ 1 2 3 4 5 ================================================ FILE: 25_Function_Arguments/tests/out.input04.c ================================================ 1 1 1 1 1 1 1 1 1 ================================================ FILE: 25_Function_Arguments/tests/out.input05.c ================================================ 6 ================================================ FILE: 25_Function_Arguments/tests/out.input06.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 25_Function_Arguments/tests/out.input07.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 25_Function_Arguments/tests/out.input08.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 25_Function_Arguments/tests/out.input09.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 25_Function_Arguments/tests/out.input10.c ================================================ 20 10 1 2 3 4 5 253 254 255 0 1 ================================================ FILE: 25_Function_Arguments/tests/out.input11.c ================================================ 10 20 30 1 2 3 4 5 253 254 255 0 1 2 3 1 2 3 4 5 ================================================ FILE: 25_Function_Arguments/tests/out.input12.c ================================================ 5 ================================================ FILE: 25_Function_Arguments/tests/out.input13.c ================================================ 23 56 ================================================ FILE: 25_Function_Arguments/tests/out.input14.c ================================================ 10 20 30 ================================================ FILE: 25_Function_Arguments/tests/out.input15.c ================================================ 18 18 12 12 ================================================ FILE: 25_Function_Arguments/tests/out.input16.c ================================================ 12 18 ================================================ FILE: 25_Function_Arguments/tests/out.input17.c ================================================ 19 12 ================================================ FILE: 25_Function_Arguments/tests/out.input18.c ================================================ 34 34 ================================================ FILE: 25_Function_Arguments/tests/out.input18a.c ================================================ 15 16 ================================================ FILE: 25_Function_Arguments/tests/out.input19.c ================================================ 30 ================================================ FILE: 25_Function_Arguments/tests/out.input20.c ================================================ 12 ================================================ FILE: 25_Function_Arguments/tests/out.input21.c ================================================ 10 Hello world ================================================ FILE: 25_Function_Arguments/tests/out.input22.c ================================================ 12 12 12 13 13 13 13 13 13 35 35 35 ================================================ FILE: 25_Function_Arguments/tests/out.input23.c ================================================ -23 100 -2 0 1 0 13 14 Hello world ================================================ FILE: 25_Function_Arguments/tests/out.input24.c ================================================ 2 59 57 8 7 ================================================ FILE: 25_Function_Arguments/tests/out.input25.c ================================================ 10 20 30 5 15 25 ================================================ FILE: 25_Function_Arguments/tests/out.input26.c ================================================ 13 23 34 44 54 64 74 84 94 95 96 ================================================ FILE: 25_Function_Arguments/tests/out.input27.c ================================================ 1 2 3 4 5 6 7 8 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 ================================================ FILE: 25_Function_Arguments/tests/out.input28.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 25_Function_Arguments/tests/runtests ================================================ #!/bin/sh # Run each test and compare # against known good output if [ ! -f ../comp1 ] then echo "Need to build ../comp1 first!"; exit 1 fi for i in input* do if [ ! -f "out.$i" ] then echo "Can't run test on $i, no output file!" else echo -n $i ../comp1 $i cc -o out out.s ../lib/printint.c ./out > trial.$i cmp -s "out.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo else echo ": OK" fi rm -f out out.s "trial.$i" fi done ================================================ FILE: 25_Function_Arguments/tests/runtestsn ================================================ #!/bin/sh # Run each test and compare # against known good output if [ ! -f ../compn ] then echo "Need to build ../compn first!"; exit 1 fi for i in input* do if [ ! -f "out.$i" ] then echo "Can't run test on $i, no output file!" else echo -n $i ../compn $i nasm -f elf64 out.s cc -no-pie -Wall -o out out.o ../lib/printint.c ./out > trial.$i cmp -s "out.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo else echo ": OK" fi rm -f out out.o out.s "trial.$i" fi done ================================================ FILE: 25_Function_Arguments/tree.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree functions // Copyright (c) 2019 Warren Toomey, GPL3 // Build and return a generic AST node struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, int intvalue) { struct ASTnode *n; // Malloc a new ASTnode n = (struct ASTnode *) malloc(sizeof(struct ASTnode)); if (n == NULL) fatal("Unable to malloc in mkastnode()"); // Copy in the field values and return it n->op = op; n->type = type; n->left = left; n->mid = mid; n->right = right; n->v.intvalue = intvalue; return (n); } // Make an AST leaf node struct ASTnode *mkastleaf(int op, int type, int intvalue) { return (mkastnode(op, type, NULL, NULL, NULL, intvalue)); } // Make a unary AST node: only one child struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, int intvalue) { return (mkastnode(op, type, left, NULL, NULL, intvalue)); } // Generate and return a new label number // just for AST dumping purposes static int gendumplabel(void) { static int id = 1; return (id++); } // Given an AST tree, print it out and follow the // traversal of the tree that genAST() follows void dumpAST(struct ASTnode *n, int label, int level) { int Lfalse, Lstart, Lend; switch (n->op) { case A_IF: Lfalse = gendumplabel(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_IF"); if (n->right) { Lend = gendumplabel(); fprintf(stdout, ", end L%d", Lend); } fprintf(stdout, "\n"); dumpAST(n->left, Lfalse, level + 2); dumpAST(n->mid, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); return; case A_WHILE: Lstart = gendumplabel(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_WHILE, start L%d\n", Lstart); Lend = gendumplabel(); dumpAST(n->left, Lend, level + 2); dumpAST(n->right, NOLABEL, level + 2); return; } // Reset level to -2 for A_GLUE if (n->op == A_GLUE) level = -2; // General AST node handling if (n->left) dumpAST(n->left, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); for (int i = 0; i < level; i++) fprintf(stdout, " "); switch (n->op) { case A_GLUE: fprintf(stdout, "\n\n"); return; case A_FUNCTION: fprintf(stdout, "A_FUNCTION %s\n", Symtable[n->v.id].name); return; case A_ADD: fprintf(stdout, "A_ADD\n"); return; case A_SUBTRACT: fprintf(stdout, "A_SUBTRACT\n"); return; case A_MULTIPLY: fprintf(stdout, "A_MULTIPLY\n"); return; case A_DIVIDE: fprintf(stdout, "A_DIVIDE\n"); return; case A_EQ: fprintf(stdout, "A_EQ\n"); return; case A_NE: fprintf(stdout, "A_NE\n"); return; case A_LT: fprintf(stdout, "A_LE\n"); return; case A_GT: fprintf(stdout, "A_GT\n"); return; case A_LE: fprintf(stdout, "A_LE\n"); return; case A_GE: fprintf(stdout, "A_GE\n"); return; case A_INTLIT: fprintf(stdout, "A_INTLIT %d\n", n->v.intvalue); return; case A_STRLIT: fprintf(stdout, "A_STRLIT rval label L%d\n", n->v.id); return; case A_IDENT: if (n->rvalue) fprintf(stdout, "A_IDENT rval %s\n", Symtable[n->v.id].name); else fprintf(stdout, "A_IDENT %s\n", Symtable[n->v.id].name); return; case A_ASSIGN: fprintf(stdout, "A_ASSIGN\n"); return; case A_WIDEN: fprintf(stdout, "A_WIDEN\n"); return; case A_RETURN: fprintf(stdout, "A_RETURN\n"); return; case A_FUNCCALL: fprintf(stdout, "A_FUNCCALL %s\n", Symtable[n->v.id].name); return; case A_ADDR: fprintf(stdout, "A_ADDR %s\n", Symtable[n->v.id].name); return; case A_DEREF: if (n->rvalue) fprintf(stdout, "A_DEREF rval\n"); else fprintf(stdout, "A_DEREF\n"); return; case A_SCALE: fprintf(stdout, "A_SCALE %d\n", n->v.size); return; case A_PREINC: fprintf(stdout, "A_PREINC %s\n", Symtable[n->v.id].name); return; case A_PREDEC: fprintf(stdout, "A_PREDEC %s\n", Symtable[n->v.id].name); return; case A_POSTINC: fprintf(stdout, "A_POSTINC\n"); return; case A_POSTDEC: fprintf(stdout, "A_POSTDEC\n"); return; case A_NEGATE: fprintf(stdout, "A_NEGATE\n"); return; default: fatald("Unknown dumpAST operator", n->op); } } ================================================ FILE: 25_Function_Arguments/types.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Types and type handling // Copyright (c) 2019 Warren Toomey, GPL3 // Return true if a type is an int type // of any size, false otherwise int inttype(int type) { if (type == P_CHAR || type == P_INT || type == P_LONG) return (1); return (0); } // Return true if a type is of pointer type int ptrtype(int type) { if (type == P_VOIDPTR || type == P_CHARPTR || type == P_INTPTR || type == P_LONGPTR) return (1); return (0); } // Given a primitive type, return // the type which is a pointer to it int pointer_to(int type) { int newtype; switch (type) { case P_VOID: newtype = P_VOIDPTR; break; case P_CHAR: newtype = P_CHARPTR; break; case P_INT: newtype = P_INTPTR; break; case P_LONG: newtype = P_LONGPTR; break; default: fatald("Unrecognised in pointer_to: type", type); } return (newtype); } // Given a primitive pointer type, return // the type which it points to int value_at(int type) { int newtype; switch (type) { case P_VOIDPTR: newtype = P_VOID; break; case P_CHARPTR: newtype = P_CHAR; break; case P_INTPTR: newtype = P_INT; break; case P_LONGPTR: newtype = P_LONG; break; default: fatald("Unrecognised in value_at: type", type); } return (newtype); } // Given an AST tree and a type which we want it to become, // possibly modify the tree by widening or scaling so that // it is compatible with this type. Return the original tree // if no changes occurred, a modified tree, or NULL if the // tree is not compatible with the given type. // If this will be part of a binary operation, the AST op is not zero. struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op) { int ltype; int lsize, rsize; ltype = tree->type; // Compare scalar int types if (inttype(ltype) && inttype(rtype)) { // Both types same, nothing to do if (ltype == rtype) return (tree); // Get the sizes for each type lsize = genprimsize(ltype); rsize = genprimsize(rtype); // Tree's size is too big if (lsize > rsize) return (NULL); // Widen to the right if (rsize > lsize) return (mkastunary(A_WIDEN, rtype, tree, 0)); } // For pointers on the left if (ptrtype(ltype)) { // OK is same type on right and not doing a binary op if (op == 0 && ltype == rtype) return (tree); } // We can scale only on A_ADD or A_SUBTRACT operation if (op == A_ADD || op == A_SUBTRACT) { // Left is int type, right is pointer type and the size // of the original type is >1: scale the left if (inttype(ltype) && ptrtype(rtype)) { rsize = genprimsize(value_at(rtype)); if (rsize > 1) return (mkastunary(A_SCALE, rtype, tree, rsize)); else return (tree); // Size 1, no need to scale } } // If we get here, the types are not compatible return (NULL); } ================================================ FILE: 26_Prototypes/Makefile ================================================ HSRCS= data.h decl.h defs.h SRCS= cg.c decl.c expr.c gen.c main.c misc.c \ scan.c stmt.c sym.c tree.c types.c SRCN= cgn.c decl.c expr.c gen.c main.c misc.c \ scan.c stmt.c sym.c tree.c types.c ARMSRCS= cg_arm.c decl.c expr.c gen.c main.c misc.c \ scan.c stmt.c sym.c tree.c types.c comp1: $(SRCS) $(HSRCS) cc -o comp1 -g -Wall $(SRCS) compn: $(SRCN) $(HSRCS) cc -o compn -g -Wall $(SRCN) comp1arm: $(ARMSRCS) $(HSRCS) cc -o comp1arm -g -Wall $(ARMSRCS) cp comp1arm comp1 clean: rm -f comp1 comp1arm compn *.o *.s out test: comp1 tests/runtests (cd tests; chmod +x runtests; ./runtests) armtest: comp1arm tests/runtests (cd tests; chmod +x runtests; ./runtests) testn: compn tests/runtestsn (cd tests; chmod +x runtestsn; ./runtestsn) ================================================ FILE: 26_Prototypes/Readme.md ================================================ # Part 26: Function Prototypes In this part of our compiler writing journey, I've added the ability to write function prototypes. In the process, I've had to rewrite some of the code that I'd just written in the previous parts; sorry about that. I didn't see far enough ahead! So what do we want with function prototypes: + the ability to declare a function prototype with no body + the ability to declare a full function later on + to keep the prototype in the global symbol table section, and the parameters as local variables in the local symbol table section + error checking on the number and types of parameters against a previous function prototype And here is what I'm *not* going to do, at least not yet: + `function(void)`: this will be the same as the `function()` declaration + the declaration of a function with just the types, e.g. `function(int ,char, long);` as this will make the parsing harder. We can do this later. ## What Functionality Needs to be Rewritten In a recent part of our journey I added the declaration of a function with parameters and a full function body. As we were parsing each parameter, I immediately added it to both the global symbol table (to form the prototype) and also the local symbol table (to be the function's parameters). Now that we want to implement function prototypes, it's not always true that a parameter list will become the actual function's parameters. Consider this function prototype: ```c int fred(char a, int foo, long bar); ``` We can only define `fred` as a function, and `a`, `foo` and `bar` as three parameters in the global symbol table. We have to wait until the full function declaration before we can add `a`, `foo` and `bar` to the local symbol table. I'll need to separate the definition of C_PARAM entries on the global symbol table and on the local symbol table. ## The Design of the New Parsing Mechanism Here is my quick design for the new function parsing mechanism which also deals with prototypes. ``` Get the identifier and '('. Search for the identifier in the symbol table. If it exists, there is already a prototype: get the id position of the function and its parammeter count. While parsing parameters: - if a previous prototype, compare this param's type against the existing one. Update the symbol's name in case this is a full function - if no previous prototype, add the parameter to the symbol table Ensure # of params matches any existing prototype. Parse the ')'. If ';' is next, done. If '{' is next, copy the parameter list from the global symtable to the local sym table. Copy them in a loop so that they are put in reverse order in the local sym table. ``` I got this done in the last few hours, so here are the code changes. ## Changes to `sym.c` I've changed the parameter list of a couple of functions in `sym.c`: ```c int addglob(char *name, int type, int stype, int class, int endlabel, int size); int addlocl(char *name, int type, int stype, int class, int size); ``` Previously, we had `addlocl()` also call `addglob()` to add a C_PARAM symbol to both symbol tables. Now that we are separating this function, it makes sense to pass the actual class of the symbol to both functions. There are calls to these functions in `main.c` and `decl.c`. I'll cover the ones in `decl.c` later. The change in `main.c` is trivial. Once we hit the declaration of a real function, we will need to copy its parameter list from the global to the local symbol table. As this is really something specific to the symbol table, I've added this function to `sym.c`: ```c // Given a function's slot number, copy the global parameters // from its prototype to be local parameters void copyfuncparams(int slot) { int i, id = slot + 1; for (i = 0; i < Symtable[slot].nelems; i++, id++) { addlocl(Symtable[id].name, Symtable[id].type, Symtable[id].stype, Symtable[id].class, Symtable[id].size); } } ``` ## Changes to `decl.c` Nearly all of the changes to the compiler are confined to `decl.c`. We'll start with the small ones and work up to the big ones. ### `var_declaration()` I've changed the parameter list to `var_declaration()` in the same way that I did for the `sym.c` functions: ```c void var_declaration(int type, int class) { ... addglob(Text, pointer_to(type), S_ARRAY, class, 0, Token.intvalue); ... if (addlocl(Text, type, S_VARIABLE, class, 1) == -1) ... addglob(Text, type, S_VARIABLE, class, 0, 1); } ``` We will use the ability to pass in the class in the other `decl.c` functions. ### `param_declaration()` We have big changes here, as we might already have a parameter list in the global symbol table as an existing prototype. If we do, we need to check the number and types in the new list against the prototype. ```c // Parse the parameters in parentheses after the function name. // Add them as symbols to the symbol table and return the number // of parameters. If id is not -1, there is an existing function // prototype, and the function has this symbol slot number. static int param_declaration(int id) { int type, param_id; int orig_paramcnt; int paramcnt = 0; // Add 1 to id so that it's either zero (no prototype), or // it's the position of the zeroth existing parameter in // the symbol table param_id = id + 1; // Get any existing prototype parameter count if (param_id) orig_paramcnt = Symtable[id].nelems; // Loop until the final right parentheses while (Token.token != T_RPAREN) { // Get the type and identifier // and add it to the symbol table type = parse_type(); ident(); // We have an existing prototype. // Check that this type matches the prototype. if (param_id) { if (type != Symtable[id].type) fatald("Type doesn't match prototype for parameter", paramcnt + 1); param_id++; } else { // Add a new parameter to the new prototype var_declaration(type, C_PARAM); } paramcnt++; // Must have a ',' or ')' at this point switch (Token.token) { case T_COMMA: scan(&Token); break; case T_RPAREN: break; default: fatald("Unexpected token in parameter list", Token.token); } } // Check that the number of parameters in this list matches // any existing prototype if ((id != -1) && (paramcnt != orig_paramcnt)) fatals("Parameter count mismatch for function", Symtable[id].name); // Return the count of parameters return (paramcnt); } ``` Remember that the first parameter's global symbol table slot position is immediately after the slot for the function name's symbol. We get passed the slot position of an existing prototype, or -1 if there is no prototype. It's a happy coincidence that we can add one to this to get the first parameter's slot number, or have 0 to indicate that there is no existing prototype. We still loop parsing each new parameter, but now there is new code to either compare against the existing prototype, or to add the parameter to the global symbol table. Once we exit the loop, we can compare the number of parameters in this list against the number in any existing prototype. Right now, the code feels a bit ugly and I'm sure that if I leave it a while, I'll be able to see a way to refactor it a bit. ### `function_declaration()` Previously, this was a fairly simple function: get the type and name, add a global symbol, read in the parameters, get the function's body and generate an AST tree for the function's code. Now, we have to deal with the fact this this might only be a prototype, or it could be a full function. And we won't know until we parse either the ';' (for a prototype) or the '{' (for a full function). So let's take the exposition of the code in stages. ```c // Parse the declaration of function. // The identifier has been scanned & we have the type. struct ASTnode *function_declaration(int type) { struct ASTnode *tree, *finalstmt; int id; int nameslot, endlabel, paramcnt; // Text has the identifier's name. If this exists and is a // function, get the id. Otherwise, set id to -1 if ((id = findsymbol(Text)) != -1) if (Symtable[id].stype != S_FUNCTION) id = -1; // If this is a new function declaration, get a // label-id for the end label, and add the function // to the symbol table, if (id == -1) { endlabel = genlabel(); nameslot = addglob(Text, type, S_FUNCTION, C_GLOBAL, endlabel, 0); } // Scan in the '(', any parameters and the ')'. // Pass in any existing function prototype symbol slot number lparen(); paramcnt = param_declaration(id); rparen(); ``` This is nearly the same as the previous version of the code, except that `id` is now set to -1 when there is no previous prototype or a positive number when there is a previous prototype. We only add the function's name to the global symbol table if it's not already there. ```c // If this is a new function declaration, update the // function symbol entry with the number of parameters if (id == -1) Symtable[nameslot].nelems = paramcnt; // Declaration ends in a semicolon, only a prototype. if (Token.token == T_SEMI) { scan(&Token); return (NULL); } ``` We've got the count of parameters. If no previous prototype, update this prototype with this count. Now we can peek at the token after the end of the parameter list. If it's a semicolon, this is just a prototype. We now have no AST tree to return, so skip the token and return NULL. I've had to slightly alter the code in `global_declarations()` to deal with this NULL value: no big change. If we continue on, we are now dealing with a full function declaration with a body. ```c // This is not just a prototype. // Copy the global parameters to be local parameters if (id == -1) id = nameslot; copyfuncparams(id); ``` We now need to copy the parameters from the prototype to the local symbol table. The `id = nameslot` code is there for when we have just added the global symbols ourselves and there was no previous prototype. The rest of the code in `function_declaration()` is the same as before and I'll omit it. It checks that a non-void function does return a value, and generates the AST tree with an A_FUNCTION root node. ## Testing the New Functionality One of the drawbacks of the `tests/runtests` script is that it assumes the compiler will definitely produce an assembly output file `out.s` which can be assembled and run. This prevents us from testing that the compiler detects syntax and semantic errors. A quick *grep* of `decl.c` shows these new errors are detected: ```c fatald("Type doesn't match prototype for parameter", paramcnt + 1); fatals("Parameter count mismatch for function", Symtable[id].name); ``` Thus, I'd better rewrite `tests/runtests` to verify that the compiler does detect these errors on bad input. We do have two new working test programs, `input29.c` and `input30.c`. The first one is the same as `input28.c` except that I've put the prototypes of all the functions at the top of the program: ```c int param8(int a, int b, int c, int d, int e, int f, int g, int h); int fred(int a, int b, int c); int main(); ``` This, and all previous test programs, still work. `input30.c`, though, is probably the first non-trivial program that our compiler has been given. It opens its own source file and prints it to standard output: ```c int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input30.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ``` We can't yet call the pre-processor, so we manually put in the prototypes for the `open()`, `read()`, `write()` and `close()` functions. We also have to use 0 instead of O_RDONLY in the `open()` call. Right now, the compiler lets us declare a `char buf[60];` but we can't use `buf` itself as a char pointer. So I chose to assign a 60-character literal string to a char pointer and we use this as the buffer. We still also have to wrap IF and WHILE bodies with '{' ... '}' to make them compound statements: I still haven't dealt with the dangling else problem. Finally, we can't accept `char *argv[]` as a parameter declaration for main yet, so I've had to hard-code the input file's name. Still, we now have a very primitive *cat(1)* program which our compiler can compile! That's progress. ## Conclusion and What's Next In the next part of our compiler writing journey, I'll follow up on a comment above and improve the testing of our compiler's functionality. [Next step](../27_Testing_Errors/Readme.md) ================================================ FILE: 26_Prototypes/cg.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\t.text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\t.data\n", Outfile); currSeg = data_seg; } } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. static int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "%r10", "%r11", "%r12", "%r13", "%r9", "%r8", "%rcx", "%rdx", "%rsi", "%rdi" }; static char *breglist[] = { "%r10b", "%r11b", "%r12b", "%r13b", "%r9b", "%r8b", "%cl", "%dl", "%sil", "%dil" }; static char *dreglist[] = { "%r10d", "%r11d", "%r12d", "%r13d", "%r9d", "%r8d", "%ecx", "%edx", "%esi", "%edi" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Symtable[id].name; int i; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the %rsp and %rsp fprintf(Outfile, "\t.globl\t%s\n" "\t.type\t%s, @function\n" "%s:\n" "\tpushq\t%%rbp\n" "\tmovq\t%%rsp, %%rbp\n", name, name, name); // Copy any in-register parameters to the stack // Stop after no more than six parameter registers for (i = NSYMBOLS - 1; i > Locls; i--) { if (Symtable[i].class != C_PARAM) break; if (i < NSYMBOLS - 6) break; Symtable[i].posn = newlocaloffset(Symtable[i].type); cgstorlocal(paramReg--, i); } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (; i > Locls; i--) { if (Symtable[i].class == C_PARAM) { Symtable[i].posn = paramOffset; paramOffset += 8; } else { Symtable[i].posn = newlocaloffset(Symtable[i].type); } } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\taddq\t$%d,%%rsp\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Symtable[id].endlabel); fprintf(Outfile, "\taddq\t$%d,%%rsp\n", stackOffset); fputs("\tpopq %rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmovq\t$%d, %s\n", value, reglist[r]); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(int id, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it switch (Symtable[id].type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", Symtable[id].name); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", Symtable[id].name); fprintf(Outfile, "\tmovzbq\t%s(%%rip), %s\n", Symtable[id].name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", Symtable[id].name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", Symtable[id].name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", Symtable[id].name); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", Symtable[id].name); fprintf(Outfile, "\tmovslq\t%s(%%rip), %s\n", Symtable[id].name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", Symtable[id].name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", Symtable[id].name); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: if (op == A_PREINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", Symtable[id].name); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", Symtable[id].name); fprintf(Outfile, "\tmovq\t%s(%%rip), %s\n", Symtable[id].name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", Symtable[id].name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", Symtable[id].name); break; default: fatald("Bad type in cgloadglob:", Symtable[id].type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(int id, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it switch (Symtable[id].type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", Symtable[id].posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", Symtable[id].posn); fprintf(Outfile, "\tmovzbq\t%d(%%rbp), %s\n", Symtable[id].posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", Symtable[id].posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", Symtable[id].posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", Symtable[id].posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", Symtable[id].posn); fprintf(Outfile, "\tmovslq\t%d(%%rbp), %s\n", Symtable[id].posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", Symtable[id].posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", Symtable[id].posn); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: if (op == A_PREINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", Symtable[id].posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", Symtable[id].posn); fprintf(Outfile, "\tmovq\t%d(%%rbp), %s\n", Symtable[id].posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", Symtable[id].posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", Symtable[id].posn); break; default: fatald("Bad type in cgloadlocal:", Symtable[id].type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int id) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tleaq\tL%d(%%rip), %s\n", id, reglist[r]); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\taddq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsubq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timulq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s,%%rax\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidivq\t%s\n", reglist[r2]); fprintf(Outfile, "\tmovq\t%%rax,%s\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tandq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\torq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txorq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshlq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshrq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tnegq\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnotq\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); return (r); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(int id, int numargs) { // Get a new register int outr = alloc_register(); // Call the function fprintf(Outfile, "\tcall\t%s@PLT\n", Symtable[id].name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\taddq\t$%d, %%rsp\n", 8 * (numargs - 6)); // and copy the return value into our register fprintf(Outfile, "\tmovq\t%%rax, %s\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpushq\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmovq\t%s, %s\n", reglist[r], reglist[FIRSTPARAMREG - argposn + 1]); } } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsalq\t$%d, %s\n", val, reglist[r]); return (r); } // Store a register's value into a variable int cgstorglob(int r, int id) { switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %s(%%rip)\n", breglist[r], Symtable[id].name); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %s(%%rip)\n", dreglist[r], Symtable[id].name); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tmovq\t%s, %s(%%rip)\n", reglist[r], Symtable[id].name); break; default: fatald("Bad type in cgstorglob:", Symtable[id].type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, int id) { switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %d(%%rbp)\n", breglist[r], Symtable[id].posn); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %d(%%rbp)\n", dreglist[r], Symtable[id].posn); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tmovq\t%s, %d(%%rbp)\n", reglist[r], Symtable[id].posn); break; default: fatald("Bad type in cgstorlocal:", Symtable[id].type); } return (r); } // Array of type sizes in P_XXX order. // 0 means no size. static int psize[] = { 0, 0, 1, 4, 8, 8, 8, 8, 8 }; // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { // Check the type is valid if (type < P_NONE || type > P_LONGPTR) fatal("Bad type in cgprimsize()"); return (psize[type]); } // Generate a global symbol but not functions void cgglobsym(int id) { int typesize; if (Symtable[id].stype == S_FUNCTION) return; // Get the size of the type typesize = cgprimsize(Symtable[id].type); // Generate the global identity and the label cgdataseg(); fprintf(Outfile, "\t.globl\t%s\n", Symtable[id].name); fprintf(Outfile, "%s:", Symtable[id].name); // Generate the space for (int i = 0; i < Symtable[id].size; i++) { switch (typesize) { case 1: fprintf(Outfile, "\t.byte\t0\n"); break; case 4: fprintf(Outfile, "\t.long\t0\n"); break; case 8: fprintf(Outfile, "\t.quad\t0\n"); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\t.byte\t%d\n", *cptr); } fprintf(Outfile, "\t.byte\t0\n"); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { // Generate code depending on the function's type switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tmovzbl\t%s, %%eax\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %%eax\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", Symtable[id].type); } cgjump(Symtable[id].endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(int id) { int r = alloc_register(); if (Symtable[id].class == C_LOCAL) fprintf(Outfile, "\tleaq\t%d(%%rbp), %s\n", Symtable[id].posn, reglist[r]); else fprintf(Outfile, "\tleaq\t%s(%%rip), %s\n", Symtable[id].name, reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tmovzbq\t(%s), %s\n", reglist[r], reglist[r]); break; case P_INTPTR: fprintf(Outfile, "\tmovslq\t(%s), %s\n", reglist[r], reglist[r]); break; case P_LONGPTR: fprintf(Outfile, "\tmovq\t(%s), %s\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, (%s)\n", breglist[r1], reglist[r2]); break; case P_INT: fprintf(Outfile, "\tmovq\t%s, (%s)\n", reglist[r1], reglist[r2]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, (%s)\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 26_Prototypes/cg_arm.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for ARMv6 on Raspberry Pi // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. static int freereg[4]; static char *reglist[4] = { "r4", "r5", "r6", "r7" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // We have to store large integer literal values in memory. // Keep a list of them which will be output in the postamble #define MAXINTS 1024 int Intlist[MAXINTS]; static int Intslot = 0; // Determine the offset of a large integer // literal from the .L3 label. If the integer // isn't in the list, add it. static void set_int_offset(int val) { int offset = -1; // See if it is already there for (int i = 0; i < Intslot; i++) { if (Intlist[i] == val) { offset = 4 * i; break; } } // Not in the list, so add it if (offset == -1) { offset = 4 * Intslot; if (Intslot == MAXINTS) fatal("Out of int slots in set_int_offset()"); Intlist[Intslot++] = val; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L3+%d\n", offset); } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n", Outfile); } // Print out the assembly postamble void cgpostamble() { // Print out the global variables fprintf(Outfile, ".L2:\n"); for (int i = 0; i < Globs; i++) { if (Symtable[i].stype == S_VARIABLE) fprintf(Outfile, "\t.word %s\n", Symtable[i].name); } // Print out the integer literals fprintf(Outfile, ".L3:\n"); for (int i = 0; i < Intslot; i++) { fprintf(Outfile, "\t.word %d\n", Intlist[i]); } } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Symtable[id].name; fprintf(Outfile, "\t.text\n" "\t.globl\t%s\n" "\t.type\t%s, \%%function\n" "%s:\n" "\tpush\t{fp, lr}\n" "\tadd\tfp, sp, #4\n" "\tsub\tsp, sp, #8\n" "\tstr\tr0, [fp, #-8]\n", name, name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Symtable[id].endlabel); fputs("\tsub\tsp, fp, #4\n" "\tpop\t{fp, pc}\n" "\t.align\t2\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); // If the literal value is small, do it with one instruction if (value <= 1000) fprintf(Outfile, "\tmov\t%s, #%d\n", reglist[r], value); else { set_int_offset(value); fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); } return (r); } // Determine the offset of a variable from the .L2 // label. Yes, this is inefficient code. static void set_var_offset(int id) { int offset = 0; // Walk the symbol table up to id. // Find S_VARIABLEs and add on 4 until // we get to our variable for (int i = 0; i < id; i++) { if (Symtable[i].stype == S_VARIABLE) offset += 4; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L2+%d\n", offset); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tldrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgloadglob:", Symtable[id].type); } return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s, %s\n", reglist[r1], reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\tmul\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { // To do a divide: r0 holds the dividend, r1 holds the divisor. // The quotient is in r0. fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r1]); fprintf(Outfile, "\tmov\tr1, %s\n", reglist[r2]); fprintf(Outfile, "\tbl\t__aeabi_idiv\n"); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r1]); free_register(r2); return (r1); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r]); fprintf(Outfile, "\tbl\t%s\n", Symtable[id].name); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r]); return (r); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tlsl\t%s, %s, #%d\n", reglist[r], reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, int id) { // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tstr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgstorglob:", Symtable[id].type); } return (r); } // Array of type sizes in P_XXX order. // 0 means no size. static int psize[] = { 0, 0, 1, 4, 4, 4, 4, 4 }; // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { // Check the type is valid if (type < P_NONE || type > P_LONGPTR) fatal("Bad type in cgprimsize()"); return (psize[type]); } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Symtable[id].type); fprintf(Outfile, "\t.data\n" "\t.globl\t%s\n", Symtable[id].name); switch (typesize) { case 1: fprintf(Outfile, "%s:\t.byte\t0\n", Symtable[id].name); break; case 4: fprintf(Outfile, "%s:\t.long\t0\n", Symtable[id].name); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "moveq", "movne", "movlt", "movgt", "movle", "movge" }; // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "movne", "moveq", "movge", "movle", "movgt", "movlt" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #1\n", cmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #0\n", invcmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\tuxtb\t%s, %s\n", reglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tb\tL%d\n", l); } // List of inverted branch instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *brlist[] = { "bne", "beq", "bge", "ble", "bgt", "blt" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", brlist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[reg]); cgjump(Symtable[id].endlabel); } // Generate code to load the address of a global // identifier into a variable. Return a new register int cgaddress(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); fprintf(Outfile, "\tmov\t%s, r3\n", reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tldrb\t%s, [%s]\n", reglist[r], reglist[r]); break; case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [%s]\n", reglist[r], reglist[r]); break; } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [%s]\n", reglist[r1], reglist[r2]); break; case P_INT: case P_LONG: fprintf(Outfile, "\tstr\t%s, [%s]\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 26_Prototypes/cgn.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\tsection .text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\tsection .data\n", Outfile); currSeg = data_seg; } } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "r10", "r11", "r12", "r13", "r9", "r8", "rcx", "rdx", "rsi", "rdi" }; static char *breglist[] = { "r10b", "r11b", "r12b", "r13b", "r9b", "r8b", "cl", "dl", "sil", "dil" }; static char *dreglist[] = { "r10d", "r11d", "r12d", "r13d", "r9d", "r8d", "ecx", "edx", "esi", "edi" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\textern\tprintint\n", Outfile); fputs("\textern\tprintchar\n", Outfile); fputs("\textern\topen\n", Outfile); fputs("\textern\tclose\n", Outfile); fputs("\textern\tread\n", Outfile); fputs("\textern\twrite\n", Outfile); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Symtable[id].name; int i; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the rsp and rbp fprintf(Outfile, "\tglobal\t%s\n" "%s:\n" "\tpush\trbp\n" "\tmov\trbp, rsp\n", name, name); // Copy any in-register parameters to the stack // Stop after no more than six parameter registers for (i = NSYMBOLS - 1; i > Locls; i--) { if (Symtable[i].class != C_PARAM) break; if (i < NSYMBOLS - 6) break; Symtable[i].posn = newlocaloffset(Symtable[i].type); cgstorlocal(paramReg--, i); } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (; i > Locls; i--) { if (Symtable[i].class == C_PARAM) { Symtable[i].posn = paramOffset; paramOffset += 8; } else { Symtable[i].posn = newlocaloffset(Symtable[i].type); } } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\tadd\trsp, %d\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Symtable[id].endlabel); fprintf(Outfile, "\tadd\trsp, %d\n", stackOffset); fputs("\tpop rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], value); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(int id, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it switch (Symtable[id].type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", Symtable[id].name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", Symtable[id].name); fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], Symtable[id].name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", Symtable[id].name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", Symtable[id].name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword [%s]\n", Symtable[id].name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", Symtable[id].name); fprintf(Outfile, "\tmovsx\t%s, word [%s]\n", dreglist[r], Symtable[id].name); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword [%s]\n", Symtable[id].name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", Symtable[id].name); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword [%s]\n", Symtable[id].name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", Symtable[id].name); fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], Symtable[id].name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword [%s]\n", Symtable[id].name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", Symtable[id].name); break; default: fatald("Bad type in cgloadglob:", Symtable[id].type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(int id, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it switch (Symtable[id].type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", Symtable[id].posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", Symtable[id].posn); fprintf(Outfile, "\tmovzx\t%s, byte [rbp+%d]\n", reglist[r], Symtable[id].posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", Symtable[id].posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", Symtable[id].posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", Symtable[id].posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", Symtable[id].posn); fprintf(Outfile, "\tmovsx\t%s, word [rbp+%d]\n", reglist[r], Symtable[id].posn); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", Symtable[id].posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", Symtable[id].posn); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", Symtable[id].posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", Symtable[id].posn); fprintf(Outfile, "\tmov\t%s, [rbp+%d]\n", reglist[r], Symtable[id].posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", Symtable[id].posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", Symtable[id].posn); break; default: fatald("Bad type in cgloadlocal:", Symtable[id].type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int id) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, L%d\n", reglist[r], id); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timul\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmov\trax, %s\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidiv\t%s\n", reglist[r2]); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tand\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\tor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshl\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshr\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tneg\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnot\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r], breglist[r]); return (r); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, byte %s\n", reglist[r], breglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(int id, int numargs) { // Get a new register int outr = alloc_register(); // Call the function fprintf(Outfile, "\tcall\t%s\n", Symtable[id].name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\tadd\trsp, %d\n", 8 * (numargs - 6)); // and copy the return value into our register fprintf(Outfile, "\tmov\t%s, rax\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpush\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmov\t%s, %s\n", reglist[FIRSTPARAMREG - argposn + 1], reglist[r]); } } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsal\t%s, %d\n", reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, int id) { switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tmov\t[%s], %s\n", Symtable[id].name, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\t[%s], %s\n", Symtable[id].name, dreglist[r]); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tmov\t[%s], %s\n", Symtable[id].name, reglist[r]); break; default: fatald("Bad type in cgloadglob:", Symtable[id].type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, int id) { switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tmov\tbyte\t[rbp+%d], %s\n", Symtable[id].posn, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\tdword\t[rbp+%d], %s\n", Symtable[id].posn, dreglist[r]); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tmov\tqword\t[rbp+%d], %s\n", Symtable[id].posn, reglist[r]); break; default: fatald("Bad type in cgstorlocal:", Symtable[id].type); } return (r); } // Array of type sizes in P_XXX order. // 0 means no size. static int psize[] = { 0, 0, 1, 4, 8, 8, 8, 8, 8 }; // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { // Check the type is valid if (type < P_NONE || type > P_LONGPTR) fatal("Bad type in cgprimsize()"); return (psize[type]); } // Generate a global symbol but not functions void cgglobsym(int id) { int typesize; if (Symtable[id].stype == S_FUNCTION) return; // Get the size of the type typesize = cgprimsize(Symtable[id].type); // Generate the global identity and the label cgdataseg(); fprintf(Outfile, "\tsection\t.data\n" "\tglobal\t%s\n", Symtable[id].name); fprintf(Outfile, "%s:", Symtable[id].name); // Generate the space // original version for (int i = 0; i < Symtable[id].size; i++) { switch(typesize) { case 1: fprintf(Outfile, "\tdb\t0\n"); break; case 4: fprintf(Outfile, "\tdd\t0\n"); break; case 8: fprintf(Outfile, "\tdq\t0\n"); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } /* compact version using times instead of loop switch(typesize) { case 1: fprintf(Outfile, "\ttimes\t%d\tdb\t0\n", Symtable[id].size); break; case 4: fprintf(Outfile, "\ttimes\t%d\tdd\t0\n", Symtable[id].size); break; case 8: fprintf(Outfile, "\ttimes\t%d\tdq\t0\n", Symtable[id].size); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } */ } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\tdb\t%d\n", *cptr); } fprintf(Outfile, "\tdb\t0\n"); /* put string in readable format on a single line // probably went overboard with error checking int comma = 0, quote = 0, start = 1; fprintf(Outfile, "\tdb\t"); for (cptr=strvalue; *cptr; cptr++) { if ( ! isprint(*cptr) ) if (comma || start) { fprintf(Outfile, "%d, ", *cptr); start = 0; comma = 1; } else if (quote) { fprintf(Outfile, "\', %d, ", *cptr); comma = 1; quote = 0; } else { fprintf(Outfile, "%d, ", *cptr); comma = 1; quote = 0; } else if (start || comma) { fprintf(Outfile, "\'%c", *cptr); start = comma = 0; quote = 1; } else { fprintf(Outfile, "%c", *cptr); comma = 0; quote = 1; } } if (comma || start) fprintf(Outfile, "0\n"); else fprintf(Outfile, "\', 0\n"); */ } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r2], breglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { // Generate code depending on the function's type switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tmovzx\teax, %s\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmov\teax, %s\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", Symtable[id].type); } cgjump(Symtable[id].endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(int id) { int r = alloc_register(); if (Symtable[id].class == C_LOCAL) fprintf(Outfile, "\tlea\t%s, [rbp+%d]\n", reglist[r], Symtable[id].posn); else fprintf(Outfile, "\tmov\t%s, %s\n", reglist[r], Symtable[id].name); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], reglist[r]); break; case P_INTPTR: fprintf(Outfile, "\tmovsx\t%s, dword [%s]\n", reglist[r], reglist[r]); break; case P_LONGPTR: fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tmov\t[%s], byte %s\n", reglist[r2], breglist[r1]); break; case P_INT: fprintf(Outfile, "\tmov\t[%s], %s\n", reglist[r2], reglist[r1]); break; case P_LONG: fprintf(Outfile, "\tmov\t[%s], %s\n", reglist[r2], reglist[r1]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 26_Prototypes/data.h ================================================ #ifndef extern_ #define extern_ extern #endif // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 extern_ int Line; // Current line number extern_ int Putback; // Character put back by scanner extern_ int Functionid; // Symbol id of the current function extern_ int Globs; // Position of next free global symbol slot extern_ int Locls; // Position of next free local symbol slot extern_ FILE *Infile; // Input and output files extern_ FILE *Outfile; extern_ struct token Token; // Last token scanned extern_ char Text[TEXTLEN + 1]; // Last identifier scanned extern_ struct symtable Symtable[NSYMBOLS]; // Global symbol table extern_ int O_dumpAST; ================================================ FILE: 26_Prototypes/decl.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of declarations // Copyright (c) 2019 Warren Toomey, GPL3 // Parse the current token and return // a primitive type enum value. Also // scan in the next token int parse_type(void) { int type; switch (Token.token) { case T_VOID: type = P_VOID; break; case T_CHAR: type = P_CHAR; break; case T_INT: type = P_INT; break; case T_LONG: type = P_LONG; break; default: fatald("Illegal type, token", Token.token); } // Scan in one or more further '*' tokens // and determine the correct pointer type while (1) { scan(&Token); if (Token.token != T_STAR) break; type = pointer_to(type); } // We leave with the next token already scanned return (type); } // variable_declaration: type identifier ';' // | type identifier '[' INTLIT ']' ';' // ; // // Parse the declaration of a scalar variable or an array // with a given size. // The identifier has been scanned & we have the type. // class is the variable's class void var_declaration(int type, int class) { // Text now has the identifier's name. // If the next token is a '[' if (Token.token == T_LBRACKET) { // Skip past the '[' scan(&Token); // Check we have an array size if (Token.token == T_INTLIT) { // Add this as a known array and generate its space in assembly. // We treat the array as a pointer to its elements' type if (class == C_LOCAL) { fatal("For now, declaration of local arrays is not implemented"); } else { addglob(Text, pointer_to(type), S_ARRAY, class, 0, Token.intvalue); } } // Ensure we have a following ']' scan(&Token); match(T_RBRACKET, "]"); } else { // Add this as a known scalar // and generate its space in assembly if (class == C_LOCAL) { if (addlocl(Text, type, S_VARIABLE, class, 1) == -1) fatals("Duplicate local variable declaration", Text); } else { addglob(Text, type, S_VARIABLE, class, 0, 1); } } } // param_declaration: // | variable_declaration // | variable_declaration ',' param_declaration // // Parse the parameters in parentheses after the function name. // Add them as symbols to the symbol table and return the number // of parameters. If id is not -1, there is an existing function // prototype, and the function has this symbol slot number. static int param_declaration(int id) { int type, param_id; int orig_paramcnt; int paramcnt = 0; // Add 1 to id so that it's either zero (no prototype), or // it's the position of the zeroth existing parameter in // the symbol table param_id = id + 1; // Get any existing prototype parameter count if (param_id) orig_paramcnt = Symtable[id].nelems; // Loop until the final right parentheses while (Token.token != T_RPAREN) { // Get the type and identifier // and add it to the symbol table type = parse_type(); ident(); // We have an existing prototype. // Check that this type matches the prototype. if (param_id) { if (type != Symtable[id].type) fatald("Type doesn't match prototype for parameter", paramcnt + 1); param_id++; } else { // Add a new parameter to the new prototype var_declaration(type, C_PARAM); } paramcnt++; // Must have a ',' or ')' at this point switch (Token.token) { case T_COMMA: scan(&Token); break; case T_RPAREN: break; default: fatald("Unexpected token in parameter list", Token.token); } } // Check that the number of parameters in this list matches // any existing prototype if ((id != -1) && (paramcnt != orig_paramcnt)) fatals("Parameter count mismatch for function", Symtable[id].name); // Return the count of parameters return (paramcnt); } // // function_declaration: type identifier '(' parameter_list ')' ; // | type identifier '(' parameter_list ')' compound_statement ; // // Parse the declaration of function. // The identifier has been scanned & we have the type. struct ASTnode *function_declaration(int type) { struct ASTnode *tree, *finalstmt; int id; int nameslot, endlabel, paramcnt; // Text has the identifier's name. If this exists and is a // function, get the id. Otherwise, set id to -1 if ((id = findsymbol(Text)) != -1) if (Symtable[id].stype != S_FUNCTION) id = -1; // If this is a new function declaration, get a // label-id for the end label, and add the function // to the symbol table, if (id == -1) { endlabel = genlabel(); nameslot = addglob(Text, type, S_FUNCTION, C_GLOBAL, endlabel, 0); } // Scan in the '(', any parameters and the ')'. // Pass in any existing function prototype symbol slot number lparen(); paramcnt = param_declaration(id); rparen(); // If this is a new function declaration, update the // function symbol entry with the number of parameters if (id == -1) Symtable[nameslot].nelems = paramcnt; // Declaration ends in a semicolon, only a prototype. if (Token.token == T_SEMI) { scan(&Token); return (NULL); } // This is not just a prototype. // Copy the global parameters to be local parameters if (id == -1) id = nameslot; copyfuncparams(id); // Set the Functionid global to the function's symbol-id Functionid = id; // Get the AST tree for the compound statement tree = compound_statement(); // If the function type isn't P_VOID .. if (type != P_VOID) { // Error if no statements in the function if (tree == NULL) fatal("No statements in function with non-void type"); // Check that the last AST operation in the // compound statement was a return statement finalstmt = (tree->op == A_GLUE) ? tree->right : tree; if (finalstmt == NULL || finalstmt->op != A_RETURN) fatal("No return for function with non-void type"); } // Return an A_FUNCTION node which has the function's id // and the compound statement sub-tree return (mkastunary(A_FUNCTION, type, tree, id)); } // Parse one or more global declarations, either // variables or functions void global_declarations(void) { struct ASTnode *tree; int type; while (1) { // We have to read past the type and identifier // to see either a '(' for a function declaration // or a ',' or ';' for a variable declaration. // Text is filled in by the ident() call. type = parse_type(); ident(); if (Token.token == T_LPAREN) { // Parse the function declaration tree = function_declaration(type); // Only a function prototype, no code if (tree == NULL) continue; // A real function, generate the assembly code for it if (O_dumpAST) { dumpAST(tree, NOLABEL, 0); fprintf(stdout, "\n\n"); } genAST(tree, NOLABEL, 0); // Now free the symbols associated // with this function freeloclsyms(); } else { // Parse the global variable declaration // and skip past the trailing semicolon var_declaration(type, C_GLOBAL); semi(); } // Stop when we have reached EOF if (Token.token == T_EOF) break; } } ================================================ FILE: 26_Prototypes/decl.h ================================================ // Function prototypes for all compiler files // Copyright (c) 2019 Warren Toomey, GPL3 // scan.c void reject_token(struct token *t); int scan(struct token *t); // tree.c struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, int intvalue); struct ASTnode *mkastleaf(int op, int type, int intvalue); struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, int intvalue); void dumpAST(struct ASTnode *n, int label, int parentASTop); // gen.c int genlabel(void); int genAST(struct ASTnode *n, int reg, int parentASTop); void genpreamble(); void genpostamble(); void genfreeregs(); void genglobsym(int id); int genglobstr(char *strvalue); int genprimsize(int type); void genreturn(int reg, int id); // cg.c void cgtextseg(); void cgdataseg(); void freeall_registers(void); void cgpreamble(); void cgpostamble(); void cgfuncpreamble(int id); void cgfuncpostamble(int id); int cgloadint(int value, int type); int cgloadglob(int id, int op); int cgloadlocal(int id, int op); int cgloadglobstr(int id); int cgadd(int r1, int r2); int cgsub(int r1, int r2); int cgmul(int r1, int r2); int cgdiv(int r1, int r2); int cgshlconst(int r, int val); int cgcall(int id, int numargs); void cgcopyarg(int r, int argposn); int cgstorglob(int r, int id); int cgstorlocal(int r, int id); void cgglobsym(int id); void cgglobstr(int l, char *strvalue); int cgcompare_and_set(int ASTop, int r1, int r2); int cgcompare_and_jump(int ASTop, int r1, int r2, int label); void cglabel(int l); void cgjump(int l); int cgwiden(int r, int oldtype, int newtype); int cgprimsize(int type); void cgreturn(int reg, int id); int cgaddress(int id); int cgderef(int r, int type); int cgstorderef(int r1, int r2, int type); int cgnegate(int r); int cginvert(int r); int cglognot(int r); int cgboolean(int r, int op, int label); int cgand(int r1, int r2); int cgor(int r1, int r2); int cgxor(int r1, int r2); int cgshl(int r1, int r2); int cgshr(int r1, int r2); // expr.c struct ASTnode *binexpr(int ptp); // stmt.c struct ASTnode *compound_statement(void); // misc.c void match(int t, char *what); void semi(void); void lbrace(void); void rbrace(void); void lparen(void); void rparen(void); void ident(void); void fatal(char *s); void fatals(char *s1, char *s2); void fatald(char *s, int d); void fatalc(char *s, int c); // sym.c int findglob(char *s); int findlocl(char *s); int findsymbol(char *s); int addglob(char *name, int type, int stype, int class, int endlabel, int size); int addlocl(char *name, int type, int stype, int class, int size); void copyfuncparams(int slot); void freeloclsyms(void); // decl.c void var_declaration(int type, int class); struct ASTnode *function_declaration(int type); void global_declarations(void); // types.c int inttype(int type); int parse_type(void); int pointer_to(int type); int value_at(int type); struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op); ================================================ FILE: 26_Prototypes/defs.h ================================================ #include #include #include #include // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 #define TEXTLEN 512 // Length of symbols in input #define NSYMBOLS 1024 // Number of symbol table entries // Token types enum { T_EOF, // Binary operators T_ASSIGN, T_LOGOR, T_LOGAND, T_OR, T_XOR, T_AMPER, T_EQ, T_NE, T_LT, T_GT, T_LE, T_GE, T_LSHIFT, T_RSHIFT, T_PLUS, T_MINUS, T_STAR, T_SLASH, // Other operators T_INC, T_DEC, T_INVERT, T_LOGNOT, // Type keywords T_VOID, T_CHAR, T_INT, T_LONG, // Other keywords T_IF, T_ELSE, T_WHILE, T_FOR, T_RETURN, // Structural tokens T_INTLIT, T_STRLIT, T_SEMI, T_IDENT, T_LBRACE, T_RBRACE, T_LPAREN, T_RPAREN, T_LBRACKET, T_RBRACKET, T_COMMA }; // Token structure struct token { int token; // Token type, from the enum list above int intvalue; // For T_INTLIT, the integer value }; // AST node types. The first few line up // with the related tokens enum { A_ASSIGN= 1, A_LOGOR, A_LOGAND, A_OR, A_XOR, A_AND, A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE, A_LSHIFT, A_RSHIFT, A_ADD, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, A_INTLIT, A_STRLIT, A_IDENT, A_GLUE, A_IF, A_WHILE, A_FUNCTION, A_WIDEN, A_RETURN, A_FUNCCALL, A_DEREF, A_ADDR, A_SCALE, A_PREINC, A_PREDEC, A_POSTINC, A_POSTDEC, A_NEGATE, A_INVERT, A_LOGNOT, A_TOBOOL }; // Primitive types enum { P_NONE, P_VOID, P_CHAR, P_INT, P_LONG, P_VOIDPTR, P_CHARPTR, P_INTPTR, P_LONGPTR }; // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates int rvalue; // True if the node is an rvalue struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; union { // For A_INTLIT, the integer value int intvalue; // For A_IDENT, the symbol slot number int id; // For A_FUNCTION, the symbol slot number int size; // For A_SCALE, the size to scale by } v; // For A_FUNCCALL, the symbol slot number }; #define NOREG -1 // Use NOREG when the AST generation // functions have no register to return #define NOLABEL 0 // Use NOLABEL when we have no label to // pass to genAST() // Structural types enum { S_VARIABLE, S_FUNCTION, S_ARRAY }; // Storage classes enum { C_GLOBAL = 1, // Globally visible symbol C_LOCAL, // Locally visible symbol C_PARAM // Locally visible function parameter }; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol int stype; // Structural type for the symbol int class; // Storage class for the symbol int endlabel; // For S_FUNCTIONs, the end label int size; // Number of elements in the symbol int posn; // For locals, either the negative offset // from stack base pointer, or register id #define nelems posn // For functions, # of params // For structs, # of fields }; ================================================ FILE: 26_Prototypes/expr.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of expressions // Copyright (c) 2019 Warren Toomey, GPL3 // expression_list: // | expression // | expression ',' expression_list // ; // Parse a list of zero or more comma-separated expressions and // return an AST composed of A_GLUE nodes with the left-hand child // being the sub-tree of previous expressions (or NULL) and the right-hand // child being the next expression. Each A_GLUE node will have size field // set to the number of expressions in the tree at this point. If no // expressions are parsed, NULL is returned static struct ASTnode *expression_list(void) { struct ASTnode *tree = NULL; struct ASTnode *child = NULL; int exprcount = 0; // Loop until the final right parentheses while (Token.token != T_RPAREN) { // Parse the next expression and increment the expression count child = binexpr(0); exprcount++; // Build an A_GLUE AST node with the previous tree as the left child // and the new expression as the right child. Store the expression count. tree = mkastnode(A_GLUE, P_NONE, tree, NULL, child, exprcount); // Must have a ',' or ')' at this point switch (Token.token) { case T_COMMA: scan(&Token); break; case T_RPAREN: break; default: fatald("Unexpected token in expression list", Token.token); } } // Return the tree of expressions return (tree); } // Parse a function call and return its AST static struct ASTnode *funccall(void) { struct ASTnode *tree; int id; // Check that the identifier has been defined as a function, // then make a leaf node for it. if ((id = findsymbol(Text)) == -1 || Symtable[id].stype != S_FUNCTION) { fatals("Undeclared function", Text); } // Get the '(' lparen(); // Parse the argument expression list tree = expression_list(); // XXX Check type of each argument against the function's prototype // Build the function call AST node. Store the // function's return type as this node's type. // Also record the function's symbol-id tree = mkastunary(A_FUNCCALL, Symtable[id].type, tree, id); // Get the ')' rparen(); return (tree); } // Parse the index into an array and return an AST tree for it static struct ASTnode *array_access(void) { struct ASTnode *left, *right; int id; // Check that the identifier has been defined as an array // then make a leaf node for it that points at the base if ((id = findsymbol(Text)) == -1 || Symtable[id].stype != S_ARRAY) { fatals("Undeclared array", Text); } left = mkastleaf(A_ADDR, Symtable[id].type, id); // Get the '[' scan(&Token); // Parse the following expression right = binexpr(0); // Get the ']' match(T_RBRACKET, "]"); // Ensure that this is of int type if (!inttype(right->type)) fatal("Array index is not of integer type"); // Scale the index by the size of the element's type right = modify_type(right, left->type, A_ADD); // Return an AST tree where the array's base has the offset // added to it, and dereference the element. Still an lvalue // at this point. left = mkastnode(A_ADD, Symtable[id].type, left, NULL, right, 0); left = mkastunary(A_DEREF, value_at(left->type), left, 0); return (left); } // Parse a postfix expression and return // an AST node representing it. The // identifier is already in Text. static struct ASTnode *postfix(void) { struct ASTnode *n; int id; // Scan in the next token to see if we have a postfix expression scan(&Token); // Function call if (Token.token == T_LPAREN) return (funccall()); // An array reference if (Token.token == T_LBRACKET) return (array_access()); // A variable. Check that the variable exists. id = findsymbol(Text); if (id == -1 || Symtable[id].stype != S_VARIABLE) fatals("Unknown variable", Text); switch (Token.token) { // Post-increment: skip over the token case T_INC: scan(&Token); n = mkastleaf(A_POSTINC, Symtable[id].type, id); break; // Post-decrement: skip over the token case T_DEC: scan(&Token); n = mkastleaf(A_POSTDEC, Symtable[id].type, id); break; // Just a variable reference default: n = mkastleaf(A_IDENT, Symtable[id].type, id); } return (n); } // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(void) { struct ASTnode *n; int id; switch (Token.token) { case T_INTLIT: // For an INTLIT token, make a leaf AST node for it. // Make it a P_CHAR if it's within the P_CHAR range if ((Token.intvalue) >= 0 && (Token.intvalue < 256)) n = mkastleaf(A_INTLIT, P_CHAR, Token.intvalue); else n = mkastleaf(A_INTLIT, P_INT, Token.intvalue); break; case T_STRLIT: // For a STRLIT token, generate the assembly for it. // Then make a leaf AST node for it. id is the string's label. id = genglobstr(Text); n = mkastleaf(A_STRLIT, P_CHARPTR, id); break; case T_IDENT: return (postfix()); case T_LPAREN: // Beginning of a parenthesised expression, skip the '('. // Scan in the expression and the right parenthesis scan(&Token); n = binexpr(0); rparen(); return (n); default: fatald("Expecting a primary expression, got token", Token.token); } // Scan in the next token and return the leaf node scan(&Token); return (n); } // Convert a binary operator token into a binary AST operation. // We rely on a 1:1 mapping from token to AST operation static int binastop(int tokentype) { if (tokentype > T_EOF && tokentype <= T_SLASH) return (tokentype); fatald("Syntax error, token", tokentype); return (0); // Keep -Wall happy } // Return true if a token is right-associative, // false otherwise. static int rightassoc(int tokentype) { if (tokentype == T_ASSIGN) return (1); return (0); } // Operator precedence for each token. Must // match up with the order of tokens in defs.h static int OpPrec[] = { 0, 10, 20, 30, // T_EOF, T_ASSIGN, T_LOGOR, T_LOGAND 40, 50, 60, // T_OR, T_XOR, T_AMPER 70, 70, // T_EQ, T_NE 80, 80, 80, 80, // T_LT, T_GT, T_LE, T_GE 90, 90, // T_LSHIFT, T_RSHIFT 100, 100, // T_PLUS, T_MINUS 110, 110 // T_STAR, T_SLASH }; // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec; if (tokentype > T_SLASH) fatald("Token with no precedence in op_precedence:", tokentype); prec = OpPrec[tokentype]; if (prec == 0) fatald("Syntax error, token", tokentype); return (prec); } // prefix_expression: primary // | '*' prefix_expression // | '&' prefix_expression // | '-' prefix_expression // | '++' prefix_expression // | '--' prefix_expression // ; // Parse a prefix expression and return // a sub-tree representing it. struct ASTnode *prefix(void) { struct ASTnode *tree; switch (Token.token) { case T_AMPER: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Ensure that it's an identifier if (tree->op != A_IDENT) fatal("& operator must be followed by an identifier"); // Now change the operator to A_ADDR and the type to // a pointer to the original type tree->op = A_ADDR; tree->type = pointer_to(tree->type); break; case T_STAR: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's either another deref or an // identifier if (tree->op != A_IDENT && tree->op != A_DEREF) fatal("* operator must be followed by an identifier or *"); // Prepend an A_DEREF operation to the tree tree = mkastunary(A_DEREF, value_at(tree->type), tree, 0); break; case T_MINUS: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_NEGATE operation to the tree and // make the child an rvalue. Because chars are unsigned, // also widen this to int so that it's signed tree->rvalue = 1; tree = modify_type(tree, P_INT, 0); tree = mkastunary(A_NEGATE, tree->type, tree, 0); break; case T_INVERT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_INVERT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_INVERT, tree->type, tree, 0); break; case T_LOGNOT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_LOGNOT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_LOGNOT, tree->type, tree, 0); break; case T_INC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("++ operator must be followed by an identifier"); // Prepend an A_PREINC operation to the tree tree = mkastunary(A_PREINC, tree->type, tree, 0); break; case T_DEC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("-- operator must be followed by an identifier"); // Prepend an A_PREDEC operation to the tree tree = mkastunary(A_PREDEC, tree->type, tree, 0); break; default: tree = primary(); } return (tree); } // Return an AST tree whose root is a binary operator. // Parameter ptp is the previous token's precedence. struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; struct ASTnode *ltemp, *rtemp; int ASTop; int tokentype; // Get the tree on the left. // Fetch the next token at the same time. left = prefix(); // If we hit one of several terminating tokens, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA) { left->rvalue = 1; return (left); } // While the precedence of this token is more than that of the // previous token precedence, or it's right associative and // equal to the previous token's precedence while ((op_precedence(tokentype) > ptp) || (rightassoc(tokentype) && op_precedence(tokentype) == ptp)) { // Fetch in the next integer literal scan(&Token); // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Determine the operation to be performed on the sub-trees ASTop = binastop(tokentype); if (ASTop == A_ASSIGN) { // Assignment // Make the right tree into an rvalue right->rvalue = 1; // Ensure the right's type matches the left right = modify_type(right, left->type, 0); if (left == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree. However, switch // left and right around, so that the right expression's // code will be generated before the left expression ltemp = left; left = right; right = ltemp; } else { // We are not doing an assignment, so both trees should be rvalues // Convert both trees into rvalue if they are lvalue trees left->rvalue = 1; right->rvalue = 1; // Ensure the two types are compatible by trying // to modify each tree to match the other's type. ltemp = modify_type(left, right->type, ASTop); rtemp = modify_type(right, left->type, ASTop); if (ltemp == NULL && rtemp == NULL) fatal("Incompatible types in binary expression"); if (ltemp != NULL) left = ltemp; if (rtemp != NULL) right = rtemp; } // Join that sub-tree with ours. Convert the token // into an AST operation at the same time. left = mkastnode(binastop(tokentype), left->type, left, NULL, right, 0); // Update the details of the current token. // If we hit a terminating token, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA) { left->rvalue = 1; return (left); } } // Return the tree we have when the precedence // is the same or lower left->rvalue = 1; return (left); } ================================================ FILE: 26_Prototypes/gen.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Generic code generator // Copyright (c) 2019 Warren Toomey, GPL3 // Generate and return a new label number int genlabel(void) { static int id = 1; return (id++); } // Generate the code for an IF statement // and an optional ELSE clause static int genIF(struct ASTnode *n) { int Lfalse, Lend; // Generate two labels: one for the // false compound statement, and one // for the end of the overall IF statement. // When there is no ELSE clause, Lfalse _is_ // the ending label! Lfalse = genlabel(); if (n->right) Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. genAST(n->left, Lfalse, n->op); genfreeregs(); // Generate the true compound statement genAST(n->mid, NOLABEL, n->op); genfreeregs(); // If there is an optional ELSE clause, // generate the jump to skip to the end if (n->right) cgjump(Lend); // Now the false label cglabel(Lfalse); // Optional ELSE clause: generate the // false compound statement and the // end label if (n->right) { genAST(n->right, NOLABEL, n->op); genfreeregs(); cglabel(Lend); } return (NOREG); } // Generate the code for a WHILE statement static int genWHILE(struct ASTnode *n) { int Lstart, Lend; // Generate the start and end labels // and output the start label Lstart = genlabel(); Lend = genlabel(); cglabel(Lstart); // Generate the condition code followed // by a jump to the end label. genAST(n->left, Lend, n->op); genfreeregs(); // Generate the compound statement for the body genAST(n->right, NOLABEL, n->op); genfreeregs(); // Finally output the jump back to the condition, // and the end label cgjump(Lstart); cglabel(Lend); return (NOREG); } // Generate the code to copy the arguments of a // function call to its parameters, then call the // function itself. Return the register that holds // the function's return value. static int gen_funccall(struct ASTnode *n) { struct ASTnode *gluetree = n->left; int reg; int numargs = 0; // If there is a list of arguments, walk this list // from the last argument (right-hand child) to the // first while (gluetree) { // Calculate the expression's value reg = genAST(gluetree->right, NOLABEL, gluetree->op); // Copy this into the n'th function parameter: size is 1, 2, 3, ... cgcopyarg(reg, gluetree->v.size); // Keep the first (highest) number of arguments if (numargs == 0) numargs = gluetree->v.size; genfreeregs(); gluetree = gluetree->left; } // Call the function, clean up the stack (based on numargs), // and return its result return (cgcall(n->v.id, numargs)); } // Given an AST, an optional label, and the AST op // of the parent, generate assembly code recursively. // Return the register id with the tree's final value. int genAST(struct ASTnode *n, int label, int parentASTop) { int leftreg, rightreg; // We have some specific AST node handling at the top // so that we don't evaluate the child sub-trees immediately switch (n->op) { case A_IF: return (genIF(n)); case A_WHILE: return (genWHILE(n)); case A_FUNCCALL: return (gen_funccall(n)); case A_GLUE: // Do each child statement, and free the // registers after each child genAST(n->left, NOLABEL, n->op); genfreeregs(); genAST(n->right, NOLABEL, n->op); genfreeregs(); return (NOREG); case A_FUNCTION: // Generate the function's preamble before the code // in the child sub-tree cgfuncpreamble(n->v.id); genAST(n->left, NOLABEL, n->op); cgfuncpostamble(n->v.id); return (NOREG); } // General AST node handling below // Get the left and right sub-tree values if (n->left) leftreg = genAST(n->left, NOLABEL, n->op); if (n->right) rightreg = genAST(n->right, NOLABEL, n->op); switch (n->op) { case A_ADD: return (cgadd(leftreg, rightreg)); case A_SUBTRACT: return (cgsub(leftreg, rightreg)); case A_MULTIPLY: return (cgmul(leftreg, rightreg)); case A_DIVIDE: return (cgdiv(leftreg, rightreg)); case A_AND: return (cgand(leftreg, rightreg)); case A_OR: return (cgor(leftreg, rightreg)); case A_XOR: return (cgxor(leftreg, rightreg)); case A_LSHIFT: return (cgshl(leftreg, rightreg)); case A_RSHIFT: return (cgshr(leftreg, rightreg)); case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, compare registers // and set one to 1 or 0 based on the comparison. if (parentASTop == A_IF || parentASTop == A_WHILE) return (cgcompare_and_jump(n->op, leftreg, rightreg, label)); else return (cgcompare_and_set(n->op, leftreg, rightreg)); case A_INTLIT: return (cgloadint(n->v.intvalue, n->type)); case A_STRLIT: return (cgloadglobstr(n->v.id)); case A_IDENT: // Load our value if we are an rvalue // or we are being dereferenced if (n->rvalue || parentASTop == A_DEREF) { if (Symtable[n->v.id].class == C_GLOBAL) { return (cgloadglob(n->v.id, n->op)); } else { return (cgloadlocal(n->v.id, n->op)); } } else return (NOREG); case A_ASSIGN: // Are we assigning to an identifier or through a pointer? switch (n->right->op) { case A_IDENT: if (Symtable[n->right->v.id].class == C_GLOBAL) return (cgstorglob(leftreg, n->right->v.id)); else return (cgstorlocal(leftreg, n->right->v.id)); case A_DEREF: return (cgstorderef(leftreg, rightreg, n->right->type)); default: fatald("Can't A_ASSIGN in genAST(), op", n->op); } case A_WIDEN: // Widen the child's type to the parent's type return (cgwiden(leftreg, n->left->type, n->type)); case A_RETURN: cgreturn(leftreg, Functionid); return (NOREG); case A_ADDR: return (cgaddress(n->v.id)); case A_DEREF: // If we are an rvalue, dereference to get the value we point at, // otherwise leave it for A_ASSIGN to store through the pointer if (n->rvalue) return (cgderef(leftreg, n->left->type)); else return (leftreg); case A_SCALE: // Small optimisation: use shift if the // scale value is a known power of two switch (n->v.size) { case 2: return (cgshlconst(leftreg, 1)); case 4: return (cgshlconst(leftreg, 2)); case 8: return (cgshlconst(leftreg, 3)); default: // Load a register with the size and // multiply the leftreg by this size rightreg = cgloadint(n->v.size, P_INT); return (cgmul(leftreg, rightreg)); } case A_POSTINC: // Load the variable's value into a register, // then increment it return (cgloadglob(n->v.id, n->op)); case A_POSTDEC: // Load the variable's value into a register, // then decrement it return (cgloadglob(n->v.id, n->op)); case A_PREINC: // Load and increment the variable's value into a register return (cgloadglob(n->left->v.id, n->op)); case A_PREDEC: // Load and decrement the variable's value into a register return (cgloadglob(n->left->v.id, n->op)); case A_NEGATE: return (cgnegate(leftreg)); case A_INVERT: return (cginvert(leftreg)); case A_LOGNOT: return (cglognot(leftreg)); case A_TOBOOL: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, set the register // to 0 or 1 based on it's zeroeness or non-zeroeness return (cgboolean(leftreg, parentASTop, label)); default: fatald("Unknown AST operator", n->op); } return (NOREG); // Keep -Wall happy } void genpreamble() { cgpreamble(); } void genpostamble() { cgpostamble(); } void genfreeregs() { freeall_registers(); } void genglobsym(int id) { cgglobsym(id); } int genglobstr(char *strvalue) { int l = genlabel(); cgglobstr(l, strvalue); return (l); } int genprimsize(int type) { return (cgprimsize(type)); } ================================================ FILE: 26_Prototypes/lib/printint.c ================================================ #include void printint(long x) { printf("%ld\n", x); } void printchar(long x) { putc((char)(x & 0x7f), stdout); } ================================================ FILE: 26_Prototypes/main.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "decl.h" #include // Compiler setup and top-level execution // Copyright (c) 2019 Warren Toomey, GPL3 // Initialise global variables static void init() { Line = 1; Putback = '\n'; Globs = 0; Locls = NSYMBOLS - 1; O_dumpAST = 0; } // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "Usage: %s [-T] infile\n", prog); exit(1); } // Main program: check arguments and print a usage // if we don't have an argument. Open up the input // file and call scanfile() to scan the tokens in it. int main(int argc, char *argv[]) { int i; // Initialise the globals init(); // Scan for command-line options for (i = 1; i < argc; i++) { if (*argv[i] != '-') break; for (int j = 1; argv[i][j]; j++) { switch (argv[i][j]) { case 'T': O_dumpAST = 1; break; default: usage(argv[0]); } } } // Ensure we have an input file argument if (i >= argc) usage(argv[0]); // Open up the input file if ((Infile = fopen(argv[i], "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", argv[i], strerror(errno)); exit(1); } // Create the output file if ((Outfile = fopen("out.s", "w")) == NULL) { fprintf(stderr, "Unable to create out.s: %s\n", strerror(errno)); exit(1); } // For now, ensure that printint() and printchar() are defined addglob("printint", P_INT, S_FUNCTION, C_GLOBAL, 0, 0); addglob("printchar", P_VOID, S_FUNCTION, C_GLOBAL, 0, 0); scan(&Token); // Get the first token from the input genpreamble(); // Output the preamble global_declarations(); // Parse the global declarations genpostamble(); // Output the postamble fclose(Outfile); // Close the output file and exit return (0); } ================================================ FILE: 26_Prototypes/misc.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Miscellaneous functions // Copyright (c) 2019 Warren Toomey, GPL3 // Ensure that the current token is t, // and fetch the next token. Otherwise // throw an error void match(int t, char *what) { if (Token.token == t) { scan(&Token); } else { fatals("Expected", what); } } // Match a semicolon and fetch the next token void semi(void) { match(T_SEMI, ";"); } // Match a left brace and fetch the next token void lbrace(void) { match(T_LBRACE, "{"); } // Match a right brace and fetch the next token void rbrace(void) { match(T_RBRACE, "}"); } // Match a left parenthesis and fetch the next token void lparen(void) { match(T_LPAREN, "("); } // Match a right parenthesis and fetch the next token void rparen(void) { match(T_RPAREN, ")"); } // Match an identifier and fetch the next token void ident(void) { match(T_IDENT, "identifier"); } // Print out fatal messages void fatal(char *s) { fprintf(stderr, "%s on line %d\n", s, Line); exit(1); } void fatals(char *s1, char *s2) { fprintf(stderr, "%s:%s on line %d\n", s1, s2, Line); exit(1); } void fatald(char *s, int d) { fprintf(stderr, "%s:%d on line %d\n", s, d, Line); exit(1); } void fatalc(char *s, int c) { fprintf(stderr, "%s:%c on line %d\n", s, c, Line); exit(1); } ================================================ FILE: 26_Prototypes/scan.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { char *p; p = strchr(s, c); return (p ? p - s : -1); } // Get the next character from the input file. static int next(void) { int c; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return (c); } c = fgetc(Infile); // Read from input file if ('\n' == c) Line++; // Increment line count return (c); } // Put back an unwanted character static void putback(int c) { Putback = c; } // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } // Return the next character from a character // or string literal static int scanch(void) { int c; // Get the next input character and interpret // metacharacters that start with a backslash c = next(); if (c == '\\') { switch (c = next()) { case 'a': return '\a'; case 'b': return '\b'; case 'f': return '\f'; case 'n': return '\n'; case 'r': return '\r'; case 't': return '\t'; case 'v': return '\v'; case '\\': return '\\'; case '"': return '"'; case '\'': return '\''; default: fatalc("unknown escape sequence", c); } } return (c); // Just an ordinary old character! } // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0; // Convert each character into an int value while ((k = chrpos("0123456789", c)) >= 0) { val = val * 10 + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return (val); } // Scan in a string literal from the input file, // and store it in buf[]. Return the length of // the string. static int scanstr(char *buf) { int i, c; // Loop while we have enough buffer space for (i = 0; i < TEXTLEN - 1; i++) { // Get the next char and append to buf // Return when we hit the ending double quote if ((c = scanch()) == '"') { buf[i] = 0; return (i); } buf[i] = c; } // Ran out of buf[] space fatal("String literal too long"); return (0); } // Scan an identifier from the input file and // store it in buf[]. Return the identifier's length static int scanident(int c, char *buf, int lim) { int i = 0; // Allow digits, alpha and underscores while (isalpha(c) || isdigit(c) || '_' == c) { // Error if we hit the identifier length limit, // else append to buf[] and get next character if (lim - 1 == i) { fatal("Identifier too long"); } else if (i < lim - 1) { buf[i++] = c; } c = next(); } // We hit a non-valid character, put it back. // NUL-terminate the buf[] and return the length putback(c); buf[i] = '\0'; return (i); } // Given a word from the input, return the matching // keyword token number or 0 if it's not a keyword. // Switch on the first letter so that we don't have // to waste time strcmp()ing against all the keywords. static int keyword(char *s) { switch (*s) { case 'c': if (!strcmp(s, "char")) return (T_CHAR); break; case 'e': if (!strcmp(s, "else")) return (T_ELSE); break; case 'f': if (!strcmp(s, "for")) return (T_FOR); break; case 'i': if (!strcmp(s, "if")) return (T_IF); if (!strcmp(s, "int")) return (T_INT); break; case 'l': if (!strcmp(s, "long")) return (T_LONG); break; case 'r': if (!strcmp(s, "return")) return (T_RETURN); break; case 'w': if (!strcmp(s, "while")) return (T_WHILE); break; case 'v': if (!strcmp(s, "void")) return (T_VOID); break; } return (0); } // A pointer to a rejected token static struct token *Rejtoken = NULL; // Reject the token that we just scanned void reject_token(struct token *t) { if (Rejtoken != NULL) fatal("Can't reject token twice"); Rejtoken = t; } // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c, tokentype; // If we have any rejected token, return it if (Rejtoken != NULL) { t = Rejtoken; Rejtoken = NULL; return (1); } // Skip whitespace c = skip(); // Determine the token based on // the input character switch (c) { case EOF: t->token = T_EOF; return (0); case '+': if ((c = next()) == '+') { t->token = T_INC; } else { putback(c); t->token = T_PLUS; } break; case '-': if ((c = next()) == '-') { t->token = T_DEC; } else { putback(c); t->token = T_MINUS; } break; case '*': t->token = T_STAR; break; case '/': t->token = T_SLASH; break; case ';': t->token = T_SEMI; break; case '{': t->token = T_LBRACE; break; case '}': t->token = T_RBRACE; break; case '(': t->token = T_LPAREN; break; case ')': t->token = T_RPAREN; break; case '[': t->token = T_LBRACKET; break; case ']': t->token = T_RBRACKET; break; case '~': t->token = T_INVERT; break; case '^': t->token = T_XOR; break; case ',': t->token = T_COMMA; break; case '=': if ((c = next()) == '=') { t->token = T_EQ; } else { putback(c); t->token = T_ASSIGN; } break; case '!': if ((c = next()) == '=') { t->token = T_NE; } else { putback(c); t->token = T_LOGNOT; } break; case '<': if ((c = next()) == '=') { t->token = T_LE; } else if (c == '<') { t->token = T_LSHIFT; } else { putback(c); t->token = T_LT; } break; case '>': if ((c = next()) == '=') { t->token = T_GE; } else if (c == '>') { t->token = T_RSHIFT; } else { putback(c); t->token = T_GT; } break; case '&': if ((c = next()) == '&') { t->token = T_LOGAND; } else { putback(c); t->token = T_AMPER; } break; case '|': if ((c = next()) == '|') { t->token = T_LOGOR; } else { putback(c); t->token = T_OR; } break; case '\'': // If it's a quote, scan in the // literal character value and // the trailing quote t->intvalue = scanch(); t->token = T_INTLIT; if (next() != '\'') fatal("Expected '\\'' at end of char literal"); break; case '"': // Scan in a literal string scanstr(Text); t->token = T_STRLIT; break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } else if (isalpha(c) || '_' == c) { // Read in a keyword or identifier scanident(c, Text, TEXTLEN); // If it's a recognised keyword, return that token if ((tokentype = keyword(Text)) != 0) { t->token = tokentype; break; } // Not a recognised keyword, so it must be an identifier t->token = T_IDENT; break; } // The character isn't part of any recognised token, error fatalc("Unrecognised character", c); } // We found a token return (1); } ================================================ FILE: 26_Prototypes/stmt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of statements // Copyright (c) 2019 Warren Toomey, GPL3 // Prototypes static struct ASTnode *single_statement(void); // compound_statement: // empty, i.e. no statement // | statement // | statement statements // ; // // statement: declaration // | expression_statement // | function_call // | if_statement // | while_statement // | for_statement // | return_statement // ; // if_statement: if_head // | if_head 'else' compound_statement // ; // // if_head: 'if' '(' true_false_expression ')' compound_statement ; // // Parse an IF statement including any // optional ELSE clause and return its AST static struct ASTnode *if_statement(void) { struct ASTnode *condAST, *trueAST, *falseAST = NULL; // Ensure we have 'if' '(' match(T_IF, "if"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, 0); rparen(); // Get the AST for the compound statement trueAST = compound_statement(); // If we have an 'else', skip it // and get the AST for the compound statement if (Token.token == T_ELSE) { scan(&Token); falseAST = compound_statement(); } // Build and return the AST for this statement return (mkastnode(A_IF, P_NONE, condAST, trueAST, falseAST, 0)); } // while_statement: 'while' '(' true_false_expression ')' compound_statement ; // // Parse a WHILE statement and return its AST static struct ASTnode *while_statement(void) { struct ASTnode *condAST, *bodyAST; // Ensure we have 'while' '(' match(T_WHILE, "while"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, 0); rparen(); // Get the AST for the compound statement bodyAST = compound_statement(); // Build and return the AST for this statement return (mkastnode(A_WHILE, P_NONE, condAST, NULL, bodyAST, 0)); } // for_statement: 'for' '(' preop_statement ';' // true_false_expression ';' // postop_statement ')' compound_statement ; // // preop_statement: statement (for now) // postop_statement: statement (for now) // // Parse a FOR statement and return its AST static struct ASTnode *for_statement(void) { struct ASTnode *condAST, *bodyAST; struct ASTnode *preopAST, *postopAST; struct ASTnode *tree; // Ensure we have 'for' '(' match(T_FOR, "for"); lparen(); // Get the pre_op statement and the ';' preopAST = single_statement(); semi(); // Get the condition and the ';'. // Force a non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, 0); semi(); // Get the post_op statement and the ')' postopAST = single_statement(); rparen(); // Get the compound statement which is the body bodyAST = compound_statement(); // For now, all four sub-trees have to be non-NULL. // Later on, we'll change the semantics for when some are missing // Glue the compound statement and the postop tree tree = mkastnode(A_GLUE, P_NONE, bodyAST, NULL, postopAST, 0); // Make a WHILE loop with the condition and this new body tree = mkastnode(A_WHILE, P_NONE, condAST, NULL, tree, 0); // And glue the preop tree to the A_WHILE tree return (mkastnode(A_GLUE, P_NONE, preopAST, NULL, tree, 0)); } // return_statement: 'return' '(' expression ')' ; // // Parse a return statement and return its AST static struct ASTnode *return_statement(void) { struct ASTnode *tree; // Can't return a value if function returns P_VOID if (Symtable[Functionid].type == P_VOID) fatal("Can't return from a void function"); // Ensure we have 'return' '(' match(T_RETURN, "return"); lparen(); // Parse the following expression tree = binexpr(0); // Ensure this is compatible with the function's type tree = modify_type(tree, Symtable[Functionid].type, 0); if (tree == NULL) fatal("Incompatible type to return"); // Add on the A_RETURN node tree = mkastunary(A_RETURN, P_NONE, tree, 0); // Get the ')' rparen(); return (tree); } // Parse a single statement and return its AST static struct ASTnode *single_statement(void) { int type; switch (Token.token) { case T_CHAR: case T_INT: case T_LONG: // The beginning of a variable declaration. // Parse the type and get the identifier. // Then parse the rest of the declaration // and skip over the semicolon type = parse_type(); ident(); var_declaration(type, C_LOCAL); semi(); return (NULL); // No AST generated here case T_IF: return (if_statement()); case T_WHILE: return (while_statement()); case T_FOR: return (for_statement()); case T_RETURN: return (return_statement()); default: // For now, see if this is an expression. // This catches assignment statements. return (binexpr(0)); } return (NULL); // Keep -Wall happy } // Parse a compound statement // and return its AST struct ASTnode *compound_statement(void) { struct ASTnode *left = NULL; struct ASTnode *tree; // Require a left curly bracket lbrace(); while (1) { // Parse a single statement tree = single_statement(); // Some statements must be followed by a semicolon if (tree != NULL && (tree->op == A_ASSIGN || tree->op == A_RETURN || tree->op == A_FUNCCALL)) semi(); // For each new tree, either save it in left // if left is empty, or glue the left and the // new tree together if (tree != NULL) { if (left == NULL) left = tree; else left = mkastnode(A_GLUE, P_NONE, left, NULL, tree, 0); } // When we hit a right curly bracket, // skip past it and return the AST if (Token.token == T_RBRACE) { rbrace(); return (left); } } } ================================================ FILE: 26_Prototypes/sym.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Symbol table functions // Copyright (c) 2019 Warren Toomey, GPL3 // Determine if the symbol s is in the global symbol table. // Return its slot position or -1 if not found. // Skip C_PARAM entries int findglob(char *s) { int i; for (i = 0; i < Globs; i++) { if (Symtable[i].class == C_PARAM) continue; if (*s == *Symtable[i].name && !strcmp(s, Symtable[i].name)) return (i); } return (-1); } // Get the position of a new global symbol slot, or die // if we've run out of positions. static int newglob(void) { int p; if ((p = Globs++) >= Locls) fatal("Too many global symbols"); return (p); } // Determine if the symbol s is in the local symbol table. // Return its slot position or -1 if not found. int findlocl(char *s) { int i; for (i = Locls + 1; i < NSYMBOLS; i++) { if (*s == *Symtable[i].name && !strcmp(s, Symtable[i].name)) return (i); } return (-1); } // Get the position of a new local symbol slot, or die // if we've run out of positions. static int newlocl(void) { int p; if ((p = Locls--) <= Globs) fatal("Too many local symbols"); return (p); } // Clear all the entries in the // local symbol table void freeloclsyms(void) { Locls = NSYMBOLS - 1; } // Update a symbol at the given slot number in the symbol table. Set up its: // + type: char, int etc. // + structural type: var, function, array etc. // + size: number of elements // + endlabel: if this is a function // + posn: Position information for local symbols static void updatesym(int slot, char *name, int type, int stype, int class, int endlabel, int size, int posn) { if (slot < 0 || slot >= NSYMBOLS) fatal("Invalid symbol slot number in updatesym()"); Symtable[slot].name = strdup(name); Symtable[slot].type = type; Symtable[slot].stype = stype; Symtable[slot].class = class; Symtable[slot].endlabel = endlabel; Symtable[slot].size = size; Symtable[slot].posn = posn; } // Add a global symbol to the symbol table. Set up its: // + type: char, int etc. // + structural type: var, function, array etc. // + class of the symbol // + size: number of elements // + endlabel: if this is a function // Return the slot number in the symbol table int addglob(char *name, int type, int stype, int class, int endlabel, int size) { int slot; // If this is already in the symbol table, return the existing slot if ((slot = findglob(name)) != -1) return (slot); // Otherwise get a new slot and fill it in slot = newglob(); updatesym(slot, name, type, stype, class, endlabel, size, 0); // Generate the assembly for the symbol if it's global if (class == C_GLOBAL) genglobsym(slot); // Return the slot number return (slot); } // Add a local symbol to the symbol table. Set up its: // + type: char, int etc. // + structural type: var, function, array etc. // + size: number of elements // Return the slot number in the symbol table, -1 if a duplicate entry int addlocl(char *name, int type, int stype, int class, int size) { int localslot; // If this is already in the symbol table, return an error if ((localslot = findlocl(name)) != -1) return (-1); // Otherwise get a new symbol slot and a position for this local. // Update the local symbol table entry. localslot = newlocl(); updatesym(localslot, name, type, stype, class, 0, size, 0); // Return the local symbol's slot return (localslot); } // Given a function's slot number, copy the global parameters // from its prototype to be local parameters void copyfuncparams(int slot) { int i, id = slot + 1; for (i = 0; i < Symtable[slot].nelems; i++, id++) { addlocl(Symtable[id].name, Symtable[id].type, Symtable[id].stype, Symtable[id].class, Symtable[id].size); } } // Determine if the symbol s is in the symbol table. // Return its slot position or -1 if not found. int findsymbol(char *s) { int slot; slot = findlocl(s); if (slot == -1) slot = findglob(s); return (slot); } ================================================ FILE: 26_Prototypes/tests/input01.c ================================================ void main() { printint(12 * 3); printint(18 - 2 * 4); printint(1 + 2 + 9 - 5/2 + 3*5); } ================================================ FILE: 26_Prototypes/tests/input02.c ================================================ void main() { int fred; int jim; fred= 5; jim= 12; printint(fred + jim); } ================================================ FILE: 26_Prototypes/tests/input03.c ================================================ void main() { int x; x= 1; printint(x); x= x + 1; printint(x); x= x + 1; printint(x); x= x + 1; printint(x); x= x + 1; printint(x); } ================================================ FILE: 26_Prototypes/tests/input04.c ================================================ void main() { int x; x= 7 < 9; printint(x); x= 7 <= 9; printint(x); x= 7 != 9; printint(x); x= 7 == 7; printint(x); x= 7 >= 7; printint(x); x= 7 <= 7; printint(x); x= 9 > 7; printint(x); x= 9 >= 7; printint(x); x= 9 != 7; printint(x); } ================================================ FILE: 26_Prototypes/tests/input05.c ================================================ void main() { int i; int j; i=6; j=12; if (i < j) { printint(i); } else { printint(j); } } ================================================ FILE: 26_Prototypes/tests/input06.c ================================================ void main() { int i; i=1; while (i <= 10) { printint(i); i= i + 1; } } ================================================ FILE: 26_Prototypes/tests/input07.c ================================================ void main() { int i; for (i= 1; i <= 10; i= i + 1) { printint(i); } } ================================================ FILE: 26_Prototypes/tests/input08.c ================================================ void main() { int i; for (i= 1; i <= 10; i= i + 1) { printint(i); } } ================================================ FILE: 26_Prototypes/tests/input09.c ================================================ void main() { int i; for (i= 1; i <= 10; i= i + 1) { printint(i); } } void fred() { int a; int b; a= 12; b= 3 * a; if (a >= b) { printint(2 * b - a); } } ================================================ FILE: 26_Prototypes/tests/input10.c ================================================ void main() { int i; char j; j= 20; printint(j); i= 10; printint(i); for (i= 1; i <= 5; i= i + 1) { printint(i); } for (j= 253; j != 2; j= j + 1) { printint(j); } } ================================================ FILE: 26_Prototypes/tests/input11.c ================================================ int main() { int i; char j; long k; i= 10; printint(i); j= 20; printint(j); k= 30; printint(k); for (i= 1; i <= 5; i= i + 1) { printint(i); } for (j= 253; j != 4; j= j + 1) { printint(j); } for (k= 1; k <= 5; k= k + 1) { printint(k); } return(i); printint(12345); return(3); } ================================================ FILE: 26_Prototypes/tests/input12.c ================================================ int fred() { return(5); } void main() { int x; x= fred(2); printint(x); } ================================================ FILE: 26_Prototypes/tests/input13.c ================================================ int fred() { return(56); } void main() { int dummy; int result; dummy= printint(23); result= fred(10); dummy= printint(result); } ================================================ FILE: 26_Prototypes/tests/input14.c ================================================ int fred() { return(20); } int main() { int result; printint(10); result= fred(15); printint(result); printint(fred(15)+10); return(0); } ================================================ FILE: 26_Prototypes/tests/input15.c ================================================ int main() { char a; char *b; char c; int d; int *e; int f; a= 18; printint(a); b= &a; c= *b; printint(c); d= 12; printint(d); e= &d; f= *e; printint(f); return(0); } ================================================ FILE: 26_Prototypes/tests/input16.c ================================================ int c; int d; int *e; int f; int main() { c= 12; d=18; printint(c); e= &c + 1; f= *e; printint(f); return(0); } ================================================ FILE: 26_Prototypes/tests/input17.c ================================================ int main() { char a; char *b; int d; int *e; b= &a; *b= 19; printint(a); e= &d; *e= 12; printint(d); return(0); } ================================================ FILE: 26_Prototypes/tests/input18.c ================================================ int main() { int a; int b; a= b= 34; printint(a); printint(b); return(0); } ================================================ FILE: 26_Prototypes/tests/input18a.c ================================================ int a; int *b; char c; char *d; int main() { b= &a; *b= 15; printint(a); d= &c; *d= 16; printint(c); return(0); } ================================================ FILE: 26_Prototypes/tests/input19.c ================================================ int a; int b; int c; int d; int e; int main() { a= 2; b= 4; c= 3; d= 2; e= (a+b) * (c+d); printint(e); return(0); } ================================================ FILE: 26_Prototypes/tests/input20.c ================================================ int a; int b[25]; int main() { b[3]= 12; a= b[3]; printint(a); return(0); } ================================================ FILE: 26_Prototypes/tests/input21.c ================================================ char c; char *str; int main() { c= '\n'; printint(c); for (str= "Hello world\n"; *str != 0; str= str + 1) { printchar(*str); } return(0); } ================================================ FILE: 26_Prototypes/tests/input22.c ================================================ char a; char b; char c; int d; int e; int f; long g; long h; long i; int main() { b= 5; c= 7; a= b + c++; printint(a); e= 5; f= 7; d= e + f++; printint(d); h= 5; i= 7; g= h + i++; printint(g); a= b-- + c; printint(a); d= e-- + f; printint(d); g= h-- + i; printint(g); a= ++b + c; printint(a); d= ++e + f; printint(d); g= ++h + i; printint(g); a= b * --c; printint(a); d= e * --f; printint(d); g= h * --i; printint(g); return(0); } ================================================ FILE: 26_Prototypes/tests/input23.c ================================================ char *str; int x; int main() { x= -23; printint(x); printint(-10 * -10); x= 1; x= ~x; printint(x); x= 2 > 5; printint(x); x= !x; printint(x); x= !x; printint(x); x= 13; if (x) { printint(13); } x= 0; if (!x) { printint(14); } for (str= "Hello world\n"; *str; str++) { printchar(*str); } return(0); } ================================================ FILE: 26_Prototypes/tests/input24.c ================================================ int a; int b; int c; int main() { a= 42; b= 19; printint(a & b); printint(a | b); printint(a ^ b); printint(1 << 3); printint(63 >> 3); return(0); } ================================================ FILE: 26_Prototypes/tests/input25.c ================================================ int a; int b; int c; int main() { char z; int y; int x; x= 10; y= 20; z= 30; printint(x); printint(y); printint(z); a= 5; b= 15; c= 25; printint(a); printint(b); printint(c); return(0); } ================================================ FILE: 26_Prototypes/tests/input26.c ================================================ int main(int a, char b, long c, int d, int e, int f, int g, int h) { int i; int j; int k; a= 13; printint(a); b= 23; printint(b); c= 34; printint(c); d= 44; printint(d); e= 54; printint(e); f= 64; printint(f); g= 74; printint(g); h= 84; printint(h); i= 94; printint(i); j= 95; printint(j); k= 96; printint(k); return(0); } ================================================ FILE: 26_Prototypes/tests/input27.c ================================================ int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printint(a); printint(b); printint(c); printint(d); printint(e); printint(f); printint(g); printint(h); return(0); } int param5(int a, int b, int c, int d, int e) { printint(a); printint(b); printint(c); printint(d); printint(e); return(0); } int param2(int a, int b) { int c; int d; int e; c= 3; d= 4; e= 5; printint(a); printint(b); printint(c); printint(d); printint(e); return(0); } int param0() { int a; int b; int c; int d; int e; a= 1; b= 2; c= 3; d= 4; e= 5; printint(a); printint(b); printint(c); printint(d); printint(e); return(0); } int main() { param8(1,2,3,4,5,6,7,8); param5(1,2,3,4,5); param2(1,2); param0(); return(0); } ================================================ FILE: 26_Prototypes/tests/input28.c ================================================ int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printint(a); printint(b); printint(c); printint(d); printint(e); printint(f); printint(g); printint(h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printint(x); return(0); } ================================================ FILE: 26_Prototypes/tests/input29.c ================================================ int param8(int a, int b, int c, int d, int e, int f, int g, int h); int fred(int a, int b, int c); int main(); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printint(a); printint(b); printint(c); printint(d); printint(e); printint(f); printint(g); printint(h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printint(x); return(0); } ================================================ FILE: 26_Prototypes/tests/input30.c ================================================ int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input30.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 26_Prototypes/tests/mktests ================================================ #!/bin/sh # Make the output files for each test for i in input*c do if [ ! -f "out.$i" ] then cc -o out $i ../lib/printint.c ./out > out.$i rm -f out fi done ================================================ FILE: 26_Prototypes/tests/out.input01.c ================================================ 36 10 25 ================================================ FILE: 26_Prototypes/tests/out.input02.c ================================================ 17 ================================================ FILE: 26_Prototypes/tests/out.input03.c ================================================ 1 2 3 4 5 ================================================ FILE: 26_Prototypes/tests/out.input04.c ================================================ 1 1 1 1 1 1 1 1 1 ================================================ FILE: 26_Prototypes/tests/out.input05.c ================================================ 6 ================================================ FILE: 26_Prototypes/tests/out.input06.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 26_Prototypes/tests/out.input07.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 26_Prototypes/tests/out.input08.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 26_Prototypes/tests/out.input09.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 26_Prototypes/tests/out.input10.c ================================================ 20 10 1 2 3 4 5 253 254 255 0 1 ================================================ FILE: 26_Prototypes/tests/out.input11.c ================================================ 10 20 30 1 2 3 4 5 253 254 255 0 1 2 3 1 2 3 4 5 ================================================ FILE: 26_Prototypes/tests/out.input12.c ================================================ 5 ================================================ FILE: 26_Prototypes/tests/out.input13.c ================================================ 23 56 ================================================ FILE: 26_Prototypes/tests/out.input14.c ================================================ 10 20 30 ================================================ FILE: 26_Prototypes/tests/out.input15.c ================================================ 18 18 12 12 ================================================ FILE: 26_Prototypes/tests/out.input16.c ================================================ 12 18 ================================================ FILE: 26_Prototypes/tests/out.input17.c ================================================ 19 12 ================================================ FILE: 26_Prototypes/tests/out.input18.c ================================================ 34 34 ================================================ FILE: 26_Prototypes/tests/out.input18a.c ================================================ 15 16 ================================================ FILE: 26_Prototypes/tests/out.input19.c ================================================ 30 ================================================ FILE: 26_Prototypes/tests/out.input20.c ================================================ 12 ================================================ FILE: 26_Prototypes/tests/out.input21.c ================================================ 10 Hello world ================================================ FILE: 26_Prototypes/tests/out.input22.c ================================================ 12 12 12 13 13 13 13 13 13 35 35 35 ================================================ FILE: 26_Prototypes/tests/out.input23.c ================================================ -23 100 -2 0 1 0 13 14 Hello world ================================================ FILE: 26_Prototypes/tests/out.input24.c ================================================ 2 59 57 8 7 ================================================ FILE: 26_Prototypes/tests/out.input25.c ================================================ 10 20 30 5 15 25 ================================================ FILE: 26_Prototypes/tests/out.input26.c ================================================ 13 23 34 44 54 64 74 84 94 95 96 ================================================ FILE: 26_Prototypes/tests/out.input27.c ================================================ 1 2 3 4 5 6 7 8 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 ================================================ FILE: 26_Prototypes/tests/out.input28.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 26_Prototypes/tests/out.input29.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 26_Prototypes/tests/out.input30.c ================================================ int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input30.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 26_Prototypes/tests/runtests ================================================ #!/bin/sh # Run each test and compare # against known good output if [ ! -f ../comp1 ] then echo "Need to build ../comp1 first!"; exit 1 fi for i in input* do if [ ! -f "out.$i" ] then echo "Can't run test on $i, no output file!" else echo -n $i ../comp1 $i cc -o out out.s ../lib/printint.c ./out > trial.$i cmp -s "out.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo else echo ": OK" fi rm -f out out.s "trial.$i" fi done ================================================ FILE: 26_Prototypes/tests/runtestsn ================================================ #!/bin/sh # Run each test and compare # against known good output if [ ! -f ../compn ] then echo "Need to build ../compn first!"; exit 1 fi for i in input* do if [ ! -f "out.$i" ] then echo "Can't run test on $i, no output file!" else echo -n $i ../compn $i nasm -f elf64 out.s cc -no-pie -fno-plt -Wall -o out out.o ../lib/printint.c ./out > trial.$i cmp -s "out.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo else echo ": OK" fi rm -f out out.o out.s "trial.$i" fi done ================================================ FILE: 26_Prototypes/tree.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree functions // Copyright (c) 2019 Warren Toomey, GPL3 // Build and return a generic AST node struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, int intvalue) { struct ASTnode *n; // Malloc a new ASTnode n = (struct ASTnode *) malloc(sizeof(struct ASTnode)); if (n == NULL) fatal("Unable to malloc in mkastnode()"); // Copy in the field values and return it n->op = op; n->type = type; n->left = left; n->mid = mid; n->right = right; n->v.intvalue = intvalue; return (n); } // Make an AST leaf node struct ASTnode *mkastleaf(int op, int type, int intvalue) { return (mkastnode(op, type, NULL, NULL, NULL, intvalue)); } // Make a unary AST node: only one child struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, int intvalue) { return (mkastnode(op, type, left, NULL, NULL, intvalue)); } // Generate and return a new label number // just for AST dumping purposes static int gendumplabel(void) { static int id = 1; return (id++); } // Given an AST tree, print it out and follow the // traversal of the tree that genAST() follows void dumpAST(struct ASTnode *n, int label, int level) { int Lfalse, Lstart, Lend; switch (n->op) { case A_IF: Lfalse = gendumplabel(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_IF"); if (n->right) { Lend = gendumplabel(); fprintf(stdout, ", end L%d", Lend); } fprintf(stdout, "\n"); dumpAST(n->left, Lfalse, level + 2); dumpAST(n->mid, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); return; case A_WHILE: Lstart = gendumplabel(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_WHILE, start L%d\n", Lstart); Lend = gendumplabel(); dumpAST(n->left, Lend, level + 2); dumpAST(n->right, NOLABEL, level + 2); return; } // Reset level to -2 for A_GLUE if (n->op == A_GLUE) level = -2; // General AST node handling if (n->left) dumpAST(n->left, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); for (int i = 0; i < level; i++) fprintf(stdout, " "); switch (n->op) { case A_GLUE: fprintf(stdout, "\n\n"); return; case A_FUNCTION: fprintf(stdout, "A_FUNCTION %s\n", Symtable[n->v.id].name); return; case A_ADD: fprintf(stdout, "A_ADD\n"); return; case A_SUBTRACT: fprintf(stdout, "A_SUBTRACT\n"); return; case A_MULTIPLY: fprintf(stdout, "A_MULTIPLY\n"); return; case A_DIVIDE: fprintf(stdout, "A_DIVIDE\n"); return; case A_EQ: fprintf(stdout, "A_EQ\n"); return; case A_NE: fprintf(stdout, "A_NE\n"); return; case A_LT: fprintf(stdout, "A_LE\n"); return; case A_GT: fprintf(stdout, "A_GT\n"); return; case A_LE: fprintf(stdout, "A_LE\n"); return; case A_GE: fprintf(stdout, "A_GE\n"); return; case A_INTLIT: fprintf(stdout, "A_INTLIT %d\n", n->v.intvalue); return; case A_STRLIT: fprintf(stdout, "A_STRLIT rval label L%d\n", n->v.id); return; case A_IDENT: if (n->rvalue) fprintf(stdout, "A_IDENT rval %s\n", Symtable[n->v.id].name); else fprintf(stdout, "A_IDENT %s\n", Symtable[n->v.id].name); return; case A_ASSIGN: fprintf(stdout, "A_ASSIGN\n"); return; case A_WIDEN: fprintf(stdout, "A_WIDEN\n"); return; case A_RETURN: fprintf(stdout, "A_RETURN\n"); return; case A_FUNCCALL: fprintf(stdout, "A_FUNCCALL %s\n", Symtable[n->v.id].name); return; case A_ADDR: fprintf(stdout, "A_ADDR %s\n", Symtable[n->v.id].name); return; case A_DEREF: if (n->rvalue) fprintf(stdout, "A_DEREF rval\n"); else fprintf(stdout, "A_DEREF\n"); return; case A_SCALE: fprintf(stdout, "A_SCALE %d\n", n->v.size); return; case A_PREINC: fprintf(stdout, "A_PREINC %s\n", Symtable[n->v.id].name); return; case A_PREDEC: fprintf(stdout, "A_PREDEC %s\n", Symtable[n->v.id].name); return; case A_POSTINC: fprintf(stdout, "A_POSTINC\n"); return; case A_POSTDEC: fprintf(stdout, "A_POSTDEC\n"); return; case A_NEGATE: fprintf(stdout, "A_NEGATE\n"); return; default: fatald("Unknown dumpAST operator", n->op); } } ================================================ FILE: 26_Prototypes/types.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Types and type handling // Copyright (c) 2019 Warren Toomey, GPL3 // Return true if a type is an int type // of any size, false otherwise int inttype(int type) { if (type == P_CHAR || type == P_INT || type == P_LONG) return (1); return (0); } // Return true if a type is of pointer type int ptrtype(int type) { if (type == P_VOIDPTR || type == P_CHARPTR || type == P_INTPTR || type == P_LONGPTR) return (1); return (0); } // Given a primitive type, return // the type which is a pointer to it int pointer_to(int type) { int newtype; switch (type) { case P_VOID: newtype = P_VOIDPTR; break; case P_CHAR: newtype = P_CHARPTR; break; case P_INT: newtype = P_INTPTR; break; case P_LONG: newtype = P_LONGPTR; break; default: fatald("Unrecognised in pointer_to: type", type); } return (newtype); } // Given a primitive pointer type, return // the type which it points to int value_at(int type) { int newtype; switch (type) { case P_VOIDPTR: newtype = P_VOID; break; case P_CHARPTR: newtype = P_CHAR; break; case P_INTPTR: newtype = P_INT; break; case P_LONGPTR: newtype = P_LONG; break; default: fatald("Unrecognised in value_at: type", type); } return (newtype); } // Given an AST tree and a type which we want it to become, // possibly modify the tree by widening or scaling so that // it is compatible with this type. Return the original tree // if no changes occurred, a modified tree, or NULL if the // tree is not compatible with the given type. // If this will be part of a binary operation, the AST op is not zero. struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op) { int ltype; int lsize, rsize; ltype = tree->type; // Compare scalar int types if (inttype(ltype) && inttype(rtype)) { // Both types same, nothing to do if (ltype == rtype) return (tree); // Get the sizes for each type lsize = genprimsize(ltype); rsize = genprimsize(rtype); // Tree's size is too big if (lsize > rsize) return (NULL); // Widen to the right if (rsize > lsize) return (mkastunary(A_WIDEN, rtype, tree, 0)); } // For pointers on the left if (ptrtype(ltype)) { // OK is same type on right and not doing a binary op if (op == 0 && ltype == rtype) return (tree); } // We can scale only on A_ADD or A_SUBTRACT operation if (op == A_ADD || op == A_SUBTRACT) { // Left is int type, right is pointer type and the size // of the original type is >1: scale the left if (inttype(ltype) && ptrtype(rtype)) { rsize = genprimsize(value_at(rtype)); if (rsize > 1) return (mkastunary(A_SCALE, rtype, tree, rsize)); else return (tree); // Size 1, no need to scale } } // If we get here, the types are not compatible return (NULL); } ================================================ FILE: 27_Testing_Errors/Makefile ================================================ HSRCS= data.h decl.h defs.h SRCS= cg.c decl.c expr.c gen.c main.c misc.c \ scan.c stmt.c sym.c tree.c types.c SRCN= cgn.c decl.c expr.c gen.c main.c misc.c \ scan.c stmt.c sym.c tree.c types.c ARMSRCS= cg_arm.c decl.c expr.c gen.c main.c misc.c \ scan.c stmt.c sym.c tree.c types.c comp1: $(SRCS) $(HSRCS) cc -o comp1 -g -Wall $(SRCS) compn: $(SRCN) $(HSRCS) cc -o compn -g -Wall $(SRCN) comp1arm: $(ARMSRCS) $(HSRCS) cc -o comp1arm -g -Wall $(ARMSRCS) cp comp1arm comp1 clean: rm -f comp1 comp1arm compn *.o *.s out test: comp1 tests/runtests (cd tests; chmod +x runtests; ./runtests) armtest: comp1arm tests/runtests (cd tests; chmod +x runtests; ./runtests) testn: compn tests/runtestsn (cd tests; chmod +x runtestsn; ./runtestsn) ================================================ FILE: 27_Testing_Errors/Readme.md ================================================ # Part 27: Regression Testing and a Nice Surprise We've had a few large-ish steps recently in our compiler writing journey, so I thought we should have a bit of a breather in this step. We can slow down a bit and review our progress so far. In the last step I noticed that we didn't have a way to confirm that our syntax and semantic error checking was working correctly. So I've just rewritten the scripts in the `tests/` folder to do this. I've been using Unix since the late 1980s, so my go-to automation tools are shell scripts and Makefiles or, if I need more complex tools, scripts written in Python or Perl (yes, I'm that old). So let's quickly look at the `runtest` script in the `tests/` directory. Even though I said I'd been using Unix scripts forever, I'm definitely not an uber script writer. ## The `runtest` Script The job of this script is to take a set of input programs, get our compiler to compile them, run the executable and compare its output against known-good output. If they match, the test is a success. If not, it's a failure. I've just extended it so that, if there is an "error" file associated with an input, we run our compiler and capture its error output. If this error output matches the expected error output, the test is a success as the compiler correctly detected the bad input. So let's look at the sections of the `runtest` script in stages. ``` # Build our compiler if needed if [ ! -f ../comp1 ] then (cd ..; make) fi ``` I'm using the '( ... )' syntax here to create a *sub-shell*. This can change its working directory without affecting the original shell, so we can move up a directory and rebuild our compiler. ``` # Try to use each input source file for i in input* # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" ``` The '[' thing is actually the external Unix tool, *test(1)*. Oh, if you've never seen this syntax before, *test(1)* means the manual page for *test* is in Section One of the man pages, and you can do: ``` $ man 1 test ``` to read the manual for *test* in Section One of the man pages. The `/usr/bin/[` executable is usually linked to `/usr/bin/test`, so that when you use '[' in a shell script, it's the same as running the *test* command. We can read the line `[ ! -f "out.$i" -a ! -f "err.$i" ]` as saying: test if there is no file "out.$i" and no file "err.$i". If both don't exist, we can give the error message. ``` # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../comp1 $i # Assemble the output, run it # and get the output in trial.$i cc -o out out.s ../lib/printint.c ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi ``` This is the bulk of the script. I think the comments explain what is going on, but perhaps there are some subtleties to flesh out. `cmp -s` compares two text files; the `-s` flag means produce no output but set the exit value that `cmp` gives when it exits to: > 0 if inputs are the same, 1 if different, 2 if trouble. (from the man page) The line `if [ "$?" -eq "1" ]` says: if the exit value of the last command is equal to the number 1. So, if the compiler's output is different to the known-good output, we announce this and use the `diff` tool to show the differences between the two files. ``` # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../comp1 $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" ... ``` This section gets executed when there is an error document, "err.$i". This time, we use the shell syntax `2>` to capture our compiler's standard error output to the file "trial.$i" and compare that against the correct error output. The logic after this is the same as before. ## What We Are Doing: Regression Testing I haven't talked much before about testing, but now's the time. I've taught software development in the past so it would be remiss of me not to cover testing at some point. What we are doing here is [**regression testing**](https://en.wikipedia.org/wiki/Regression_testing). Wikipedia gives this definition: > Regression testing is the action of re-running functional and non-functional tests > to ensure that previously developed and tested software still performs after a change. As our compiler is changing at each step, we have to ensure that each new change doesn't break the functionality (and the error checking) of the previous steps. So each time I introduce a change, I add one or more tests to a) prove that it works and b) re-run this test on future changes. As long as all the tests pass, I'm sure that the new code hasn't broken the old code. ### Functional Tests The `runtests` script looks for files with the `out` prefix to do the functional testing. Right now, we have: ``` tests/out.input01.c tests/out.input12.c tests/out.input22.c tests/out.input02.c tests/out.input13.c tests/out.input23.c tests/out.input03.c tests/out.input14.c tests/out.input24.c tests/out.input04.c tests/out.input15.c tests/out.input25.c tests/out.input05.c tests/out.input16.c tests/out.input26.c tests/out.input06.c tests/out.input17.c tests/out.input27.c tests/out.input07.c tests/out.input18a.c tests/out.input28.c tests/out.input08.c tests/out.input18.c tests/out.input29.c tests/out.input09.c tests/out.input19.c tests/out.input30.c tests/out.input10.c tests/out.input20.c tests/out.input53.c tests/out.input11.c tests/out.input21.c tests/out.input54.c ``` That's 33 separate tests of the compiler's functionality. Right now, I know for a fact that our compiler is a bit fragile. None of these tests really stress the compiler in any way: they are simple tests of a few lines each. Later on, we will start to add some nasty stress tests to help strengthen the compiler and make it more resilient. ### Non-Functional Tests The `runtests` script looks for files with the `err` prefix to do the functional testing. Right now, we have: ``` tests/err.input31.c tests/err.input39.c tests/err.input47.c tests/err.input32.c tests/err.input40.c tests/err.input48.c tests/err.input33.c tests/err.input41.c tests/err.input49.c tests/err.input34.c tests/err.input42.c tests/err.input50.c tests/err.input35.c tests/err.input43.c tests/err.input51.c tests/err.input36.c tests/err.input44.c tests/err.input52.c tests/err.input37.c tests/err.input45.c tests/err.input38.c tests/err.input46.c ``` I created these 22 tests of the compiler's error checking in this step of our journey by looking for `fatal()` calls in the compiler. For each one, I've tried to write a small input file which would trigger it. Have a read of the matching source files and see if you can work out what syntax or semantic error each one triggers. ## Other Forms of Testing This isn't a course on software development methodologies, so I won't give too much more coverage on testing. But I'll give you links to a few more thing that I would highly recommend that you look at: + [Unit testing](https://en.wikipedia.org/wiki/Unit_testing) + [Test-driven development](https://en.wikipedia.org/wiki/Test-driven_development) + [Continuous integration](https://en.wikipedia.org/wiki/Continuous_integration) + [Version control](https://en.wikipedia.org/wiki/Version_control) I haven't done any unit testing with our compiler. The main reason here is that the code is very fluid in terms of the APIs for the functions. I'm not using a traditional waterfall model of development, so I'd be spending too much time rewriting my unit tests to match the latest APIs of all the functions. So, in some sense I am living dangerously here: there will be a number of latent bugs in the code which we haven't detected yet. However, there are guaranteed to be *many* more bugs where the compiler looks like it accepts the C language, but of course this isn't true. The compiler is failing the [principle of least astonishment](https://en.wikipedia.org/wiki/Principle_of_least_astonishment). We will need to spend some time adding in functionality that a "normal" C programmer expects to see. ## And a Nice Surprise Finally, we have a nice functional surprise with the compiler as it stands. A while back, I purposefully left out the code to test that the number and type of arguments to a function call matches the function's prototype (in `expr.c`): ``` // XXX Check type of each argument against the function's prototype ``` I left this out as I didn't want to add too much new code in one of our steps. Now that we have prototypes, I've wanted to finally add support for `printf()` so that we can ditch our homegrown `printint()` and `printchar()` functions. But we can't do this just yet, because `printf()` is a [variadic function](https://en.wikipedia.org/wiki/Variadic_function): it can accept a variable number of parameters. And, right now, our compiler only allows a function declaration with a fixed number of parameters. *However* (and this is the nice surprise), because we don't check the number of arguments in a function call, we can pass *any* number of arguments to `printf()` as long as we have given it an existing prototype. So, at present, this code (`tests/input53.c`) works: ```c int printf(char *fmt); int main() { printf("Hello world, %d\n", 23); return(0); } ``` And that's a nice thing! There is a gotcha. With the given `printf()` prototype, the cleanup code in `cgcall()` won't adjust the stack pointer when the function returns, as there are less than six parameters in the prototype. But we could call `printf()` with ten arguments: we'd push four of them on the stack, but `cgcall()` wouldn't clean up these four arguments when `printf()` returns. ## Conclusion and What's Next There is no new compiler code in this step, but we are now testing the error checking capability of the compiler, and we now have 54 regression tests to help ensure we don't break the compiler when we add new functionality. And, fortuitously, we can now use `printf()` as well as the other external fixed parameter count functions. In the next part of our compiler writing journey, I think I'll try to: + add support for an external pre-processor + allow the compiler to compile multiple files named on the command line + add the `-o`, `-c` and `-S` flags to the compiler to make it feel more like a "normal" C compiler [Next step](../28_Runtime_Flags/Readme.md) ================================================ FILE: 27_Testing_Errors/cg.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\t.text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\t.data\n", Outfile); currSeg = data_seg; } } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. static int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "%r10", "%r11", "%r12", "%r13", "%r9", "%r8", "%rcx", "%rdx", "%rsi", "%rdi" }; static char *breglist[] = { "%r10b", "%r11b", "%r12b", "%r13b", "%r9b", "%r8b", "%cl", "%dl", "%sil", "%dil" }; static char *dreglist[] = { "%r10d", "%r11d", "%r12d", "%r13d", "%r9d", "%r8d", "%ecx", "%edx", "%esi", "%edi" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Symtable[id].name; int i; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the %rsp and %rsp fprintf(Outfile, "\t.globl\t%s\n" "\t.type\t%s, @function\n" "%s:\n" "\tpushq\t%%rbp\n" "\tmovq\t%%rsp, %%rbp\n", name, name, name); // Copy any in-register parameters to the stack // Stop after no more than six parameter registers for (i = NSYMBOLS - 1; i > Locls; i--) { if (Symtable[i].class != C_PARAM) break; if (i < NSYMBOLS - 6) break; Symtable[i].posn = newlocaloffset(Symtable[i].type); cgstorlocal(paramReg--, i); } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (; i > Locls; i--) { if (Symtable[i].class == C_PARAM) { Symtable[i].posn = paramOffset; paramOffset += 8; } else { Symtable[i].posn = newlocaloffset(Symtable[i].type); } } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\taddq\t$%d,%%rsp\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Symtable[id].endlabel); fprintf(Outfile, "\taddq\t$%d,%%rsp\n", stackOffset); fputs("\tpopq %rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmovq\t$%d, %s\n", value, reglist[r]); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(int id, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it switch (Symtable[id].type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", Symtable[id].name); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", Symtable[id].name); fprintf(Outfile, "\tmovzbq\t%s(%%rip), %s\n", Symtable[id].name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", Symtable[id].name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", Symtable[id].name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", Symtable[id].name); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", Symtable[id].name); fprintf(Outfile, "\tmovslq\t%s(%%rip), %s\n", Symtable[id].name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", Symtable[id].name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", Symtable[id].name); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: if (op == A_PREINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", Symtable[id].name); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", Symtable[id].name); fprintf(Outfile, "\tmovq\t%s(%%rip), %s\n", Symtable[id].name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", Symtable[id].name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", Symtable[id].name); break; default: fatald("Bad type in cgloadglob:", Symtable[id].type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(int id, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it switch (Symtable[id].type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", Symtable[id].posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", Symtable[id].posn); fprintf(Outfile, "\tmovzbq\t%d(%%rbp), %s\n", Symtable[id].posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", Symtable[id].posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", Symtable[id].posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", Symtable[id].posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", Symtable[id].posn); fprintf(Outfile, "\tmovslq\t%d(%%rbp), %s\n", Symtable[id].posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", Symtable[id].posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", Symtable[id].posn); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: if (op == A_PREINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", Symtable[id].posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", Symtable[id].posn); fprintf(Outfile, "\tmovq\t%d(%%rbp), %s\n", Symtable[id].posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", Symtable[id].posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", Symtable[id].posn); break; default: fatald("Bad type in cgloadlocal:", Symtable[id].type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int id) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tleaq\tL%d(%%rip), %s\n", id, reglist[r]); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\taddq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsubq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timulq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s,%%rax\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidivq\t%s\n", reglist[r2]); fprintf(Outfile, "\tmovq\t%%rax,%s\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tandq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\torq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txorq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshlq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshrq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tnegq\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnotq\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); return (r); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(int id, int numargs) { // Get a new register int outr = alloc_register(); // Call the function fprintf(Outfile, "\tcall\t%s@PLT\n", Symtable[id].name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\taddq\t$%d, %%rsp\n", 8 * (numargs - 6)); // and copy the return value into our register fprintf(Outfile, "\tmovq\t%%rax, %s\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpushq\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmovq\t%s, %s\n", reglist[r], reglist[FIRSTPARAMREG - argposn + 1]); } } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsalq\t$%d, %s\n", val, reglist[r]); return (r); } // Store a register's value into a variable int cgstorglob(int r, int id) { switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %s(%%rip)\n", breglist[r], Symtable[id].name); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %s(%%rip)\n", dreglist[r], Symtable[id].name); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tmovq\t%s, %s(%%rip)\n", reglist[r], Symtable[id].name); break; default: fatald("Bad type in cgstorglob:", Symtable[id].type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, int id) { switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %d(%%rbp)\n", breglist[r], Symtable[id].posn); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %d(%%rbp)\n", dreglist[r], Symtable[id].posn); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tmovq\t%s, %d(%%rbp)\n", reglist[r], Symtable[id].posn); break; default: fatald("Bad type in cgstorlocal:", Symtable[id].type); } return (r); } // Array of type sizes in P_XXX order. // 0 means no size. static int psize[] = { 0, 0, 1, 4, 8, 8, 8, 8, 8 }; // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { // Check the type is valid if (type < P_NONE || type > P_LONGPTR) fatal("Bad type in cgprimsize()"); return (psize[type]); } // Generate a global symbol but not functions void cgglobsym(int id) { int typesize; if (Symtable[id].stype == S_FUNCTION) return; // Get the size of the type typesize = cgprimsize(Symtable[id].type); // Generate the global identity and the label cgdataseg(); fprintf(Outfile, "\t.globl\t%s\n", Symtable[id].name); fprintf(Outfile, "%s:", Symtable[id].name); // Generate the space for (int i = 0; i < Symtable[id].size; i++) { switch (typesize) { case 1: fprintf(Outfile, "\t.byte\t0\n"); break; case 4: fprintf(Outfile, "\t.long\t0\n"); break; case 8: fprintf(Outfile, "\t.quad\t0\n"); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\t.byte\t%d\n", *cptr); } fprintf(Outfile, "\t.byte\t0\n"); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { // Generate code depending on the function's type switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tmovzbl\t%s, %%eax\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %%eax\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", Symtable[id].type); } cgjump(Symtable[id].endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(int id) { int r = alloc_register(); if (Symtable[id].class == C_LOCAL) fprintf(Outfile, "\tleaq\t%d(%%rbp), %s\n", Symtable[id].posn, reglist[r]); else fprintf(Outfile, "\tleaq\t%s(%%rip), %s\n", Symtable[id].name, reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tmovzbq\t(%s), %s\n", reglist[r], reglist[r]); break; case P_INTPTR: fprintf(Outfile, "\tmovslq\t(%s), %s\n", reglist[r], reglist[r]); break; case P_LONGPTR: fprintf(Outfile, "\tmovq\t(%s), %s\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, (%s)\n", breglist[r1], reglist[r2]); break; case P_INT: fprintf(Outfile, "\tmovq\t%s, (%s)\n", reglist[r1], reglist[r2]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, (%s)\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 27_Testing_Errors/cg_arm.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for ARMv6 on Raspberry Pi // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. static int freereg[4]; static char *reglist[4] = { "r4", "r5", "r6", "r7" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // We have to store large integer literal values in memory. // Keep a list of them which will be output in the postamble #define MAXINTS 1024 int Intlist[MAXINTS]; static int Intslot = 0; // Determine the offset of a large integer // literal from the .L3 label. If the integer // isn't in the list, add it. static void set_int_offset(int val) { int offset = -1; // See if it is already there for (int i = 0; i < Intslot; i++) { if (Intlist[i] == val) { offset = 4 * i; break; } } // Not in the list, so add it if (offset == -1) { offset = 4 * Intslot; if (Intslot == MAXINTS) fatal("Out of int slots in set_int_offset()"); Intlist[Intslot++] = val; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L3+%d\n", offset); } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n", Outfile); } // Print out the assembly postamble void cgpostamble() { // Print out the global variables fprintf(Outfile, ".L2:\n"); for (int i = 0; i < Globs; i++) { if (Symtable[i].stype == S_VARIABLE) fprintf(Outfile, "\t.word %s\n", Symtable[i].name); } // Print out the integer literals fprintf(Outfile, ".L3:\n"); for (int i = 0; i < Intslot; i++) { fprintf(Outfile, "\t.word %d\n", Intlist[i]); } } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Symtable[id].name; fprintf(Outfile, "\t.text\n" "\t.globl\t%s\n" "\t.type\t%s, \%%function\n" "%s:\n" "\tpush\t{fp, lr}\n" "\tadd\tfp, sp, #4\n" "\tsub\tsp, sp, #8\n" "\tstr\tr0, [fp, #-8]\n", name, name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Symtable[id].endlabel); fputs("\tsub\tsp, fp, #4\n" "\tpop\t{fp, pc}\n" "\t.align\t2\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); // If the literal value is small, do it with one instruction if (value <= 1000) fprintf(Outfile, "\tmov\t%s, #%d\n", reglist[r], value); else { set_int_offset(value); fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); } return (r); } // Determine the offset of a variable from the .L2 // label. Yes, this is inefficient code. static void set_var_offset(int id) { int offset = 0; // Walk the symbol table up to id. // Find S_VARIABLEs and add on 4 until // we get to our variable for (int i = 0; i < id; i++) { if (Symtable[i].stype == S_VARIABLE) offset += 4; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L2+%d\n", offset); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tldrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgloadglob:", Symtable[id].type); } return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s, %s\n", reglist[r1], reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\tmul\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { // To do a divide: r0 holds the dividend, r1 holds the divisor. // The quotient is in r0. fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r1]); fprintf(Outfile, "\tmov\tr1, %s\n", reglist[r2]); fprintf(Outfile, "\tbl\t__aeabi_idiv\n"); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r1]); free_register(r2); return (r1); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r]); fprintf(Outfile, "\tbl\t%s\n", Symtable[id].name); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r]); return (r); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tlsl\t%s, %s, #%d\n", reglist[r], reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, int id) { // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tstr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgstorglob:", Symtable[id].type); } return (r); } // Array of type sizes in P_XXX order. // 0 means no size. static int psize[] = { 0, 0, 1, 4, 4, 4, 4, 4 }; // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { // Check the type is valid if (type < P_NONE || type > P_LONGPTR) fatal("Bad type in cgprimsize()"); return (psize[type]); } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Symtable[id].type); fprintf(Outfile, "\t.data\n" "\t.globl\t%s\n", Symtable[id].name); switch (typesize) { case 1: fprintf(Outfile, "%s:\t.byte\t0\n", Symtable[id].name); break; case 4: fprintf(Outfile, "%s:\t.long\t0\n", Symtable[id].name); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "moveq", "movne", "movlt", "movgt", "movle", "movge" }; // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "movne", "moveq", "movge", "movle", "movgt", "movlt" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #1\n", cmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #0\n", invcmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\tuxtb\t%s, %s\n", reglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tb\tL%d\n", l); } // List of inverted branch instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *brlist[] = { "bne", "beq", "bge", "ble", "bgt", "blt" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", brlist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[reg]); cgjump(Symtable[id].endlabel); } // Generate code to load the address of a global // identifier into a variable. Return a new register int cgaddress(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); fprintf(Outfile, "\tmov\t%s, r3\n", reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tldrb\t%s, [%s]\n", reglist[r], reglist[r]); break; case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [%s]\n", reglist[r], reglist[r]); break; } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [%s]\n", reglist[r1], reglist[r2]); break; case P_INT: case P_LONG: fprintf(Outfile, "\tstr\t%s, [%s]\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 27_Testing_Errors/cgn.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\tsection .text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\tsection .data\n", Outfile); currSeg = data_seg; } } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "r10", "r11", "r12", "r13", "r9", "r8", "rcx", "rdx", "rsi", "rdi" }; static char *breglist[] = { "r10b", "r11b", "r12b", "r13b", "r9b", "r8b", "cl", "dl", "sil", "dil" }; static char *dreglist[] = { "r10d", "r11d", "r12d", "r13d", "r9d", "r8d", "ecx", "edx", "esi", "edi" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\textern\tprintint\n", Outfile); fputs("\textern\tprintchar\n", Outfile); fputs("\textern\topen\n", Outfile); fputs("\textern\tclose\n", Outfile); fputs("\textern\tread\n", Outfile); fputs("\textern\twrite\n", Outfile); fputs("\textern\tprintf\n", Outfile); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Symtable[id].name; int i; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the rsp and rbp fprintf(Outfile, "\tglobal\t%s\n" "%s:\n" "\tpush\trbp\n" "\tmov\trbp, rsp\n", name, name); // Copy any in-register parameters to the stack // Stop after no more than six parameter registers for (i = NSYMBOLS - 1; i > Locls; i--) { if (Symtable[i].class != C_PARAM) break; if (i < NSYMBOLS - 6) break; Symtable[i].posn = newlocaloffset(Symtable[i].type); cgstorlocal(paramReg--, i); } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (; i > Locls; i--) { if (Symtable[i].class == C_PARAM) { Symtable[i].posn = paramOffset; paramOffset += 8; } else { Symtable[i].posn = newlocaloffset(Symtable[i].type); } } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\tadd\trsp, %d\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Symtable[id].endlabel); fprintf(Outfile, "\tadd\trsp, %d\n", stackOffset); fputs("\tpop rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], value); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(int id, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it switch (Symtable[id].type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", Symtable[id].name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", Symtable[id].name); fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], Symtable[id].name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", Symtable[id].name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", Symtable[id].name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword [%s]\n", Symtable[id].name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", Symtable[id].name); fprintf(Outfile, "\tmovsx\t%s, word [%s]\n", dreglist[r], Symtable[id].name); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword [%s]\n", Symtable[id].name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", Symtable[id].name); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword [%s]\n", Symtable[id].name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", Symtable[id].name); fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], Symtable[id].name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword [%s]\n", Symtable[id].name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", Symtable[id].name); break; default: fatald("Bad type in cgloadglob:", Symtable[id].type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(int id, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it switch (Symtable[id].type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", Symtable[id].posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", Symtable[id].posn); fprintf(Outfile, "\tmovzx\t%s, byte [rbp+%d]\n", reglist[r], Symtable[id].posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", Symtable[id].posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", Symtable[id].posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", Symtable[id].posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", Symtable[id].posn); fprintf(Outfile, "\tmovsx\t%s, word [rbp+%d]\n", reglist[r], Symtable[id].posn); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", Symtable[id].posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", Symtable[id].posn); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", Symtable[id].posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", Symtable[id].posn); fprintf(Outfile, "\tmov\t%s, [rbp+%d]\n", reglist[r], Symtable[id].posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", Symtable[id].posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", Symtable[id].posn); break; default: fatald("Bad type in cgloadlocal:", Symtable[id].type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int id) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, L%d\n", reglist[r], id); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timul\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmov\trax, %s\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidiv\t%s\n", reglist[r2]); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tand\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\tor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshl\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshr\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tneg\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnot\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r], breglist[r]); return (r); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, byte %s\n", reglist[r], breglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(int id, int numargs) { // Get a new register int outr = alloc_register(); // Call the function fprintf(Outfile, "\tcall\t%s\n", Symtable[id].name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\tadd\trsp, %d\n", 8 * (numargs - 6)); // and copy the return value into our register fprintf(Outfile, "\tmov\t%s, rax\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpush\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmov\t%s, %s\n", reglist[FIRSTPARAMREG - argposn + 1], reglist[r]); } } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsal\t%s, %d\n", reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, int id) { switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tmov\t[%s], %s\n", Symtable[id].name, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\t[%s], %s\n", Symtable[id].name, dreglist[r]); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tmov\t[%s], %s\n", Symtable[id].name, reglist[r]); break; default: fatald("Bad type in cgloadglob:", Symtable[id].type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, int id) { switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tmov\tbyte\t[rbp+%d], %s\n", Symtable[id].posn, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\tdword\t[rbp+%d], %s\n", Symtable[id].posn, dreglist[r]); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tmov\tqword\t[rbp+%d], %s\n", Symtable[id].posn, reglist[r]); break; default: fatald("Bad type in cgstorlocal:", Symtable[id].type); } return (r); } // Array of type sizes in P_XXX order. // 0 means no size. static int psize[] = { 0, 0, 1, 4, 8, 8, 8, 8, 8 }; // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { // Check the type is valid if (type < P_NONE || type > P_LONGPTR) fatal("Bad type in cgprimsize()"); return (psize[type]); } // Generate a global symbol but not functions void cgglobsym(int id) { int typesize; if (Symtable[id].stype == S_FUNCTION) return; // Get the size of the type typesize = cgprimsize(Symtable[id].type); // Generate the global identity and the label cgdataseg(); fprintf(Outfile, "\tsection\t.data\n" "\tglobal\t%s\n", Symtable[id].name); fprintf(Outfile, "%s:", Symtable[id].name); // Generate the space // original version for (int i = 0; i < Symtable[id].size; i++) { switch(typesize) { case 1: fprintf(Outfile, "\tdb\t0\n"); break; case 4: fprintf(Outfile, "\tdd\t0\n"); break; case 8: fprintf(Outfile, "\tdq\t0\n"); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } /* compact version using times instead of loop switch(typesize) { case 1: fprintf(Outfile, "\ttimes\t%d\tdb\t0\n", Symtable[id].size); break; case 4: fprintf(Outfile, "\ttimes\t%d\tdd\t0\n", Symtable[id].size); break; case 8: fprintf(Outfile, "\ttimes\t%d\tdq\t0\n", Symtable[id].size); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } */ } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\tdb\t%d\n", *cptr); } fprintf(Outfile, "\tdb\t0\n"); /* put string in readable format on a single line // probably went overboard with error checking int comma = 0, quote = 0, start = 1; fprintf(Outfile, "\tdb\t"); for (cptr=strvalue; *cptr; cptr++) { if ( ! isprint(*cptr) ) if (comma || start) { fprintf(Outfile, "%d, ", *cptr); start = 0; comma = 1; } else if (quote) { fprintf(Outfile, "\', %d, ", *cptr); comma = 1; quote = 0; } else { fprintf(Outfile, "%d, ", *cptr); comma = 1; quote = 0; } else if (start || comma) { fprintf(Outfile, "\'%c", *cptr); start = comma = 0; quote = 1; } else { fprintf(Outfile, "%c", *cptr); comma = 0; quote = 1; } } if (comma || start) fprintf(Outfile, "0\n"); else fprintf(Outfile, "\', 0\n"); */ } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r2], breglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { // Generate code depending on the function's type switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tmovzx\teax, %s\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmov\teax, %s\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", Symtable[id].type); } cgjump(Symtable[id].endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(int id) { int r = alloc_register(); if (Symtable[id].class == C_LOCAL) fprintf(Outfile, "\tlea\t%s, [rbp+%d]\n", reglist[r], Symtable[id].posn); else fprintf(Outfile, "\tmov\t%s, %s\n", reglist[r], Symtable[id].name); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], reglist[r]); break; case P_INTPTR: fprintf(Outfile, "\tmovsx\t%s, dword [%s]\n", reglist[r], reglist[r]); break; case P_LONGPTR: fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tmov\t[%s], byte %s\n", reglist[r2], breglist[r1]); break; case P_INT: fprintf(Outfile, "\tmov\t[%s], %s\n", reglist[r2], reglist[r1]); break; case P_LONG: fprintf(Outfile, "\tmov\t[%s], %s\n", reglist[r2], reglist[r1]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 27_Testing_Errors/data.h ================================================ #ifndef extern_ #define extern_ extern #endif // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 extern_ int Line; // Current line number extern_ int Putback; // Character put back by scanner extern_ int Functionid; // Symbol id of the current function extern_ int Globs; // Position of next free global symbol slot extern_ int Locls; // Position of next free local symbol slot extern_ FILE *Infile; // Input and output files extern_ FILE *Outfile; extern_ struct token Token; // Last token scanned extern_ char Text[TEXTLEN + 1]; // Last identifier scanned extern_ struct symtable Symtable[NSYMBOLS]; // Global symbol table extern_ int O_dumpAST; ================================================ FILE: 27_Testing_Errors/decl.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of declarations // Copyright (c) 2019 Warren Toomey, GPL3 // Parse the current token and return // a primitive type enum value. Also // scan in the next token int parse_type(void) { int type; switch (Token.token) { case T_VOID: type = P_VOID; break; case T_CHAR: type = P_CHAR; break; case T_INT: type = P_INT; break; case T_LONG: type = P_LONG; break; default: fatald("Illegal type, token", Token.token); } // Scan in one or more further '*' tokens // and determine the correct pointer type while (1) { scan(&Token); if (Token.token != T_STAR) break; type = pointer_to(type); } // We leave with the next token already scanned return (type); } // variable_declaration: type identifier ';' // | type identifier '[' INTLIT ']' ';' // ; // // Parse the declaration of a scalar variable or an array // with a given size. // The identifier has been scanned & we have the type. // class is the variable's class void var_declaration(int type, int class) { // Text now has the identifier's name. // If the next token is a '[' if (Token.token == T_LBRACKET) { // Skip past the '[' scan(&Token); // Check we have an array size if (Token.token == T_INTLIT) { // Add this as a known array and generate its space in assembly. // We treat the array as a pointer to its elements' type if (class == C_LOCAL) { fatal("For now, declaration of local arrays is not implemented"); } else { addglob(Text, pointer_to(type), S_ARRAY, class, 0, Token.intvalue); } } // Ensure we have a following ']' scan(&Token); match(T_RBRACKET, "]"); } else { // Add this as a known scalar // and generate its space in assembly if (class == C_LOCAL) { if (addlocl(Text, type, S_VARIABLE, class, 1) == -1) fatals("Duplicate local variable declaration", Text); } else { addglob(Text, type, S_VARIABLE, class, 0, 1); } } } // param_declaration: // | variable_declaration // | variable_declaration ',' param_declaration // // Parse the parameters in parentheses after the function name. // Add them as symbols to the symbol table and return the number // of parameters. If id is not -1, there is an existing function // prototype, and the function has this symbol slot number. static int param_declaration(int id) { int type, param_id; int orig_paramcnt; int paramcnt = 0; // Add 1 to id so that it's either zero (no prototype), or // it's the position of the zeroth existing parameter in // the symbol table param_id = id + 1; // Get any existing prototype parameter count if (param_id) orig_paramcnt = Symtable[id].nelems; // Loop until the final right parentheses while (Token.token != T_RPAREN) { // Get the type and identifier // and add it to the symbol table type = parse_type(); ident(); // We have an existing prototype. // Check that this type matches the prototype. if (param_id) { if (type != Symtable[id].type) fatald("Type doesn't match prototype for parameter", paramcnt + 1); param_id++; } else { // Add a new parameter to the new prototype var_declaration(type, C_PARAM); } paramcnt++; // Must have a ',' or ')' at this point switch (Token.token) { case T_COMMA: scan(&Token); break; case T_RPAREN: break; default: fatald("Unexpected token in parameter list", Token.token); } } // Check that the number of parameters in this list matches // any existing prototype if ((id != -1) && (paramcnt != orig_paramcnt)) fatals("Parameter count mismatch for function", Symtable[id].name); // Return the count of parameters return (paramcnt); } // // function_declaration: type identifier '(' parameter_list ')' ; // | type identifier '(' parameter_list ')' compound_statement ; // // Parse the declaration of function. // The identifier has been scanned & we have the type. struct ASTnode *function_declaration(int type) { struct ASTnode *tree, *finalstmt; int id; int nameslot, endlabel, paramcnt; // Text has the identifier's name. If this exists and is a // function, get the id. Otherwise, set id to -1 if ((id = findsymbol(Text)) != -1) if (Symtable[id].stype != S_FUNCTION) id = -1; // If this is a new function declaration, get a // label-id for the end label, and add the function // to the symbol table, if (id == -1) { endlabel = genlabel(); nameslot = addglob(Text, type, S_FUNCTION, C_GLOBAL, endlabel, 0); } // Scan in the '(', any parameters and the ')'. // Pass in any existing function prototype symbol slot number lparen(); paramcnt = param_declaration(id); rparen(); // If this is a new function declaration, update the // function symbol entry with the number of parameters if (id == -1) Symtable[nameslot].nelems = paramcnt; // Declaration ends in a semicolon, only a prototype. if (Token.token == T_SEMI) { scan(&Token); return (NULL); } // This is not just a prototype. // Copy the global parameters to be local parameters if (id == -1) id = nameslot; copyfuncparams(id); // Set the Functionid global to the function's symbol-id Functionid = id; // Get the AST tree for the compound statement tree = compound_statement(); // If the function type isn't P_VOID .. if (type != P_VOID) { // Error if no statements in the function if (tree == NULL) fatal("No statements in function with non-void type"); // Check that the last AST operation in the // compound statement was a return statement finalstmt = (tree->op == A_GLUE) ? tree->right : tree; if (finalstmt == NULL || finalstmt->op != A_RETURN) fatal("No return for function with non-void type"); } // Return an A_FUNCTION node which has the function's id // and the compound statement sub-tree return (mkastunary(A_FUNCTION, type, tree, id)); } // Parse one or more global declarations, either // variables or functions void global_declarations(void) { struct ASTnode *tree; int type; while (1) { // We have to read past the type and identifier // to see either a '(' for a function declaration // or a ',' or ';' for a variable declaration. // Text is filled in by the ident() call. type = parse_type(); ident(); if (Token.token == T_LPAREN) { // Parse the function declaration tree = function_declaration(type); // Only a function prototype, no code if (tree == NULL) continue; // A real function, generate the assembly code for it if (O_dumpAST) { dumpAST(tree, NOLABEL, 0); fprintf(stdout, "\n\n"); } genAST(tree, NOLABEL, 0); // Now free the symbols associated // with this function freeloclsyms(); } else { // Parse the global variable declaration // and skip past the trailing semicolon var_declaration(type, C_GLOBAL); semi(); } // Stop when we have reached EOF if (Token.token == T_EOF) break; } } ================================================ FILE: 27_Testing_Errors/decl.h ================================================ // Function prototypes for all compiler files // Copyright (c) 2019 Warren Toomey, GPL3 // scan.c void reject_token(struct token *t); int scan(struct token *t); // tree.c struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, int intvalue); struct ASTnode *mkastleaf(int op, int type, int intvalue); struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, int intvalue); void dumpAST(struct ASTnode *n, int label, int parentASTop); // gen.c int genlabel(void); int genAST(struct ASTnode *n, int reg, int parentASTop); void genpreamble(); void genpostamble(); void genfreeregs(); void genglobsym(int id); int genglobstr(char *strvalue); int genprimsize(int type); void genreturn(int reg, int id); // cg.c void cgtextseg(); void cgdataseg(); void freeall_registers(void); void cgpreamble(); void cgpostamble(); void cgfuncpreamble(int id); void cgfuncpostamble(int id); int cgloadint(int value, int type); int cgloadglob(int id, int op); int cgloadlocal(int id, int op); int cgloadglobstr(int id); int cgadd(int r1, int r2); int cgsub(int r1, int r2); int cgmul(int r1, int r2); int cgdiv(int r1, int r2); int cgshlconst(int r, int val); int cgcall(int id, int numargs); void cgcopyarg(int r, int argposn); int cgstorglob(int r, int id); int cgstorlocal(int r, int id); void cgglobsym(int id); void cgglobstr(int l, char *strvalue); int cgcompare_and_set(int ASTop, int r1, int r2); int cgcompare_and_jump(int ASTop, int r1, int r2, int label); void cglabel(int l); void cgjump(int l); int cgwiden(int r, int oldtype, int newtype); int cgprimsize(int type); void cgreturn(int reg, int id); int cgaddress(int id); int cgderef(int r, int type); int cgstorderef(int r1, int r2, int type); int cgnegate(int r); int cginvert(int r); int cglognot(int r); int cgboolean(int r, int op, int label); int cgand(int r1, int r2); int cgor(int r1, int r2); int cgxor(int r1, int r2); int cgshl(int r1, int r2); int cgshr(int r1, int r2); // expr.c struct ASTnode *binexpr(int ptp); // stmt.c struct ASTnode *compound_statement(void); // misc.c void match(int t, char *what); void semi(void); void lbrace(void); void rbrace(void); void lparen(void); void rparen(void); void ident(void); void fatal(char *s); void fatals(char *s1, char *s2); void fatald(char *s, int d); void fatalc(char *s, int c); // sym.c int findglob(char *s); int findlocl(char *s); int findsymbol(char *s); int addglob(char *name, int type, int stype, int class, int endlabel, int size); int addlocl(char *name, int type, int stype, int class, int size); void copyfuncparams(int slot); void freeloclsyms(void); // decl.c void var_declaration(int type, int class); struct ASTnode *function_declaration(int type); void global_declarations(void); // types.c int inttype(int type); int parse_type(void); int pointer_to(int type); int value_at(int type); struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op); ================================================ FILE: 27_Testing_Errors/defs.h ================================================ #include #include #include #include // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 #define TEXTLEN 512 // Length of symbols in input #define NSYMBOLS 1024 // Number of symbol table entries // Token types enum { T_EOF, // Binary operators T_ASSIGN, T_LOGOR, T_LOGAND, T_OR, T_XOR, T_AMPER, T_EQ, T_NE, T_LT, T_GT, T_LE, T_GE, T_LSHIFT, T_RSHIFT, T_PLUS, T_MINUS, T_STAR, T_SLASH, // Other operators T_INC, T_DEC, T_INVERT, T_LOGNOT, // Type keywords T_VOID, T_CHAR, T_INT, T_LONG, // Other keywords T_IF, T_ELSE, T_WHILE, T_FOR, T_RETURN, // Structural tokens T_INTLIT, T_STRLIT, T_SEMI, T_IDENT, T_LBRACE, T_RBRACE, T_LPAREN, T_RPAREN, T_LBRACKET, T_RBRACKET, T_COMMA }; // Token structure struct token { int token; // Token type, from the enum list above int intvalue; // For T_INTLIT, the integer value }; // AST node types. The first few line up // with the related tokens enum { A_ASSIGN= 1, A_LOGOR, A_LOGAND, A_OR, A_XOR, A_AND, A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE, A_LSHIFT, A_RSHIFT, A_ADD, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, A_INTLIT, A_STRLIT, A_IDENT, A_GLUE, A_IF, A_WHILE, A_FUNCTION, A_WIDEN, A_RETURN, A_FUNCCALL, A_DEREF, A_ADDR, A_SCALE, A_PREINC, A_PREDEC, A_POSTINC, A_POSTDEC, A_NEGATE, A_INVERT, A_LOGNOT, A_TOBOOL }; // Primitive types enum { P_NONE, P_VOID, P_CHAR, P_INT, P_LONG, P_VOIDPTR, P_CHARPTR, P_INTPTR, P_LONGPTR }; // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates int rvalue; // True if the node is an rvalue struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; union { // For A_INTLIT, the integer value int intvalue; // For A_IDENT, the symbol slot number int id; // For A_FUNCTION, the symbol slot number int size; // For A_SCALE, the size to scale by } v; // For A_FUNCCALL, the symbol slot number }; #define NOREG -1 // Use NOREG when the AST generation // functions have no register to return #define NOLABEL 0 // Use NOLABEL when we have no label to // pass to genAST() // Structural types enum { S_VARIABLE, S_FUNCTION, S_ARRAY }; // Storage classes enum { C_GLOBAL = 1, // Globally visible symbol C_LOCAL, // Locally visible symbol C_PARAM // Locally visible function parameter }; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol int stype; // Structural type for the symbol int class; // Storage class for the symbol int endlabel; // For S_FUNCTIONs, the end label int size; // Number of elements in the symbol int posn; // For locals, either the negative offset // from stack base pointer, or register id #define nelems posn // For functions, # of params // For structs, # of fields }; ================================================ FILE: 27_Testing_Errors/expr.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of expressions // Copyright (c) 2019 Warren Toomey, GPL3 // expression_list: // | expression // | expression ',' expression_list // ; // Parse a list of zero or more comma-separated expressions and // return an AST composed of A_GLUE nodes with the left-hand child // being the sub-tree of previous expressions (or NULL) and the right-hand // child being the next expression. Each A_GLUE node will have size field // set to the number of expressions in the tree at this point. If no // expressions are parsed, NULL is returned static struct ASTnode *expression_list(void) { struct ASTnode *tree = NULL; struct ASTnode *child = NULL; int exprcount = 0; // Loop until the final right parentheses while (Token.token != T_RPAREN) { // Parse the next expression and increment the expression count child = binexpr(0); exprcount++; // Build an A_GLUE AST node with the previous tree as the left child // and the new expression as the right child. Store the expression count. tree = mkastnode(A_GLUE, P_NONE, tree, NULL, child, exprcount); // Must have a ',' or ')' at this point switch (Token.token) { case T_COMMA: scan(&Token); break; case T_RPAREN: break; default: fatald("Unexpected token in expression list", Token.token); } } // Return the tree of expressions return (tree); } // Parse a function call and return its AST static struct ASTnode *funccall(void) { struct ASTnode *tree; int id; // Check that the identifier has been defined as a function, // then make a leaf node for it. if ((id = findsymbol(Text)) == -1 || Symtable[id].stype != S_FUNCTION) { fatals("Undeclared function", Text); } // Get the '(' lparen(); // Parse the argument expression list tree = expression_list(); // XXX Check type of each argument against the function's prototype // Build the function call AST node. Store the // function's return type as this node's type. // Also record the function's symbol-id tree = mkastunary(A_FUNCCALL, Symtable[id].type, tree, id); // Get the ')' rparen(); return (tree); } // Parse the index into an array and return an AST tree for it static struct ASTnode *array_access(void) { struct ASTnode *left, *right; int id; // Check that the identifier has been defined as an array // then make a leaf node for it that points at the base if ((id = findsymbol(Text)) == -1 || Symtable[id].stype != S_ARRAY) { fatals("Undeclared array", Text); } left = mkastleaf(A_ADDR, Symtable[id].type, id); // Get the '[' scan(&Token); // Parse the following expression right = binexpr(0); // Get the ']' match(T_RBRACKET, "]"); // Ensure that this is of int type if (!inttype(right->type)) fatal("Array index is not of integer type"); // Scale the index by the size of the element's type right = modify_type(right, left->type, A_ADD); // Return an AST tree where the array's base has the offset // added to it, and dereference the element. Still an lvalue // at this point. left = mkastnode(A_ADD, Symtable[id].type, left, NULL, right, 0); left = mkastunary(A_DEREF, value_at(left->type), left, 0); return (left); } // Parse a postfix expression and return // an AST node representing it. The // identifier is already in Text. static struct ASTnode *postfix(void) { struct ASTnode *n; int id; // Scan in the next token to see if we have a postfix expression scan(&Token); // Function call if (Token.token == T_LPAREN) return (funccall()); // An array reference if (Token.token == T_LBRACKET) return (array_access()); // A variable. Check that the variable exists. id = findsymbol(Text); if (id == -1 || Symtable[id].stype != S_VARIABLE) fatals("Unknown variable", Text); switch (Token.token) { // Post-increment: skip over the token case T_INC: scan(&Token); n = mkastleaf(A_POSTINC, Symtable[id].type, id); break; // Post-decrement: skip over the token case T_DEC: scan(&Token); n = mkastleaf(A_POSTDEC, Symtable[id].type, id); break; // Just a variable reference default: n = mkastleaf(A_IDENT, Symtable[id].type, id); } return (n); } // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(void) { struct ASTnode *n; int id; switch (Token.token) { case T_INTLIT: // For an INTLIT token, make a leaf AST node for it. // Make it a P_CHAR if it's within the P_CHAR range if ((Token.intvalue) >= 0 && (Token.intvalue < 256)) n = mkastleaf(A_INTLIT, P_CHAR, Token.intvalue); else n = mkastleaf(A_INTLIT, P_INT, Token.intvalue); break; case T_STRLIT: // For a STRLIT token, generate the assembly for it. // Then make a leaf AST node for it. id is the string's label. id = genglobstr(Text); n = mkastleaf(A_STRLIT, P_CHARPTR, id); break; case T_IDENT: return (postfix()); case T_LPAREN: // Beginning of a parenthesised expression, skip the '('. // Scan in the expression and the right parenthesis scan(&Token); n = binexpr(0); rparen(); return (n); default: fatald("Expecting a primary expression, got token", Token.token); } // Scan in the next token and return the leaf node scan(&Token); return (n); } // Convert a binary operator token into a binary AST operation. // We rely on a 1:1 mapping from token to AST operation static int binastop(int tokentype) { if (tokentype > T_EOF && tokentype <= T_SLASH) return (tokentype); fatald("Syntax error, token", tokentype); return (0); // Keep -Wall happy } // Return true if a token is right-associative, // false otherwise. static int rightassoc(int tokentype) { if (tokentype == T_ASSIGN) return (1); return (0); } // Operator precedence for each token. Must // match up with the order of tokens in defs.h static int OpPrec[] = { 0, 10, 20, 30, // T_EOF, T_ASSIGN, T_LOGOR, T_LOGAND 40, 50, 60, // T_OR, T_XOR, T_AMPER 70, 70, // T_EQ, T_NE 80, 80, 80, 80, // T_LT, T_GT, T_LE, T_GE 90, 90, // T_LSHIFT, T_RSHIFT 100, 100, // T_PLUS, T_MINUS 110, 110 // T_STAR, T_SLASH }; // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec; if (tokentype > T_SLASH) fatald("Token with no precedence in op_precedence:", tokentype); prec = OpPrec[tokentype]; if (prec == 0) fatald("Syntax error, token", tokentype); return (prec); } // prefix_expression: primary // | '*' prefix_expression // | '&' prefix_expression // | '-' prefix_expression // | '++' prefix_expression // | '--' prefix_expression // ; // Parse a prefix expression and return // a sub-tree representing it. struct ASTnode *prefix(void) { struct ASTnode *tree; switch (Token.token) { case T_AMPER: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Ensure that it's an identifier if (tree->op != A_IDENT) fatal("& operator must be followed by an identifier"); // Now change the operator to A_ADDR and the type to // a pointer to the original type tree->op = A_ADDR; tree->type = pointer_to(tree->type); break; case T_STAR: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's either another deref or an // identifier if (tree->op != A_IDENT && tree->op != A_DEREF) fatal("* operator must be followed by an identifier or *"); // Prepend an A_DEREF operation to the tree tree = mkastunary(A_DEREF, value_at(tree->type), tree, 0); break; case T_MINUS: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_NEGATE operation to the tree and // make the child an rvalue. Because chars are unsigned, // also widen this to int so that it's signed tree->rvalue = 1; tree = modify_type(tree, P_INT, 0); tree = mkastunary(A_NEGATE, tree->type, tree, 0); break; case T_INVERT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_INVERT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_INVERT, tree->type, tree, 0); break; case T_LOGNOT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_LOGNOT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_LOGNOT, tree->type, tree, 0); break; case T_INC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("++ operator must be followed by an identifier"); // Prepend an A_PREINC operation to the tree tree = mkastunary(A_PREINC, tree->type, tree, 0); break; case T_DEC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("-- operator must be followed by an identifier"); // Prepend an A_PREDEC operation to the tree tree = mkastunary(A_PREDEC, tree->type, tree, 0); break; default: tree = primary(); } return (tree); } // Return an AST tree whose root is a binary operator. // Parameter ptp is the previous token's precedence. struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; struct ASTnode *ltemp, *rtemp; int ASTop; int tokentype; // Get the tree on the left. // Fetch the next token at the same time. left = prefix(); // If we hit one of several terminating tokens, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA) { left->rvalue = 1; return (left); } // While the precedence of this token is more than that of the // previous token precedence, or it's right associative and // equal to the previous token's precedence while ((op_precedence(tokentype) > ptp) || (rightassoc(tokentype) && op_precedence(tokentype) == ptp)) { // Fetch in the next integer literal scan(&Token); // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Determine the operation to be performed on the sub-trees ASTop = binastop(tokentype); if (ASTop == A_ASSIGN) { // Assignment // Make the right tree into an rvalue right->rvalue = 1; // Ensure the right's type matches the left right = modify_type(right, left->type, 0); if (right == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree. However, switch // left and right around, so that the right expression's // code will be generated before the left expression ltemp = left; left = right; right = ltemp; } else { // We are not doing an assignment, so both trees should be rvalues // Convert both trees into rvalue if they are lvalue trees left->rvalue = 1; right->rvalue = 1; // Ensure the two types are compatible by trying // to modify each tree to match the other's type. ltemp = modify_type(left, right->type, ASTop); rtemp = modify_type(right, left->type, ASTop); if (ltemp == NULL && rtemp == NULL) fatal("Incompatible types in binary expression"); if (ltemp != NULL) left = ltemp; if (rtemp != NULL) right = rtemp; } // Join that sub-tree with ours. Convert the token // into an AST operation at the same time. left = mkastnode(binastop(tokentype), left->type, left, NULL, right, 0); // Update the details of the current token. // If we hit a terminating token, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA) { left->rvalue = 1; return (left); } } // Return the tree we have when the precedence // is the same or lower left->rvalue = 1; return (left); } ================================================ FILE: 27_Testing_Errors/gen.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Generic code generator // Copyright (c) 2019 Warren Toomey, GPL3 // Generate and return a new label number int genlabel(void) { static int id = 1; return (id++); } // Generate the code for an IF statement // and an optional ELSE clause static int genIF(struct ASTnode *n) { int Lfalse, Lend; // Generate two labels: one for the // false compound statement, and one // for the end of the overall IF statement. // When there is no ELSE clause, Lfalse _is_ // the ending label! Lfalse = genlabel(); if (n->right) Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. genAST(n->left, Lfalse, n->op); genfreeregs(); // Generate the true compound statement genAST(n->mid, NOLABEL, n->op); genfreeregs(); // If there is an optional ELSE clause, // generate the jump to skip to the end if (n->right) cgjump(Lend); // Now the false label cglabel(Lfalse); // Optional ELSE clause: generate the // false compound statement and the // end label if (n->right) { genAST(n->right, NOLABEL, n->op); genfreeregs(); cglabel(Lend); } return (NOREG); } // Generate the code for a WHILE statement static int genWHILE(struct ASTnode *n) { int Lstart, Lend; // Generate the start and end labels // and output the start label Lstart = genlabel(); Lend = genlabel(); cglabel(Lstart); // Generate the condition code followed // by a jump to the end label. genAST(n->left, Lend, n->op); genfreeregs(); // Generate the compound statement for the body genAST(n->right, NOLABEL, n->op); genfreeregs(); // Finally output the jump back to the condition, // and the end label cgjump(Lstart); cglabel(Lend); return (NOREG); } // Generate the code to copy the arguments of a // function call to its parameters, then call the // function itself. Return the register that holds // the function's return value. static int gen_funccall(struct ASTnode *n) { struct ASTnode *gluetree = n->left; int reg; int numargs = 0; // If there is a list of arguments, walk this list // from the last argument (right-hand child) to the // first while (gluetree) { // Calculate the expression's value reg = genAST(gluetree->right, NOLABEL, gluetree->op); // Copy this into the n'th function parameter: size is 1, 2, 3, ... cgcopyarg(reg, gluetree->v.size); // Keep the first (highest) number of arguments if (numargs == 0) numargs = gluetree->v.size; genfreeregs(); gluetree = gluetree->left; } // Call the function, clean up the stack (based on numargs), // and return its result return (cgcall(n->v.id, numargs)); } // Given an AST, an optional label, and the AST op // of the parent, generate assembly code recursively. // Return the register id with the tree's final value. int genAST(struct ASTnode *n, int label, int parentASTop) { int leftreg, rightreg; // We have some specific AST node handling at the top // so that we don't evaluate the child sub-trees immediately switch (n->op) { case A_IF: return (genIF(n)); case A_WHILE: return (genWHILE(n)); case A_FUNCCALL: return (gen_funccall(n)); case A_GLUE: // Do each child statement, and free the // registers after each child genAST(n->left, NOLABEL, n->op); genfreeregs(); genAST(n->right, NOLABEL, n->op); genfreeregs(); return (NOREG); case A_FUNCTION: // Generate the function's preamble before the code // in the child sub-tree cgfuncpreamble(n->v.id); genAST(n->left, NOLABEL, n->op); cgfuncpostamble(n->v.id); return (NOREG); } // General AST node handling below // Get the left and right sub-tree values if (n->left) leftreg = genAST(n->left, NOLABEL, n->op); if (n->right) rightreg = genAST(n->right, NOLABEL, n->op); switch (n->op) { case A_ADD: return (cgadd(leftreg, rightreg)); case A_SUBTRACT: return (cgsub(leftreg, rightreg)); case A_MULTIPLY: return (cgmul(leftreg, rightreg)); case A_DIVIDE: return (cgdiv(leftreg, rightreg)); case A_AND: return (cgand(leftreg, rightreg)); case A_OR: return (cgor(leftreg, rightreg)); case A_XOR: return (cgxor(leftreg, rightreg)); case A_LSHIFT: return (cgshl(leftreg, rightreg)); case A_RSHIFT: return (cgshr(leftreg, rightreg)); case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, compare registers // and set one to 1 or 0 based on the comparison. if (parentASTop == A_IF || parentASTop == A_WHILE) return (cgcompare_and_jump(n->op, leftreg, rightreg, label)); else return (cgcompare_and_set(n->op, leftreg, rightreg)); case A_INTLIT: return (cgloadint(n->v.intvalue, n->type)); case A_STRLIT: return (cgloadglobstr(n->v.id)); case A_IDENT: // Load our value if we are an rvalue // or we are being dereferenced if (n->rvalue || parentASTop == A_DEREF) { if (Symtable[n->v.id].class == C_GLOBAL) { return (cgloadglob(n->v.id, n->op)); } else { return (cgloadlocal(n->v.id, n->op)); } } else return (NOREG); case A_ASSIGN: // Are we assigning to an identifier or through a pointer? switch (n->right->op) { case A_IDENT: if (Symtable[n->right->v.id].class == C_GLOBAL) return (cgstorglob(leftreg, n->right->v.id)); else return (cgstorlocal(leftreg, n->right->v.id)); case A_DEREF: return (cgstorderef(leftreg, rightreg, n->right->type)); default: fatald("Can't A_ASSIGN in genAST(), op", n->op); } case A_WIDEN: // Widen the child's type to the parent's type return (cgwiden(leftreg, n->left->type, n->type)); case A_RETURN: cgreturn(leftreg, Functionid); return (NOREG); case A_ADDR: return (cgaddress(n->v.id)); case A_DEREF: // If we are an rvalue, dereference to get the value we point at, // otherwise leave it for A_ASSIGN to store through the pointer if (n->rvalue) return (cgderef(leftreg, n->left->type)); else return (leftreg); case A_SCALE: // Small optimisation: use shift if the // scale value is a known power of two switch (n->v.size) { case 2: return (cgshlconst(leftreg, 1)); case 4: return (cgshlconst(leftreg, 2)); case 8: return (cgshlconst(leftreg, 3)); default: // Load a register with the size and // multiply the leftreg by this size rightreg = cgloadint(n->v.size, P_INT); return (cgmul(leftreg, rightreg)); } case A_POSTINC: case A_POSTDEC: // Load and decrement the variable's value into a register // and post increment/decrement it if (Symtable[n->v.id].class == C_GLOBAL) return (cgloadglob(n->v.id, n->op)); else return (cgloadlocal(n->v.id, n->op)); case A_PREINC: case A_PREDEC: // Load and decrement the variable's value into a register // and pre increment/decrement it if (Symtable[n->left->v.id].class == C_GLOBAL) return (cgloadglob(n->left->v.id, n->op)); else return (cgloadlocal(n->left->v.id, n->op)); case A_NEGATE: return (cgnegate(leftreg)); case A_INVERT: return (cginvert(leftreg)); case A_LOGNOT: return (cglognot(leftreg)); case A_TOBOOL: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, set the register // to 0 or 1 based on it's zeroeness or non-zeroeness return (cgboolean(leftreg, parentASTop, label)); default: fatald("Unknown AST operator", n->op); } return (NOREG); // Keep -Wall happy } void genpreamble() { cgpreamble(); } void genpostamble() { cgpostamble(); } void genfreeregs() { freeall_registers(); } void genglobsym(int id) { cgglobsym(id); } int genglobstr(char *strvalue) { int l = genlabel(); cgglobstr(l, strvalue); return (l); } int genprimsize(int type) { return (cgprimsize(type)); } ================================================ FILE: 27_Testing_Errors/lib/printint.c ================================================ #include void printint(long x) { printf("%ld\n", x); } void printchar(long x) { putc((char)(x & 0x7f), stdout); } ================================================ FILE: 27_Testing_Errors/main.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "decl.h" #include // Compiler setup and top-level execution // Copyright (c) 2019 Warren Toomey, GPL3 // Initialise global variables static void init() { Line = 1; Putback = '\n'; Globs = 0; Locls = NSYMBOLS - 1; O_dumpAST = 0; } // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "Usage: %s [-T] infile\n", prog); exit(1); } // Main program: check arguments and print a usage // if we don't have an argument. Open up the input // file and call scanfile() to scan the tokens in it. int main(int argc, char *argv[]) { int i; // Initialise the globals init(); // Scan for command-line options for (i = 1; i < argc; i++) { if (*argv[i] != '-') break; for (int j = 1; argv[i][j]; j++) { switch (argv[i][j]) { case 'T': O_dumpAST = 1; break; default: usage(argv[0]); } } } // Ensure we have an input file argument if (i >= argc) usage(argv[0]); // Open up the input file if ((Infile = fopen(argv[i], "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", argv[i], strerror(errno)); exit(1); } // Create the output file if ((Outfile = fopen("out.s", "w")) == NULL) { fprintf(stderr, "Unable to create out.s: %s\n", strerror(errno)); exit(1); } // For now, ensure that printint() and printchar() are defined addglob("printint", P_INT, S_FUNCTION, C_GLOBAL, 0, 0); addglob("printchar", P_VOID, S_FUNCTION, C_GLOBAL, 0, 0); scan(&Token); // Get the first token from the input genpreamble(); // Output the preamble global_declarations(); // Parse the global declarations genpostamble(); // Output the postamble fclose(Outfile); // Close the output file and exit return (0); } ================================================ FILE: 27_Testing_Errors/misc.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Miscellaneous functions // Copyright (c) 2019 Warren Toomey, GPL3 // Ensure that the current token is t, // and fetch the next token. Otherwise // throw an error void match(int t, char *what) { if (Token.token == t) { scan(&Token); } else { fatals("Expected", what); } } // Match a semicolon and fetch the next token void semi(void) { match(T_SEMI, ";"); } // Match a left brace and fetch the next token void lbrace(void) { match(T_LBRACE, "{"); } // Match a right brace and fetch the next token void rbrace(void) { match(T_RBRACE, "}"); } // Match a left parenthesis and fetch the next token void lparen(void) { match(T_LPAREN, "("); } // Match a right parenthesis and fetch the next token void rparen(void) { match(T_RPAREN, ")"); } // Match an identifier and fetch the next token void ident(void) { match(T_IDENT, "identifier"); } // Print out fatal messages void fatal(char *s) { fprintf(stderr, "%s on line %d\n", s, Line); exit(1); } void fatals(char *s1, char *s2) { fprintf(stderr, "%s:%s on line %d\n", s1, s2, Line); exit(1); } void fatald(char *s, int d) { fprintf(stderr, "%s:%d on line %d\n", s, d, Line); exit(1); } void fatalc(char *s, int c) { fprintf(stderr, "%s:%c on line %d\n", s, c, Line); exit(1); } ================================================ FILE: 27_Testing_Errors/scan.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { char *p; p = strchr(s, c); return (p ? p - s : -1); } // Get the next character from the input file. static int next(void) { int c; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return (c); } c = fgetc(Infile); // Read from input file if ('\n' == c) Line++; // Increment line count return (c); } // Put back an unwanted character static void putback(int c) { Putback = c; } // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } // Return the next character from a character // or string literal static int scanch(void) { int c; // Get the next input character and interpret // metacharacters that start with a backslash c = next(); if (c == '\\') { switch (c = next()) { case 'a': return '\a'; case 'b': return '\b'; case 'f': return '\f'; case 'n': return '\n'; case 'r': return '\r'; case 't': return '\t'; case 'v': return '\v'; case '\\': return '\\'; case '"': return '"'; case '\'': return '\''; default: fatalc("unknown escape sequence", c); } } return (c); // Just an ordinary old character! } // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0; // Convert each character into an int value while ((k = chrpos("0123456789", c)) >= 0) { val = val * 10 + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return (val); } // Scan in a string literal from the input file, // and store it in buf[]. Return the length of // the string. static int scanstr(char *buf) { int i, c; // Loop while we have enough buffer space for (i = 0; i < TEXTLEN - 1; i++) { // Get the next char and append to buf // Return when we hit the ending double quote if ((c = scanch()) == '"') { buf[i] = 0; return (i); } buf[i] = c; } // Ran out of buf[] space fatal("String literal too long"); return (0); } // Scan an identifier from the input file and // store it in buf[]. Return the identifier's length static int scanident(int c, char *buf, int lim) { int i = 0; // Allow digits, alpha and underscores while (isalpha(c) || isdigit(c) || '_' == c) { // Error if we hit the identifier length limit, // else append to buf[] and get next character if (lim - 1 == i) { fatal("Identifier too long"); } else if (i < lim - 1) { buf[i++] = c; } c = next(); } // We hit a non-valid character, put it back. // NUL-terminate the buf[] and return the length putback(c); buf[i] = '\0'; return (i); } // Given a word from the input, return the matching // keyword token number or 0 if it's not a keyword. // Switch on the first letter so that we don't have // to waste time strcmp()ing against all the keywords. static int keyword(char *s) { switch (*s) { case 'c': if (!strcmp(s, "char")) return (T_CHAR); break; case 'e': if (!strcmp(s, "else")) return (T_ELSE); break; case 'f': if (!strcmp(s, "for")) return (T_FOR); break; case 'i': if (!strcmp(s, "if")) return (T_IF); if (!strcmp(s, "int")) return (T_INT); break; case 'l': if (!strcmp(s, "long")) return (T_LONG); break; case 'r': if (!strcmp(s, "return")) return (T_RETURN); break; case 'w': if (!strcmp(s, "while")) return (T_WHILE); break; case 'v': if (!strcmp(s, "void")) return (T_VOID); break; } return (0); } // A pointer to a rejected token static struct token *Rejtoken = NULL; // Reject the token that we just scanned void reject_token(struct token *t) { if (Rejtoken != NULL) fatal("Can't reject token twice"); Rejtoken = t; } // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c, tokentype; // If we have any rejected token, return it if (Rejtoken != NULL) { t = Rejtoken; Rejtoken = NULL; return (1); } // Skip whitespace c = skip(); // Determine the token based on // the input character switch (c) { case EOF: t->token = T_EOF; return (0); case '+': if ((c = next()) == '+') { t->token = T_INC; } else { putback(c); t->token = T_PLUS; } break; case '-': if ((c = next()) == '-') { t->token = T_DEC; } else { putback(c); t->token = T_MINUS; } break; case '*': t->token = T_STAR; break; case '/': t->token = T_SLASH; break; case ';': t->token = T_SEMI; break; case '{': t->token = T_LBRACE; break; case '}': t->token = T_RBRACE; break; case '(': t->token = T_LPAREN; break; case ')': t->token = T_RPAREN; break; case '[': t->token = T_LBRACKET; break; case ']': t->token = T_RBRACKET; break; case '~': t->token = T_INVERT; break; case '^': t->token = T_XOR; break; case ',': t->token = T_COMMA; break; case '=': if ((c = next()) == '=') { t->token = T_EQ; } else { putback(c); t->token = T_ASSIGN; } break; case '!': if ((c = next()) == '=') { t->token = T_NE; } else { putback(c); t->token = T_LOGNOT; } break; case '<': if ((c = next()) == '=') { t->token = T_LE; } else if (c == '<') { t->token = T_LSHIFT; } else { putback(c); t->token = T_LT; } break; case '>': if ((c = next()) == '=') { t->token = T_GE; } else if (c == '>') { t->token = T_RSHIFT; } else { putback(c); t->token = T_GT; } break; case '&': if ((c = next()) == '&') { t->token = T_LOGAND; } else { putback(c); t->token = T_AMPER; } break; case '|': if ((c = next()) == '|') { t->token = T_LOGOR; } else { putback(c); t->token = T_OR; } break; case '\'': // If it's a quote, scan in the // literal character value and // the trailing quote t->intvalue = scanch(); t->token = T_INTLIT; if (next() != '\'') fatal("Expected '\\'' at end of char literal"); break; case '"': // Scan in a literal string scanstr(Text); t->token = T_STRLIT; break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } else if (isalpha(c) || '_' == c) { // Read in a keyword or identifier scanident(c, Text, TEXTLEN); // If it's a recognised keyword, return that token if ((tokentype = keyword(Text)) != 0) { t->token = tokentype; break; } // Not a recognised keyword, so it must be an identifier t->token = T_IDENT; break; } // The character isn't part of any recognised token, error fatalc("Unrecognised character", c); } // We found a token return (1); } ================================================ FILE: 27_Testing_Errors/stmt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of statements // Copyright (c) 2019 Warren Toomey, GPL3 // Prototypes static struct ASTnode *single_statement(void); // compound_statement: // empty, i.e. no statement // | statement // | statement statements // ; // // statement: declaration // | expression_statement // | function_call // | if_statement // | while_statement // | for_statement // | return_statement // ; // if_statement: if_head // | if_head 'else' compound_statement // ; // // if_head: 'if' '(' true_false_expression ')' compound_statement ; // // Parse an IF statement including any // optional ELSE clause and return its AST static struct ASTnode *if_statement(void) { struct ASTnode *condAST, *trueAST, *falseAST = NULL; // Ensure we have 'if' '(' match(T_IF, "if"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, 0); rparen(); // Get the AST for the compound statement trueAST = compound_statement(); // If we have an 'else', skip it // and get the AST for the compound statement if (Token.token == T_ELSE) { scan(&Token); falseAST = compound_statement(); } // Build and return the AST for this statement return (mkastnode(A_IF, P_NONE, condAST, trueAST, falseAST, 0)); } // while_statement: 'while' '(' true_false_expression ')' compound_statement ; // // Parse a WHILE statement and return its AST static struct ASTnode *while_statement(void) { struct ASTnode *condAST, *bodyAST; // Ensure we have 'while' '(' match(T_WHILE, "while"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, 0); rparen(); // Get the AST for the compound statement bodyAST = compound_statement(); // Build and return the AST for this statement return (mkastnode(A_WHILE, P_NONE, condAST, NULL, bodyAST, 0)); } // for_statement: 'for' '(' preop_statement ';' // true_false_expression ';' // postop_statement ')' compound_statement ; // // preop_statement: statement (for now) // postop_statement: statement (for now) // // Parse a FOR statement and return its AST static struct ASTnode *for_statement(void) { struct ASTnode *condAST, *bodyAST; struct ASTnode *preopAST, *postopAST; struct ASTnode *tree; // Ensure we have 'for' '(' match(T_FOR, "for"); lparen(); // Get the pre_op statement and the ';' preopAST = single_statement(); semi(); // Get the condition and the ';'. // Force a non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, 0); semi(); // Get the post_op statement and the ')' postopAST = single_statement(); rparen(); // Get the compound statement which is the body bodyAST = compound_statement(); // For now, all four sub-trees have to be non-NULL. // Later on, we'll change the semantics for when some are missing // Glue the compound statement and the postop tree tree = mkastnode(A_GLUE, P_NONE, bodyAST, NULL, postopAST, 0); // Make a WHILE loop with the condition and this new body tree = mkastnode(A_WHILE, P_NONE, condAST, NULL, tree, 0); // And glue the preop tree to the A_WHILE tree return (mkastnode(A_GLUE, P_NONE, preopAST, NULL, tree, 0)); } // return_statement: 'return' '(' expression ')' ; // // Parse a return statement and return its AST static struct ASTnode *return_statement(void) { struct ASTnode *tree; // Can't return a value if function returns P_VOID if (Symtable[Functionid].type == P_VOID) fatal("Can't return from a void function"); // Ensure we have 'return' '(' match(T_RETURN, "return"); lparen(); // Parse the following expression tree = binexpr(0); // Ensure this is compatible with the function's type tree = modify_type(tree, Symtable[Functionid].type, 0); if (tree == NULL) fatal("Incompatible type to return"); // Add on the A_RETURN node tree = mkastunary(A_RETURN, P_NONE, tree, 0); // Get the ')' rparen(); return (tree); } // Parse a single statement and return its AST static struct ASTnode *single_statement(void) { int type; switch (Token.token) { case T_CHAR: case T_INT: case T_LONG: // The beginning of a variable declaration. // Parse the type and get the identifier. // Then parse the rest of the declaration // and skip over the semicolon type = parse_type(); ident(); var_declaration(type, C_LOCAL); semi(); return (NULL); // No AST generated here case T_IF: return (if_statement()); case T_WHILE: return (while_statement()); case T_FOR: return (for_statement()); case T_RETURN: return (return_statement()); default: // For now, see if this is an expression. // This catches assignment statements. return (binexpr(0)); } return (NULL); // Keep -Wall happy } // Parse a compound statement // and return its AST struct ASTnode *compound_statement(void) { struct ASTnode *left = NULL; struct ASTnode *tree; // Require a left curly bracket lbrace(); while (1) { // Parse a single statement tree = single_statement(); // Some statements must be followed by a semicolon if (tree != NULL && (tree->op == A_ASSIGN || tree->op == A_RETURN || tree->op == A_FUNCCALL)) semi(); // For each new tree, either save it in left // if left is empty, or glue the left and the // new tree together if (tree != NULL) { if (left == NULL) left = tree; else left = mkastnode(A_GLUE, P_NONE, left, NULL, tree, 0); } // When we hit a right curly bracket, // skip past it and return the AST if (Token.token == T_RBRACE) { rbrace(); return (left); } } } ================================================ FILE: 27_Testing_Errors/sym.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Symbol table functions // Copyright (c) 2019 Warren Toomey, GPL3 // Determine if the symbol s is in the global symbol table. // Return its slot position or -1 if not found. // Skip C_PARAM entries int findglob(char *s) { int i; for (i = 0; i < Globs; i++) { if (Symtable[i].class == C_PARAM) continue; if (*s == *Symtable[i].name && !strcmp(s, Symtable[i].name)) return (i); } return (-1); } // Get the position of a new global symbol slot, or die // if we've run out of positions. static int newglob(void) { int p; if ((p = Globs++) >= Locls) fatal("Too many global symbols"); return (p); } // Determine if the symbol s is in the local symbol table. // Return its slot position or -1 if not found. int findlocl(char *s) { int i; for (i = Locls + 1; i < NSYMBOLS; i++) { if (*s == *Symtable[i].name && !strcmp(s, Symtable[i].name)) return (i); } return (-1); } // Get the position of a new local symbol slot, or die // if we've run out of positions. static int newlocl(void) { int p; if ((p = Locls--) <= Globs) fatal("Too many local symbols"); return (p); } // Clear all the entries in the // local symbol table void freeloclsyms(void) { Locls = NSYMBOLS - 1; } // Update a symbol at the given slot number in the symbol table. Set up its: // + type: char, int etc. // + structural type: var, function, array etc. // + size: number of elements // + endlabel: if this is a function // + posn: Position information for local symbols static void updatesym(int slot, char *name, int type, int stype, int class, int endlabel, int size, int posn) { if (slot < 0 || slot >= NSYMBOLS) fatal("Invalid symbol slot number in updatesym()"); Symtable[slot].name = strdup(name); Symtable[slot].type = type; Symtable[slot].stype = stype; Symtable[slot].class = class; Symtable[slot].endlabel = endlabel; Symtable[slot].size = size; Symtable[slot].posn = posn; } // Add a global symbol to the symbol table. Set up its: // + type: char, int etc. // + structural type: var, function, array etc. // + class of the symbol // + size: number of elements // + endlabel: if this is a function // Return the slot number in the symbol table int addglob(char *name, int type, int stype, int class, int endlabel, int size) { int slot; // If this is already in the symbol table, return the existing slot if ((slot = findglob(name)) != -1) return (slot); // Otherwise get a new slot and fill it in slot = newglob(); updatesym(slot, name, type, stype, class, endlabel, size, 0); // Generate the assembly for the symbol if it's global if (class == C_GLOBAL) genglobsym(slot); // Return the slot number return (slot); } // Add a local symbol to the symbol table. Set up its: // + type: char, int etc. // + structural type: var, function, array etc. // + size: number of elements // Return the slot number in the symbol table, -1 if a duplicate entry int addlocl(char *name, int type, int stype, int class, int size) { int localslot; // If this is already in the symbol table, return an error if ((localslot = findlocl(name)) != -1) return (-1); // Otherwise get a new symbol slot and a position for this local. // Update the local symbol table entry. localslot = newlocl(); updatesym(localslot, name, type, stype, class, 0, size, 0); // Return the local symbol's slot return (localslot); } // Given a function's slot number, copy the global parameters // from its prototype to be local parameters void copyfuncparams(int slot) { int i, id = slot + 1; for (i = 0; i < Symtable[slot].nelems; i++, id++) { addlocl(Symtable[id].name, Symtable[id].type, Symtable[id].stype, Symtable[id].class, Symtable[id].size); } } // Determine if the symbol s is in the symbol table. // Return its slot position or -1 if not found. int findsymbol(char *s) { int slot; slot = findlocl(s); if (slot == -1) slot = findglob(s); return (slot); } ================================================ FILE: 27_Testing_Errors/tests/err.input31.c ================================================ Expecting a primary expression, got token:15 on line 3 ================================================ FILE: 27_Testing_Errors/tests/err.input32.c ================================================ Unknown variable:cow on line 2 ================================================ FILE: 27_Testing_Errors/tests/err.input33.c ================================================ Incompatible type to return on line 2 ================================================ FILE: 27_Testing_Errors/tests/err.input34.c ================================================ For now, declaration of local arrays is not implemented on line 2 ================================================ FILE: 27_Testing_Errors/tests/err.input35.c ================================================ Duplicate local variable declaration:a on line 2 ================================================ FILE: 27_Testing_Errors/tests/err.input36.c ================================================ Type doesn't match prototype for parameter:3 on line 2 ================================================ FILE: 27_Testing_Errors/tests/err.input37.c ================================================ Unexpected token in parameter list:15 on line 1 ================================================ FILE: 27_Testing_Errors/tests/err.input38.c ================================================ Type doesn't match prototype for parameter:3 on line 2 ================================================ FILE: 27_Testing_Errors/tests/err.input39.c ================================================ No statements in function with non-void type on line 2 ================================================ FILE: 27_Testing_Errors/tests/err.input40.c ================================================ No return for function with non-void type on line 2 ================================================ FILE: 27_Testing_Errors/tests/err.input41.c ================================================ Can't return from a void function on line 1 ================================================ FILE: 27_Testing_Errors/tests/err.input42.c ================================================ Undeclared function:fred on line 1 ================================================ FILE: 27_Testing_Errors/tests/err.input43.c ================================================ Undeclared array:b on line 1 ================================================ FILE: 27_Testing_Errors/tests/err.input44.c ================================================ Unknown variable:z on line 1 ================================================ FILE: 27_Testing_Errors/tests/err.input45.c ================================================ & operator must be followed by an identifier on line 1 ================================================ FILE: 27_Testing_Errors/tests/err.input46.c ================================================ * operator must be followed by an identifier or * on line 1 ================================================ FILE: 27_Testing_Errors/tests/err.input47.c ================================================ ++ operator must be followed by an identifier on line 1 ================================================ FILE: 27_Testing_Errors/tests/err.input48.c ================================================ -- operator must be followed by an identifier on line 1 ================================================ FILE: 27_Testing_Errors/tests/err.input49.c ================================================ Incompatible expression in assignment on line 4 ================================================ FILE: 27_Testing_Errors/tests/err.input50.c ================================================ Incompatible types in binary expression on line 4 ================================================ FILE: 27_Testing_Errors/tests/err.input51.c ================================================ Expected '\'' at end of char literal on line 2 ================================================ FILE: 27_Testing_Errors/tests/err.input52.c ================================================ Unrecognised character:$ on line 3 ================================================ FILE: 27_Testing_Errors/tests/input01.c ================================================ void main() { printint(12 * 3); printint(18 - 2 * 4); printint(1 + 2 + 9 - 5/2 + 3*5); } ================================================ FILE: 27_Testing_Errors/tests/input02.c ================================================ void main() { int fred; int jim; fred= 5; jim= 12; printint(fred + jim); } ================================================ FILE: 27_Testing_Errors/tests/input03.c ================================================ void main() { int x; x= 1; printint(x); x= x + 1; printint(x); x= x + 1; printint(x); x= x + 1; printint(x); x= x + 1; printint(x); } ================================================ FILE: 27_Testing_Errors/tests/input04.c ================================================ void main() { int x; x= 7 < 9; printint(x); x= 7 <= 9; printint(x); x= 7 != 9; printint(x); x= 7 == 7; printint(x); x= 7 >= 7; printint(x); x= 7 <= 7; printint(x); x= 9 > 7; printint(x); x= 9 >= 7; printint(x); x= 9 != 7; printint(x); } ================================================ FILE: 27_Testing_Errors/tests/input05.c ================================================ void main() { int i; int j; i=6; j=12; if (i < j) { printint(i); } else { printint(j); } } ================================================ FILE: 27_Testing_Errors/tests/input06.c ================================================ void main() { int i; i=1; while (i <= 10) { printint(i); i= i + 1; } } ================================================ FILE: 27_Testing_Errors/tests/input07.c ================================================ void main() { int i; for (i= 1; i <= 10; i= i + 1) { printint(i); } } ================================================ FILE: 27_Testing_Errors/tests/input08.c ================================================ void main() { int i; for (i= 1; i <= 10; i= i + 1) { printint(i); } } ================================================ FILE: 27_Testing_Errors/tests/input09.c ================================================ void main() { int i; for (i= 1; i <= 10; i= i + 1) { printint(i); } } void fred() { int a; int b; a= 12; b= 3 * a; if (a >= b) { printint(2 * b - a); } } ================================================ FILE: 27_Testing_Errors/tests/input10.c ================================================ void main() { int i; char j; j= 20; printint(j); i= 10; printint(i); for (i= 1; i <= 5; i= i + 1) { printint(i); } for (j= 253; j != 2; j= j + 1) { printint(j); } } ================================================ FILE: 27_Testing_Errors/tests/input11.c ================================================ int main() { int i; char j; long k; i= 10; printint(i); j= 20; printint(j); k= 30; printint(k); for (i= 1; i <= 5; i= i + 1) { printint(i); } for (j= 253; j != 4; j= j + 1) { printint(j); } for (k= 1; k <= 5; k= k + 1) { printint(k); } return(i); printint(12345); return(3); } ================================================ FILE: 27_Testing_Errors/tests/input12.c ================================================ int fred() { return(5); } void main() { int x; x= fred(2); printint(x); } ================================================ FILE: 27_Testing_Errors/tests/input13.c ================================================ int fred() { return(56); } void main() { int dummy; int result; dummy= printint(23); result= fred(10); dummy= printint(result); } ================================================ FILE: 27_Testing_Errors/tests/input14.c ================================================ int fred() { return(20); } int main() { int result; printint(10); result= fred(15); printint(result); printint(fred(15)+10); return(0); } ================================================ FILE: 27_Testing_Errors/tests/input15.c ================================================ int main() { char a; char *b; char c; int d; int *e; int f; a= 18; printint(a); b= &a; c= *b; printint(c); d= 12; printint(d); e= &d; f= *e; printint(f); return(0); } ================================================ FILE: 27_Testing_Errors/tests/input16.c ================================================ int c; int d; int *e; int f; int main() { c= 12; d=18; printint(c); e= &c + 1; f= *e; printint(f); return(0); } ================================================ FILE: 27_Testing_Errors/tests/input17.c ================================================ int main() { char a; char *b; int d; int *e; b= &a; *b= 19; printint(a); e= &d; *e= 12; printint(d); return(0); } ================================================ FILE: 27_Testing_Errors/tests/input18.c ================================================ int main() { int a; int b; a= b= 34; printint(a); printint(b); return(0); } ================================================ FILE: 27_Testing_Errors/tests/input18a.c ================================================ int a; int *b; char c; char *d; int main() { b= &a; *b= 15; printint(a); d= &c; *d= 16; printint(c); return(0); } ================================================ FILE: 27_Testing_Errors/tests/input19.c ================================================ int a; int b; int c; int d; int e; int main() { a= 2; b= 4; c= 3; d= 2; e= (a+b) * (c+d); printint(e); return(0); } ================================================ FILE: 27_Testing_Errors/tests/input20.c ================================================ int a; int b[25]; int main() { b[3]= 12; a= b[3]; printint(a); return(0); } ================================================ FILE: 27_Testing_Errors/tests/input21.c ================================================ char c; char *str; int main() { c= '\n'; printint(c); for (str= "Hello world\n"; *str != 0; str= str + 1) { printchar(*str); } return(0); } ================================================ FILE: 27_Testing_Errors/tests/input22.c ================================================ char a; char b; char c; int d; int e; int f; long g; long h; long i; int main() { b= 5; c= 7; a= b + c++; printint(a); e= 5; f= 7; d= e + f++; printint(d); h= 5; i= 7; g= h + i++; printint(g); a= b-- + c; printint(a); d= e-- + f; printint(d); g= h-- + i; printint(g); a= ++b + c; printint(a); d= ++e + f; printint(d); g= ++h + i; printint(g); a= b * --c; printint(a); d= e * --f; printint(d); g= h * --i; printint(g); return(0); } ================================================ FILE: 27_Testing_Errors/tests/input23.c ================================================ char *str; int x; int main() { x= -23; printint(x); printint(-10 * -10); x= 1; x= ~x; printint(x); x= 2 > 5; printint(x); x= !x; printint(x); x= !x; printint(x); x= 13; if (x) { printint(13); } x= 0; if (!x) { printint(14); } for (str= "Hello world\n"; *str; str++) { printchar(*str); } return(0); } ================================================ FILE: 27_Testing_Errors/tests/input24.c ================================================ int a; int b; int c; int main() { a= 42; b= 19; printint(a & b); printint(a | b); printint(a ^ b); printint(1 << 3); printint(63 >> 3); return(0); } ================================================ FILE: 27_Testing_Errors/tests/input25.c ================================================ int a; int b; int c; int main() { char z; int y; int x; x= 10; y= 20; z= 30; printint(x); printint(y); printint(z); a= 5; b= 15; c= 25; printint(a); printint(b); printint(c); return(0); } ================================================ FILE: 27_Testing_Errors/tests/input26.c ================================================ int main(int a, char b, long c, int d, int e, int f, int g, int h) { int i; int j; int k; a= 13; printint(a); b= 23; printint(b); c= 34; printint(c); d= 44; printint(d); e= 54; printint(e); f= 64; printint(f); g= 74; printint(g); h= 84; printint(h); i= 94; printint(i); j= 95; printint(j); k= 96; printint(k); return(0); } ================================================ FILE: 27_Testing_Errors/tests/input27.c ================================================ int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printint(a); printint(b); printint(c); printint(d); printint(e); printint(f); printint(g); printint(h); return(0); } int param5(int a, int b, int c, int d, int e) { printint(a); printint(b); printint(c); printint(d); printint(e); return(0); } int param2(int a, int b) { int c; int d; int e; c= 3; d= 4; e= 5; printint(a); printint(b); printint(c); printint(d); printint(e); return(0); } int param0() { int a; int b; int c; int d; int e; a= 1; b= 2; c= 3; d= 4; e= 5; printint(a); printint(b); printint(c); printint(d); printint(e); return(0); } int main() { param8(1,2,3,4,5,6,7,8); param5(1,2,3,4,5); param2(1,2); param0(); return(0); } ================================================ FILE: 27_Testing_Errors/tests/input28.c ================================================ int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printint(a); printint(b); printint(c); printint(d); printint(e); printint(f); printint(g); printint(h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printint(x); return(0); } ================================================ FILE: 27_Testing_Errors/tests/input29.c ================================================ int param8(int a, int b, int c, int d, int e, int f, int g, int h); int fred(int a, int b, int c); int main(); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printint(a); printint(b); printint(c); printint(d); printint(e); printint(f); printint(g); printint(h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printint(x); return(0); } ================================================ FILE: 27_Testing_Errors/tests/input30.c ================================================ int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input30.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 27_Testing_Errors/tests/input31.c ================================================ int main() { int x; x= 2 + + 3 - * / ; } ================================================ FILE: 27_Testing_Errors/tests/input32.c ================================================ int main() { pizza cow llama sausage; } ================================================ FILE: 27_Testing_Errors/tests/input33.c ================================================ int main() { char *z; return(z); } ================================================ FILE: 27_Testing_Errors/tests/input34.c ================================================ int main() { int a[12]; return(0); } ================================================ FILE: 27_Testing_Errors/tests/input35.c ================================================ int fred(int a, int b) { int a; return(a); } ================================================ FILE: 27_Testing_Errors/tests/input36.c ================================================ int fred(int a, char b, int c); int fred(int a, int b, char c); ================================================ FILE: 27_Testing_Errors/tests/input37.c ================================================ int fred(int a, char b +, int z); ================================================ FILE: 27_Testing_Errors/tests/input38.c ================================================ int fred(int a, char b, int c); int fred(int a, int b, char c, int g); ================================================ FILE: 27_Testing_Errors/tests/input39.c ================================================ int main() { int a; } ================================================ FILE: 27_Testing_Errors/tests/input40.c ================================================ int main() { int a; a= 5; } ================================================ FILE: 27_Testing_Errors/tests/input41.c ================================================ void fred() { return(5); } ================================================ FILE: 27_Testing_Errors/tests/input42.c ================================================ int main() { fred(5); } ================================================ FILE: 27_Testing_Errors/tests/input43.c ================================================ int main() { int a; a= b[4]; } ================================================ FILE: 27_Testing_Errors/tests/input44.c ================================================ int main() { int a; a= z; } ================================================ FILE: 27_Testing_Errors/tests/input45.c ================================================ int main() { int a; a= &5; } ================================================ FILE: 27_Testing_Errors/tests/input46.c ================================================ int main() { int a; a= *5; } ================================================ FILE: 27_Testing_Errors/tests/input47.c ================================================ int main() { int a; a= ++5; } ================================================ FILE: 27_Testing_Errors/tests/input48.c ================================================ int main() { int a; a= --5; } ================================================ FILE: 27_Testing_Errors/tests/input49.c ================================================ int main() { int x; char y; y= x; } ================================================ FILE: 27_Testing_Errors/tests/input50.c ================================================ int main() { char *a; char *b; a= a + b; } ================================================ FILE: 27_Testing_Errors/tests/input51.c ================================================ int main() { char a; a= 'fred'; } ================================================ FILE: 27_Testing_Errors/tests/input52.c ================================================ int main() { int a; a= $5.00; } ================================================ FILE: 27_Testing_Errors/tests/input53.c ================================================ int printf(char *fmt); int main() { printf("Hello world, %d\n", 23); return(0); } ================================================ FILE: 27_Testing_Errors/tests/input54.c ================================================ int printf(char *fmt); int main() { int i; for (i=0; i < 20; i++) { printf("Hello world, %d\n", i); } return(0); } ================================================ FILE: 27_Testing_Errors/tests/mktests ================================================ #!/bin/sh # Make the output files for each test # Build our compiler if needed if [ ! -f ../comp1 ] then (cd ..; make) fi for i in input*c do if [ ! -f "out.$i" -a ! -f "err.$i" ] then ../comp1 $i 2> "err.$i" # If the err file is empty if [ ! -s "err.$i" ] then rm -f "err.$i" cc -o out $i ../lib/printint.c ./out > "out.$i" fi fi rm -f out out.s done ================================================ FILE: 27_Testing_Errors/tests/out.input01.c ================================================ 36 10 25 ================================================ FILE: 27_Testing_Errors/tests/out.input02.c ================================================ 17 ================================================ FILE: 27_Testing_Errors/tests/out.input03.c ================================================ 1 2 3 4 5 ================================================ FILE: 27_Testing_Errors/tests/out.input04.c ================================================ 1 1 1 1 1 1 1 1 1 ================================================ FILE: 27_Testing_Errors/tests/out.input05.c ================================================ 6 ================================================ FILE: 27_Testing_Errors/tests/out.input06.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 27_Testing_Errors/tests/out.input07.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 27_Testing_Errors/tests/out.input08.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 27_Testing_Errors/tests/out.input09.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 27_Testing_Errors/tests/out.input10.c ================================================ 20 10 1 2 3 4 5 253 254 255 0 1 ================================================ FILE: 27_Testing_Errors/tests/out.input11.c ================================================ 10 20 30 1 2 3 4 5 253 254 255 0 1 2 3 1 2 3 4 5 ================================================ FILE: 27_Testing_Errors/tests/out.input12.c ================================================ 5 ================================================ FILE: 27_Testing_Errors/tests/out.input13.c ================================================ 23 56 ================================================ FILE: 27_Testing_Errors/tests/out.input14.c ================================================ 10 20 30 ================================================ FILE: 27_Testing_Errors/tests/out.input15.c ================================================ 18 18 12 12 ================================================ FILE: 27_Testing_Errors/tests/out.input16.c ================================================ 12 18 ================================================ FILE: 27_Testing_Errors/tests/out.input17.c ================================================ 19 12 ================================================ FILE: 27_Testing_Errors/tests/out.input18.c ================================================ 34 34 ================================================ FILE: 27_Testing_Errors/tests/out.input18a.c ================================================ 15 16 ================================================ FILE: 27_Testing_Errors/tests/out.input19.c ================================================ 30 ================================================ FILE: 27_Testing_Errors/tests/out.input20.c ================================================ 12 ================================================ FILE: 27_Testing_Errors/tests/out.input21.c ================================================ 10 Hello world ================================================ FILE: 27_Testing_Errors/tests/out.input22.c ================================================ 12 12 12 13 13 13 13 13 13 35 35 35 ================================================ FILE: 27_Testing_Errors/tests/out.input23.c ================================================ -23 100 -2 0 1 0 13 14 Hello world ================================================ FILE: 27_Testing_Errors/tests/out.input24.c ================================================ 2 59 57 8 7 ================================================ FILE: 27_Testing_Errors/tests/out.input25.c ================================================ 10 20 30 5 15 25 ================================================ FILE: 27_Testing_Errors/tests/out.input26.c ================================================ 13 23 34 44 54 64 74 84 94 95 96 ================================================ FILE: 27_Testing_Errors/tests/out.input27.c ================================================ 1 2 3 4 5 6 7 8 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 ================================================ FILE: 27_Testing_Errors/tests/out.input28.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 27_Testing_Errors/tests/out.input29.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 27_Testing_Errors/tests/out.input30.c ================================================ int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input30.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 27_Testing_Errors/tests/out.input53.c ================================================ Hello world, 23 ================================================ FILE: 27_Testing_Errors/tests/out.input54.c ================================================ Hello world, 0 Hello world, 1 Hello world, 2 Hello world, 3 Hello world, 4 Hello world, 5 Hello world, 6 Hello world, 7 Hello world, 8 Hello world, 9 Hello world, 10 Hello world, 11 Hello world, 12 Hello world, 13 Hello world, 14 Hello world, 15 Hello world, 16 Hello world, 17 Hello world, 18 Hello world, 19 ================================================ FILE: 27_Testing_Errors/tests/runtests ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../comp1 ] then (cd ..; make) fi # Try to use each input source file for i in input* # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../comp1 $i # Assemble the output, run it # and get the output in trial.$i cc -o out out.s ../lib/printint.c ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../comp1 $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.s "trial.$i" done ================================================ FILE: 27_Testing_Errors/tests/runtestsn ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../compn ] then (cd ..; make) fi # Try to use each input source file for i in input* # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../compn $i # Assemble the output, run it # and get the output in trial.$i nasm -f elf64 out.s cc -no-pie -fno-plt -Wall -o out out.o ../lib/printint.c ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../compn $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.o out.s "trial.$i" done ================================================ FILE: 27_Testing_Errors/tree.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree functions // Copyright (c) 2019 Warren Toomey, GPL3 // Build and return a generic AST node struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, int intvalue) { struct ASTnode *n; // Malloc a new ASTnode n = (struct ASTnode *) malloc(sizeof(struct ASTnode)); if (n == NULL) fatal("Unable to malloc in mkastnode()"); // Copy in the field values and return it n->op = op; n->type = type; n->left = left; n->mid = mid; n->right = right; n->v.intvalue = intvalue; return (n); } // Make an AST leaf node struct ASTnode *mkastleaf(int op, int type, int intvalue) { return (mkastnode(op, type, NULL, NULL, NULL, intvalue)); } // Make a unary AST node: only one child struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, int intvalue) { return (mkastnode(op, type, left, NULL, NULL, intvalue)); } // Generate and return a new label number // just for AST dumping purposes static int gendumplabel(void) { static int id = 1; return (id++); } // Given an AST tree, print it out and follow the // traversal of the tree that genAST() follows void dumpAST(struct ASTnode *n, int label, int level) { int Lfalse, Lstart, Lend; switch (n->op) { case A_IF: Lfalse = gendumplabel(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_IF"); if (n->right) { Lend = gendumplabel(); fprintf(stdout, ", end L%d", Lend); } fprintf(stdout, "\n"); dumpAST(n->left, Lfalse, level + 2); dumpAST(n->mid, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); return; case A_WHILE: Lstart = gendumplabel(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_WHILE, start L%d\n", Lstart); Lend = gendumplabel(); dumpAST(n->left, Lend, level + 2); dumpAST(n->right, NOLABEL, level + 2); return; } // Reset level to -2 for A_GLUE if (n->op == A_GLUE) level = -2; // General AST node handling if (n->left) dumpAST(n->left, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); for (int i = 0; i < level; i++) fprintf(stdout, " "); switch (n->op) { case A_GLUE: fprintf(stdout, "\n\n"); return; case A_FUNCTION: fprintf(stdout, "A_FUNCTION %s\n", Symtable[n->v.id].name); return; case A_ADD: fprintf(stdout, "A_ADD\n"); return; case A_SUBTRACT: fprintf(stdout, "A_SUBTRACT\n"); return; case A_MULTIPLY: fprintf(stdout, "A_MULTIPLY\n"); return; case A_DIVIDE: fprintf(stdout, "A_DIVIDE\n"); return; case A_EQ: fprintf(stdout, "A_EQ\n"); return; case A_NE: fprintf(stdout, "A_NE\n"); return; case A_LT: fprintf(stdout, "A_LE\n"); return; case A_GT: fprintf(stdout, "A_GT\n"); return; case A_LE: fprintf(stdout, "A_LE\n"); return; case A_GE: fprintf(stdout, "A_GE\n"); return; case A_INTLIT: fprintf(stdout, "A_INTLIT %d\n", n->v.intvalue); return; case A_STRLIT: fprintf(stdout, "A_STRLIT rval label L%d\n", n->v.id); return; case A_IDENT: if (n->rvalue) fprintf(stdout, "A_IDENT rval %s\n", Symtable[n->v.id].name); else fprintf(stdout, "A_IDENT %s\n", Symtable[n->v.id].name); return; case A_ASSIGN: fprintf(stdout, "A_ASSIGN\n"); return; case A_WIDEN: fprintf(stdout, "A_WIDEN\n"); return; case A_RETURN: fprintf(stdout, "A_RETURN\n"); return; case A_FUNCCALL: fprintf(stdout, "A_FUNCCALL %s\n", Symtable[n->v.id].name); return; case A_ADDR: fprintf(stdout, "A_ADDR %s\n", Symtable[n->v.id].name); return; case A_DEREF: if (n->rvalue) fprintf(stdout, "A_DEREF rval\n"); else fprintf(stdout, "A_DEREF\n"); return; case A_SCALE: fprintf(stdout, "A_SCALE %d\n", n->v.size); return; case A_PREINC: fprintf(stdout, "A_PREINC %s\n", Symtable[n->v.id].name); return; case A_PREDEC: fprintf(stdout, "A_PREDEC %s\n", Symtable[n->v.id].name); return; case A_POSTINC: fprintf(stdout, "A_POSTINC\n"); return; case A_POSTDEC: fprintf(stdout, "A_POSTDEC\n"); return; case A_NEGATE: fprintf(stdout, "A_NEGATE\n"); return; default: fatald("Unknown dumpAST operator", n->op); } } ================================================ FILE: 27_Testing_Errors/types.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Types and type handling // Copyright (c) 2019 Warren Toomey, GPL3 // Return true if a type is an int type // of any size, false otherwise int inttype(int type) { if (type == P_CHAR || type == P_INT || type == P_LONG) return (1); return (0); } // Return true if a type is of pointer type int ptrtype(int type) { if (type == P_VOIDPTR || type == P_CHARPTR || type == P_INTPTR || type == P_LONGPTR) return (1); return (0); } // Given a primitive type, return // the type which is a pointer to it int pointer_to(int type) { int newtype; switch (type) { case P_VOID: newtype = P_VOIDPTR; break; case P_CHAR: newtype = P_CHARPTR; break; case P_INT: newtype = P_INTPTR; break; case P_LONG: newtype = P_LONGPTR; break; default: fatald("Unrecognised in pointer_to: type", type); } return (newtype); } // Given a primitive pointer type, return // the type which it points to int value_at(int type) { int newtype; switch (type) { case P_VOIDPTR: newtype = P_VOID; break; case P_CHARPTR: newtype = P_CHAR; break; case P_INTPTR: newtype = P_INT; break; case P_LONGPTR: newtype = P_LONG; break; default: fatald("Unrecognised in value_at: type", type); } return (newtype); } // Given an AST tree and a type which we want it to become, // possibly modify the tree by widening or scaling so that // it is compatible with this type. Return the original tree // if no changes occurred, a modified tree, or NULL if the // tree is not compatible with the given type. // If this will be part of a binary operation, the AST op is not zero. struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op) { int ltype; int lsize, rsize; ltype = tree->type; // Compare scalar int types if (inttype(ltype) && inttype(rtype)) { // Both types same, nothing to do if (ltype == rtype) return (tree); // Get the sizes for each type lsize = genprimsize(ltype); rsize = genprimsize(rtype); // Tree's size is too big if (lsize > rsize) return (NULL); // Widen to the right if (rsize > lsize) return (mkastunary(A_WIDEN, rtype, tree, 0)); } // For pointers on the left if (ptrtype(ltype)) { // OK is same type on right and not doing a binary op if (op == 0 && ltype == rtype) return (tree); } // We can scale only on A_ADD or A_SUBTRACT operation if (op == A_ADD || op == A_SUBTRACT) { // Left is int type, right is pointer type and the size // of the original type is >1: scale the left if (inttype(ltype) && ptrtype(rtype)) { rsize = genprimsize(value_at(rtype)); if (rsize > 1) return (mkastunary(A_SCALE, rtype, tree, rsize)); else return (tree); // Size 1, no need to scale } } // If we get here, the types are not compatible return (NULL); } ================================================ FILE: 28_Runtime_Flags/Makefile ================================================ HSRCS= data.h decl.h defs.h SRCS= cg.c decl.c expr.c gen.c main.c misc.c \ scan.c stmt.c sym.c tree.c types.c SRCN= cgn.c decl.c expr.c gen.c main.c misc.c \ scan.c stmt.c sym.c tree.c types.c ARMSRCS= cg_arm.c decl.c expr.c gen.c main.c misc.c \ scan.c stmt.c sym.c tree.c types.c cwj: $(SRCS) $(HSRCS) cc -o cwj -g -Wall $(SRCS) compn: $(SRCN) $(HSRCS) cc -D__NASM__ -o compn -g -Wall $(SRCN) cwjarm: $(ARMSRCS) $(HSRCS) cc -o cwjarm -g -Wall $(ARMSRCS) cp cwjarm cwj clean: rm -f cwj cwjarm compn *.o *.s out test: cwj tests/runtests (cd tests; chmod +x runtests; ./runtests) armtest: cwjarm tests/runtests (cd tests; chmod +x runtests; ./runtests) testn: compn tests/runtestsn (cd tests; chmod +x runtestsn; ./runtestsn) ================================================ FILE: 28_Runtime_Flags/Readme.md ================================================ # Part 28: Adding More Run-time Flags This part of our compiler writing journey really doesn't have anything to do with scanning, parsing, semantic analysis or code generation. In this part, I add the `-c`, `-S` and `-o` run-time flags to the compiler so that it behaves more like a traditional Unix C compiler. So, if that's not interesting, feel free to skip to the next part of the journey. ## Compilation Steps Up to now, our compiler has only been outputting assembly files. But there are more steps to convert a source code file in a high-level language to an executable file: + Scan and parse the source code file to generate assembly output + Assemble the assembly output to an [object file](https://en.wikipedia.org/wiki/Object_file) + [Link](https://en.wikipedia.org/wiki/Linker_(computing)) one or more object files to produce the executable file We've been doing the last two steps manually or with our Makefile, but I'm going to modify the compiler to call an external assembler and linker to perform the last two steps. To do this, I'm going to rearrange some of the code in `main.c` and also write more functions in `main.c` to do the assembling and linking. Most of this code is typical string and file handling code done in C, so I'll go through the code but it may only be interesting if you've never seen this sort of code. ## Parsing the Command-Line Flags I've renamed the compiler to be `cwj` to reflect the name of the project. When you run it with no command-line arguments, it now gives this usage message: ``` $ ./cwj Usage: ./cwj [-vcST] [-o outfile] file [file ...] -v give verbose output of the compilation stages -c generate object files but don't link them -S generate assembly files but don't link them -T dump the AST trees for each input file -o outfile, produce the outfile executable file ``` We now allow multiple source code files as inputs. We have four boolean flags, `-v`, `-c`, `-S` and `-T`, and we can now name the output executable file. The `argv[]` parsing code in `main()` is now changed to deal with this, and there are several more option variables to hold the results. ```c // Initialise our variables O_dumpAST = 0; // If true, dump the AST trees O_keepasm = 0; // If true, keep any assembly files O_assemble = 0; // If true, assemble the assembly files O_dolink = 1; // If true, link the object files O_verbose = 0; // If true, print info on compilation stages // Scan for command-line options for (i = 1; i < argc; i++) { // No leading '-', stop scanning for options if (*argv[i] != '-') break; // For each option in this argument for (int j = 1; (*argv[i] == '-') && argv[i][j]; j++) { switch (argv[i][j]) { case 'o': outfilename = argv[++i]; break; // Save & skip to next argument case 'T': O_dumpAST = 1; break; case 'c': O_assemble = 1; O_keepasm = 0; O_dolink = 0; break; case 'S': O_keepasm = 1; O_assemble = 0; O_dolink = 0; break; case 'v': O_verbose = 1; break; default: usage(argv[0]); } } } ``` Note that some options are mutually exclusive, e.g. if we only want assembly output with `-S`, then we don't want to link or create object files. ## Performing the Compilation Stages With the command-line flags parsed, we can now run the compilation stages. We can compile and assemble each input file easily, but there may be a number of object files that we need to link together at the end. So we have some local variables in `main()` to store the object file names: ```c #define MAXOBJ 100 char *objlist[MAXOBJ]; // List of object file names int objcnt = 0; // Position to insert next name ``` We first process all the input source files in turn: ```c // Work on each input file in turn while (i < argc) { asmfile = do_compile(argv[i]); // Compile the source file if (O_dolink || O_assemble) { objfile = do_assemble(asmfile); // Assemble it to object format if (objcnt == (MAXOBJ - 2)) { fprintf(stderr, "Too many object files for the compiler to handle\n"); exit(1); } objlist[objcnt++] = objfile; // Add the object file's name objlist[objcnt] = NULL; // to the list of object files } if (!O_keepasm) // Remove the assembly file if unlink(asmfile); // we don't need to keep it i++; } ``` `do_compile()` has the code that used to be in `main()` to open the file, parse it ourselves and generate the assembly file. But we can't open up the hard-coded filename `out.s` like we used to; we now need to convert `filename.c` to `filename.s`. ## Altering the Input Filename We have a helper function to alter filenames. ```c // Given a string with a '.' and at least a 1-character suffix // after the '.', change the suffix to be the given character. // Return the new string or NULL if the original string could // not be modified char *alter_suffix(char *str, char suffix) { char *posn; char *newstr; // Clone the string if ((newstr = strdup(str)) == NULL) return (NULL); // Find the '.' if ((posn = strrchr(newstr, '.')) == NULL) return (NULL); // Ensure there is a suffix posn++; if (*posn == '\0') return (NULL); // Change the suffix and NUL-terminate the string *posn++ = suffix; *posn = '\0'; return (newstr); } ``` Only the `strdup()`, `strrchr()` and the last two lines do any real work; the rest is error checking. ## Doing the Compilation Here is the code that we used to have, now repackaged into a new function. ```c // Given an input filename, compile that file // down to assembly code. Return the new file's name static char *do_compile(char *filename) { Outfilename = alter_suffix(filename, 's'); if (Outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .c on the end\n", filename); exit(1); } // Open up the input file if ((Infile = fopen(filename, "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", filename, strerror(errno)); exit(1); } // Create the output file if ((Outfile = fopen(Outfilename, "w")) == NULL) { fprintf(stderr, "Unable to create %s: %s\n", Outfilename, strerror(errno)); exit(1); } Line = 1; // Reset the scanner Putback = '\n'; clear_symtable(); // Clear the symbol table if (O_verbose) printf("compiling %s\n", filename); scan(&Token); // Get the first token from the input genpreamble(); // Output the preamble global_declarations(); // Parse the global declarations genpostamble(); // Output the postamble fclose(Outfile); // Close the output file return (Outfilename); } ``` There's very little new code here, just the call to `alter_suffix()` to get the correct output file's name. There is one important change: the assembly output file is now a global variable called `Outfilename`. This allows the `fatal()` function and friends in `misc.c` to remove assembly files if we never fully generated them, e.g. ```c // Print out fatal messages void fatal(char *s) { fprintf(stderr, "%s on line %d\n", s, Line); fclose(Outfile); unlink(Outfilename); exit(1); } ``` ## Assembling the Above Output Now that we have assembly output files, we can now call an external assembler to do this. This is defined as ASCMD in `defs.h`. Here's the function to do this: ```c #define ASCMD "as -o " // Given an input filename, assemble that file // down to object code. Return the object filename char *do_assemble(char *filename) { char cmd[TEXTLEN]; int err; char *outfilename = alter_suffix(filename, 'o'); if (outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .s on the end\n", filename); exit(1); } // Build the assembly command and run it snprintf(cmd, TEXTLEN, "%s %s %s", ASCMD, outfilename, filename); if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Assembly of %s failed\n", filename); exit(1); } return (outfilename); } ``` I'm using `snprintf()` to build the assembly command which we will run. If the user used the `-v` command-line flag, this command will be shown to them. Then we use `system()` to execute this Linux command. Example: ``` $ ./cwj -v -c tests/input54.c compiling tests/input54.c as -o tests/input54.o tests/input54.s ``` ## Linking the Object Files Down in `main()` we build up a list of object files that `do_assemble()` returns to us: ```c objlist[objcnt++] = objfile; // Add the object file's name objlist[objcnt] = NULL; // to the list of object files ``` So, when we need to link them all together, we need to pass this list to the `do_link()` function. The code is similar to `do_assemble()` in that it uses `snprintf()` and `system()`. The difference is that we must track where we are up to in our command buffer, and how much room is left to do more `snprintf()`ing. ```c #define LDCMD "cc -o " // Given a list of object files and an output filename, // link all of the object filenames together. void do_link(char *outfilename, char *objlist[]) { int cnt, size = TEXTLEN; char cmd[TEXTLEN], *cptr; int err; // Start with the linker command and the output file cptr = cmd; cnt = snprintf(cptr, size, "%s %s ", LDCMD, outfilename); cptr += cnt; size -= cnt; // Now append each object file while (*objlist != NULL) { cnt = snprintf(cptr, size, "%s ", *objlist); cptr += cnt; size -= cnt; objlist++; } if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Linking failed\n"); exit(1); } } ``` One annoyance is that I'm still calling the external C compiler `cc` to do the linking. We really should be able to break this dependency on another compiler. A long time ago, it was possible to link a set of object files manually by doing, e.g. ``` $ ln -o out /lib/crt0.o file1.o file.o /usr/lib/libc.a ``` I assume that it should be possible to do a similar command on current Linux, but so far my Google-fu isn't enough to work this out. If you read this and know the answer, let me know! ## Losing `printint()` and `printchar()` Now that we can call `printf()` directly in the programs that we can compile, we no longer need our hand-written `printint()` and `printchar()` functions. I've removed `lib/printint.c`, and I've updated all of the tests in the `tests/` directory to use `printf()`. I've also updated the `tests/mktests` and `tests/runtests` scripts so that they use the new compiler command-line arguments, and ditto the top-level `Makefile`. So a `make test` still runs our regression tests OK. ## Conclusion and What's Next That's about it for this part of our journey. Our compiler now feels like the traditional Unix compilers that I'm used to. I did promise to add in support for an external pre-processor in this step, but I decided against it. The main reason is that I would need to parse the filenames and line numbers that the pre-processor embeds in its output, e.g. ```c # 1 "tests/input54.c" # 1 "" # 1 "" # 31 "" # 1 "/usr/include/stdc-predef.h" 1 3 4 # 32 "" 2 # 1 "tests/input54.c" int printf(char *fmt); int main() { int i; for (i=0; i < 20; i++) { printf("Hello world, %d\n", i); } return(0); } ``` In the next part of our compiler writing journey, we will look at adding support for structs to our compiler. I think we might have to do another design step first before we get to implementing the changes. [Next step](../29_Refactoring/Readme.md) ================================================ FILE: 28_Runtime_Flags/cg.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\t.text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\t.data\n", Outfile); currSeg = data_seg; } } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. static int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "%r10", "%r11", "%r12", "%r13", "%r9", "%r8", "%rcx", "%rdx", "%rsi", "%rdi" }; static char *breglist[] = { "%r10b", "%r11b", "%r12b", "%r13b", "%r9b", "%r8b", "%cl", "%dl", "%sil", "%dil" }; static char *dreglist[] = { "%r10d", "%r11d", "%r12d", "%r13d", "%r9d", "%r8d", "%ecx", "%edx", "%esi", "%edi" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Symtable[id].name; int i; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the %rsp and %rsp fprintf(Outfile, "\t.globl\t%s\n" "\t.type\t%s, @function\n" "%s:\n" "\tpushq\t%%rbp\n" "\tmovq\t%%rsp, %%rbp\n", name, name, name); // Copy any in-register parameters to the stack // Stop after no more than six parameter registers for (i = NSYMBOLS - 1; i > Locls; i--) { if (Symtable[i].class != C_PARAM) break; if (i < NSYMBOLS - 6) break; Symtable[i].posn = newlocaloffset(Symtable[i].type); cgstorlocal(paramReg--, i); } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (; i > Locls; i--) { if (Symtable[i].class == C_PARAM) { Symtable[i].posn = paramOffset; paramOffset += 8; } else { Symtable[i].posn = newlocaloffset(Symtable[i].type); } } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\taddq\t$%d,%%rsp\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Symtable[id].endlabel); fprintf(Outfile, "\taddq\t$%d,%%rsp\n", stackOffset); fputs("\tpopq %rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmovq\t$%d, %s\n", value, reglist[r]); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(int id, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it switch (Symtable[id].type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", Symtable[id].name); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", Symtable[id].name); fprintf(Outfile, "\tmovzbq\t%s(%%rip), %s\n", Symtable[id].name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", Symtable[id].name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", Symtable[id].name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", Symtable[id].name); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", Symtable[id].name); fprintf(Outfile, "\tmovslq\t%s(%%rip), %s\n", Symtable[id].name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", Symtable[id].name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", Symtable[id].name); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: if (op == A_PREINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", Symtable[id].name); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", Symtable[id].name); fprintf(Outfile, "\tmovq\t%s(%%rip), %s\n", Symtable[id].name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", Symtable[id].name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", Symtable[id].name); break; default: fatald("Bad type in cgloadglob:", Symtable[id].type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(int id, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it switch (Symtable[id].type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", Symtable[id].posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", Symtable[id].posn); fprintf(Outfile, "\tmovzbq\t%d(%%rbp), %s\n", Symtable[id].posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", Symtable[id].posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", Symtable[id].posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", Symtable[id].posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", Symtable[id].posn); fprintf(Outfile, "\tmovslq\t%d(%%rbp), %s\n", Symtable[id].posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", Symtable[id].posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", Symtable[id].posn); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: if (op == A_PREINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", Symtable[id].posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", Symtable[id].posn); fprintf(Outfile, "\tmovq\t%d(%%rbp), %s\n", Symtable[id].posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", Symtable[id].posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", Symtable[id].posn); break; default: fatald("Bad type in cgloadlocal:", Symtable[id].type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int id) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tleaq\tL%d(%%rip), %s\n", id, reglist[r]); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\taddq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsubq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timulq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s,%%rax\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidivq\t%s\n", reglist[r2]); fprintf(Outfile, "\tmovq\t%%rax,%s\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tandq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\torq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txorq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshlq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshrq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tnegq\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnotq\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); return (r); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(int id, int numargs) { // Get a new register int outr = alloc_register(); // Call the function fprintf(Outfile, "\tcall\t%s@PLT\n", Symtable[id].name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\taddq\t$%d, %%rsp\n", 8 * (numargs - 6)); // and copy the return value into our register fprintf(Outfile, "\tmovq\t%%rax, %s\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpushq\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmovq\t%s, %s\n", reglist[r], reglist[FIRSTPARAMREG - argposn + 1]); } } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsalq\t$%d, %s\n", val, reglist[r]); return (r); } // Store a register's value into a variable int cgstorglob(int r, int id) { switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %s(%%rip)\n", breglist[r], Symtable[id].name); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %s(%%rip)\n", dreglist[r], Symtable[id].name); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tmovq\t%s, %s(%%rip)\n", reglist[r], Symtable[id].name); break; default: fatald("Bad type in cgstorglob:", Symtable[id].type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, int id) { switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %d(%%rbp)\n", breglist[r], Symtable[id].posn); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %d(%%rbp)\n", dreglist[r], Symtable[id].posn); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tmovq\t%s, %d(%%rbp)\n", reglist[r], Symtable[id].posn); break; default: fatald("Bad type in cgstorlocal:", Symtable[id].type); } return (r); } // Array of type sizes in P_XXX order. // 0 means no size. static int psize[] = { 0, 0, 1, 4, 8, 8, 8, 8, 8 }; // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { // Check the type is valid if (type < P_NONE || type > P_LONGPTR) fatal("Bad type in cgprimsize()"); return (psize[type]); } // Generate a global symbol but not functions void cgglobsym(int id) { int typesize; if (Symtable[id].stype == S_FUNCTION) return; // Get the size of the type typesize = cgprimsize(Symtable[id].type); // Generate the global identity and the label cgdataseg(); fprintf(Outfile, "\t.globl\t%s\n", Symtable[id].name); fprintf(Outfile, "%s:", Symtable[id].name); // Generate the space for (int i = 0; i < Symtable[id].size; i++) { switch (typesize) { case 1: fprintf(Outfile, "\t.byte\t0\n"); break; case 4: fprintf(Outfile, "\t.long\t0\n"); break; case 8: fprintf(Outfile, "\t.quad\t0\n"); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\t.byte\t%d\n", *cptr); } fprintf(Outfile, "\t.byte\t0\n"); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { // Generate code depending on the function's type switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tmovzbl\t%s, %%eax\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %%eax\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", Symtable[id].type); } cgjump(Symtable[id].endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(int id) { int r = alloc_register(); if (Symtable[id].class == C_GLOBAL) fprintf(Outfile, "\tleaq\t%s(%%rip), %s\n", Symtable[id].name, reglist[r]); else fprintf(Outfile, "\tleaq\t%d(%%rbp), %s\n", Symtable[id].posn, reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tmovzbq\t(%s), %s\n", reglist[r], reglist[r]); break; case P_INTPTR: fprintf(Outfile, "\tmovslq\t(%s), %s\n", reglist[r], reglist[r]); break; case P_LONGPTR: fprintf(Outfile, "\tmovq\t(%s), %s\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, (%s)\n", breglist[r1], reglist[r2]); break; case P_INT: fprintf(Outfile, "\tmovq\t%s, (%s)\n", reglist[r1], reglist[r2]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, (%s)\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 28_Runtime_Flags/cg_arm.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for ARMv6 on Raspberry Pi // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. static int freereg[4]; static char *reglist[4] = { "r4", "r5", "r6", "r7" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // We have to store large integer literal values in memory. // Keep a list of them which will be output in the postamble #define MAXINTS 1024 int Intlist[MAXINTS]; static int Intslot = 0; // Determine the offset of a large integer // literal from the .L3 label. If the integer // isn't in the list, add it. static void set_int_offset(int val) { int offset = -1; // See if it is already there for (int i = 0; i < Intslot; i++) { if (Intlist[i] == val) { offset = 4 * i; break; } } // Not in the list, so add it if (offset == -1) { offset = 4 * Intslot; if (Intslot == MAXINTS) fatal("Out of int slots in set_int_offset()"); Intlist[Intslot++] = val; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L3+%d\n", offset); } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n", Outfile); } // Print out the assembly postamble void cgpostamble() { // Print out the global variables fprintf(Outfile, ".L2:\n"); for (int i = 0; i < Globs; i++) { if (Symtable[i].stype == S_VARIABLE) fprintf(Outfile, "\t.word %s\n", Symtable[i].name); } // Print out the integer literals fprintf(Outfile, ".L3:\n"); for (int i = 0; i < Intslot; i++) { fprintf(Outfile, "\t.word %d\n", Intlist[i]); } } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Symtable[id].name; fprintf(Outfile, "\t.text\n" "\t.globl\t%s\n" "\t.type\t%s, \%%function\n" "%s:\n" "\tpush\t{fp, lr}\n" "\tadd\tfp, sp, #4\n" "\tsub\tsp, sp, #8\n" "\tstr\tr0, [fp, #-8]\n", name, name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Symtable[id].endlabel); fputs("\tsub\tsp, fp, #4\n" "\tpop\t{fp, pc}\n" "\t.align\t2\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); // If the literal value is small, do it with one instruction if (value <= 1000) fprintf(Outfile, "\tmov\t%s, #%d\n", reglist[r], value); else { set_int_offset(value); fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); } return (r); } // Determine the offset of a variable from the .L2 // label. Yes, this is inefficient code. static void set_var_offset(int id) { int offset = 0; // Walk the symbol table up to id. // Find S_VARIABLEs and add on 4 until // we get to our variable for (int i = 0; i < id; i++) { if (Symtable[i].stype == S_VARIABLE) offset += 4; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L2+%d\n", offset); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tldrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgloadglob:", Symtable[id].type); } return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s, %s\n", reglist[r1], reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\tmul\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { // To do a divide: r0 holds the dividend, r1 holds the divisor. // The quotient is in r0. fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r1]); fprintf(Outfile, "\tmov\tr1, %s\n", reglist[r2]); fprintf(Outfile, "\tbl\t__aeabi_idiv\n"); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r1]); free_register(r2); return (r1); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r]); fprintf(Outfile, "\tbl\t%s\n", Symtable[id].name); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r]); return (r); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tlsl\t%s, %s, #%d\n", reglist[r], reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, int id) { // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tstr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgstorglob:", Symtable[id].type); } return (r); } // Array of type sizes in P_XXX order. // 0 means no size. static int psize[] = { 0, 0, 1, 4, 4, 4, 4, 4 }; // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { // Check the type is valid if (type < P_NONE || type > P_LONGPTR) fatal("Bad type in cgprimsize()"); return (psize[type]); } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Symtable[id].type); fprintf(Outfile, "\t.data\n" "\t.globl\t%s\n", Symtable[id].name); switch (typesize) { case 1: fprintf(Outfile, "%s:\t.byte\t0\n", Symtable[id].name); break; case 4: fprintf(Outfile, "%s:\t.long\t0\n", Symtable[id].name); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "moveq", "movne", "movlt", "movgt", "movle", "movge" }; // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "movne", "moveq", "movge", "movle", "movgt", "movlt" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #1\n", cmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #0\n", invcmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\tuxtb\t%s, %s\n", reglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tb\tL%d\n", l); } // List of inverted branch instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *brlist[] = { "bne", "beq", "bge", "ble", "bgt", "blt" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", brlist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[reg]); cgjump(Symtable[id].endlabel); } // Generate code to load the address of a global // identifier into a variable. Return a new register int cgaddress(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); fprintf(Outfile, "\tmov\t%s, r3\n", reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tldrb\t%s, [%s]\n", reglist[r], reglist[r]); break; case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [%s]\n", reglist[r], reglist[r]); break; } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [%s]\n", reglist[r1], reglist[r2]); break; case P_INT: case P_LONG: fprintf(Outfile, "\tstr\t%s, [%s]\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 28_Runtime_Flags/cgn.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\tsection .text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\tsection .data\n", Outfile); currSeg = data_seg; } } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "r10", "r11", "r12", "r13", "r9", "r8", "rcx", "rdx", "rsi", "rdi" }; static char *breglist[] = { "r10b", "r11b", "r12b", "r13b", "r9b", "r8b", "cl", "dl", "sil", "dil" }; static char *dreglist[] = { "r10d", "r11d", "r12d", "r13d", "r9d", "r8d", "ecx", "edx", "esi", "edi" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\textern\tprintint\n", Outfile); fputs("\textern\tprintchar\n", Outfile); fputs("\textern\topen\n", Outfile); fputs("\textern\tclose\n", Outfile); fputs("\textern\tread\n", Outfile); fputs("\textern\twrite\n", Outfile); fputs("\textern\tprintf\n", Outfile); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Symtable[id].name; int i; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the rsp and rbp fprintf(Outfile, "\tglobal\t%s\n" "%s:\n" "\tpush\trbp\n" "\tmov\trbp, rsp\n", name, name); // Copy any in-register parameters to the stack // Stop after no more than six parameter registers for (i = NSYMBOLS - 1; i > Locls; i--) { if (Symtable[i].class != C_PARAM) break; if (i < NSYMBOLS - 6) break; Symtable[i].posn = newlocaloffset(Symtable[i].type); cgstorlocal(paramReg--, i); } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (; i > Locls; i--) { if (Symtable[i].class == C_PARAM) { Symtable[i].posn = paramOffset; paramOffset += 8; } else { Symtable[i].posn = newlocaloffset(Symtable[i].type); } } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\tadd\trsp, %d\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Symtable[id].endlabel); fprintf(Outfile, "\tadd\trsp, %d\n", stackOffset); fputs("\tpop rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], value); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(int id, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it switch (Symtable[id].type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", Symtable[id].name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", Symtable[id].name); fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], Symtable[id].name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", Symtable[id].name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", Symtable[id].name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword [%s]\n", Symtable[id].name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", Symtable[id].name); fprintf(Outfile, "\tmovsx\t%s, word [%s]\n", dreglist[r], Symtable[id].name); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword [%s]\n", Symtable[id].name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", Symtable[id].name); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword [%s]\n", Symtable[id].name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", Symtable[id].name); fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], Symtable[id].name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword [%s]\n", Symtable[id].name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", Symtable[id].name); break; default: fatald("Bad type in cgloadglob:", Symtable[id].type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(int id, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it switch (Symtable[id].type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", Symtable[id].posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", Symtable[id].posn); fprintf(Outfile, "\tmovzx\t%s, byte [rbp+%d]\n", reglist[r], Symtable[id].posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", Symtable[id].posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", Symtable[id].posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", Symtable[id].posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", Symtable[id].posn); fprintf(Outfile, "\tmovsx\t%s, word [rbp+%d]\n", reglist[r], Symtable[id].posn); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", Symtable[id].posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", Symtable[id].posn); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", Symtable[id].posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", Symtable[id].posn); fprintf(Outfile, "\tmov\t%s, [rbp+%d]\n", reglist[r], Symtable[id].posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", Symtable[id].posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", Symtable[id].posn); break; default: fatald("Bad type in cgloadlocal:", Symtable[id].type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int id) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, L%d\n", reglist[r], id); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timul\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmov\trax, %s\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidiv\t%s\n", reglist[r2]); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tand\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\tor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshl\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshr\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tneg\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnot\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r], breglist[r]); return (r); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, byte %s\n", reglist[r], breglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(int id, int numargs) { // Get a new register int outr = alloc_register(); // Call the function fprintf(Outfile, "\tcall\t%s\n", Symtable[id].name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\tadd\trsp, %d\n", 8 * (numargs - 6)); // and copy the return value into our register fprintf(Outfile, "\tmov\t%s, rax\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpush\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmov\t%s, %s\n", reglist[FIRSTPARAMREG - argposn + 1], reglist[r]); } } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsal\t%s, %d\n", reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, int id) { switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tmov\t[%s], %s\n", Symtable[id].name, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\t[%s], %s\n", Symtable[id].name, dreglist[r]); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tmov\t[%s], %s\n", Symtable[id].name, reglist[r]); break; default: fatald("Bad type in cgloadglob:", Symtable[id].type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, int id) { switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tmov\tbyte\t[rbp+%d], %s\n", Symtable[id].posn, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\tdword\t[rbp+%d], %s\n", Symtable[id].posn, dreglist[r]); break; case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tmov\tqword\t[rbp+%d], %s\n", Symtable[id].posn, reglist[r]); break; default: fatald("Bad type in cgstorlocal:", Symtable[id].type); } return (r); } // Array of type sizes in P_XXX order. // 0 means no size. static int psize[] = { 0, 0, 1, 4, 8, 8, 8, 8, 8 }; // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { // Check the type is valid if (type < P_NONE || type > P_LONGPTR) fatal("Bad type in cgprimsize()"); return (psize[type]); } // Generate a global symbol but not functions void cgglobsym(int id) { int typesize; if (Symtable[id].stype == S_FUNCTION) return; // Get the size of the type typesize = cgprimsize(Symtable[id].type); // Generate the global identity and the label cgdataseg(); fprintf(Outfile, "\tsection\t.data\n" "\tglobal\t%s\n", Symtable[id].name); fprintf(Outfile, "%s:", Symtable[id].name); // Generate the space // original version for (int i = 0; i < Symtable[id].size; i++) { switch(typesize) { case 1: fprintf(Outfile, "\tdb\t0\n"); break; case 4: fprintf(Outfile, "\tdd\t0\n"); break; case 8: fprintf(Outfile, "\tdq\t0\n"); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } /* compact version using times instead of loop switch(typesize) { case 1: fprintf(Outfile, "\ttimes\t%d\tdb\t0\n", Symtable[id].size); break; case 4: fprintf(Outfile, "\ttimes\t%d\tdd\t0\n", Symtable[id].size); break; case 8: fprintf(Outfile, "\ttimes\t%d\tdq\t0\n", Symtable[id].size); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } */ } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\tdb\t%d\n", *cptr); } fprintf(Outfile, "\tdb\t0\n"); /* put string in readable format on a single line // probably went overboard with error checking int comma = 0, quote = 0, start = 1; fprintf(Outfile, "\tdb\t"); for (cptr=strvalue; *cptr; cptr++) { if ( ! isprint(*cptr) ) if (comma || start) { fprintf(Outfile, "%d, ", *cptr); start = 0; comma = 1; } else if (quote) { fprintf(Outfile, "\', %d, ", *cptr); comma = 1; quote = 0; } else { fprintf(Outfile, "%d, ", *cptr); comma = 1; quote = 0; } else if (start || comma) { fprintf(Outfile, "\'%c", *cptr); start = comma = 0; quote = 1; } else { fprintf(Outfile, "%c", *cptr); comma = 0; quote = 1; } } if (comma || start) fprintf(Outfile, "0\n"); else fprintf(Outfile, "\', 0\n"); */ } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r2], breglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { // Generate code depending on the function's type switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tmovzx\teax, %s\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmov\teax, %s\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", Symtable[id].type); } cgjump(Symtable[id].endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(int id) { int r = alloc_register(); if (Symtable[id].class == C_GLOBAL) fprintf(Outfile, "\tmov\t%s, %s\n", reglist[r], Symtable[id].name); else fprintf(Outfile, "\tlea\t%s, [rbp+%d]\n", reglist[r], Symtable[id].posn); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], reglist[r]); break; case P_INTPTR: fprintf(Outfile, "\tmovsx\t%s, dword [%s]\n", reglist[r], reglist[r]); break; case P_LONGPTR: fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tmov\t[%s], byte %s\n", reglist[r2], breglist[r1]); break; case P_INT: fprintf(Outfile, "\tmov\t[%s], %s\n", reglist[r2], reglist[r1]); break; case P_LONG: fprintf(Outfile, "\tmov\t[%s], %s\n", reglist[r2], reglist[r1]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 28_Runtime_Flags/data.h ================================================ #ifndef extern_ #define extern_ extern #endif // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 extern_ int Line; // Current line number extern_ int Putback; // Character put back by scanner extern_ int Functionid; // Symbol id of the current function extern_ int Globs; // Position of next free global symbol slot extern_ int Locls; // Position of next free local symbol slot extern_ FILE *Infile; // Input and output files extern_ FILE *Outfile; extern_ char *Outfilename; // Name of file we opened as Outfile extern_ struct token Token; // Last token scanned extern_ char Text[TEXTLEN + 1]; // Last identifier scanned extern_ struct symtable Symtable[NSYMBOLS]; // Global symbol table extern_ int O_dumpAST; // If true, dump the AST trees extern_ int O_keepasm; // If true, keep any assembly files extern_ int O_assemble; // If true, assemble the assembly files extern_ int O_dolink; // If true, link the object files extern_ int O_verbose; // If true, print info on compilation stages ================================================ FILE: 28_Runtime_Flags/decl.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of declarations // Copyright (c) 2019 Warren Toomey, GPL3 // Parse the current token and return // a primitive type enum value. Also // scan in the next token int parse_type(void) { int type; switch (Token.token) { case T_VOID: type = P_VOID; break; case T_CHAR: type = P_CHAR; break; case T_INT: type = P_INT; break; case T_LONG: type = P_LONG; break; default: fatald("Illegal type, token", Token.token); } // Scan in one or more further '*' tokens // and determine the correct pointer type while (1) { scan(&Token); if (Token.token != T_STAR) break; type = pointer_to(type); } // We leave with the next token already scanned return (type); } // variable_declaration: type identifier ';' // | type identifier '[' INTLIT ']' ';' // ; // // Parse the declaration of a scalar variable or an array // with a given size. // The identifier has been scanned & we have the type. // class is the variable's class void var_declaration(int type, int class) { // Text now has the identifier's name. // If the next token is a '[' if (Token.token == T_LBRACKET) { // Skip past the '[' scan(&Token); // Check we have an array size if (Token.token == T_INTLIT) { // Add this as a known array and generate its space in assembly. // We treat the array as a pointer to its elements' type if (class == C_LOCAL) { fatal("For now, declaration of local arrays is not implemented"); } else { addglob(Text, pointer_to(type), S_ARRAY, class, 0, Token.intvalue); } } // Ensure we have a following ']' scan(&Token); match(T_RBRACKET, "]"); } else { // Add this as a known scalar // and generate its space in assembly if (class == C_LOCAL) { if (addlocl(Text, type, S_VARIABLE, class, 1) == -1) fatals("Duplicate local variable declaration", Text); } else { addglob(Text, type, S_VARIABLE, class, 0, 1); } } } // param_declaration: // | variable_declaration // | variable_declaration ',' param_declaration // // Parse the parameters in parentheses after the function name. // Add them as symbols to the symbol table and return the number // of parameters. If id is not -1, there is an existing function // prototype, and the function has this symbol slot number. static int param_declaration(int id) { int type, param_id; int orig_paramcnt; int paramcnt = 0; // Add 1 to id so that it's either zero (no prototype), or // it's the position of the zeroth existing parameter in // the symbol table param_id = id + 1; // Get any existing prototype parameter count if (param_id) orig_paramcnt = Symtable[id].nelems; // Loop until the final right parentheses while (Token.token != T_RPAREN) { // Get the type and identifier // and add it to the symbol table type = parse_type(); ident(); // We have an existing prototype. // Check that this type matches the prototype. if (param_id) { if (type != Symtable[id].type) fatald("Type doesn't match prototype for parameter", paramcnt + 1); param_id++; } else { // Add a new parameter to the new prototype var_declaration(type, C_PARAM); } paramcnt++; // Must have a ',' or ')' at this point switch (Token.token) { case T_COMMA: scan(&Token); break; case T_RPAREN: break; default: fatald("Unexpected token in parameter list", Token.token); } } // Check that the number of parameters in this list matches // any existing prototype if ((id != -1) && (paramcnt != orig_paramcnt)) fatals("Parameter count mismatch for function", Symtable[id].name); // Return the count of parameters return (paramcnt); } // // function_declaration: type identifier '(' parameter_list ')' ; // | type identifier '(' parameter_list ')' compound_statement ; // // Parse the declaration of function. // The identifier has been scanned & we have the type. struct ASTnode *function_declaration(int type) { struct ASTnode *tree, *finalstmt; int id; int nameslot, endlabel, paramcnt; // Text has the identifier's name. If this exists and is a // function, get the id. Otherwise, set id to -1 if ((id = findsymbol(Text)) != -1) if (Symtable[id].stype != S_FUNCTION) id = -1; // If this is a new function declaration, get a // label-id for the end label, and add the function // to the symbol table, if (id == -1) { endlabel = genlabel(); nameslot = addglob(Text, type, S_FUNCTION, C_GLOBAL, endlabel, 0); } // Scan in the '(', any parameters and the ')'. // Pass in any existing function prototype symbol slot number lparen(); paramcnt = param_declaration(id); rparen(); // If this is a new function declaration, update the // function symbol entry with the number of parameters if (id == -1) Symtable[nameslot].nelems = paramcnt; // Declaration ends in a semicolon, only a prototype. if (Token.token == T_SEMI) { scan(&Token); return (NULL); } // This is not just a prototype. // Copy the global parameters to be local parameters if (id == -1) id = nameslot; copyfuncparams(id); // Set the Functionid global to the function's symbol-id Functionid = id; // Get the AST tree for the compound statement tree = compound_statement(); // If the function type isn't P_VOID .. if (type != P_VOID) { // Error if no statements in the function if (tree == NULL) fatal("No statements in function with non-void type"); // Check that the last AST operation in the // compound statement was a return statement finalstmt = (tree->op == A_GLUE) ? tree->right : tree; if (finalstmt == NULL || finalstmt->op != A_RETURN) fatal("No return for function with non-void type"); } // Return an A_FUNCTION node which has the function's id // and the compound statement sub-tree return (mkastunary(A_FUNCTION, type, tree, id)); } // Parse one or more global declarations, either // variables or functions void global_declarations(void) { struct ASTnode *tree; int type; while (1) { // We have to read past the type and identifier // to see either a '(' for a function declaration // or a ',' or ';' for a variable declaration. // Text is filled in by the ident() call. type = parse_type(); ident(); if (Token.token == T_LPAREN) { // Parse the function declaration tree = function_declaration(type); // Only a function prototype, no code if (tree == NULL) continue; // A real function, generate the assembly code for it if (O_dumpAST) { dumpAST(tree, NOLABEL, 0); fprintf(stdout, "\n\n"); } genAST(tree, NOLABEL, 0); // Now free the symbols associated // with this function freeloclsyms(); } else { // Parse the global variable declaration // and skip past the trailing semicolon var_declaration(type, C_GLOBAL); semi(); } // Stop when we have reached EOF if (Token.token == T_EOF) break; } } ================================================ FILE: 28_Runtime_Flags/decl.h ================================================ // Function prototypes for all compiler files // Copyright (c) 2019 Warren Toomey, GPL3 // scan.c void reject_token(struct token *t); int scan(struct token *t); // tree.c struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, int intvalue); struct ASTnode *mkastleaf(int op, int type, int intvalue); struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, int intvalue); void dumpAST(struct ASTnode *n, int label, int parentASTop); // gen.c int genlabel(void); int genAST(struct ASTnode *n, int reg, int parentASTop); void genpreamble(); void genpostamble(); void genfreeregs(); void genglobsym(int id); int genglobstr(char *strvalue); int genprimsize(int type); void genreturn(int reg, int id); // cg.c void cgtextseg(); void cgdataseg(); void freeall_registers(void); void cgpreamble(); void cgpostamble(); void cgfuncpreamble(int id); void cgfuncpostamble(int id); int cgloadint(int value, int type); int cgloadglob(int id, int op); int cgloadlocal(int id, int op); int cgloadglobstr(int id); int cgadd(int r1, int r2); int cgsub(int r1, int r2); int cgmul(int r1, int r2); int cgdiv(int r1, int r2); int cgshlconst(int r, int val); int cgcall(int id, int numargs); void cgcopyarg(int r, int argposn); int cgstorglob(int r, int id); int cgstorlocal(int r, int id); void cgglobsym(int id); void cgglobstr(int l, char *strvalue); int cgcompare_and_set(int ASTop, int r1, int r2); int cgcompare_and_jump(int ASTop, int r1, int r2, int label); void cglabel(int l); void cgjump(int l); int cgwiden(int r, int oldtype, int newtype); int cgprimsize(int type); void cgreturn(int reg, int id); int cgaddress(int id); int cgderef(int r, int type); int cgstorderef(int r1, int r2, int type); int cgnegate(int r); int cginvert(int r); int cglognot(int r); int cgboolean(int r, int op, int label); int cgand(int r1, int r2); int cgor(int r1, int r2); int cgxor(int r1, int r2); int cgshl(int r1, int r2); int cgshr(int r1, int r2); // expr.c struct ASTnode *binexpr(int ptp); // stmt.c struct ASTnode *compound_statement(void); // misc.c void match(int t, char *what); void semi(void); void lbrace(void); void rbrace(void); void lparen(void); void rparen(void); void ident(void); void fatal(char *s); void fatals(char *s1, char *s2); void fatald(char *s, int d); void fatalc(char *s, int c); // sym.c int findglob(char *s); int findlocl(char *s); int findsymbol(char *s); int addglob(char *name, int type, int stype, int class, int endlabel, int size); int addlocl(char *name, int type, int stype, int class, int size); void copyfuncparams(int slot); void freeloclsyms(void); void clear_symtable(void); // decl.c void var_declaration(int type, int class); struct ASTnode *function_declaration(int type); void global_declarations(void); // types.c int inttype(int type); int parse_type(void); int pointer_to(int type); int value_at(int type); struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op); ================================================ FILE: 28_Runtime_Flags/defs.h ================================================ #include #include #include #include // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 #define TEXTLEN 512 // Length of symbols in input #define NSYMBOLS 1024 // Number of symbol table entries // Commands and default filenames #define AOUT "a.out" #ifdef __NASM__ #define ASCMD "nasm -f elf64 -o " #define LDCMD "cc -no-pie -fno-plt -Wall -o " #else #define ASCMD "as -o " #define LDCMD "cc -o " #endif // Token types enum { T_EOF, // Binary operators T_ASSIGN, T_LOGOR, T_LOGAND, T_OR, T_XOR, T_AMPER, T_EQ, T_NE, T_LT, T_GT, T_LE, T_GE, T_LSHIFT, T_RSHIFT, T_PLUS, T_MINUS, T_STAR, T_SLASH, // Other operators T_INC, T_DEC, T_INVERT, T_LOGNOT, // Type keywords T_VOID, T_CHAR, T_INT, T_LONG, // Other keywords T_IF, T_ELSE, T_WHILE, T_FOR, T_RETURN, // Structural tokens T_INTLIT, T_STRLIT, T_SEMI, T_IDENT, T_LBRACE, T_RBRACE, T_LPAREN, T_RPAREN, T_LBRACKET, T_RBRACKET, T_COMMA }; // Token structure struct token { int token; // Token type, from the enum list above int intvalue; // For T_INTLIT, the integer value }; // AST node types. The first few line up // with the related tokens enum { A_ASSIGN= 1, A_LOGOR, A_LOGAND, A_OR, A_XOR, A_AND, A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE, A_LSHIFT, A_RSHIFT, A_ADD, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, A_INTLIT, A_STRLIT, A_IDENT, A_GLUE, A_IF, A_WHILE, A_FUNCTION, A_WIDEN, A_RETURN, A_FUNCCALL, A_DEREF, A_ADDR, A_SCALE, A_PREINC, A_PREDEC, A_POSTINC, A_POSTDEC, A_NEGATE, A_INVERT, A_LOGNOT, A_TOBOOL }; // Primitive types enum { P_NONE, P_VOID, P_CHAR, P_INT, P_LONG, P_VOIDPTR, P_CHARPTR, P_INTPTR, P_LONGPTR }; // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates int rvalue; // True if the node is an rvalue struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; union { // For A_INTLIT, the integer value int intvalue; // For A_IDENT, the symbol slot number int id; // For A_FUNCTION, the symbol slot number int size; // For A_SCALE, the size to scale by } v; // For A_FUNCCALL, the symbol slot number }; #define NOREG -1 // Use NOREG when the AST generation // functions have no register to return #define NOLABEL 0 // Use NOLABEL when we have no label to // pass to genAST() // Structural types enum { S_VARIABLE, S_FUNCTION, S_ARRAY }; // Storage classes enum { C_GLOBAL = 1, // Globally visible symbol C_LOCAL, // Locally visible symbol C_PARAM // Locally visible function parameter }; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol int stype; // Structural type for the symbol int class; // Storage class for the symbol int endlabel; // For S_FUNCTIONs, the end label int size; // Number of elements in the symbol int posn; // For locals, either the negative offset // from stack base pointer, or register id #define nelems posn // For functions, # of params // For structs, # of fields }; ================================================ FILE: 28_Runtime_Flags/expr.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of expressions // Copyright (c) 2019 Warren Toomey, GPL3 // expression_list: // | expression // | expression ',' expression_list // ; // Parse a list of zero or more comma-separated expressions and // return an AST composed of A_GLUE nodes with the left-hand child // being the sub-tree of previous expressions (or NULL) and the right-hand // child being the next expression. Each A_GLUE node will have size field // set to the number of expressions in the tree at this point. If no // expressions are parsed, NULL is returned static struct ASTnode *expression_list(void) { struct ASTnode *tree = NULL; struct ASTnode *child = NULL; int exprcount = 0; // Loop until the final right parentheses while (Token.token != T_RPAREN) { // Parse the next expression and increment the expression count child = binexpr(0); exprcount++; // Build an A_GLUE AST node with the previous tree as the left child // and the new expression as the right child. Store the expression count. tree = mkastnode(A_GLUE, P_NONE, tree, NULL, child, exprcount); // Must have a ',' or ')' at this point switch (Token.token) { case T_COMMA: scan(&Token); break; case T_RPAREN: break; default: fatald("Unexpected token in expression list", Token.token); } } // Return the tree of expressions return (tree); } // Parse a function call and return its AST static struct ASTnode *funccall(void) { struct ASTnode *tree; int id; // Check that the identifier has been defined as a function, // then make a leaf node for it. if ((id = findsymbol(Text)) == -1 || Symtable[id].stype != S_FUNCTION) { fatals("Undeclared function", Text); } // Get the '(' lparen(); // Parse the argument expression list tree = expression_list(); // XXX Check type of each argument against the function's prototype // Build the function call AST node. Store the // function's return type as this node's type. // Also record the function's symbol-id tree = mkastunary(A_FUNCCALL, Symtable[id].type, tree, id); // Get the ')' rparen(); return (tree); } // Parse the index into an array and return an AST tree for it static struct ASTnode *array_access(void) { struct ASTnode *left, *right; int id; // Check that the identifier has been defined as an array // then make a leaf node for it that points at the base if ((id = findsymbol(Text)) == -1 || Symtable[id].stype != S_ARRAY) { fatals("Undeclared array", Text); } left = mkastleaf(A_ADDR, Symtable[id].type, id); // Get the '[' scan(&Token); // Parse the following expression right = binexpr(0); // Get the ']' match(T_RBRACKET, "]"); // Ensure that this is of int type if (!inttype(right->type)) fatal("Array index is not of integer type"); // Scale the index by the size of the element's type right = modify_type(right, left->type, A_ADD); // Return an AST tree where the array's base has the offset // added to it, and dereference the element. Still an lvalue // at this point. left = mkastnode(A_ADD, Symtable[id].type, left, NULL, right, 0); left = mkastunary(A_DEREF, value_at(left->type), left, 0); return (left); } // Parse a postfix expression and return // an AST node representing it. The // identifier is already in Text. static struct ASTnode *postfix(void) { struct ASTnode *n; int id; // Scan in the next token to see if we have a postfix expression scan(&Token); // Function call if (Token.token == T_LPAREN) return (funccall()); // An array reference if (Token.token == T_LBRACKET) return (array_access()); // A variable. Check that the variable exists. id = findsymbol(Text); if (id == -1 || Symtable[id].stype != S_VARIABLE) fatals("Unknown variable", Text); switch (Token.token) { // Post-increment: skip over the token case T_INC: scan(&Token); n = mkastleaf(A_POSTINC, Symtable[id].type, id); break; // Post-decrement: skip over the token case T_DEC: scan(&Token); n = mkastleaf(A_POSTDEC, Symtable[id].type, id); break; // Just a variable reference default: n = mkastleaf(A_IDENT, Symtable[id].type, id); } return (n); } // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(void) { struct ASTnode *n; int id; switch (Token.token) { case T_INTLIT: // For an INTLIT token, make a leaf AST node for it. // Make it a P_CHAR if it's within the P_CHAR range if ((Token.intvalue) >= 0 && (Token.intvalue < 256)) n = mkastleaf(A_INTLIT, P_CHAR, Token.intvalue); else n = mkastleaf(A_INTLIT, P_INT, Token.intvalue); break; case T_STRLIT: // For a STRLIT token, generate the assembly for it. // Then make a leaf AST node for it. id is the string's label. id = genglobstr(Text); n = mkastleaf(A_STRLIT, P_CHARPTR, id); break; case T_IDENT: return (postfix()); case T_LPAREN: // Beginning of a parenthesised expression, skip the '('. // Scan in the expression and the right parenthesis scan(&Token); n = binexpr(0); rparen(); return (n); default: fatald("Expecting a primary expression, got token", Token.token); } // Scan in the next token and return the leaf node scan(&Token); return (n); } // Convert a binary operator token into a binary AST operation. // We rely on a 1:1 mapping from token to AST operation static int binastop(int tokentype) { if (tokentype > T_EOF && tokentype <= T_SLASH) return (tokentype); fatald("Syntax error, token", tokentype); return (0); // Keep -Wall happy } // Return true if a token is right-associative, // false otherwise. static int rightassoc(int tokentype) { if (tokentype == T_ASSIGN) return (1); return (0); } // Operator precedence for each token. Must // match up with the order of tokens in defs.h static int OpPrec[] = { 0, 10, 20, 30, // T_EOF, T_ASSIGN, T_LOGOR, T_LOGAND 40, 50, 60, // T_OR, T_XOR, T_AMPER 70, 70, // T_EQ, T_NE 80, 80, 80, 80, // T_LT, T_GT, T_LE, T_GE 90, 90, // T_LSHIFT, T_RSHIFT 100, 100, // T_PLUS, T_MINUS 110, 110 // T_STAR, T_SLASH }; // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec; if (tokentype > T_SLASH) fatald("Token with no precedence in op_precedence:", tokentype); prec = OpPrec[tokentype]; if (prec == 0) fatald("Syntax error, token", tokentype); return (prec); } // prefix_expression: primary // | '*' prefix_expression // | '&' prefix_expression // | '-' prefix_expression // | '++' prefix_expression // | '--' prefix_expression // ; // Parse a prefix expression and return // a sub-tree representing it. struct ASTnode *prefix(void) { struct ASTnode *tree; switch (Token.token) { case T_AMPER: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Ensure that it's an identifier if (tree->op != A_IDENT) fatal("& operator must be followed by an identifier"); // Now change the operator to A_ADDR and the type to // a pointer to the original type tree->op = A_ADDR; tree->type = pointer_to(tree->type); break; case T_STAR: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's either another deref or an // identifier if (tree->op != A_IDENT && tree->op != A_DEREF) fatal("* operator must be followed by an identifier or *"); // Prepend an A_DEREF operation to the tree tree = mkastunary(A_DEREF, value_at(tree->type), tree, 0); break; case T_MINUS: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_NEGATE operation to the tree and // make the child an rvalue. Because chars are unsigned, // also widen this to int so that it's signed tree->rvalue = 1; tree = modify_type(tree, P_INT, 0); tree = mkastunary(A_NEGATE, tree->type, tree, 0); break; case T_INVERT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_INVERT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_INVERT, tree->type, tree, 0); break; case T_LOGNOT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_LOGNOT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_LOGNOT, tree->type, tree, 0); break; case T_INC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("++ operator must be followed by an identifier"); // Prepend an A_PREINC operation to the tree tree = mkastunary(A_PREINC, tree->type, tree, 0); break; case T_DEC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("-- operator must be followed by an identifier"); // Prepend an A_PREDEC operation to the tree tree = mkastunary(A_PREDEC, tree->type, tree, 0); break; default: tree = primary(); } return (tree); } // Return an AST tree whose root is a binary operator. // Parameter ptp is the previous token's precedence. struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; struct ASTnode *ltemp, *rtemp; int ASTop; int tokentype; // Get the tree on the left. // Fetch the next token at the same time. left = prefix(); // If we hit one of several terminating tokens, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA) { left->rvalue = 1; return (left); } // While the precedence of this token is more than that of the // previous token precedence, or it's right associative and // equal to the previous token's precedence while ((op_precedence(tokentype) > ptp) || (rightassoc(tokentype) && op_precedence(tokentype) == ptp)) { // Fetch in the next integer literal scan(&Token); // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Determine the operation to be performed on the sub-trees ASTop = binastop(tokentype); if (ASTop == A_ASSIGN) { // Assignment // Make the right tree into an rvalue right->rvalue = 1; // Ensure the right's type matches the left right = modify_type(right, left->type, 0); if (right == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree. However, switch // left and right around, so that the right expression's // code will be generated before the left expression ltemp = left; left = right; right = ltemp; } else { // We are not doing an assignment, so both trees should be rvalues // Convert both trees into rvalue if they are lvalue trees left->rvalue = 1; right->rvalue = 1; // Ensure the two types are compatible by trying // to modify each tree to match the other's type. ltemp = modify_type(left, right->type, ASTop); rtemp = modify_type(right, left->type, ASTop); if (ltemp == NULL && rtemp == NULL) fatal("Incompatible types in binary expression"); if (ltemp != NULL) left = ltemp; if (rtemp != NULL) right = rtemp; } // Join that sub-tree with ours. Convert the token // into an AST operation at the same time. left = mkastnode(binastop(tokentype), left->type, left, NULL, right, 0); // Update the details of the current token. // If we hit a terminating token, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA) { left->rvalue = 1; return (left); } } // Return the tree we have when the precedence // is the same or lower left->rvalue = 1; return (left); } ================================================ FILE: 28_Runtime_Flags/gen.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Generic code generator // Copyright (c) 2019 Warren Toomey, GPL3 // Generate and return a new label number int genlabel(void) { static int id = 1; return (id++); } // Generate the code for an IF statement // and an optional ELSE clause static int genIF(struct ASTnode *n) { int Lfalse, Lend; // Generate two labels: one for the // false compound statement, and one // for the end of the overall IF statement. // When there is no ELSE clause, Lfalse _is_ // the ending label! Lfalse = genlabel(); if (n->right) Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. genAST(n->left, Lfalse, n->op); genfreeregs(); // Generate the true compound statement genAST(n->mid, NOLABEL, n->op); genfreeregs(); // If there is an optional ELSE clause, // generate the jump to skip to the end if (n->right) cgjump(Lend); // Now the false label cglabel(Lfalse); // Optional ELSE clause: generate the // false compound statement and the // end label if (n->right) { genAST(n->right, NOLABEL, n->op); genfreeregs(); cglabel(Lend); } return (NOREG); } // Generate the code for a WHILE statement static int genWHILE(struct ASTnode *n) { int Lstart, Lend; // Generate the start and end labels // and output the start label Lstart = genlabel(); Lend = genlabel(); cglabel(Lstart); // Generate the condition code followed // by a jump to the end label. genAST(n->left, Lend, n->op); genfreeregs(); // Generate the compound statement for the body genAST(n->right, NOLABEL, n->op); genfreeregs(); // Finally output the jump back to the condition, // and the end label cgjump(Lstart); cglabel(Lend); return (NOREG); } // Generate the code to copy the arguments of a // function call to its parameters, then call the // function itself. Return the register that holds // the function's return value. static int gen_funccall(struct ASTnode *n) { struct ASTnode *gluetree = n->left; int reg; int numargs = 0; // If there is a list of arguments, walk this list // from the last argument (right-hand child) to the // first while (gluetree) { // Calculate the expression's value reg = genAST(gluetree->right, NOLABEL, gluetree->op); // Copy this into the n'th function parameter: size is 1, 2, 3, ... cgcopyarg(reg, gluetree->v.size); // Keep the first (highest) number of arguments if (numargs == 0) numargs = gluetree->v.size; genfreeregs(); gluetree = gluetree->left; } // Call the function, clean up the stack (based on numargs), // and return its result return (cgcall(n->v.id, numargs)); } // Given an AST, an optional label, and the AST op // of the parent, generate assembly code recursively. // Return the register id with the tree's final value. int genAST(struct ASTnode *n, int label, int parentASTop) { int leftreg, rightreg; // We have some specific AST node handling at the top // so that we don't evaluate the child sub-trees immediately switch (n->op) { case A_IF: return (genIF(n)); case A_WHILE: return (genWHILE(n)); case A_FUNCCALL: return (gen_funccall(n)); case A_GLUE: // Do each child statement, and free the // registers after each child genAST(n->left, NOLABEL, n->op); genfreeregs(); genAST(n->right, NOLABEL, n->op); genfreeregs(); return (NOREG); case A_FUNCTION: // Generate the function's preamble before the code // in the child sub-tree cgfuncpreamble(n->v.id); genAST(n->left, NOLABEL, n->op); cgfuncpostamble(n->v.id); return (NOREG); } // General AST node handling below // Get the left and right sub-tree values if (n->left) leftreg = genAST(n->left, NOLABEL, n->op); if (n->right) rightreg = genAST(n->right, NOLABEL, n->op); switch (n->op) { case A_ADD: return (cgadd(leftreg, rightreg)); case A_SUBTRACT: return (cgsub(leftreg, rightreg)); case A_MULTIPLY: return (cgmul(leftreg, rightreg)); case A_DIVIDE: return (cgdiv(leftreg, rightreg)); case A_AND: return (cgand(leftreg, rightreg)); case A_OR: return (cgor(leftreg, rightreg)); case A_XOR: return (cgxor(leftreg, rightreg)); case A_LSHIFT: return (cgshl(leftreg, rightreg)); case A_RSHIFT: return (cgshr(leftreg, rightreg)); case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, compare registers // and set one to 1 or 0 based on the comparison. if (parentASTop == A_IF || parentASTop == A_WHILE) return (cgcompare_and_jump(n->op, leftreg, rightreg, label)); else return (cgcompare_and_set(n->op, leftreg, rightreg)); case A_INTLIT: return (cgloadint(n->v.intvalue, n->type)); case A_STRLIT: return (cgloadglobstr(n->v.id)); case A_IDENT: // Load our value if we are an rvalue // or we are being dereferenced if (n->rvalue || parentASTop == A_DEREF) { if (Symtable[n->v.id].class == C_GLOBAL) { return (cgloadglob(n->v.id, n->op)); } else { return (cgloadlocal(n->v.id, n->op)); } } else return (NOREG); case A_ASSIGN: // Are we assigning to an identifier or through a pointer? switch (n->right->op) { case A_IDENT: if (Symtable[n->right->v.id].class == C_GLOBAL) return (cgstorglob(leftreg, n->right->v.id)); else return (cgstorlocal(leftreg, n->right->v.id)); case A_DEREF: return (cgstorderef(leftreg, rightreg, n->right->type)); default: fatald("Can't A_ASSIGN in genAST(), op", n->op); } case A_WIDEN: // Widen the child's type to the parent's type return (cgwiden(leftreg, n->left->type, n->type)); case A_RETURN: cgreturn(leftreg, Functionid); return (NOREG); case A_ADDR: return (cgaddress(n->v.id)); case A_DEREF: // If we are an rvalue, dereference to get the value we point at, // otherwise leave it for A_ASSIGN to store through the pointer if (n->rvalue) return (cgderef(leftreg, n->left->type)); else return (leftreg); case A_SCALE: // Small optimisation: use shift if the // scale value is a known power of two switch (n->v.size) { case 2: return (cgshlconst(leftreg, 1)); case 4: return (cgshlconst(leftreg, 2)); case 8: return (cgshlconst(leftreg, 3)); default: // Load a register with the size and // multiply the leftreg by this size rightreg = cgloadint(n->v.size, P_INT); return (cgmul(leftreg, rightreg)); } case A_POSTINC: case A_POSTDEC: // Load and decrement the variable's value into a register // and post increment/decrement it if (Symtable[n->v.id].class == C_GLOBAL) return (cgloadglob(n->v.id, n->op)); else return (cgloadlocal(n->v.id, n->op)); case A_PREINC: case A_PREDEC: // Load and decrement the variable's value into a register // and pre increment/decrement it if (Symtable[n->left->v.id].class == C_GLOBAL) return (cgloadglob(n->left->v.id, n->op)); else return (cgloadlocal(n->left->v.id, n->op)); case A_NEGATE: return (cgnegate(leftreg)); case A_INVERT: return (cginvert(leftreg)); case A_LOGNOT: return (cglognot(leftreg)); case A_TOBOOL: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, set the register // to 0 or 1 based on it's zeroeness or non-zeroeness return (cgboolean(leftreg, parentASTop, label)); default: fatald("Unknown AST operator", n->op); } return (NOREG); // Keep -Wall happy } void genpreamble() { cgpreamble(); } void genpostamble() { cgpostamble(); } void genfreeregs() { freeall_registers(); } void genglobsym(int id) { cgglobsym(id); } int genglobstr(char *strvalue) { int l = genlabel(); cgglobstr(l, strvalue); return (l); } int genprimsize(int type) { return (cgprimsize(type)); } ================================================ FILE: 28_Runtime_Flags/main.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "decl.h" #include #include // Compiler setup and top-level execution // Copyright (c) 2019 Warren Toomey, GPL3 // Given a string with a '.' and at least a 1-character suffix // after the '.', change the suffix to be the given character. // Return the new string or NULL if the original string could // not be modified char *alter_suffix(char *str, char suffix) { char *posn; char *newstr; // Clone the string if ((newstr = strdup(str)) == NULL) return (NULL); // Find the '.' if ((posn = strrchr(newstr, '.')) == NULL) return (NULL); // Ensure there is a suffix posn++; if (*posn == '\0') return (NULL); // Change the suffix and NUL-terminate the string *posn++ = suffix; *posn = '\0'; return (newstr); } // Given an input filename, compile that file // down to assembly code. Return the new file's name static char *do_compile(char *filename) { Outfilename = alter_suffix(filename, 's'); if (Outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .c on the end\n", filename); exit(1); } // Open up the input file if ((Infile = fopen(filename, "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", filename, strerror(errno)); exit(1); } // Create the output file if ((Outfile = fopen(Outfilename, "w")) == NULL) { fprintf(stderr, "Unable to create %s: %s\n", Outfilename, strerror(errno)); exit(1); } Line = 1; // Reset the scanner Putback = '\n'; clear_symtable(); // Clear the symbol table if (O_verbose) printf("compiling %s\n", filename); scan(&Token); // Get the first token from the input genpreamble(); // Output the preamble global_declarations(); // Parse the global declarations genpostamble(); // Output the postamble fclose(Outfile); // Close the output file return (Outfilename); } // Given an input filename, assemble that file // down to object code. Return the object filename char *do_assemble(char *filename) { char cmd[TEXTLEN]; int err; char *outfilename = alter_suffix(filename, 'o'); if (outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .s on the end\n", filename); exit(1); } // Build the assembly command and run it snprintf(cmd, TEXTLEN, "%s %s %s", ASCMD, outfilename, filename); if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Assembly of %s failed\n", filename); exit(1); } return (outfilename); } // Given a list of object files and an output filename, // link all of the object filenames together. void do_link(char *outfilename, char *objlist[]) { int cnt, size = TEXTLEN; char cmd[TEXTLEN], *cptr; int err; // Start with the linker command and the output file cptr = cmd; cnt = snprintf(cptr, size, "%s %s ", LDCMD, outfilename); cptr += cnt; size -= cnt; // Now append each object file while (*objlist != NULL) { cnt = snprintf(cptr, size, "%s ", *objlist); cptr += cnt; size -= cnt; objlist++; } if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Linking failed\n"); exit(1); } } // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "Usage: %s [-vcST] [-o outfile] file [file ...]\n", prog); fprintf(stderr, " -v give verbose output of the compilation stages\n"); fprintf(stderr, " -c generate object files but don't link them\n"); fprintf(stderr, " -S generate assembly files but don't link them\n"); fprintf(stderr, " -T dump the AST trees for each input file\n"); fprintf(stderr, " -o outfile, produce the outfile executable file\n"); exit(1); } // Main program: check arguments and print a usage // if we don't have an argument. Open up the input // file and call scanfile() to scan the tokens in it. #define MAXOBJ 100 int main(int argc, char *argv[]) { char *outfilename = AOUT; char *asmfile, *objfile; char *objlist[MAXOBJ]; int i, objcnt = 0; // Initialise our variables O_dumpAST = 0; O_keepasm = 0; O_assemble = 0; O_verbose = 0; O_dolink = 1; // Scan for command-line options for (i = 1; i < argc; i++) { // No leading '-', stop scanning for options if (*argv[i] != '-') break; // For each option in this argument for (int j = 1; (*argv[i] == '-') && argv[i][j]; j++) { switch (argv[i][j]) { case 'o': outfilename = argv[++i]; // Save & skip to next argument break; case 'T': O_dumpAST = 1; break; case 'c': O_assemble = 1; O_keepasm = 0; O_dolink = 0; break; case 'S': O_keepasm = 1; O_assemble = 0; O_dolink = 0; break; case 'v': O_verbose = 1; break; default: usage(argv[0]); } } } // Ensure we have at lease one input file argument if (i >= argc) usage(argv[0]); // Work on each input file in turn while (i < argc) { asmfile = do_compile(argv[i]); // Compile the source file if (O_dolink || O_assemble) { objfile = do_assemble(asmfile); // Assemble it to object forma if (objcnt == (MAXOBJ - 2)) { fprintf(stderr, "Too many object files for the compiler to handle\n"); exit(1); } objlist[objcnt++] = objfile; // Add the object file's name objlist[objcnt] = NULL; // to the list of object files } if (!O_keepasm) // Remove the assembly file if unlink(asmfile); // we don't need to keep it i++; } // Now link all the object files together if (O_dolink) { do_link(outfilename, objlist); // If we don't need to keep the object // files, then remove them if (!O_assemble) { for (i = 0; objlist[i] != NULL; i++) unlink(objlist[i]); } } return (0); } ================================================ FILE: 28_Runtime_Flags/misc.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" #include // Miscellaneous functions // Copyright (c) 2019 Warren Toomey, GPL3 // Ensure that the current token is t, // and fetch the next token. Otherwise // throw an error void match(int t, char *what) { if (Token.token == t) { scan(&Token); } else { fatals("Expected", what); } } // Match a semicolon and fetch the next token void semi(void) { match(T_SEMI, ";"); } // Match a left brace and fetch the next token void lbrace(void) { match(T_LBRACE, "{"); } // Match a right brace and fetch the next token void rbrace(void) { match(T_RBRACE, "}"); } // Match a left parenthesis and fetch the next token void lparen(void) { match(T_LPAREN, "("); } // Match a right parenthesis and fetch the next token void rparen(void) { match(T_RPAREN, ")"); } // Match an identifier and fetch the next token void ident(void) { match(T_IDENT, "identifier"); } // Print out fatal messages void fatal(char *s) { fprintf(stderr, "%s on line %d\n", s, Line); fclose(Outfile); unlink(Outfilename); exit(1); } void fatals(char *s1, char *s2) { fprintf(stderr, "%s:%s on line %d\n", s1, s2, Line); fclose(Outfile); unlink(Outfilename); exit(1); } void fatald(char *s, int d) { fprintf(stderr, "%s:%d on line %d\n", s, d, Line); fclose(Outfile); unlink(Outfilename); exit(1); } void fatalc(char *s, int c) { fprintf(stderr, "%s:%c on line %d\n", s, c, Line); fclose(Outfile); unlink(Outfilename); exit(1); } ================================================ FILE: 28_Runtime_Flags/scan.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { char *p; p = strchr(s, c); return (p ? p - s : -1); } // Get the next character from the input file. static int next(void) { int c; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return (c); } c = fgetc(Infile); // Read from input file if ('\n' == c) Line++; // Increment line count return (c); } // Put back an unwanted character static void putback(int c) { Putback = c; } // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } // Return the next character from a character // or string literal static int scanch(void) { int c; // Get the next input character and interpret // metacharacters that start with a backslash c = next(); if (c == '\\') { switch (c = next()) { case 'a': return '\a'; case 'b': return '\b'; case 'f': return '\f'; case 'n': return '\n'; case 'r': return '\r'; case 't': return '\t'; case 'v': return '\v'; case '\\': return '\\'; case '"': return '"'; case '\'': return '\''; default: fatalc("unknown escape sequence", c); } } return (c); // Just an ordinary old character! } // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0; // Convert each character into an int value while ((k = chrpos("0123456789", c)) >= 0) { val = val * 10 + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return (val); } // Scan in a string literal from the input file, // and store it in buf[]. Return the length of // the string. static int scanstr(char *buf) { int i, c; // Loop while we have enough buffer space for (i = 0; i < TEXTLEN - 1; i++) { // Get the next char and append to buf // Return when we hit the ending double quote if ((c = scanch()) == '"') { buf[i] = 0; return (i); } buf[i] = c; } // Ran out of buf[] space fatal("String literal too long"); return (0); } // Scan an identifier from the input file and // store it in buf[]. Return the identifier's length static int scanident(int c, char *buf, int lim) { int i = 0; // Allow digits, alpha and underscores while (isalpha(c) || isdigit(c) || '_' == c) { // Error if we hit the identifier length limit, // else append to buf[] and get next character if (lim - 1 == i) { fatal("Identifier too long"); } else if (i < lim - 1) { buf[i++] = c; } c = next(); } // We hit a non-valid character, put it back. // NUL-terminate the buf[] and return the length putback(c); buf[i] = '\0'; return (i); } // Given a word from the input, return the matching // keyword token number or 0 if it's not a keyword. // Switch on the first letter so that we don't have // to waste time strcmp()ing against all the keywords. static int keyword(char *s) { switch (*s) { case 'c': if (!strcmp(s, "char")) return (T_CHAR); break; case 'e': if (!strcmp(s, "else")) return (T_ELSE); break; case 'f': if (!strcmp(s, "for")) return (T_FOR); break; case 'i': if (!strcmp(s, "if")) return (T_IF); if (!strcmp(s, "int")) return (T_INT); break; case 'l': if (!strcmp(s, "long")) return (T_LONG); break; case 'r': if (!strcmp(s, "return")) return (T_RETURN); break; case 'w': if (!strcmp(s, "while")) return (T_WHILE); break; case 'v': if (!strcmp(s, "void")) return (T_VOID); break; } return (0); } // A pointer to a rejected token static struct token *Rejtoken = NULL; // Reject the token that we just scanned void reject_token(struct token *t) { if (Rejtoken != NULL) fatal("Can't reject token twice"); Rejtoken = t; } // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c, tokentype; // If we have any rejected token, return it if (Rejtoken != NULL) { t = Rejtoken; Rejtoken = NULL; return (1); } // Skip whitespace c = skip(); // Determine the token based on // the input character switch (c) { case EOF: t->token = T_EOF; return (0); case '+': if ((c = next()) == '+') { t->token = T_INC; } else { putback(c); t->token = T_PLUS; } break; case '-': if ((c = next()) == '-') { t->token = T_DEC; } else { putback(c); t->token = T_MINUS; } break; case '*': t->token = T_STAR; break; case '/': t->token = T_SLASH; break; case ';': t->token = T_SEMI; break; case '{': t->token = T_LBRACE; break; case '}': t->token = T_RBRACE; break; case '(': t->token = T_LPAREN; break; case ')': t->token = T_RPAREN; break; case '[': t->token = T_LBRACKET; break; case ']': t->token = T_RBRACKET; break; case '~': t->token = T_INVERT; break; case '^': t->token = T_XOR; break; case ',': t->token = T_COMMA; break; case '=': if ((c = next()) == '=') { t->token = T_EQ; } else { putback(c); t->token = T_ASSIGN; } break; case '!': if ((c = next()) == '=') { t->token = T_NE; } else { putback(c); t->token = T_LOGNOT; } break; case '<': if ((c = next()) == '=') { t->token = T_LE; } else if (c == '<') { t->token = T_LSHIFT; } else { putback(c); t->token = T_LT; } break; case '>': if ((c = next()) == '=') { t->token = T_GE; } else if (c == '>') { t->token = T_RSHIFT; } else { putback(c); t->token = T_GT; } break; case '&': if ((c = next()) == '&') { t->token = T_LOGAND; } else { putback(c); t->token = T_AMPER; } break; case '|': if ((c = next()) == '|') { t->token = T_LOGOR; } else { putback(c); t->token = T_OR; } break; case '\'': // If it's a quote, scan in the // literal character value and // the trailing quote t->intvalue = scanch(); t->token = T_INTLIT; if (next() != '\'') fatal("Expected '\\'' at end of char literal"); break; case '"': // Scan in a literal string scanstr(Text); t->token = T_STRLIT; break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } else if (isalpha(c) || '_' == c) { // Read in a keyword or identifier scanident(c, Text, TEXTLEN); // If it's a recognised keyword, return that token if ((tokentype = keyword(Text)) != 0) { t->token = tokentype; break; } // Not a recognised keyword, so it must be an identifier t->token = T_IDENT; break; } // The character isn't part of any recognised token, error fatalc("Unrecognised character", c); } // We found a token return (1); } ================================================ FILE: 28_Runtime_Flags/stmt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of statements // Copyright (c) 2019 Warren Toomey, GPL3 // Prototypes static struct ASTnode *single_statement(void); // compound_statement: // empty, i.e. no statement // | statement // | statement statements // ; // // statement: declaration // | expression_statement // | function_call // | if_statement // | while_statement // | for_statement // | return_statement // ; // if_statement: if_head // | if_head 'else' compound_statement // ; // // if_head: 'if' '(' true_false_expression ')' compound_statement ; // // Parse an IF statement including any // optional ELSE clause and return its AST static struct ASTnode *if_statement(void) { struct ASTnode *condAST, *trueAST, *falseAST = NULL; // Ensure we have 'if' '(' match(T_IF, "if"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, 0); rparen(); // Get the AST for the compound statement trueAST = compound_statement(); // If we have an 'else', skip it // and get the AST for the compound statement if (Token.token == T_ELSE) { scan(&Token); falseAST = compound_statement(); } // Build and return the AST for this statement return (mkastnode(A_IF, P_NONE, condAST, trueAST, falseAST, 0)); } // while_statement: 'while' '(' true_false_expression ')' compound_statement ; // // Parse a WHILE statement and return its AST static struct ASTnode *while_statement(void) { struct ASTnode *condAST, *bodyAST; // Ensure we have 'while' '(' match(T_WHILE, "while"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, 0); rparen(); // Get the AST for the compound statement bodyAST = compound_statement(); // Build and return the AST for this statement return (mkastnode(A_WHILE, P_NONE, condAST, NULL, bodyAST, 0)); } // for_statement: 'for' '(' preop_statement ';' // true_false_expression ';' // postop_statement ')' compound_statement ; // // preop_statement: statement (for now) // postop_statement: statement (for now) // // Parse a FOR statement and return its AST static struct ASTnode *for_statement(void) { struct ASTnode *condAST, *bodyAST; struct ASTnode *preopAST, *postopAST; struct ASTnode *tree; // Ensure we have 'for' '(' match(T_FOR, "for"); lparen(); // Get the pre_op statement and the ';' preopAST = single_statement(); semi(); // Get the condition and the ';'. // Force a non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, 0); semi(); // Get the post_op statement and the ')' postopAST = single_statement(); rparen(); // Get the compound statement which is the body bodyAST = compound_statement(); // For now, all four sub-trees have to be non-NULL. // Later on, we'll change the semantics for when some are missing // Glue the compound statement and the postop tree tree = mkastnode(A_GLUE, P_NONE, bodyAST, NULL, postopAST, 0); // Make a WHILE loop with the condition and this new body tree = mkastnode(A_WHILE, P_NONE, condAST, NULL, tree, 0); // And glue the preop tree to the A_WHILE tree return (mkastnode(A_GLUE, P_NONE, preopAST, NULL, tree, 0)); } // return_statement: 'return' '(' expression ')' ; // // Parse a return statement and return its AST static struct ASTnode *return_statement(void) { struct ASTnode *tree; // Can't return a value if function returns P_VOID if (Symtable[Functionid].type == P_VOID) fatal("Can't return from a void function"); // Ensure we have 'return' '(' match(T_RETURN, "return"); lparen(); // Parse the following expression tree = binexpr(0); // Ensure this is compatible with the function's type tree = modify_type(tree, Symtable[Functionid].type, 0); if (tree == NULL) fatal("Incompatible type to return"); // Add on the A_RETURN node tree = mkastunary(A_RETURN, P_NONE, tree, 0); // Get the ')' rparen(); return (tree); } // Parse a single statement and return its AST static struct ASTnode *single_statement(void) { int type; switch (Token.token) { case T_CHAR: case T_INT: case T_LONG: // The beginning of a variable declaration. // Parse the type and get the identifier. // Then parse the rest of the declaration // and skip over the semicolon type = parse_type(); ident(); var_declaration(type, C_LOCAL); semi(); return (NULL); // No AST generated here case T_IF: return (if_statement()); case T_WHILE: return (while_statement()); case T_FOR: return (for_statement()); case T_RETURN: return (return_statement()); default: // For now, see if this is an expression. // This catches assignment statements. return (binexpr(0)); } return (NULL); // Keep -Wall happy } // Parse a compound statement // and return its AST struct ASTnode *compound_statement(void) { struct ASTnode *left = NULL; struct ASTnode *tree; // Require a left curly bracket lbrace(); while (1) { // Parse a single statement tree = single_statement(); // Some statements must be followed by a semicolon if (tree != NULL && (tree->op == A_ASSIGN || tree->op == A_RETURN || tree->op == A_FUNCCALL)) semi(); // For each new tree, either save it in left // if left is empty, or glue the left and the // new tree together if (tree != NULL) { if (left == NULL) left = tree; else left = mkastnode(A_GLUE, P_NONE, left, NULL, tree, 0); } // When we hit a right curly bracket, // skip past it and return the AST if (Token.token == T_RBRACE) { rbrace(); return (left); } } } ================================================ FILE: 28_Runtime_Flags/sym.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Symbol table functions // Copyright (c) 2019 Warren Toomey, GPL3 // Determine if the symbol s is in the global symbol table. // Return its slot position or -1 if not found. // Skip C_PARAM entries int findglob(char *s) { int i; for (i = 0; i < Globs; i++) { if (Symtable[i].class == C_PARAM) continue; if (*s == *Symtable[i].name && !strcmp(s, Symtable[i].name)) return (i); } return (-1); } // Get the position of a new global symbol slot, or die // if we've run out of positions. static int newglob(void) { int p; if ((p = Globs++) >= Locls) fatal("Too many global symbols"); return (p); } // Determine if the symbol s is in the local symbol table. // Return its slot position or -1 if not found. int findlocl(char *s) { int i; for (i = Locls + 1; i < NSYMBOLS; i++) { if (*s == *Symtable[i].name && !strcmp(s, Symtable[i].name)) return (i); } return (-1); } // Get the position of a new local symbol slot, or die // if we've run out of positions. static int newlocl(void) { int p; if ((p = Locls--) <= Globs) fatal("Too many local symbols"); return (p); } // Clear all the entries in the // local symbol table void freeloclsyms(void) { Locls = NSYMBOLS - 1; } // Update a symbol at the given slot number in the symbol table. Set up its: // + type: char, int etc. // + structural type: var, function, array etc. // + size: number of elements // + endlabel: if this is a function // + posn: Position information for local symbols static void updatesym(int slot, char *name, int type, int stype, int class, int endlabel, int size, int posn) { if (slot < 0 || slot >= NSYMBOLS) fatal("Invalid symbol slot number in updatesym()"); Symtable[slot].name = strdup(name); Symtable[slot].type = type; Symtable[slot].stype = stype; Symtable[slot].class = class; Symtable[slot].endlabel = endlabel; Symtable[slot].size = size; Symtable[slot].posn = posn; } // Add a global symbol to the symbol table. Set up its: // + type: char, int etc. // + structural type: var, function, array etc. // + class of the symbol // + size: number of elements // + endlabel: if this is a function // Return the slot number in the symbol table int addglob(char *name, int type, int stype, int class, int endlabel, int size) { int slot; // If this is already in the symbol table, return the existing slot if ((slot = findglob(name)) != -1) return (slot); // Otherwise get a new slot and fill it in slot = newglob(); updatesym(slot, name, type, stype, class, endlabel, size, 0); // Generate the assembly for the symbol if it's global if (class == C_GLOBAL) genglobsym(slot); // Return the slot number return (slot); } // Add a local symbol to the symbol table. Set up its: // + type: char, int etc. // + structural type: var, function, array etc. // + size: number of elements // Return the slot number in the symbol table, -1 if a duplicate entry int addlocl(char *name, int type, int stype, int class, int size) { int localslot; // If this is already in the symbol table, return an error if ((localslot = findlocl(name)) != -1) return (-1); // Otherwise get a new symbol slot and a position for this local. // Update the local symbol table entry. localslot = newlocl(); updatesym(localslot, name, type, stype, class, 0, size, 0); // Return the local symbol's slot return (localslot); } // Given a function's slot number, copy the global parameters // from its prototype to be local parameters void copyfuncparams(int slot) { int i, id = slot + 1; for (i = 0; i < Symtable[slot].nelems; i++, id++) { addlocl(Symtable[id].name, Symtable[id].type, Symtable[id].stype, Symtable[id].class, Symtable[id].size); } } // Determine if the symbol s is in the symbol table. // Return its slot position or -1 if not found. int findsymbol(char *s) { int slot; slot = findlocl(s); if (slot == -1) slot = findglob(s); return (slot); } // Reset the contents of the symbol table void clear_symtable(void) { Globs = 0; Locls = NSYMBOLS - 1; } ================================================ FILE: 28_Runtime_Flags/tests/err.input31.c ================================================ Expecting a primary expression, got token:15 on line 5 ================================================ FILE: 28_Runtime_Flags/tests/err.input32.c ================================================ Unknown variable:cow on line 4 ================================================ FILE: 28_Runtime_Flags/tests/err.input33.c ================================================ Incompatible type to return on line 4 ================================================ FILE: 28_Runtime_Flags/tests/err.input34.c ================================================ For now, declaration of local arrays is not implemented on line 4 ================================================ FILE: 28_Runtime_Flags/tests/err.input35.c ================================================ Duplicate local variable declaration:a on line 4 ================================================ FILE: 28_Runtime_Flags/tests/err.input36.c ================================================ Type doesn't match prototype for parameter:3 on line 4 ================================================ FILE: 28_Runtime_Flags/tests/err.input37.c ================================================ Unexpected token in parameter list:15 on line 3 ================================================ FILE: 28_Runtime_Flags/tests/err.input38.c ================================================ Type doesn't match prototype for parameter:3 on line 4 ================================================ FILE: 28_Runtime_Flags/tests/err.input39.c ================================================ No statements in function with non-void type on line 4 ================================================ FILE: 28_Runtime_Flags/tests/err.input40.c ================================================ No return for function with non-void type on line 4 ================================================ FILE: 28_Runtime_Flags/tests/err.input41.c ================================================ Can't return from a void function on line 3 ================================================ FILE: 28_Runtime_Flags/tests/err.input42.c ================================================ Undeclared function:fred on line 3 ================================================ FILE: 28_Runtime_Flags/tests/err.input43.c ================================================ Undeclared array:b on line 3 ================================================ FILE: 28_Runtime_Flags/tests/err.input44.c ================================================ Unknown variable:z on line 3 ================================================ FILE: 28_Runtime_Flags/tests/err.input45.c ================================================ & operator must be followed by an identifier on line 3 ================================================ FILE: 28_Runtime_Flags/tests/err.input46.c ================================================ * operator must be followed by an identifier or * on line 3 ================================================ FILE: 28_Runtime_Flags/tests/err.input47.c ================================================ ++ operator must be followed by an identifier on line 3 ================================================ FILE: 28_Runtime_Flags/tests/err.input48.c ================================================ -- operator must be followed by an identifier on line 3 ================================================ FILE: 28_Runtime_Flags/tests/err.input49.c ================================================ Incompatible expression in assignment on line 6 ================================================ FILE: 28_Runtime_Flags/tests/err.input50.c ================================================ Incompatible types in binary expression on line 6 ================================================ FILE: 28_Runtime_Flags/tests/err.input51.c ================================================ Expected '\'' at end of char literal on line 4 ================================================ FILE: 28_Runtime_Flags/tests/err.input52.c ================================================ Unrecognised character:$ on line 5 ================================================ FILE: 28_Runtime_Flags/tests/input01.c ================================================ int printf(char *fmt); void main() { printf("%d\n", 12 * 3); printf("%d\n", 18 - 2 * 4); printf("%d\n", 1 + 2 + 9 - 5/2 + 3*5); } ================================================ FILE: 28_Runtime_Flags/tests/input02.c ================================================ int printf(char *fmt); void main() { int fred; int jim; fred= 5; jim= 12; printf("%d\n", fred + jim); } ================================================ FILE: 28_Runtime_Flags/tests/input03.c ================================================ int printf(char *fmt); void main() { int x; x= 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); } ================================================ FILE: 28_Runtime_Flags/tests/input04.c ================================================ int printf(char *fmt); void main() { int x; x= 7 < 9; printf("%d\n", x); x= 7 <= 9; printf("%d\n", x); x= 7 != 9; printf("%d\n", x); x= 7 == 7; printf("%d\n", x); x= 7 >= 7; printf("%d\n", x); x= 7 <= 7; printf("%d\n", x); x= 9 > 7; printf("%d\n", x); x= 9 >= 7; printf("%d\n", x); x= 9 != 7; printf("%d\n", x); } ================================================ FILE: 28_Runtime_Flags/tests/input05.c ================================================ int printf(char *fmt); void main() { int i; int j; i=6; j=12; if (i < j) { printf("%d\n", i); } else { printf("%d\n", j); } } ================================================ FILE: 28_Runtime_Flags/tests/input06.c ================================================ int printf(char *fmt); void main() { int i; i=1; while (i <= 10) { printf("%d\n", i); i= i + 1; } } ================================================ FILE: 28_Runtime_Flags/tests/input07.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 28_Runtime_Flags/tests/input08.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 28_Runtime_Flags/tests/input09.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } void fred() { int a; int b; a= 12; b= 3 * a; if (a >= b) { printf("%d\n", 2 * b - a); } } ================================================ FILE: 28_Runtime_Flags/tests/input10.c ================================================ int printf(char *fmt); void main() { int i; char j; j= 20; printf("%d\n", j); i= 10; printf("%d\n", i); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 2; j= j + 1) { printf("%d\n", j); } } ================================================ FILE: 28_Runtime_Flags/tests/input11.c ================================================ int printf(char *fmt); int main() { int i; char j; long k; i= 10; printf("%d\n", i); j= 20; printf("%d\n", j); k= 30; printf("%d\n", k); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 4; j= j + 1) { printf("%d\n", j); } for (k= 1; k <= 5; k= k + 1) { printf("%d\n", k); } return(i); printf("%d\n", 12345); return(3); } ================================================ FILE: 28_Runtime_Flags/tests/input12.c ================================================ int printf(char *fmt); int fred() { return(5); } void main() { int x; x= fred(2); printf("%d\n", x); } ================================================ FILE: 28_Runtime_Flags/tests/input13.c ================================================ int printf(char *fmt); int fred() { return(56); } void main() { int dummy; int result; dummy= printf("%d\n", 23); result= fred(10); dummy= printf("%d\n", result); } ================================================ FILE: 28_Runtime_Flags/tests/input14.c ================================================ int printf(char *fmt); int fred() { return(20); } int main() { int result; printf("%d\n", 10); result= fred(15); printf("%d\n", result); printf("%d\n", fred(15)+10); return(0); } ================================================ FILE: 28_Runtime_Flags/tests/input15.c ================================================ int printf(char *fmt); int main() { char a; char *b; char c; int d; int *e; int f; a= 18; printf("%d\n", a); b= &a; c= *b; printf("%d\n", c); d= 12; printf("%d\n", d); e= &d; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 28_Runtime_Flags/tests/input16.c ================================================ int printf(char *fmt); int c; int d; int *e; int f; int main() { c= 12; d=18; printf("%d\n", c); e= &c + 1; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 28_Runtime_Flags/tests/input17.c ================================================ int printf(char *fmt); int main() { char a; char *b; int d; int *e; b= &a; *b= 19; printf("%d\n", a); e= &d; *e= 12; printf("%d\n", d); return(0); } ================================================ FILE: 28_Runtime_Flags/tests/input18.c ================================================ int printf(char *fmt); int main() { int a; int b; a= b= 34; printf("%d\n", a); printf("%d\n", b); return(0); } ================================================ FILE: 28_Runtime_Flags/tests/input18a.c ================================================ int printf(char *fmt); int a; int *b; char c; char *d; int main() { b= &a; *b= 15; printf("%d\n", a); d= &c; *d= 16; printf("%d\n", c); return(0); } ================================================ FILE: 28_Runtime_Flags/tests/input19.c ================================================ int printf(char *fmt); int a; int b; int c; int d; int e; int main() { a= 2; b= 4; c= 3; d= 2; e= (a+b) * (c+d); printf("%d\n", e); return(0); } ================================================ FILE: 28_Runtime_Flags/tests/input20.c ================================================ int printf(char *fmt); int a; int b[25]; int main() { b[3]= 12; a= b[3]; printf("%d\n", a); return(0); } ================================================ FILE: 28_Runtime_Flags/tests/input21.c ================================================ int printf(char *fmt); char c; char *str; int main() { c= '\n'; printf("%d\n", c); for (str= "Hello world\n"; *str != 0; str= str + 1) { printf("%c", *str); } return(0); } ================================================ FILE: 28_Runtime_Flags/tests/input22.c ================================================ int printf(char *fmt); char a; char b; char c; int d; int e; int f; long g; long h; long i; int main() { b= 5; c= 7; a= b + c++; printf("%d\n", a); e= 5; f= 7; d= e + f++; printf("%d\n", d); h= 5; i= 7; g= h + i++; printf("%d\n", g); a= b-- + c; printf("%d\n", a); d= e-- + f; printf("%d\n", d); g= h-- + i; printf("%d\n", g); a= ++b + c; printf("%d\n", a); d= ++e + f; printf("%d\n", d); g= ++h + i; printf("%d\n", g); a= b * --c; printf("%d\n", a); d= e * --f; printf("%d\n", d); g= h * --i; printf("%d\n", g); return(0); } ================================================ FILE: 28_Runtime_Flags/tests/input23.c ================================================ int printf(char *fmt); char *str; int x; int main() { x= -23; printf("%d\n", x); printf("%d\n", -10 * -10); x= 1; x= ~x; printf("%d\n", x); x= 2 > 5; printf("%d\n", x); x= !x; printf("%d\n", x); x= !x; printf("%d\n", x); x= 13; if (x) { printf("%d\n", 13); } x= 0; if (!x) { printf("%d\n", 14); } for (str= "Hello world\n"; *str; str++) { printf("%c", *str); } return(0); } ================================================ FILE: 28_Runtime_Flags/tests/input24.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { a= 42; b= 19; printf("%d\n", a & b); printf("%d\n", a | b); printf("%d\n", a ^ b); printf("%d\n", 1 << 3); printf("%d\n", 63 >> 3); return(0); } ================================================ FILE: 28_Runtime_Flags/tests/input25.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { char z; int y; int x; x= 10; y= 20; z= 30; printf("%d\n", x); printf("%d\n", y); printf("%d\n", z); a= 5; b= 15; c= 25; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); return(0); } ================================================ FILE: 28_Runtime_Flags/tests/input26.c ================================================ int printf(char *fmt); int main(int a, char b, long c, int d, int e, int f, int g, int h) { int i; int j; int k; a= 13; printf("%d\n", a); b= 23; printf("%d\n", b); c= 34; printf("%d\n", c); d= 44; printf("%d\n", d); e= 54; printf("%d\n", e); f= 64; printf("%d\n", f); g= 74; printf("%d\n", g); h= 84; printf("%d\n", h); i= 94; printf("%d\n", i); j= 95; printf("%d\n", j); k= 96; printf("%d\n", k); return(0); } ================================================ FILE: 28_Runtime_Flags/tests/input27.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int param5(int a, int b, int c, int d, int e) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param2(int a, int b) { int c; int d; int e; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param0() { int a; int b; int c; int d; int e; a= 1; b= 2; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int main() { param8(1,2,3,4,5,6,7,8); param5(1,2,3,4,5); param2(1,2); param0(); return(0); } ================================================ FILE: 28_Runtime_Flags/tests/input28.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 28_Runtime_Flags/tests/input29.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h); int fred(int a, int b, int c); int main(); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 28_Runtime_Flags/tests/input30.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input30.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 28_Runtime_Flags/tests/input31.c ================================================ int printf(char *fmt); int main() { int x; x= 2 + + 3 - * / ; } ================================================ FILE: 28_Runtime_Flags/tests/input32.c ================================================ int printf(char *fmt); int main() { pizza cow llama sausage; } ================================================ FILE: 28_Runtime_Flags/tests/input33.c ================================================ int printf(char *fmt); int main() { char *z; return(z); } ================================================ FILE: 28_Runtime_Flags/tests/input34.c ================================================ int printf(char *fmt); int main() { int a[12]; return(0); } ================================================ FILE: 28_Runtime_Flags/tests/input35.c ================================================ int printf(char *fmt); int fred(int a, int b) { int a; return(a); } ================================================ FILE: 28_Runtime_Flags/tests/input36.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c); ================================================ FILE: 28_Runtime_Flags/tests/input37.c ================================================ int printf(char *fmt); int fred(int a, char b +, int z); ================================================ FILE: 28_Runtime_Flags/tests/input38.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c, int g); ================================================ FILE: 28_Runtime_Flags/tests/input39.c ================================================ int printf(char *fmt); int main() { int a; } ================================================ FILE: 28_Runtime_Flags/tests/input40.c ================================================ int printf(char *fmt); int main() { int a; a= 5; } ================================================ FILE: 28_Runtime_Flags/tests/input41.c ================================================ int printf(char *fmt); void fred() { return(5); } ================================================ FILE: 28_Runtime_Flags/tests/input42.c ================================================ int printf(char *fmt); int main() { fred(5); } ================================================ FILE: 28_Runtime_Flags/tests/input43.c ================================================ int printf(char *fmt); int main() { int a; a= b[4]; } ================================================ FILE: 28_Runtime_Flags/tests/input44.c ================================================ int printf(char *fmt); int main() { int a; a= z; } ================================================ FILE: 28_Runtime_Flags/tests/input45.c ================================================ int printf(char *fmt); int main() { int a; a= &5; } ================================================ FILE: 28_Runtime_Flags/tests/input46.c ================================================ int printf(char *fmt); int main() { int a; a= *5; } ================================================ FILE: 28_Runtime_Flags/tests/input47.c ================================================ int printf(char *fmt); int main() { int a; a= ++5; } ================================================ FILE: 28_Runtime_Flags/tests/input48.c ================================================ int printf(char *fmt); int main() { int a; a= --5; } ================================================ FILE: 28_Runtime_Flags/tests/input49.c ================================================ int printf(char *fmt); int main() { int x; char y; y= x; } ================================================ FILE: 28_Runtime_Flags/tests/input50.c ================================================ int printf(char *fmt); int main() { char *a; char *b; a= a + b; } ================================================ FILE: 28_Runtime_Flags/tests/input51.c ================================================ int printf(char *fmt); int main() { char a; a= 'fred'; } ================================================ FILE: 28_Runtime_Flags/tests/input52.c ================================================ int printf(char *fmt); int main() { int a; a= $5.00; } ================================================ FILE: 28_Runtime_Flags/tests/input53.c ================================================ int printf(char *fmt); int main() { printf("Hello world, %d\n", 23); return(0); } ================================================ FILE: 28_Runtime_Flags/tests/input54.c ================================================ int printf(char *fmt); int main() { int i; for (i=0; i < 20; i++) { printf("Hello world, %d\n", i); } return(0); } ================================================ FILE: 28_Runtime_Flags/tests/mktests ================================================ #!/bin/sh # Make the output files for each test # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make) fi for i in input*c do if [ ! -f "out.$i" -a ! -f "err.$i" ] then ../cwj -o out $i 2> "err.$i" # If the err file is empty if [ ! -s "err.$i" ] then rm -f "err.$i" cc -o out $i ../lib/printint.c ./out > "out.$i" fi fi rm -f out out.s done ================================================ FILE: 28_Runtime_Flags/tests/out.input01.c ================================================ 36 10 25 ================================================ FILE: 28_Runtime_Flags/tests/out.input02.c ================================================ 17 ================================================ FILE: 28_Runtime_Flags/tests/out.input03.c ================================================ 1 2 3 4 5 ================================================ FILE: 28_Runtime_Flags/tests/out.input04.c ================================================ 1 1 1 1 1 1 1 1 1 ================================================ FILE: 28_Runtime_Flags/tests/out.input05.c ================================================ 6 ================================================ FILE: 28_Runtime_Flags/tests/out.input06.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 28_Runtime_Flags/tests/out.input07.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 28_Runtime_Flags/tests/out.input08.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 28_Runtime_Flags/tests/out.input09.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 28_Runtime_Flags/tests/out.input10.c ================================================ 20 10 1 2 3 4 5 253 254 255 0 1 ================================================ FILE: 28_Runtime_Flags/tests/out.input11.c ================================================ 10 20 30 1 2 3 4 5 253 254 255 0 1 2 3 1 2 3 4 5 ================================================ FILE: 28_Runtime_Flags/tests/out.input12.c ================================================ 5 ================================================ FILE: 28_Runtime_Flags/tests/out.input13.c ================================================ 23 56 ================================================ FILE: 28_Runtime_Flags/tests/out.input14.c ================================================ 10 20 30 ================================================ FILE: 28_Runtime_Flags/tests/out.input15.c ================================================ 18 18 12 12 ================================================ FILE: 28_Runtime_Flags/tests/out.input16.c ================================================ 12 18 ================================================ FILE: 28_Runtime_Flags/tests/out.input17.c ================================================ 19 12 ================================================ FILE: 28_Runtime_Flags/tests/out.input18.c ================================================ 34 34 ================================================ FILE: 28_Runtime_Flags/tests/out.input18a.c ================================================ 15 16 ================================================ FILE: 28_Runtime_Flags/tests/out.input19.c ================================================ 30 ================================================ FILE: 28_Runtime_Flags/tests/out.input20.c ================================================ 12 ================================================ FILE: 28_Runtime_Flags/tests/out.input21.c ================================================ 10 Hello world ================================================ FILE: 28_Runtime_Flags/tests/out.input22.c ================================================ 12 12 12 13 13 13 13 13 13 35 35 35 ================================================ FILE: 28_Runtime_Flags/tests/out.input23.c ================================================ -23 100 -2 0 1 0 13 14 Hello world ================================================ FILE: 28_Runtime_Flags/tests/out.input24.c ================================================ 2 59 57 8 7 ================================================ FILE: 28_Runtime_Flags/tests/out.input25.c ================================================ 10 20 30 5 15 25 ================================================ FILE: 28_Runtime_Flags/tests/out.input26.c ================================================ 13 23 34 44 54 64 74 84 94 95 96 ================================================ FILE: 28_Runtime_Flags/tests/out.input27.c ================================================ 1 2 3 4 5 6 7 8 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 ================================================ FILE: 28_Runtime_Flags/tests/out.input28.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 28_Runtime_Flags/tests/out.input29.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 28_Runtime_Flags/tests/out.input30.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input30.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 28_Runtime_Flags/tests/out.input53.c ================================================ Hello world, 23 ================================================ FILE: 28_Runtime_Flags/tests/out.input54.c ================================================ Hello world, 0 Hello world, 1 Hello world, 2 Hello world, 3 Hello world, 4 Hello world, 5 Hello world, 6 Hello world, 7 Hello world, 8 Hello world, 9 Hello world, 10 Hello world, 11 Hello world, 12 Hello world, 13 Hello world, 14 Hello world, 15 Hello world, 16 Hello world, 17 Hello world, 18 Hello world, 19 ================================================ FILE: 28_Runtime_Flags/tests/runtests ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../cwj -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../cwj $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.s "trial.$i" done ================================================ FILE: 28_Runtime_Flags/tests/runtestsn ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../compn ] then (cd ..; make) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../compn -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../compn $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.o out.s "trial.$i" done ================================================ FILE: 28_Runtime_Flags/tree.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree functions // Copyright (c) 2019 Warren Toomey, GPL3 // Build and return a generic AST node struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, int intvalue) { struct ASTnode *n; // Malloc a new ASTnode n = (struct ASTnode *) malloc(sizeof(struct ASTnode)); if (n == NULL) fatal("Unable to malloc in mkastnode()"); // Copy in the field values and return it n->op = op; n->type = type; n->left = left; n->mid = mid; n->right = right; n->v.intvalue = intvalue; return (n); } // Make an AST leaf node struct ASTnode *mkastleaf(int op, int type, int intvalue) { return (mkastnode(op, type, NULL, NULL, NULL, intvalue)); } // Make a unary AST node: only one child struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, int intvalue) { return (mkastnode(op, type, left, NULL, NULL, intvalue)); } // Generate and return a new label number // just for AST dumping purposes static int gendumplabel(void) { static int id = 1; return (id++); } // Given an AST tree, print it out and follow the // traversal of the tree that genAST() follows void dumpAST(struct ASTnode *n, int label, int level) { int Lfalse, Lstart, Lend; switch (n->op) { case A_IF: Lfalse = gendumplabel(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_IF"); if (n->right) { Lend = gendumplabel(); fprintf(stdout, ", end L%d", Lend); } fprintf(stdout, "\n"); dumpAST(n->left, Lfalse, level + 2); dumpAST(n->mid, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); return; case A_WHILE: Lstart = gendumplabel(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_WHILE, start L%d\n", Lstart); Lend = gendumplabel(); dumpAST(n->left, Lend, level + 2); dumpAST(n->right, NOLABEL, level + 2); return; } // Reset level to -2 for A_GLUE if (n->op == A_GLUE) level = -2; // General AST node handling if (n->left) dumpAST(n->left, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); for (int i = 0; i < level; i++) fprintf(stdout, " "); switch (n->op) { case A_GLUE: fprintf(stdout, "\n\n"); return; case A_FUNCTION: fprintf(stdout, "A_FUNCTION %s\n", Symtable[n->v.id].name); return; case A_ADD: fprintf(stdout, "A_ADD\n"); return; case A_SUBTRACT: fprintf(stdout, "A_SUBTRACT\n"); return; case A_MULTIPLY: fprintf(stdout, "A_MULTIPLY\n"); return; case A_DIVIDE: fprintf(stdout, "A_DIVIDE\n"); return; case A_EQ: fprintf(stdout, "A_EQ\n"); return; case A_NE: fprintf(stdout, "A_NE\n"); return; case A_LT: fprintf(stdout, "A_LE\n"); return; case A_GT: fprintf(stdout, "A_GT\n"); return; case A_LE: fprintf(stdout, "A_LE\n"); return; case A_GE: fprintf(stdout, "A_GE\n"); return; case A_INTLIT: fprintf(stdout, "A_INTLIT %d\n", n->v.intvalue); return; case A_STRLIT: fprintf(stdout, "A_STRLIT rval label L%d\n", n->v.id); return; case A_IDENT: if (n->rvalue) fprintf(stdout, "A_IDENT rval %s\n", Symtable[n->v.id].name); else fprintf(stdout, "A_IDENT %s\n", Symtable[n->v.id].name); return; case A_ASSIGN: fprintf(stdout, "A_ASSIGN\n"); return; case A_WIDEN: fprintf(stdout, "A_WIDEN\n"); return; case A_RETURN: fprintf(stdout, "A_RETURN\n"); return; case A_FUNCCALL: fprintf(stdout, "A_FUNCCALL %s\n", Symtable[n->v.id].name); return; case A_ADDR: fprintf(stdout, "A_ADDR %s\n", Symtable[n->v.id].name); return; case A_DEREF: if (n->rvalue) fprintf(stdout, "A_DEREF rval\n"); else fprintf(stdout, "A_DEREF\n"); return; case A_SCALE: fprintf(stdout, "A_SCALE %d\n", n->v.size); return; case A_PREINC: fprintf(stdout, "A_PREINC %s\n", Symtable[n->v.id].name); return; case A_PREDEC: fprintf(stdout, "A_PREDEC %s\n", Symtable[n->v.id].name); return; case A_POSTINC: fprintf(stdout, "A_POSTINC\n"); return; case A_POSTDEC: fprintf(stdout, "A_POSTDEC\n"); return; case A_NEGATE: fprintf(stdout, "A_NEGATE\n"); return; default: fatald("Unknown dumpAST operator", n->op); } } ================================================ FILE: 28_Runtime_Flags/types.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Types and type handling // Copyright (c) 2019 Warren Toomey, GPL3 // Return true if a type is an int type // of any size, false otherwise int inttype(int type) { if (type == P_CHAR || type == P_INT || type == P_LONG) return (1); return (0); } // Return true if a type is of pointer type int ptrtype(int type) { if (type == P_VOIDPTR || type == P_CHARPTR || type == P_INTPTR || type == P_LONGPTR) return (1); return (0); } // Given a primitive type, return // the type which is a pointer to it int pointer_to(int type) { int newtype; switch (type) { case P_VOID: newtype = P_VOIDPTR; break; case P_CHAR: newtype = P_CHARPTR; break; case P_INT: newtype = P_INTPTR; break; case P_LONG: newtype = P_LONGPTR; break; default: fatald("Unrecognised in pointer_to: type", type); } return (newtype); } // Given a primitive pointer type, return // the type which it points to int value_at(int type) { int newtype; switch (type) { case P_VOIDPTR: newtype = P_VOID; break; case P_CHARPTR: newtype = P_CHAR; break; case P_INTPTR: newtype = P_INT; break; case P_LONGPTR: newtype = P_LONG; break; default: fatald("Unrecognised in value_at: type", type); } return (newtype); } // Given an AST tree and a type which we want it to become, // possibly modify the tree by widening or scaling so that // it is compatible with this type. Return the original tree // if no changes occurred, a modified tree, or NULL if the // tree is not compatible with the given type. // If this will be part of a binary operation, the AST op is not zero. struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op) { int ltype; int lsize, rsize; ltype = tree->type; // Compare scalar int types if (inttype(ltype) && inttype(rtype)) { // Both types same, nothing to do if (ltype == rtype) return (tree); // Get the sizes for each type lsize = genprimsize(ltype); rsize = genprimsize(rtype); // Tree's size is too big if (lsize > rsize) return (NULL); // Widen to the right if (rsize > lsize) return (mkastunary(A_WIDEN, rtype, tree, 0)); } // For pointers on the left if (ptrtype(ltype)) { // OK is same type on right and not doing a binary op if (op == 0 && ltype == rtype) return (tree); } // We can scale only on A_ADD or A_SUBTRACT operation if (op == A_ADD || op == A_SUBTRACT) { // Left is int type, right is pointer type and the size // of the original type is >1: scale the left if (inttype(ltype) && ptrtype(rtype)) { rsize = genprimsize(value_at(rtype)); if (rsize > 1) return (mkastunary(A_SCALE, rtype, tree, rsize)); else return (tree); // Size 1, no need to scale } } // If we get here, the types are not compatible return (NULL); } ================================================ FILE: 29_Refactoring/Makefile ================================================ HSRCS= data.h decl.h defs.h SRCS= cg.c decl.c expr.c gen.c main.c misc.c \ scan.c stmt.c sym.c tree.c types.c SRCN= cgn.c decl.c expr.c gen.c main.c misc.c \ scan.c stmt.c sym.c tree.c types.c ARMSRCS= cg_arm.c decl.c expr.c gen.c main.c misc.c \ scan.c stmt.c sym.c tree.c types.c cwj: $(SRCS) $(HSRCS) cc -o cwj -g -Wall $(SRCS) compn: $(SRCN) $(HSRCS) cc -D__NASM__ -o compn -g -Wall $(SRCN) cwjarm: $(ARMSRCS) $(HSRCS) cc -o cwjarm -g -Wall $(ARMSRCS) cp cwjarm cwj clean: rm -f cwj cwjarm compn *.o *.s out test: cwj tests/runtests (cd tests; chmod +x runtests; ./runtests) armtest: cwjarm tests/runtests (cd tests; chmod +x runtests; ./runtests) testn: compn tests/runtestsn (cd tests; chmod +x runtestsn; ./runtestsn) ================================================ FILE: 29_Refactoring/Readme.md ================================================ # Part 29: A Bit of Refactoring I started thinking about the design side of implementing structs, unions and enums in our compiler, and then I had a good idea on how to improve the symbol table, and that led me to doing a bit of refactoring of the compiler's code. So in this part of the journey there is no new functionality, but I feel a bit happier about some of the code in the compiler. If you are more interested in my design ideas for structs, unions and enums, feel free to skip to the next part. ## Refactoring the Symbol Table When I started writing our compiler, I had just finished reading through the [SubC](http://www.t3x.org/subc/) compiler's code and adding my own comments. Thus, I borrowed many of my initial ideas from this code base. One of them was to have an array of elements for the symbol table, with global symbols at one end and local symbols at the other. We've seen, for function prototypes and parameters, that we have to copy a function's prototype from the global end over to the local end so that the function has local parameter variables. And we have to worry if one end of the symbol table crashes into the other end. So, at some point, we should convert the symbol table into a number of singly-linked lists: at least one for the global symbols and one for the local symbols. When we get to implementing enums, I might have a third one for the enum values. Now, I haven't done this refactoring in this part of the journey as the changes look to be substantial, so I'll wait until I really need to do it. But one more change I will make is this. Each symbol node will have a `next` pointer to form the singly-linked list, but also a `param` pointer. This will allow functions to have a separate singly-linked list for their parameters which we can skip past when searching for global symbols. Then, when we need to "copy" a function's prototype to be its list of parameters, we can simply copy the pointer to the prototype list of parameters. Anyway, this change is for the future. ## Types, Revisited Another thing that I borrowed from SubC is the enumeration of types (in `defs.h`): ```c // Primitive types enum { P_NONE, P_VOID, P_CHAR, P_INT, P_LONG, P_VOIDPTR, P_CHARPTR, P_INTPTR, P_LONGPTR }; ``` SubC only allows one level of indirection, thus the list of types above. I had the idea, why not encode the level of indirection in the primitive type value? So I've changed our code so that the bottom four bits in a `type` integer is the level of indirection, and the higher bits encode the actual type: ```c // Primitive types. The bottom 4 bits is an integer // value that represents the level of indirection, // e.g. 0= no pointer, 1= pointer, 2= pointer pointer etc. enum { P_NONE, P_VOID=16, P_CHAR=32, P_INT=48, P_LONG=64 }; ``` I've been able to completely refactor out all of the old `P_XXXPTR` references in the old code. Let's see what changes there have been. Firstly, we have to deal with scalar and pointer types in `types.c`. The code now is actually smaller than before: ```c // Return true if a type is an int type // of any size, false otherwise int inttype(int type) { return ((type & 0xf) == 0); } // Return true if a type is of pointer type int ptrtype(int type) { return ((type & 0xf) != 0); } // Given a primitive type, return // the type which is a pointer to it int pointer_to(int type) { if ((type & 0xf) == 0xf) fatald("Unrecognised in pointer_to: type", type); return (type + 1); } // Given a primitive pointer type, return // the type which it points to int value_at(int type) { if ((type & 0xf) == 0x0) fatald("Unrecognised in value_at: type", type); return (type - 1); } ``` And `modify_type()` hasn't changed whatsoever. In `expr.c`, when dealing with literal strings, I was using `P_CHARPTR` but now I can write: ```c n = mkastleaf(A_STRLIT, pointer_to(P_CHAR), id); ``` One other substantial area where the `P_XXXPTR` values were used is the code in the hardware-dependent code in `cg.c`. We start by rewriting `cgprimsize()` to use `ptrtype()`: ```c // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } ``` With this code, the other functions in `cg.c` can now call `cgprimsize()`, `ptrtype()`, `inttype()`, `pointer_to()` and `value_at()` as required, instead of referring to specific types. Here's an example from `cg.c`: ```c // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzbq\t(%s), %s\n", reglist[r], reglist[r]); break; case 2: fprintf(Outfile, "\tmovslq\t(%s), %s\n", reglist[r], reglist[r]); break; case 4: case 8: fprintf(Outfile, "\tmovq\t(%s), %s\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } ``` Have a quick read through `cg.c` and look for the calls to `cgprimsize()`. ### An Example Use of Double Pointers Now that we have up to sixteen levels of indirection, I wrote a test program to confirm that they work, `tests/input55.c`: ```c int printf(char *fmt); int main(int argc, char **argv) { int i; char *argument; printf("Hello world\n"); for (i=0; i < argc; i++) { argument= *argv; argv= argv + 1; printf("Argument %d is %s\n", i, argument); } return(0); } ``` Note that `argv++` doesn't yet work, and `argv[i]` also doesn't yet work. But we can work around these missing features as shown above. ## Changes to the Symbol Table Structure While I didn't refactor the symbol table into lists, I did tweak the symbol table structure itself, now that I realised that I can use unions and not have to give the union a variable name: ```c // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol int stype; // Structural type for the symbol int class; // Storage class for the symbol union { int size; // Number of elements in the symbol int endlabel; // For functions, the end label }; union { int nelems; // For functions, # of params int posn; // For locals, the negative offset // from the stack base pointer }; }; ``` I used to have a `#define` for `nelems`, but the above is the same result and prevents the global definition of `nelems` from polluting the namespace. I also realised that `size` and `endlabel` could occupy the same position in the structure, and added that union. There are a few cosmetic changes to the parameters to `addglob()`, but not much else. ## Changes to the AST Structure Similarly, I've modified the AST node structure so that the union doesn't have a variable name: ```c // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates int rvalue; // True if the node is an rvalue struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; union { // For A_INTLIT, the integer value int intvalue; // For A_IDENT, the symbol slot number int id; // For A_FUNCTION, the symbol slot number int size; // For A_SCALE, the size to scale by }; // For A_FUNCCALL, the symbol slot number }; ``` and this means that I can, e.g., write the second line instead of the first one: ```c return (cgloadglob(n->left->v.id, n->op)); // Old code return (cgloadglob(n->left->id, n->op)); // New code ``` ## Conclusion and What's Next That's about it for this part of our compiler writing journey. I might have done a few more small code changes here and there, but I can't think of anything else that was major. I will get to changing the symbol table to be a linked list; this will probably happen in the part where we implement enumerated values. In the next part of our compiler writing journey, I'll get back to what I wanted to cover in this part: the design side of implementing structs, unions and enums in our compiler. [Next step](../30_Design_Composites/Readme.md) ================================================ FILE: 29_Refactoring/cg.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\t.text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\t.data\n", Outfile); currSeg = data_seg; } } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. static int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "%r10", "%r11", "%r12", "%r13", "%r9", "%r8", "%rcx", "%rdx", "%rsi", "%rdi" }; static char *breglist[] = { "%r10b", "%r11b", "%r12b", "%r13b", "%r9b", "%r8b", "%cl", "%dl", "%sil", "%dil" }; static char *dreglist[] = { "%r10d", "%r11d", "%r12d", "%r13d", "%r9d", "%r8d", "%ecx", "%edx", "%esi", "%edi" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Symtable[id].name; int i; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the %rsp and %rsp fprintf(Outfile, "\t.globl\t%s\n" "\t.type\t%s, @function\n" "%s:\n" "\tpushq\t%%rbp\n" "\tmovq\t%%rsp, %%rbp\n", name, name, name); // Copy any in-register parameters to the stack // Stop after no more than six parameter registers for (i = NSYMBOLS - 1; i > Locls; i--) { if (Symtable[i].class != C_PARAM) break; if (i < NSYMBOLS - 6) break; Symtable[i].posn = newlocaloffset(Symtable[i].type); cgstorlocal(paramReg--, i); } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (; i > Locls; i--) { if (Symtable[i].class == C_PARAM) { Symtable[i].posn = paramOffset; paramOffset += 8; } else { Symtable[i].posn = newlocaloffset(Symtable[i].type); } } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\taddq\t$%d,%%rsp\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Symtable[id].endlabel); fprintf(Outfile, "\taddq\t$%d,%%rsp\n", stackOffset); fputs("\tpopq %rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmovq\t$%d, %s\n", value, reglist[r]); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(int id, int op) { // Get a new register int r = alloc_register(); if (cgprimsize(Symtable[id].type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", Symtable[id].name); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", Symtable[id].name); fprintf(Outfile, "\tmovq\t%s(%%rip), %s\n", Symtable[id].name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", Symtable[id].name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", Symtable[id].name); } else // Print out the code to initialise it switch (Symtable[id].type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", Symtable[id].name); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", Symtable[id].name); fprintf(Outfile, "\tmovzbq\t%s(%%rip), %s\n", Symtable[id].name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", Symtable[id].name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", Symtable[id].name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", Symtable[id].name); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", Symtable[id].name); fprintf(Outfile, "\tmovslq\t%s(%%rip), %s\n", Symtable[id].name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", Symtable[id].name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", Symtable[id].name); break; default: fatald("Bad type in cgloadglob:", Symtable[id].type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(int id, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it if (cgprimsize(Symtable[id].type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", Symtable[id].posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", Symtable[id].posn); fprintf(Outfile, "\tmovq\t%d(%%rbp), %s\n", Symtable[id].posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", Symtable[id].posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", Symtable[id].posn); } else switch (Symtable[id].type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", Symtable[id].posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", Symtable[id].posn); fprintf(Outfile, "\tmovzbq\t%d(%%rbp), %s\n", Symtable[id].posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", Symtable[id].posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", Symtable[id].posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", Symtable[id].posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", Symtable[id].posn); fprintf(Outfile, "\tmovslq\t%d(%%rbp), %s\n", Symtable[id].posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", Symtable[id].posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", Symtable[id].posn); break; default: fatald("Bad type in cgloadlocal:", Symtable[id].type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int id) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tleaq\tL%d(%%rip), %s\n", id, reglist[r]); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\taddq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsubq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timulq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s,%%rax\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidivq\t%s\n", reglist[r2]); fprintf(Outfile, "\tmovq\t%%rax,%s\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tandq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\torq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txorq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshlq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshrq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tnegq\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnotq\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); return (r); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(int id, int numargs) { // Get a new register int outr = alloc_register(); // Call the function fprintf(Outfile, "\tcall\t%s@PLT\n", Symtable[id].name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\taddq\t$%d, %%rsp\n", 8 * (numargs - 6)); // and copy the return value into our register fprintf(Outfile, "\tmovq\t%%rax, %s\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpushq\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmovq\t%s, %s\n", reglist[r], reglist[FIRSTPARAMREG - argposn + 1]); } } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsalq\t$%d, %s\n", val, reglist[r]); return (r); } // Store a register's value into a variable int cgstorglob(int r, int id) { if (cgprimsize(Symtable[id].type) == 8) { fprintf(Outfile, "\tmovq\t%s, %s(%%rip)\n", reglist[r], Symtable[id].name); } else switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %s(%%rip)\n", breglist[r], Symtable[id].name); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %s(%%rip)\n", dreglist[r], Symtable[id].name); break; default: fatald("Bad type in cgstorglob:", Symtable[id].type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, int id) { if (cgprimsize(Symtable[id].type) == 8) { fprintf(Outfile, "\tmovq\t%s, %d(%%rbp)\n", reglist[r], Symtable[id].posn); } else switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %d(%%rbp)\n", breglist[r], Symtable[id].posn); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %d(%%rbp)\n", dreglist[r], Symtable[id].posn); break; default: fatald("Bad type in cgstorlocal:", Symtable[id].type); } return (r); } // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Generate a global symbol but not functions void cgglobsym(int id) { int typesize; if (Symtable[id].stype == S_FUNCTION) return; // Get the size of the type typesize = cgprimsize(Symtable[id].type); // Generate the global identity and the label cgdataseg(); fprintf(Outfile, "\t.globl\t%s\n", Symtable[id].name); fprintf(Outfile, "%s:", Symtable[id].name); // Generate the space for (int i = 0; i < Symtable[id].size; i++) { switch (typesize) { case 1: fprintf(Outfile, "\t.byte\t0\n"); break; case 4: fprintf(Outfile, "\t.long\t0\n"); break; case 8: fprintf(Outfile, "\t.quad\t0\n"); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\t.byte\t%d\n", *cptr); } fprintf(Outfile, "\t.byte\t0\n"); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { // Generate code depending on the function's type switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tmovzbl\t%s, %%eax\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %%eax\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", Symtable[id].type); } cgjump(Symtable[id].endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(int id) { int r = alloc_register(); if (Symtable[id].class == C_GLOBAL) fprintf(Outfile, "\tleaq\t%s(%%rip), %s\n", Symtable[id].name, reglist[r]); else fprintf(Outfile, "\tleaq\t%d(%%rbp), %s\n", Symtable[id].posn, reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzbq\t(%s), %s\n", reglist[r], reglist[r]); break; case 2: fprintf(Outfile, "\tmovslq\t(%s), %s\n", reglist[r], reglist[r]); break; case 4: case 8: fprintf(Outfile, "\tmovq\t(%s), %s\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { // Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmovb\t%s, (%s)\n", breglist[r1], reglist[r2]); break; case 2: case 4: case 8: fprintf(Outfile, "\tmovq\t%s, (%s)\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 29_Refactoring/cg_arm.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for ARMv6 on Raspberry Pi // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. static int freereg[4]; static char *reglist[4] = { "r4", "r5", "r6", "r7" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // We have to store large integer literal values in memory. // Keep a list of them which will be output in the postamble #define MAXINTS 1024 int Intlist[MAXINTS]; static int Intslot = 0; // Determine the offset of a large integer // literal from the .L3 label. If the integer // isn't in the list, add it. static void set_int_offset(int val) { int offset = -1; // See if it is already there for (int i = 0; i < Intslot; i++) { if (Intlist[i] == val) { offset = 4 * i; break; } } // Not in the list, so add it if (offset == -1) { offset = 4 * Intslot; if (Intslot == MAXINTS) fatal("Out of int slots in set_int_offset()"); Intlist[Intslot++] = val; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L3+%d\n", offset); } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n", Outfile); } // Print out the assembly postamble void cgpostamble() { // Print out the global variables fprintf(Outfile, ".L2:\n"); for (int i = 0; i < Globs; i++) { if (Symtable[i].stype == S_VARIABLE) fprintf(Outfile, "\t.word %s\n", Symtable[i].name); } // Print out the integer literals fprintf(Outfile, ".L3:\n"); for (int i = 0; i < Intslot; i++) { fprintf(Outfile, "\t.word %d\n", Intlist[i]); } } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Symtable[id].name; fprintf(Outfile, "\t.text\n" "\t.globl\t%s\n" "\t.type\t%s, \%%function\n" "%s:\n" "\tpush\t{fp, lr}\n" "\tadd\tfp, sp, #4\n" "\tsub\tsp, sp, #8\n" "\tstr\tr0, [fp, #-8]\n", name, name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Symtable[id].endlabel); fputs("\tsub\tsp, fp, #4\n" "\tpop\t{fp, pc}\n" "\t.align\t2\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); // If the literal value is small, do it with one instruction if (value <= 1000) fprintf(Outfile, "\tmov\t%s, #%d\n", reglist[r], value); else { set_int_offset(value); fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); } return (r); } // Determine the offset of a variable from the .L2 // label. Yes, this is inefficient code. static void set_var_offset(int id) { int offset = 0; // Walk the symbol table up to id. // Find S_VARIABLEs and add on 4 until // we get to our variable for (int i = 0; i < id; i++) { if (Symtable[i].stype == S_VARIABLE) offset += 4; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L2+%d\n", offset); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tldrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgloadglob:", Symtable[id].type); } return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s, %s\n", reglist[r1], reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\tmul\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { // To do a divide: r0 holds the dividend, r1 holds the divisor. // The quotient is in r0. fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r1]); fprintf(Outfile, "\tmov\tr1, %s\n", reglist[r2]); fprintf(Outfile, "\tbl\t__aeabi_idiv\n"); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r1]); free_register(r2); return (r1); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r]); fprintf(Outfile, "\tbl\t%s\n", Symtable[id].name); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r]); return (r); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tlsl\t%s, %s, #%d\n", reglist[r], reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, int id) { // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tstr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgstorglob:", Symtable[id].type); } return (r); } // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (4); switch (type) { case P_CHAR: return (1); case P_INT: case P_LONG: return (4); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Symtable[id].type); fprintf(Outfile, "\t.data\n" "\t.globl\t%s\n", Symtable[id].name); switch (typesize) { case 1: fprintf(Outfile, "%s:\t.byte\t0\n", Symtable[id].name); break; case 4: fprintf(Outfile, "%s:\t.long\t0\n", Symtable[id].name); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "moveq", "movne", "movlt", "movgt", "movle", "movge" }; // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "movne", "moveq", "movge", "movle", "movgt", "movlt" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #1\n", cmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #0\n", invcmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\tuxtb\t%s, %s\n", reglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tb\tL%d\n", l); } // List of inverted branch instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *brlist[] = { "bne", "beq", "bge", "ble", "bgt", "blt" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", brlist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[reg]); cgjump(Symtable[id].endlabel); } // Generate code to load the address of a global // identifier into a variable. Return a new register int cgaddress(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); fprintf(Outfile, "\tmov\t%s, r3\n", reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tldrb\t%s, [%s]\n", reglist[r], reglist[r]); break; case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [%s]\n", reglist[r], reglist[r]); break; } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [%s]\n", reglist[r1], reglist[r2]); break; case P_INT: case P_LONG: fprintf(Outfile, "\tstr\t%s, [%s]\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 29_Refactoring/cgn.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\tsection .text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\tsection .data\n", Outfile); currSeg = data_seg; } } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "r10", "r11", "r12", "r13", "r9", "r8", "rcx", "rdx", "rsi", "rdi" }; static char *breglist[] = { "r10b", "r11b", "r12b", "r13b", "r9b", "r8b", "cl", "dl", "sil", "dil" }; static char *dreglist[] = { "r10d", "r11d", "r12d", "r13d", "r9d", "r8d", "ecx", "edx", "esi", "edi" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\textern\tprintint\n", Outfile); fputs("\textern\tprintchar\n", Outfile); fputs("\textern\topen\n", Outfile); fputs("\textern\tclose\n", Outfile); fputs("\textern\tread\n", Outfile); fputs("\textern\twrite\n", Outfile); fputs("\textern\tprintf\n", Outfile); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Symtable[id].name; int i; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the rsp and rbp fprintf(Outfile, "\tglobal\t%s\n" "%s:\n" "\tpush\trbp\n" "\tmov\trbp, rsp\n", name, name); // Copy any in-register parameters to the stack // Stop after no more than six parameter registers for (i = NSYMBOLS - 1; i > Locls; i--) { if (Symtable[i].class != C_PARAM) break; if (i < NSYMBOLS - 6) break; Symtable[i].posn = newlocaloffset(Symtable[i].type); cgstorlocal(paramReg--, i); } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (; i > Locls; i--) { if (Symtable[i].class == C_PARAM) { Symtable[i].posn = paramOffset; paramOffset += 8; } else { Symtable[i].posn = newlocaloffset(Symtable[i].type); } } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\tadd\trsp, %d\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Symtable[id].endlabel); fprintf(Outfile, "\tadd\trsp, %d\n", stackOffset); fputs("\tpop rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], value); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(int id, int op) { // Get a new register int r = alloc_register(); if (cgprimsize(Symtable[id].type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword [%s]\n", Symtable[id].name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", Symtable[id].name); fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], Symtable[id].name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword [%s]\n", Symtable[id].name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", Symtable[id].name); } else // Print out the code to initialise it switch (Symtable[id].type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", Symtable[id].name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", Symtable[id].name); fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], Symtable[id].name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", Symtable[id].name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", Symtable[id].name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword [%s]\n", Symtable[id].name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", Symtable[id].name); fprintf(Outfile, "\tmovsx\t%s, word [%s]\n", dreglist[r], Symtable[id].name); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword [%s]\n", Symtable[id].name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", Symtable[id].name); break; default: fatald("Bad type in cgloadglob:", Symtable[id].type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(int id, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it if (cgprimsize(Symtable[id].type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", Symtable[id].posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", Symtable[id].posn); fprintf(Outfile, "\tmov\t%s, [rbp+%d]\n", reglist[r], Symtable[id].posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", Symtable[id].posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", Symtable[id].posn); } else switch (Symtable[id].type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", Symtable[id].posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", Symtable[id].posn); fprintf(Outfile, "\tmovzx\t%s, byte [rbp+%d]\n", reglist[r], Symtable[id].posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", Symtable[id].posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", Symtable[id].posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", Symtable[id].posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", Symtable[id].posn); fprintf(Outfile, "\tmovsx\t%s, word [rbp+%d]\n", reglist[r], Symtable[id].posn); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", Symtable[id].posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", Symtable[id].posn); break; default: fatald("Bad type in cgloadlocal:", Symtable[id].type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int id) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, L%d\n", reglist[r], id); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timul\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmov\trax, %s\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidiv\t%s\n", reglist[r2]); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tand\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\tor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshl\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshr\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tneg\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnot\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r], breglist[r]); return (r); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, byte %s\n", reglist[r], breglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(int id, int numargs) { // Get a new register int outr = alloc_register(); // Call the function fprintf(Outfile, "\tcall\t%s\n", Symtable[id].name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\tadd\trsp, %d\n", 8 * (numargs - 6)); // and copy the return value into our register fprintf(Outfile, "\tmov\t%s, rax\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpush\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmov\t%s, %s\n", reglist[FIRSTPARAMREG - argposn + 1], reglist[r]); } } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsal\t%s, %d\n", reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, int id) { if (cgprimsize(Symtable[id].type) == 8) { fprintf(Outfile, "\tmov\t[%s], %s\n", Symtable[id].name, reglist[r]); } else switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tmov\t[%s], %s\n", Symtable[id].name, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\t[%s], %s\n", Symtable[id].name, dreglist[r]); break; default: fatald("Bad type in cgloadglob:", Symtable[id].type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, int id) { if (cgprimsize(Symtable[id].type) == 8) { fprintf(Outfile, "\tmov\tqword\t[rbp+%d], %s\n", Symtable[id].posn, reglist[r]); } else switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tmov\tbyte\t[rbp+%d], %s\n", Symtable[id].posn, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\tdword\t[rbp+%d], %s\n", Symtable[id].posn, dreglist[r]); break; default: fatald("Bad type in cgstorlocal:", Symtable[id].type); } return (r); } // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Generate a global symbol but not functions void cgglobsym(int id) { int typesize; if (Symtable[id].stype == S_FUNCTION) return; // Get the size of the type typesize = cgprimsize(Symtable[id].type); // Generate the global identity and the label cgdataseg(); fprintf(Outfile, "\tsection\t.data\n" "\tglobal\t%s\n", Symtable[id].name); fprintf(Outfile, "%s:", Symtable[id].name); // Generate the space // original version for (int i = 0; i < Symtable[id].size; i++) { switch(typesize) { case 1: fprintf(Outfile, "\tdb\t0\n"); break; case 4: fprintf(Outfile, "\tdd\t0\n"); break; case 8: fprintf(Outfile, "\tdq\t0\n"); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } /* compact version using times instead of loop switch(typesize) { case 1: fprintf(Outfile, "\ttimes\t%d\tdb\t0\n", Symtable[id].size); break; case 4: fprintf(Outfile, "\ttimes\t%d\tdd\t0\n", Symtable[id].size); break; case 8: fprintf(Outfile, "\ttimes\t%d\tdq\t0\n", Symtable[id].size); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } */ } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\tdb\t%d\n", *cptr); } fprintf(Outfile, "\tdb\t0\n"); /* put string in readable format on a single line // probably went overboard with error checking int comma = 0, quote = 0, start = 1; fprintf(Outfile, "\tdb\t"); for (cptr=strvalue; *cptr; cptr++) { if ( ! isprint(*cptr) ) if (comma || start) { fprintf(Outfile, "%d, ", *cptr); start = 0; comma = 1; } else if (quote) { fprintf(Outfile, "\', %d, ", *cptr); comma = 1; quote = 0; } else { fprintf(Outfile, "%d, ", *cptr); comma = 1; quote = 0; } else if (start || comma) { fprintf(Outfile, "\'%c", *cptr); start = comma = 0; quote = 1; } else { fprintf(Outfile, "%c", *cptr); comma = 0; quote = 1; } } if (comma || start) fprintf(Outfile, "0\n"); else fprintf(Outfile, "\', 0\n"); */ } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r2], breglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { // Generate code depending on the function's type switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tmovzx\teax, %s\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmov\teax, %s\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", Symtable[id].type); } cgjump(Symtable[id].endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(int id) { int r = alloc_register(); if (Symtable[id].class == C_GLOBAL) fprintf(Outfile, "\tmov\t%s, %s\n", reglist[r], Symtable[id].name); else fprintf(Outfile, "\tlea\t%s, [rbp+%d]\n", reglist[r], Symtable[id].posn); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], reglist[r]); break; case 2: fprintf(Outfile, "\tmovsx\t%s, dword [%s]\n", reglist[r], reglist[r]); break; case 4: case 8: fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { //Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmov\t[%s], byte %s\n", reglist[r2], breglist[r1]); break; case 2: case 4: case 8: fprintf(Outfile, "\tmov\t[%s], %s\n", reglist[r2], reglist[r1]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 29_Refactoring/data.h ================================================ #ifndef extern_ #define extern_ extern #endif // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 extern_ int Line; // Current line number extern_ int Putback; // Character put back by scanner extern_ int Functionid; // Symbol id of the current function extern_ int Globs; // Position of next free global symbol slot extern_ int Locls; // Position of next free local symbol slot extern_ FILE *Infile; // Input and output files extern_ FILE *Outfile; extern_ char *Outfilename; // Name of file we opened as Outfile extern_ struct token Token; // Last token scanned extern_ char Text[TEXTLEN + 1]; // Last identifier scanned extern_ struct symtable Symtable[NSYMBOLS]; // Global symbol table extern_ int O_dumpAST; // If true, dump the AST trees extern_ int O_keepasm; // If true, keep any assembly files extern_ int O_assemble; // If true, assemble the assembly files extern_ int O_dolink; // If true, link the object files extern_ int O_verbose; // If true, print info on compilation stages ================================================ FILE: 29_Refactoring/decl.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of declarations // Copyright (c) 2019 Warren Toomey, GPL3 // Parse the current token and return // a primitive type enum value. Also // scan in the next token int parse_type(void) { int type; switch (Token.token) { case T_VOID: type = P_VOID; break; case T_CHAR: type = P_CHAR; break; case T_INT: type = P_INT; break; case T_LONG: type = P_LONG; break; default: fatald("Illegal type, token", Token.token); } // Scan in one or more further '*' tokens // and determine the correct pointer type while (1) { scan(&Token); if (Token.token != T_STAR) break; type = pointer_to(type); } // We leave with the next token already scanned return (type); } // variable_declaration: type identifier ';' // | type identifier '[' INTLIT ']' ';' // ; // // Parse the declaration of a scalar variable or an array // with a given size. // The identifier has been scanned & we have the type. // class is the variable's class void var_declaration(int type, int class) { // Text now has the identifier's name. // If the next token is a '[' if (Token.token == T_LBRACKET) { // Skip past the '[' scan(&Token); // Check we have an array size if (Token.token == T_INTLIT) { // Add this as a known array and generate its space in assembly. // We treat the array as a pointer to its elements' type if (class == C_LOCAL) { fatal("For now, declaration of local arrays is not implemented"); } else { addglob(Text, pointer_to(type), S_ARRAY, class, Token.intvalue); } } // Ensure we have a following ']' scan(&Token); match(T_RBRACKET, "]"); } else { // Add this as a known scalar // and generate its space in assembly if (class == C_LOCAL) { if (addlocl(Text, type, S_VARIABLE, class, 1) == -1) fatals("Duplicate local variable declaration", Text); } else { addglob(Text, type, S_VARIABLE, class, 1); } } } // param_declaration: // | variable_declaration // | variable_declaration ',' param_declaration // // Parse the parameters in parentheses after the function name. // Add them as symbols to the symbol table and return the number // of parameters. If id is not -1, there is an existing function // prototype, and the function has this symbol slot number. static int param_declaration(int id) { int type, param_id; int orig_paramcnt; int paramcnt = 0; // Add 1 to id so that it's either zero (no prototype), or // it's the position of the zeroth existing parameter in // the symbol table param_id = id + 1; // Get any existing prototype parameter count if (param_id) orig_paramcnt = Symtable[id].nelems; // Loop until the final right parentheses while (Token.token != T_RPAREN) { // Get the type and identifier // and add it to the symbol table type = parse_type(); ident(); // We have an existing prototype. // Check that this type matches the prototype. if (param_id) { if (type != Symtable[id].type) fatald("Type doesn't match prototype for parameter", paramcnt + 1); param_id++; } else { // Add a new parameter to the new prototype var_declaration(type, C_PARAM); } paramcnt++; // Must have a ',' or ')' at this point switch (Token.token) { case T_COMMA: scan(&Token); break; case T_RPAREN: break; default: fatald("Unexpected token in parameter list", Token.token); } } // Check that the number of parameters in this list matches // any existing prototype if ((id != -1) && (paramcnt != orig_paramcnt)) fatals("Parameter count mismatch for function", Symtable[id].name); // Return the count of parameters return (paramcnt); } // // function_declaration: type identifier '(' parameter_list ')' ; // | type identifier '(' parameter_list ')' compound_statement ; // // Parse the declaration of function. // The identifier has been scanned & we have the type. struct ASTnode *function_declaration(int type) { struct ASTnode *tree, *finalstmt; int id; int nameslot, endlabel, paramcnt; // Text has the identifier's name. If this exists and is a // function, get the id. Otherwise, set id to -1 if ((id = findsymbol(Text)) != -1) if (Symtable[id].stype != S_FUNCTION) id = -1; // If this is a new function declaration, get a // label-id for the end label, and add the function // to the symbol table, if (id == -1) { endlabel = genlabel(); nameslot = addglob(Text, type, S_FUNCTION, C_GLOBAL, endlabel); } // Scan in the '(', any parameters and the ')'. // Pass in any existing function prototype symbol slot number lparen(); paramcnt = param_declaration(id); rparen(); // If this is a new function declaration, update the // function symbol entry with the number of parameters if (id == -1) Symtable[nameslot].nelems = paramcnt; // Declaration ends in a semicolon, only a prototype. if (Token.token == T_SEMI) { scan(&Token); return (NULL); } // This is not just a prototype. // Copy the global parameters to be local parameters if (id == -1) id = nameslot; copyfuncparams(id); // Set the Functionid global to the function's symbol-id Functionid = id; // Get the AST tree for the compound statement tree = compound_statement(); // If the function type isn't P_VOID .. if (type != P_VOID) { // Error if no statements in the function if (tree == NULL) fatal("No statements in function with non-void type"); // Check that the last AST operation in the // compound statement was a return statement finalstmt = (tree->op == A_GLUE) ? tree->right : tree; if (finalstmt == NULL || finalstmt->op != A_RETURN) fatal("No return for function with non-void type"); } // Return an A_FUNCTION node which has the function's id // and the compound statement sub-tree return (mkastunary(A_FUNCTION, type, tree, id)); } // Parse one or more global declarations, either // variables or functions void global_declarations(void) { struct ASTnode *tree; int type; while (1) { // We have to read past the type and identifier // to see either a '(' for a function declaration // or a ',' or ';' for a variable declaration. // Text is filled in by the ident() call. type = parse_type(); ident(); if (Token.token == T_LPAREN) { // Parse the function declaration tree = function_declaration(type); // Only a function prototype, no code if (tree == NULL) continue; // A real function, generate the assembly code for it if (O_dumpAST) { dumpAST(tree, NOLABEL, 0); fprintf(stdout, "\n\n"); } genAST(tree, NOLABEL, 0); // Now free the symbols associated // with this function freeloclsyms(); } else { // Parse the global variable declaration // and skip past the trailing semicolon var_declaration(type, C_GLOBAL); semi(); } // Stop when we have reached EOF if (Token.token == T_EOF) break; } } ================================================ FILE: 29_Refactoring/decl.h ================================================ // Function prototypes for all compiler files // Copyright (c) 2019 Warren Toomey, GPL3 // scan.c void reject_token(struct token *t); int scan(struct token *t); // tree.c struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, int intvalue); struct ASTnode *mkastleaf(int op, int type, int intvalue); struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, int intvalue); void dumpAST(struct ASTnode *n, int label, int parentASTop); // gen.c int genlabel(void); int genAST(struct ASTnode *n, int reg, int parentASTop); void genpreamble(); void genpostamble(); void genfreeregs(); void genglobsym(int id); int genglobstr(char *strvalue); int genprimsize(int type); void genreturn(int reg, int id); // cg.c void cgtextseg(); void cgdataseg(); void freeall_registers(void); void cgpreamble(); void cgpostamble(); void cgfuncpreamble(int id); void cgfuncpostamble(int id); int cgloadint(int value, int type); int cgloadglob(int id, int op); int cgloadlocal(int id, int op); int cgloadglobstr(int id); int cgadd(int r1, int r2); int cgsub(int r1, int r2); int cgmul(int r1, int r2); int cgdiv(int r1, int r2); int cgshlconst(int r, int val); int cgcall(int id, int numargs); void cgcopyarg(int r, int argposn); int cgstorglob(int r, int id); int cgstorlocal(int r, int id); void cgglobsym(int id); void cgglobstr(int l, char *strvalue); int cgcompare_and_set(int ASTop, int r1, int r2); int cgcompare_and_jump(int ASTop, int r1, int r2, int label); void cglabel(int l); void cgjump(int l); int cgwiden(int r, int oldtype, int newtype); int cgprimsize(int type); void cgreturn(int reg, int id); int cgaddress(int id); int cgderef(int r, int type); int cgstorderef(int r1, int r2, int type); int cgnegate(int r); int cginvert(int r); int cglognot(int r); int cgboolean(int r, int op, int label); int cgand(int r1, int r2); int cgor(int r1, int r2); int cgxor(int r1, int r2); int cgshl(int r1, int r2); int cgshr(int r1, int r2); // expr.c struct ASTnode *binexpr(int ptp); // stmt.c struct ASTnode *compound_statement(void); // misc.c void match(int t, char *what); void semi(void); void lbrace(void); void rbrace(void); void lparen(void); void rparen(void); void ident(void); void fatal(char *s); void fatals(char *s1, char *s2); void fatald(char *s, int d); void fatalc(char *s, int c); // sym.c int findglob(char *s); int findlocl(char *s); int findsymbol(char *s); int addglob(char *name, int type, int stype, int class, int size); int addlocl(char *name, int type, int stype, int class, int size); void copyfuncparams(int slot); void freeloclsyms(void); void clear_symtable(void); // decl.c void var_declaration(int type, int class); struct ASTnode *function_declaration(int type); void global_declarations(void); // types.c int inttype(int type); int ptrtype(int type); int parse_type(void); int pointer_to(int type); int value_at(int type); struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op); ================================================ FILE: 29_Refactoring/defs.h ================================================ #include #include #include #include // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 enum { TEXTLEN=512, // Length of symbols in input NSYMBOLS=1024 // Number of symbol table entries }; // Commands and default filenames #define AOUT "a.out" #ifdef __NASM__ #define ASCMD "nasm -f elf64 -o " #define LDCMD "cc -no-pie -fno-plt -Wall -o " #else #define ASCMD "as -o " #define LDCMD "cc -o " #endif // Token types enum { T_EOF, // Binary operators T_ASSIGN, T_LOGOR, T_LOGAND, T_OR, T_XOR, T_AMPER, T_EQ, T_NE, T_LT, T_GT, T_LE, T_GE, T_LSHIFT, T_RSHIFT, T_PLUS, T_MINUS, T_STAR, T_SLASH, // Other operators T_INC, T_DEC, T_INVERT, T_LOGNOT, // Type keywords T_VOID, T_CHAR, T_INT, T_LONG, // Other keywords T_IF, T_ELSE, T_WHILE, T_FOR, T_RETURN, // Structural tokens T_INTLIT, T_STRLIT, T_SEMI, T_IDENT, T_LBRACE, T_RBRACE, T_LPAREN, T_RPAREN, T_LBRACKET, T_RBRACKET, T_COMMA }; // Token structure struct token { int token; // Token type, from the enum list above int intvalue; // For T_INTLIT, the integer value }; // AST node types. The first few line up // with the related tokens enum { A_ASSIGN= 1, A_LOGOR, A_LOGAND, A_OR, A_XOR, A_AND, A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE, A_LSHIFT, A_RSHIFT, A_ADD, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, A_INTLIT, A_STRLIT, A_IDENT, A_GLUE, A_IF, A_WHILE, A_FUNCTION, A_WIDEN, A_RETURN, A_FUNCCALL, A_DEREF, A_ADDR, A_SCALE, A_PREINC, A_PREDEC, A_POSTINC, A_POSTDEC, A_NEGATE, A_INVERT, A_LOGNOT, A_TOBOOL }; // Primitive types. The bottom 4 bits is an integer // value that represents the level of indirection, // e.g. 0= no pointer, 1= pointer, 2= pointer pointer etc. enum { P_NONE, P_VOID=16, P_CHAR=32, P_INT=48, P_LONG=64 }; // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates int rvalue; // True if the node is an rvalue struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; union { // For A_INTLIT, the integer value int intvalue; // For A_IDENT, the symbol slot number int id; // For A_FUNCTION, the symbol slot number int size; // For A_SCALE, the size to scale by }; // For A_FUNCCALL, the symbol slot number }; enum { NOREG= -1, // Use NOREG when the AST generation // functions have no register to return NOLABEL= 0 // Use NOLABEL when we have no label to // pass to genAST() }; // Structural types enum { S_VARIABLE, S_FUNCTION, S_ARRAY }; // Storage classes enum { C_GLOBAL = 1, // Globally visible symbol C_LOCAL, // Locally visible symbol C_PARAM // Locally visible function parameter }; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol int stype; // Structural type for the symbol int class; // Storage class for the symbol union { int size; // Number of elements in the symbol int endlabel; // For functions, the end label }; union { int nelems; // For functions, # of params int posn; // For locals, the negative offset // from the stack base pointer }; }; ================================================ FILE: 29_Refactoring/expr.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of expressions // Copyright (c) 2019 Warren Toomey, GPL3 // expression_list: // | expression // | expression ',' expression_list // ; // Parse a list of zero or more comma-separated expressions and // return an AST composed of A_GLUE nodes with the left-hand child // being the sub-tree of previous expressions (or NULL) and the right-hand // child being the next expression. Each A_GLUE node will have size field // set to the number of expressions in the tree at this point. If no // expressions are parsed, NULL is returned static struct ASTnode *expression_list(void) { struct ASTnode *tree = NULL; struct ASTnode *child = NULL; int exprcount = 0; // Loop until the final right parentheses while (Token.token != T_RPAREN) { // Parse the next expression and increment the expression count child = binexpr(0); exprcount++; // Build an A_GLUE AST node with the previous tree as the left child // and the new expression as the right child. Store the expression count. tree = mkastnode(A_GLUE, P_NONE, tree, NULL, child, exprcount); // Must have a ',' or ')' at this point switch (Token.token) { case T_COMMA: scan(&Token); break; case T_RPAREN: break; default: fatald("Unexpected token in expression list", Token.token); } } // Return the tree of expressions return (tree); } // Parse a function call and return its AST static struct ASTnode *funccall(void) { struct ASTnode *tree; int id; // Check that the identifier has been defined as a function, // then make a leaf node for it. if ((id = findsymbol(Text)) == -1 || Symtable[id].stype != S_FUNCTION) { fatals("Undeclared function", Text); } // Get the '(' lparen(); // Parse the argument expression list tree = expression_list(); // XXX Check type of each argument against the function's prototype // Build the function call AST node. Store the // function's return type as this node's type. // Also record the function's symbol-id tree = mkastunary(A_FUNCCALL, Symtable[id].type, tree, id); // Get the ')' rparen(); return (tree); } // Parse the index into an array and return an AST tree for it static struct ASTnode *array_access(void) { struct ASTnode *left, *right; int id; // Check that the identifier has been defined as an array // then make a leaf node for it that points at the base if ((id = findsymbol(Text)) == -1 || Symtable[id].stype != S_ARRAY) { fatals("Undeclared array", Text); } left = mkastleaf(A_ADDR, Symtable[id].type, id); // Get the '[' scan(&Token); // Parse the following expression right = binexpr(0); // Get the ']' match(T_RBRACKET, "]"); // Ensure that this is of int type if (!inttype(right->type)) fatal("Array index is not of integer type"); // Scale the index by the size of the element's type right = modify_type(right, left->type, A_ADD); // Return an AST tree where the array's base has the offset // added to it, and dereference the element. Still an lvalue // at this point. left = mkastnode(A_ADD, Symtable[id].type, left, NULL, right, 0); left = mkastunary(A_DEREF, value_at(left->type), left, 0); return (left); } // Parse a postfix expression and return // an AST node representing it. The // identifier is already in Text. static struct ASTnode *postfix(void) { struct ASTnode *n; int id; // Scan in the next token to see if we have a postfix expression scan(&Token); // Function call if (Token.token == T_LPAREN) return (funccall()); // An array reference if (Token.token == T_LBRACKET) return (array_access()); // A variable. Check that the variable exists. id = findsymbol(Text); if (id == -1 || Symtable[id].stype != S_VARIABLE) fatals("Unknown variable", Text); switch (Token.token) { // Post-increment: skip over the token case T_INC: scan(&Token); n = mkastleaf(A_POSTINC, Symtable[id].type, id); break; // Post-decrement: skip over the token case T_DEC: scan(&Token); n = mkastleaf(A_POSTDEC, Symtable[id].type, id); break; // Just a variable reference default: n = mkastleaf(A_IDENT, Symtable[id].type, id); } return (n); } // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(void) { struct ASTnode *n; int id; switch (Token.token) { case T_INTLIT: // For an INTLIT token, make a leaf AST node for it. // Make it a P_CHAR if it's within the P_CHAR range if ((Token.intvalue) >= 0 && (Token.intvalue < 256)) n = mkastleaf(A_INTLIT, P_CHAR, Token.intvalue); else n = mkastleaf(A_INTLIT, P_INT, Token.intvalue); break; case T_STRLIT: // For a STRLIT token, generate the assembly for it. // Then make a leaf AST node for it. id is the string's label. id = genglobstr(Text); n = mkastleaf(A_STRLIT, pointer_to(P_CHAR), id); break; case T_IDENT: return (postfix()); case T_LPAREN: // Beginning of a parenthesised expression, skip the '('. // Scan in the expression and the right parenthesis scan(&Token); n = binexpr(0); rparen(); return (n); default: fatald("Expecting a primary expression, got token", Token.token); } // Scan in the next token and return the leaf node scan(&Token); return (n); } // Convert a binary operator token into a binary AST operation. // We rely on a 1:1 mapping from token to AST operation static int binastop(int tokentype) { if (tokentype > T_EOF && tokentype <= T_SLASH) return (tokentype); fatald("Syntax error, token", tokentype); return (0); // Keep -Wall happy } // Return true if a token is right-associative, // false otherwise. static int rightassoc(int tokentype) { if (tokentype == T_ASSIGN) return (1); return (0); } // Operator precedence for each token. Must // match up with the order of tokens in defs.h static int OpPrec[] = { 0, 10, 20, 30, // T_EOF, T_ASSIGN, T_LOGOR, T_LOGAND 40, 50, 60, // T_OR, T_XOR, T_AMPER 70, 70, // T_EQ, T_NE 80, 80, 80, 80, // T_LT, T_GT, T_LE, T_GE 90, 90, // T_LSHIFT, T_RSHIFT 100, 100, // T_PLUS, T_MINUS 110, 110 // T_STAR, T_SLASH }; // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec; if (tokentype > T_SLASH) fatald("Token with no precedence in op_precedence:", tokentype); prec = OpPrec[tokentype]; if (prec == 0) fatald("Syntax error, token", tokentype); return (prec); } // prefix_expression: primary // | '*' prefix_expression // | '&' prefix_expression // | '-' prefix_expression // | '++' prefix_expression // | '--' prefix_expression // ; // Parse a prefix expression and return // a sub-tree representing it. struct ASTnode *prefix(void) { struct ASTnode *tree; switch (Token.token) { case T_AMPER: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Ensure that it's an identifier if (tree->op != A_IDENT) fatal("& operator must be followed by an identifier"); // Now change the operator to A_ADDR and the type to // a pointer to the original type tree->op = A_ADDR; tree->type = pointer_to(tree->type); break; case T_STAR: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's either another deref or an // identifier if (tree->op != A_IDENT && tree->op != A_DEREF) fatal("* operator must be followed by an identifier or *"); // Prepend an A_DEREF operation to the tree tree = mkastunary(A_DEREF, value_at(tree->type), tree, 0); break; case T_MINUS: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_NEGATE operation to the tree and // make the child an rvalue. Because chars are unsigned, // also widen this to int so that it's signed tree->rvalue = 1; tree = modify_type(tree, P_INT, 0); tree = mkastunary(A_NEGATE, tree->type, tree, 0); break; case T_INVERT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_INVERT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_INVERT, tree->type, tree, 0); break; case T_LOGNOT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_LOGNOT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_LOGNOT, tree->type, tree, 0); break; case T_INC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("++ operator must be followed by an identifier"); // Prepend an A_PREINC operation to the tree tree = mkastunary(A_PREINC, tree->type, tree, 0); break; case T_DEC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("-- operator must be followed by an identifier"); // Prepend an A_PREDEC operation to the tree tree = mkastunary(A_PREDEC, tree->type, tree, 0); break; default: tree = primary(); } return (tree); } // Return an AST tree whose root is a binary operator. // Parameter ptp is the previous token's precedence. struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; struct ASTnode *ltemp, *rtemp; int ASTop; int tokentype; // Get the tree on the left. // Fetch the next token at the same time. left = prefix(); // If we hit one of several terminating tokens, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA) { left->rvalue = 1; return (left); } // While the precedence of this token is more than that of the // previous token precedence, or it's right associative and // equal to the previous token's precedence while ((op_precedence(tokentype) > ptp) || (rightassoc(tokentype) && op_precedence(tokentype) == ptp)) { // Fetch in the next integer literal scan(&Token); // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Determine the operation to be performed on the sub-trees ASTop = binastop(tokentype); if (ASTop == A_ASSIGN) { // Assignment // Make the right tree into an rvalue right->rvalue = 1; // Ensure the right's type matches the left right = modify_type(right, left->type, 0); if (right == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree. However, switch // left and right around, so that the right expression's // code will be generated before the left expression ltemp = left; left = right; right = ltemp; } else { // We are not doing an assignment, so both trees should be rvalues // Convert both trees into rvalue if they are lvalue trees left->rvalue = 1; right->rvalue = 1; // Ensure the two types are compatible by trying // to modify each tree to match the other's type. ltemp = modify_type(left, right->type, ASTop); rtemp = modify_type(right, left->type, ASTop); if (ltemp == NULL && rtemp == NULL) fatal("Incompatible types in binary expression"); if (ltemp != NULL) left = ltemp; if (rtemp != NULL) right = rtemp; } // Join that sub-tree with ours. Convert the token // into an AST operation at the same time. left = mkastnode(binastop(tokentype), left->type, left, NULL, right, 0); // Update the details of the current token. // If we hit a terminating token, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA) { left->rvalue = 1; return (left); } } // Return the tree we have when the precedence // is the same or lower left->rvalue = 1; return (left); } ================================================ FILE: 29_Refactoring/gen.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Generic code generator // Copyright (c) 2019 Warren Toomey, GPL3 // Generate and return a new label number int genlabel(void) { static int id = 1; return (id++); } // Generate the code for an IF statement // and an optional ELSE clause static int genIF(struct ASTnode *n) { int Lfalse, Lend; // Generate two labels: one for the // false compound statement, and one // for the end of the overall IF statement. // When there is no ELSE clause, Lfalse _is_ // the ending label! Lfalse = genlabel(); if (n->right) Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. genAST(n->left, Lfalse, n->op); genfreeregs(); // Generate the true compound statement genAST(n->mid, NOLABEL, n->op); genfreeregs(); // If there is an optional ELSE clause, // generate the jump to skip to the end if (n->right) cgjump(Lend); // Now the false label cglabel(Lfalse); // Optional ELSE clause: generate the // false compound statement and the // end label if (n->right) { genAST(n->right, NOLABEL, n->op); genfreeregs(); cglabel(Lend); } return (NOREG); } // Generate the code for a WHILE statement static int genWHILE(struct ASTnode *n) { int Lstart, Lend; // Generate the start and end labels // and output the start label Lstart = genlabel(); Lend = genlabel(); cglabel(Lstart); // Generate the condition code followed // by a jump to the end label. genAST(n->left, Lend, n->op); genfreeregs(); // Generate the compound statement for the body genAST(n->right, NOLABEL, n->op); genfreeregs(); // Finally output the jump back to the condition, // and the end label cgjump(Lstart); cglabel(Lend); return (NOREG); } // Generate the code to copy the arguments of a // function call to its parameters, then call the // function itself. Return the register that holds // the function's return value. static int gen_funccall(struct ASTnode *n) { struct ASTnode *gluetree = n->left; int reg; int numargs = 0; // If there is a list of arguments, walk this list // from the last argument (right-hand child) to the // first while (gluetree) { // Calculate the expression's value reg = genAST(gluetree->right, NOLABEL, gluetree->op); // Copy this into the n'th function parameter: size is 1, 2, 3, ... cgcopyarg(reg, gluetree->size); // Keep the first (highest) number of arguments if (numargs == 0) numargs = gluetree->size; genfreeregs(); gluetree = gluetree->left; } // Call the function, clean up the stack (based on numargs), // and return its result return (cgcall(n->id, numargs)); } // Given an AST, an optional label, and the AST op // of the parent, generate assembly code recursively. // Return the register id with the tree's final value. int genAST(struct ASTnode *n, int label, int parentASTop) { int leftreg, rightreg; // We have some specific AST node handling at the top // so that we don't evaluate the child sub-trees immediately switch (n->op) { case A_IF: return (genIF(n)); case A_WHILE: return (genWHILE(n)); case A_FUNCCALL: return (gen_funccall(n)); case A_GLUE: // Do each child statement, and free the // registers after each child genAST(n->left, NOLABEL, n->op); genfreeregs(); genAST(n->right, NOLABEL, n->op); genfreeregs(); return (NOREG); case A_FUNCTION: // Generate the function's preamble before the code // in the child sub-tree cgfuncpreamble(n->id); genAST(n->left, NOLABEL, n->op); cgfuncpostamble(n->id); return (NOREG); } // General AST node handling below // Get the left and right sub-tree values if (n->left) leftreg = genAST(n->left, NOLABEL, n->op); if (n->right) rightreg = genAST(n->right, NOLABEL, n->op); switch (n->op) { case A_ADD: return (cgadd(leftreg, rightreg)); case A_SUBTRACT: return (cgsub(leftreg, rightreg)); case A_MULTIPLY: return (cgmul(leftreg, rightreg)); case A_DIVIDE: return (cgdiv(leftreg, rightreg)); case A_AND: return (cgand(leftreg, rightreg)); case A_OR: return (cgor(leftreg, rightreg)); case A_XOR: return (cgxor(leftreg, rightreg)); case A_LSHIFT: return (cgshl(leftreg, rightreg)); case A_RSHIFT: return (cgshr(leftreg, rightreg)); case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, compare registers // and set one to 1 or 0 based on the comparison. if (parentASTop == A_IF || parentASTop == A_WHILE) return (cgcompare_and_jump(n->op, leftreg, rightreg, label)); else return (cgcompare_and_set(n->op, leftreg, rightreg)); case A_INTLIT: return (cgloadint(n->intvalue, n->type)); case A_STRLIT: return (cgloadglobstr(n->id)); case A_IDENT: // Load our value if we are an rvalue // or we are being dereferenced if (n->rvalue || parentASTop == A_DEREF) { if (Symtable[n->id].class == C_GLOBAL) { return (cgloadglob(n->id, n->op)); } else { return (cgloadlocal(n->id, n->op)); } } else return (NOREG); case A_ASSIGN: // Are we assigning to an identifier or through a pointer? switch (n->right->op) { case A_IDENT: if (Symtable[n->right->id].class == C_GLOBAL) return (cgstorglob(leftreg, n->right->id)); else return (cgstorlocal(leftreg, n->right->id)); case A_DEREF: return (cgstorderef(leftreg, rightreg, n->right->type)); default: fatald("Can't A_ASSIGN in genAST(), op", n->op); } case A_WIDEN: // Widen the child's type to the parent's type return (cgwiden(leftreg, n->left->type, n->type)); case A_RETURN: cgreturn(leftreg, Functionid); return (NOREG); case A_ADDR: return (cgaddress(n->id)); case A_DEREF: // If we are an rvalue, dereference to get the value we point at, // otherwise leave it for A_ASSIGN to store through the pointer if (n->rvalue) return (cgderef(leftreg, n->left->type)); else return (leftreg); case A_SCALE: // Small optimisation: use shift if the // scale value is a known power of two switch (n->size) { case 2: return (cgshlconst(leftreg, 1)); case 4: return (cgshlconst(leftreg, 2)); case 8: return (cgshlconst(leftreg, 3)); default: // Load a register with the size and // multiply the leftreg by this size rightreg = cgloadint(n->size, P_INT); return (cgmul(leftreg, rightreg)); } case A_POSTINC: case A_POSTDEC: // Load and decrement the variable's value into a register // and post increment/decrement it if (Symtable[n->id].class == C_GLOBAL) return (cgloadglob(n->id, n->op)); else return (cgloadlocal(n->id, n->op)); case A_PREINC: case A_PREDEC: // Load and decrement the variable's value into a register // and pre increment/decrement it if (Symtable[n->left->id].class == C_GLOBAL) return (cgloadglob(n->left->id, n->op)); else return (cgloadlocal(n->left->id, n->op)); case A_NEGATE: return (cgnegate(leftreg)); case A_INVERT: return (cginvert(leftreg)); case A_LOGNOT: return (cglognot(leftreg)); case A_TOBOOL: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, set the register // to 0 or 1 based on it's zeroeness or non-zeroeness return (cgboolean(leftreg, parentASTop, label)); default: fatald("Unknown AST operator", n->op); } return (NOREG); // Keep -Wall happy } void genpreamble() { cgpreamble(); } void genpostamble() { cgpostamble(); } void genfreeregs() { freeall_registers(); } void genglobsym(int id) { cgglobsym(id); } int genglobstr(char *strvalue) { int l = genlabel(); cgglobstr(l, strvalue); return (l); } int genprimsize(int type) { return (cgprimsize(type)); } ================================================ FILE: 29_Refactoring/main.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "decl.h" #include #include // Compiler setup and top-level execution // Copyright (c) 2019 Warren Toomey, GPL3 // Given a string with a '.' and at least a 1-character suffix // after the '.', change the suffix to be the given character. // Return the new string or NULL if the original string could // not be modified char *alter_suffix(char *str, char suffix) { char *posn; char *newstr; // Clone the string if ((newstr = strdup(str)) == NULL) return (NULL); // Find the '.' if ((posn = strrchr(newstr, '.')) == NULL) return (NULL); // Ensure there is a suffix posn++; if (*posn == '\0') return (NULL); // Change the suffix and NUL-terminate the string *posn++ = suffix; *posn = '\0'; return (newstr); } // Given an input filename, compile that file // down to assembly code. Return the new file's name static char *do_compile(char *filename) { Outfilename = alter_suffix(filename, 's'); if (Outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .c on the end\n", filename); exit(1); } // Open up the input file if ((Infile = fopen(filename, "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", filename, strerror(errno)); exit(1); } // Create the output file if ((Outfile = fopen(Outfilename, "w")) == NULL) { fprintf(stderr, "Unable to create %s: %s\n", Outfilename, strerror(errno)); exit(1); } Line = 1; // Reset the scanner Putback = '\n'; clear_symtable(); // Clear the symbol table if (O_verbose) printf("compiling %s\n", filename); scan(&Token); // Get the first token from the input genpreamble(); // Output the preamble global_declarations(); // Parse the global declarations genpostamble(); // Output the postamble fclose(Outfile); // Close the output file return (Outfilename); } // Given an input filename, assemble that file // down to object code. Return the object filename char *do_assemble(char *filename) { char cmd[TEXTLEN]; int err; char *outfilename = alter_suffix(filename, 'o'); if (outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .s on the end\n", filename); exit(1); } // Build the assembly command and run it snprintf(cmd, TEXTLEN, "%s %s %s", ASCMD, outfilename, filename); if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Assembly of %s failed\n", filename); exit(1); } return (outfilename); } // Given a list of object files and an output filename, // link all of the object filenames together. void do_link(char *outfilename, char *objlist[]) { int cnt, size = TEXTLEN; char cmd[TEXTLEN], *cptr; int err; // Start with the linker command and the output file cptr = cmd; cnt = snprintf(cptr, size, "%s %s ", LDCMD, outfilename); cptr += cnt; size -= cnt; // Now append each object file while (*objlist != NULL) { cnt = snprintf(cptr, size, "%s ", *objlist); cptr += cnt; size -= cnt; objlist++; } if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Linking failed\n"); exit(1); } } // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "Usage: %s [-vcST] [-o outfile] file [file ...]\n", prog); fprintf(stderr, " -v give verbose output of the compilation stages\n"); fprintf(stderr, " -c generate object files but don't link them\n"); fprintf(stderr, " -S generate assembly files but don't link them\n"); fprintf(stderr, " -T dump the AST trees for each input file\n"); fprintf(stderr, " -o outfile, produce the outfile executable file\n"); exit(1); } // Main program: check arguments and print a usage // if we don't have an argument. Open up the input // file and call scanfile() to scan the tokens in it. enum { MAXOBJ= 100 }; int main(int argc, char *argv[]) { char *outfilename = AOUT; char *asmfile, *objfile; char *objlist[MAXOBJ]; int i, objcnt = 0; // Initialise our variables O_dumpAST = 0; O_keepasm = 0; O_assemble = 0; O_verbose = 0; O_dolink = 1; // Scan for command-line options for (i = 1; i < argc; i++) { // No leading '-', stop scanning for options if (*argv[i] != '-') break; // For each option in this argument for (int j = 1; (*argv[i] == '-') && argv[i][j]; j++) { switch (argv[i][j]) { case 'o': outfilename = argv[++i]; // Save & skip to next argument break; case 'T': O_dumpAST = 1; break; case 'c': O_assemble = 1; O_keepasm = 0; O_dolink = 0; break; case 'S': O_keepasm = 1; O_assemble = 0; O_dolink = 0; break; case 'v': O_verbose = 1; break; default: usage(argv[0]); } } } // Ensure we have at lease one input file argument if (i >= argc) usage(argv[0]); // Work on each input file in turn while (i < argc) { asmfile = do_compile(argv[i]); // Compile the source file if (O_dolink || O_assemble) { objfile = do_assemble(asmfile); // Assemble it to object forma if (objcnt == (MAXOBJ - 2)) { fprintf(stderr, "Too many object files for the compiler to handle\n"); exit(1); } objlist[objcnt++] = objfile; // Add the object file's name objlist[objcnt] = NULL; // to the list of object files } if (!O_keepasm) // Remove the assembly file if unlink(asmfile); // we don't need to keep it i++; } // Now link all the object files together if (O_dolink) { do_link(outfilename, objlist); // If we don't need to keep the object // files, then remove them if (!O_assemble) { for (i = 0; objlist[i] != NULL; i++) unlink(objlist[i]); } } return (0); } ================================================ FILE: 29_Refactoring/misc.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" #include // Miscellaneous functions // Copyright (c) 2019 Warren Toomey, GPL3 // Ensure that the current token is t, // and fetch the next token. Otherwise // throw an error void match(int t, char *what) { if (Token.token == t) { scan(&Token); } else { fatals("Expected", what); } } // Match a semicolon and fetch the next token void semi(void) { match(T_SEMI, ";"); } // Match a left brace and fetch the next token void lbrace(void) { match(T_LBRACE, "{"); } // Match a right brace and fetch the next token void rbrace(void) { match(T_RBRACE, "}"); } // Match a left parenthesis and fetch the next token void lparen(void) { match(T_LPAREN, "("); } // Match a right parenthesis and fetch the next token void rparen(void) { match(T_RPAREN, ")"); } // Match an identifier and fetch the next token void ident(void) { match(T_IDENT, "identifier"); } // Print out fatal messages void fatal(char *s) { fprintf(stderr, "%s on line %d\n", s, Line); fclose(Outfile); unlink(Outfilename); exit(1); } void fatals(char *s1, char *s2) { fprintf(stderr, "%s:%s on line %d\n", s1, s2, Line); fclose(Outfile); unlink(Outfilename); exit(1); } void fatald(char *s, int d) { fprintf(stderr, "%s:%d on line %d\n", s, d, Line); fclose(Outfile); unlink(Outfilename); exit(1); } void fatalc(char *s, int c) { fprintf(stderr, "%s:%c on line %d\n", s, c, Line); fclose(Outfile); unlink(Outfilename); exit(1); } ================================================ FILE: 29_Refactoring/scan.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { char *p; p = strchr(s, c); return (p ? p - s : -1); } // Get the next character from the input file. static int next(void) { int c; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return (c); } c = fgetc(Infile); // Read from input file if ('\n' == c) Line++; // Increment line count return (c); } // Put back an unwanted character static void putback(int c) { Putback = c; } // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } // Return the next character from a character // or string literal static int scanch(void) { int c; // Get the next input character and interpret // metacharacters that start with a backslash c = next(); if (c == '\\') { switch (c = next()) { case 'a': return '\a'; case 'b': return '\b'; case 'f': return '\f'; case 'n': return '\n'; case 'r': return '\r'; case 't': return '\t'; case 'v': return '\v'; case '\\': return '\\'; case '"': return '"'; case '\'': return '\''; default: fatalc("unknown escape sequence", c); } } return (c); // Just an ordinary old character! } // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0; // Convert each character into an int value while ((k = chrpos("0123456789", c)) >= 0) { val = val * 10 + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return (val); } // Scan in a string literal from the input file, // and store it in buf[]. Return the length of // the string. static int scanstr(char *buf) { int i, c; // Loop while we have enough buffer space for (i = 0; i < TEXTLEN - 1; i++) { // Get the next char and append to buf // Return when we hit the ending double quote if ((c = scanch()) == '"') { buf[i] = 0; return (i); } buf[i] = c; } // Ran out of buf[] space fatal("String literal too long"); return (0); } // Scan an identifier from the input file and // store it in buf[]. Return the identifier's length static int scanident(int c, char *buf, int lim) { int i = 0; // Allow digits, alpha and underscores while (isalpha(c) || isdigit(c) || '_' == c) { // Error if we hit the identifier length limit, // else append to buf[] and get next character if (lim - 1 == i) { fatal("Identifier too long"); } else if (i < lim - 1) { buf[i++] = c; } c = next(); } // We hit a non-valid character, put it back. // NUL-terminate the buf[] and return the length putback(c); buf[i] = '\0'; return (i); } // Given a word from the input, return the matching // keyword token number or 0 if it's not a keyword. // Switch on the first letter so that we don't have // to waste time strcmp()ing against all the keywords. static int keyword(char *s) { switch (*s) { case 'c': if (!strcmp(s, "char")) return (T_CHAR); break; case 'e': if (!strcmp(s, "else")) return (T_ELSE); break; case 'f': if (!strcmp(s, "for")) return (T_FOR); break; case 'i': if (!strcmp(s, "if")) return (T_IF); if (!strcmp(s, "int")) return (T_INT); break; case 'l': if (!strcmp(s, "long")) return (T_LONG); break; case 'r': if (!strcmp(s, "return")) return (T_RETURN); break; case 'w': if (!strcmp(s, "while")) return (T_WHILE); break; case 'v': if (!strcmp(s, "void")) return (T_VOID); break; } return (0); } // A pointer to a rejected token static struct token *Rejtoken = NULL; // Reject the token that we just scanned void reject_token(struct token *t) { if (Rejtoken != NULL) fatal("Can't reject token twice"); Rejtoken = t; } // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c, tokentype; // If we have any rejected token, return it if (Rejtoken != NULL) { t = Rejtoken; Rejtoken = NULL; return (1); } // Skip whitespace c = skip(); // Determine the token based on // the input character switch (c) { case EOF: t->token = T_EOF; return (0); case '+': if ((c = next()) == '+') { t->token = T_INC; } else { putback(c); t->token = T_PLUS; } break; case '-': if ((c = next()) == '-') { t->token = T_DEC; } else { putback(c); t->token = T_MINUS; } break; case '*': t->token = T_STAR; break; case '/': t->token = T_SLASH; break; case ';': t->token = T_SEMI; break; case '{': t->token = T_LBRACE; break; case '}': t->token = T_RBRACE; break; case '(': t->token = T_LPAREN; break; case ')': t->token = T_RPAREN; break; case '[': t->token = T_LBRACKET; break; case ']': t->token = T_RBRACKET; break; case '~': t->token = T_INVERT; break; case '^': t->token = T_XOR; break; case ',': t->token = T_COMMA; break; case '=': if ((c = next()) == '=') { t->token = T_EQ; } else { putback(c); t->token = T_ASSIGN; } break; case '!': if ((c = next()) == '=') { t->token = T_NE; } else { putback(c); t->token = T_LOGNOT; } break; case '<': if ((c = next()) == '=') { t->token = T_LE; } else if (c == '<') { t->token = T_LSHIFT; } else { putback(c); t->token = T_LT; } break; case '>': if ((c = next()) == '=') { t->token = T_GE; } else if (c == '>') { t->token = T_RSHIFT; } else { putback(c); t->token = T_GT; } break; case '&': if ((c = next()) == '&') { t->token = T_LOGAND; } else { putback(c); t->token = T_AMPER; } break; case '|': if ((c = next()) == '|') { t->token = T_LOGOR; } else { putback(c); t->token = T_OR; } break; case '\'': // If it's a quote, scan in the // literal character value and // the trailing quote t->intvalue = scanch(); t->token = T_INTLIT; if (next() != '\'') fatal("Expected '\\'' at end of char literal"); break; case '"': // Scan in a literal string scanstr(Text); t->token = T_STRLIT; break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } else if (isalpha(c) || '_' == c) { // Read in a keyword or identifier scanident(c, Text, TEXTLEN); // If it's a recognised keyword, return that token if ((tokentype = keyword(Text)) != 0) { t->token = tokentype; break; } // Not a recognised keyword, so it must be an identifier t->token = T_IDENT; break; } // The character isn't part of any recognised token, error fatalc("Unrecognised character", c); } // We found a token return (1); } ================================================ FILE: 29_Refactoring/stmt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of statements // Copyright (c) 2019 Warren Toomey, GPL3 // Prototypes static struct ASTnode *single_statement(void); // compound_statement: // empty, i.e. no statement // | statement // | statement statements // ; // // statement: declaration // | expression_statement // | function_call // | if_statement // | while_statement // | for_statement // | return_statement // ; // if_statement: if_head // | if_head 'else' compound_statement // ; // // if_head: 'if' '(' true_false_expression ')' compound_statement ; // // Parse an IF statement including any // optional ELSE clause and return its AST static struct ASTnode *if_statement(void) { struct ASTnode *condAST, *trueAST, *falseAST = NULL; // Ensure we have 'if' '(' match(T_IF, "if"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, 0); rparen(); // Get the AST for the compound statement trueAST = compound_statement(); // If we have an 'else', skip it // and get the AST for the compound statement if (Token.token == T_ELSE) { scan(&Token); falseAST = compound_statement(); } // Build and return the AST for this statement return (mkastnode(A_IF, P_NONE, condAST, trueAST, falseAST, 0)); } // while_statement: 'while' '(' true_false_expression ')' compound_statement ; // // Parse a WHILE statement and return its AST static struct ASTnode *while_statement(void) { struct ASTnode *condAST, *bodyAST; // Ensure we have 'while' '(' match(T_WHILE, "while"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, 0); rparen(); // Get the AST for the compound statement bodyAST = compound_statement(); // Build and return the AST for this statement return (mkastnode(A_WHILE, P_NONE, condAST, NULL, bodyAST, 0)); } // for_statement: 'for' '(' preop_statement ';' // true_false_expression ';' // postop_statement ')' compound_statement ; // // preop_statement: statement (for now) // postop_statement: statement (for now) // // Parse a FOR statement and return its AST static struct ASTnode *for_statement(void) { struct ASTnode *condAST, *bodyAST; struct ASTnode *preopAST, *postopAST; struct ASTnode *tree; // Ensure we have 'for' '(' match(T_FOR, "for"); lparen(); // Get the pre_op statement and the ';' preopAST = single_statement(); semi(); // Get the condition and the ';'. // Force a non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, 0); semi(); // Get the post_op statement and the ')' postopAST = single_statement(); rparen(); // Get the compound statement which is the body bodyAST = compound_statement(); // For now, all four sub-trees have to be non-NULL. // Later on, we'll change the semantics for when some are missing // Glue the compound statement and the postop tree tree = mkastnode(A_GLUE, P_NONE, bodyAST, NULL, postopAST, 0); // Make a WHILE loop with the condition and this new body tree = mkastnode(A_WHILE, P_NONE, condAST, NULL, tree, 0); // And glue the preop tree to the A_WHILE tree return (mkastnode(A_GLUE, P_NONE, preopAST, NULL, tree, 0)); } // return_statement: 'return' '(' expression ')' ; // // Parse a return statement and return its AST static struct ASTnode *return_statement(void) { struct ASTnode *tree; // Can't return a value if function returns P_VOID if (Symtable[Functionid].type == P_VOID) fatal("Can't return from a void function"); // Ensure we have 'return' '(' match(T_RETURN, "return"); lparen(); // Parse the following expression tree = binexpr(0); // Ensure this is compatible with the function's type tree = modify_type(tree, Symtable[Functionid].type, 0); if (tree == NULL) fatal("Incompatible type to return"); // Add on the A_RETURN node tree = mkastunary(A_RETURN, P_NONE, tree, 0); // Get the ')' rparen(); return (tree); } // Parse a single statement and return its AST static struct ASTnode *single_statement(void) { int type; switch (Token.token) { case T_CHAR: case T_INT: case T_LONG: // The beginning of a variable declaration. // Parse the type and get the identifier. // Then parse the rest of the declaration // and skip over the semicolon type = parse_type(); ident(); var_declaration(type, C_LOCAL); semi(); return (NULL); // No AST generated here case T_IF: return (if_statement()); case T_WHILE: return (while_statement()); case T_FOR: return (for_statement()); case T_RETURN: return (return_statement()); default: // For now, see if this is an expression. // This catches assignment statements. return (binexpr(0)); } return (NULL); // Keep -Wall happy } // Parse a compound statement // and return its AST struct ASTnode *compound_statement(void) { struct ASTnode *left = NULL; struct ASTnode *tree; // Require a left curly bracket lbrace(); while (1) { // Parse a single statement tree = single_statement(); // Some statements must be followed by a semicolon if (tree != NULL && (tree->op == A_ASSIGN || tree->op == A_RETURN || tree->op == A_FUNCCALL)) semi(); // For each new tree, either save it in left // if left is empty, or glue the left and the // new tree together if (tree != NULL) { if (left == NULL) left = tree; else left = mkastnode(A_GLUE, P_NONE, left, NULL, tree, 0); } // When we hit a right curly bracket, // skip past it and return the AST if (Token.token == T_RBRACE) { rbrace(); return (left); } } } ================================================ FILE: 29_Refactoring/sym.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Symbol table functions // Copyright (c) 2019 Warren Toomey, GPL3 // Determine if the symbol s is in the global symbol table. // Return its slot position or -1 if not found. // Skip C_PARAM entries int findglob(char *s) { int i; for (i = 0; i < Globs; i++) { if (Symtable[i].class == C_PARAM) continue; if (*s == *Symtable[i].name && !strcmp(s, Symtable[i].name)) return (i); } return (-1); } // Get the position of a new global symbol slot, or die // if we've run out of positions. static int newglob(void) { int p; if ((p = Globs++) >= Locls) fatal("Too many global symbols"); return (p); } // Determine if the symbol s is in the local symbol table. // Return its slot position or -1 if not found. int findlocl(char *s) { int i; for (i = Locls + 1; i < NSYMBOLS; i++) { if (*s == *Symtable[i].name && !strcmp(s, Symtable[i].name)) return (i); } return (-1); } // Get the position of a new local symbol slot, or die // if we've run out of positions. static int newlocl(void) { int p; if ((p = Locls--) <= Globs) fatal("Too many local symbols"); return (p); } // Clear all the entries in the // local symbol table void freeloclsyms(void) { Locls = NSYMBOLS - 1; } // Update a symbol at the given slot number in the symbol table. Set up its: // + type: char, int etc. // + structural type: var, function, array etc. // + size: number of elements, or endlabel: end label for a function // + posn: Position information for local symbols static void updatesym(int slot, char *name, int type, int stype, int class, int size, int posn) { if (slot < 0 || slot >= NSYMBOLS) fatal("Invalid symbol slot number in updatesym()"); Symtable[slot].name = strdup(name); Symtable[slot].type = type; Symtable[slot].stype = stype; Symtable[slot].class = class; Symtable[slot].size = size; Symtable[slot].posn = posn; } // Add a global symbol to the symbol table. Set up its: // + type: char, int etc. // + structural type: var, function, array etc. // + class of the symbol // + size: number of elements, or endlabel: end label for a function // Return the slot number in the symbol table int addglob(char *name, int type, int stype, int class, int size) { int slot; // If this is already in the symbol table, return the existing slot if ((slot = findglob(name)) != -1) return (slot); // Otherwise get a new slot and fill it in slot = newglob(); updatesym(slot, name, type, stype, class, size, 0); // Generate the assembly for the symbol if it's global if (class == C_GLOBAL) genglobsym(slot); // Return the slot number return (slot); } // Add a local symbol to the symbol table. Set up its: // + type: char, int etc. // + structural type: var, function, array etc. // + size: number of elements // Return the slot number in the symbol table, -1 if a duplicate entry int addlocl(char *name, int type, int stype, int class, int size) { int localslot; // If this is already in the symbol table, return an error if ((localslot = findlocl(name)) != -1) return (-1); // Otherwise get a new symbol slot and a position for this local. // Update the local symbol table entry. localslot = newlocl(); updatesym(localslot, name, type, stype, class, size, 0); // Return the local symbol's slot return (localslot); } // Given a function's slot number, copy the global parameters // from its prototype to be local parameters void copyfuncparams(int slot) { int i, id = slot + 1; for (i = 0; i < Symtable[slot].nelems; i++, id++) { addlocl(Symtable[id].name, Symtable[id].type, Symtable[id].stype, Symtable[id].class, Symtable[id].size); } } // Determine if the symbol s is in the symbol table. // Return its slot position or -1 if not found. int findsymbol(char *s) { int slot; slot = findlocl(s); if (slot == -1) slot = findglob(s); return (slot); } // Reset the contents of the symbol table void clear_symtable(void) { Globs = 0; Locls = NSYMBOLS - 1; } ================================================ FILE: 29_Refactoring/tests/err.input31.c ================================================ Expecting a primary expression, got token:15 on line 5 ================================================ FILE: 29_Refactoring/tests/err.input32.c ================================================ Unknown variable:cow on line 4 ================================================ FILE: 29_Refactoring/tests/err.input33.c ================================================ Incompatible type to return on line 4 ================================================ FILE: 29_Refactoring/tests/err.input34.c ================================================ For now, declaration of local arrays is not implemented on line 4 ================================================ FILE: 29_Refactoring/tests/err.input35.c ================================================ Duplicate local variable declaration:a on line 4 ================================================ FILE: 29_Refactoring/tests/err.input36.c ================================================ Type doesn't match prototype for parameter:3 on line 4 ================================================ FILE: 29_Refactoring/tests/err.input37.c ================================================ Unexpected token in parameter list:15 on line 3 ================================================ FILE: 29_Refactoring/tests/err.input38.c ================================================ Type doesn't match prototype for parameter:3 on line 4 ================================================ FILE: 29_Refactoring/tests/err.input39.c ================================================ No statements in function with non-void type on line 4 ================================================ FILE: 29_Refactoring/tests/err.input40.c ================================================ No return for function with non-void type on line 4 ================================================ FILE: 29_Refactoring/tests/err.input41.c ================================================ Can't return from a void function on line 3 ================================================ FILE: 29_Refactoring/tests/err.input42.c ================================================ Undeclared function:fred on line 3 ================================================ FILE: 29_Refactoring/tests/err.input43.c ================================================ Undeclared array:b on line 3 ================================================ FILE: 29_Refactoring/tests/err.input44.c ================================================ Unknown variable:z on line 3 ================================================ FILE: 29_Refactoring/tests/err.input45.c ================================================ & operator must be followed by an identifier on line 3 ================================================ FILE: 29_Refactoring/tests/err.input46.c ================================================ * operator must be followed by an identifier or * on line 3 ================================================ FILE: 29_Refactoring/tests/err.input47.c ================================================ ++ operator must be followed by an identifier on line 3 ================================================ FILE: 29_Refactoring/tests/err.input48.c ================================================ -- operator must be followed by an identifier on line 3 ================================================ FILE: 29_Refactoring/tests/err.input49.c ================================================ Incompatible expression in assignment on line 6 ================================================ FILE: 29_Refactoring/tests/err.input50.c ================================================ Incompatible types in binary expression on line 6 ================================================ FILE: 29_Refactoring/tests/err.input51.c ================================================ Expected '\'' at end of char literal on line 4 ================================================ FILE: 29_Refactoring/tests/err.input52.c ================================================ Unrecognised character:$ on line 5 ================================================ FILE: 29_Refactoring/tests/input01.c ================================================ int printf(char *fmt); void main() { printf("%d\n", 12 * 3); printf("%d\n", 18 - 2 * 4); printf("%d\n", 1 + 2 + 9 - 5/2 + 3*5); } ================================================ FILE: 29_Refactoring/tests/input02.c ================================================ int printf(char *fmt); void main() { int fred; int jim; fred= 5; jim= 12; printf("%d\n", fred + jim); } ================================================ FILE: 29_Refactoring/tests/input03.c ================================================ int printf(char *fmt); void main() { int x; x= 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); } ================================================ FILE: 29_Refactoring/tests/input04.c ================================================ int printf(char *fmt); void main() { int x; x= 7 < 9; printf("%d\n", x); x= 7 <= 9; printf("%d\n", x); x= 7 != 9; printf("%d\n", x); x= 7 == 7; printf("%d\n", x); x= 7 >= 7; printf("%d\n", x); x= 7 <= 7; printf("%d\n", x); x= 9 > 7; printf("%d\n", x); x= 9 >= 7; printf("%d\n", x); x= 9 != 7; printf("%d\n", x); } ================================================ FILE: 29_Refactoring/tests/input05.c ================================================ int printf(char *fmt); void main() { int i; int j; i=6; j=12; if (i < j) { printf("%d\n", i); } else { printf("%d\n", j); } } ================================================ FILE: 29_Refactoring/tests/input06.c ================================================ int printf(char *fmt); void main() { int i; i=1; while (i <= 10) { printf("%d\n", i); i= i + 1; } } ================================================ FILE: 29_Refactoring/tests/input07.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 29_Refactoring/tests/input08.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 29_Refactoring/tests/input09.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } void fred() { int a; int b; a= 12; b= 3 * a; if (a >= b) { printf("%d\n", 2 * b - a); } } ================================================ FILE: 29_Refactoring/tests/input10.c ================================================ int printf(char *fmt); void main() { int i; char j; j= 20; printf("%d\n", j); i= 10; printf("%d\n", i); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 2; j= j + 1) { printf("%d\n", j); } } ================================================ FILE: 29_Refactoring/tests/input11.c ================================================ int printf(char *fmt); int main() { int i; char j; long k; i= 10; printf("%d\n", i); j= 20; printf("%d\n", j); k= 30; printf("%d\n", k); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 4; j= j + 1) { printf("%d\n", j); } for (k= 1; k <= 5; k= k + 1) { printf("%d\n", k); } return(i); printf("%d\n", 12345); return(3); } ================================================ FILE: 29_Refactoring/tests/input12.c ================================================ int printf(char *fmt); int fred() { return(5); } void main() { int x; x= fred(2); printf("%d\n", x); } ================================================ FILE: 29_Refactoring/tests/input13.c ================================================ int printf(char *fmt); int fred() { return(56); } void main() { int dummy; int result; dummy= printf("%d\n", 23); result= fred(10); dummy= printf("%d\n", result); } ================================================ FILE: 29_Refactoring/tests/input14.c ================================================ int printf(char *fmt); int fred() { return(20); } int main() { int result; printf("%d\n", 10); result= fred(15); printf("%d\n", result); printf("%d\n", fred(15)+10); return(0); } ================================================ FILE: 29_Refactoring/tests/input15.c ================================================ int printf(char *fmt); int main() { char a; char *b; char c; int d; int *e; int f; a= 18; printf("%d\n", a); b= &a; c= *b; printf("%d\n", c); d= 12; printf("%d\n", d); e= &d; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 29_Refactoring/tests/input16.c ================================================ int printf(char *fmt); int c; int d; int *e; int f; int main() { c= 12; d=18; printf("%d\n", c); e= &c + 1; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 29_Refactoring/tests/input17.c ================================================ int printf(char *fmt); int main() { char a; char *b; int d; int *e; b= &a; *b= 19; printf("%d\n", a); e= &d; *e= 12; printf("%d\n", d); return(0); } ================================================ FILE: 29_Refactoring/tests/input18.c ================================================ int printf(char *fmt); int main() { int a; int b; a= b= 34; printf("%d\n", a); printf("%d\n", b); return(0); } ================================================ FILE: 29_Refactoring/tests/input18a.c ================================================ int printf(char *fmt); int a; int *b; char c; char *d; int main() { b= &a; *b= 15; printf("%d\n", a); d= &c; *d= 16; printf("%d\n", c); return(0); } ================================================ FILE: 29_Refactoring/tests/input19.c ================================================ int printf(char *fmt); int a; int b; int c; int d; int e; int main() { a= 2; b= 4; c= 3; d= 2; e= (a+b) * (c+d); printf("%d\n", e); return(0); } ================================================ FILE: 29_Refactoring/tests/input20.c ================================================ int printf(char *fmt); int a; int b[25]; int main() { b[3]= 12; a= b[3]; printf("%d\n", a); return(0); } ================================================ FILE: 29_Refactoring/tests/input21.c ================================================ int printf(char *fmt); char c; char *str; int main() { c= '\n'; printf("%d\n", c); for (str= "Hello world\n"; *str != 0; str= str + 1) { printf("%c", *str); } return(0); } ================================================ FILE: 29_Refactoring/tests/input22.c ================================================ int printf(char *fmt); char a; char b; char c; int d; int e; int f; long g; long h; long i; int main() { b= 5; c= 7; a= b + c++; printf("%d\n", a); e= 5; f= 7; d= e + f++; printf("%d\n", d); h= 5; i= 7; g= h + i++; printf("%d\n", g); a= b-- + c; printf("%d\n", a); d= e-- + f; printf("%d\n", d); g= h-- + i; printf("%d\n", g); a= ++b + c; printf("%d\n", a); d= ++e + f; printf("%d\n", d); g= ++h + i; printf("%d\n", g); a= b * --c; printf("%d\n", a); d= e * --f; printf("%d\n", d); g= h * --i; printf("%d\n", g); return(0); } ================================================ FILE: 29_Refactoring/tests/input23.c ================================================ int printf(char *fmt); char *str; int x; int main() { x= -23; printf("%d\n", x); printf("%d\n", -10 * -10); x= 1; x= ~x; printf("%d\n", x); x= 2 > 5; printf("%d\n", x); x= !x; printf("%d\n", x); x= !x; printf("%d\n", x); x= 13; if (x) { printf("%d\n", 13); } x= 0; if (!x) { printf("%d\n", 14); } for (str= "Hello world\n"; *str; str++) { printf("%c", *str); } return(0); } ================================================ FILE: 29_Refactoring/tests/input24.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { a= 42; b= 19; printf("%d\n", a & b); printf("%d\n", a | b); printf("%d\n", a ^ b); printf("%d\n", 1 << 3); printf("%d\n", 63 >> 3); return(0); } ================================================ FILE: 29_Refactoring/tests/input25.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { char z; int y; int x; x= 10; y= 20; z= 30; printf("%d\n", x); printf("%d\n", y); printf("%d\n", z); a= 5; b= 15; c= 25; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); return(0); } ================================================ FILE: 29_Refactoring/tests/input26.c ================================================ int printf(char *fmt); int main(int a, char b, long c, int d, int e, int f, int g, int h) { int i; int j; int k; a= 13; printf("%d\n", a); b= 23; printf("%d\n", b); c= 34; printf("%d\n", c); d= 44; printf("%d\n", d); e= 54; printf("%d\n", e); f= 64; printf("%d\n", f); g= 74; printf("%d\n", g); h= 84; printf("%d\n", h); i= 94; printf("%d\n", i); j= 95; printf("%d\n", j); k= 96; printf("%d\n", k); return(0); } ================================================ FILE: 29_Refactoring/tests/input27.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int param5(int a, int b, int c, int d, int e) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param2(int a, int b) { int c; int d; int e; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param0() { int a; int b; int c; int d; int e; a= 1; b= 2; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int main() { param8(1,2,3,4,5,6,7,8); param5(1,2,3,4,5); param2(1,2); param0(); return(0); } ================================================ FILE: 29_Refactoring/tests/input28.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 29_Refactoring/tests/input29.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h); int fred(int a, int b, int c); int main(); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 29_Refactoring/tests/input30.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input30.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 29_Refactoring/tests/input31.c ================================================ int printf(char *fmt); int main() { int x; x= 2 + + 3 - * / ; } ================================================ FILE: 29_Refactoring/tests/input32.c ================================================ int printf(char *fmt); int main() { pizza cow llama sausage; } ================================================ FILE: 29_Refactoring/tests/input33.c ================================================ int printf(char *fmt); int main() { char *z; return(z); } ================================================ FILE: 29_Refactoring/tests/input34.c ================================================ int printf(char *fmt); int main() { int a[12]; return(0); } ================================================ FILE: 29_Refactoring/tests/input35.c ================================================ int printf(char *fmt); int fred(int a, int b) { int a; return(a); } ================================================ FILE: 29_Refactoring/tests/input36.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c); ================================================ FILE: 29_Refactoring/tests/input37.c ================================================ int printf(char *fmt); int fred(int a, char b +, int z); ================================================ FILE: 29_Refactoring/tests/input38.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c, int g); ================================================ FILE: 29_Refactoring/tests/input39.c ================================================ int printf(char *fmt); int main() { int a; } ================================================ FILE: 29_Refactoring/tests/input40.c ================================================ int printf(char *fmt); int main() { int a; a= 5; } ================================================ FILE: 29_Refactoring/tests/input41.c ================================================ int printf(char *fmt); void fred() { return(5); } ================================================ FILE: 29_Refactoring/tests/input42.c ================================================ int printf(char *fmt); int main() { fred(5); } ================================================ FILE: 29_Refactoring/tests/input43.c ================================================ int printf(char *fmt); int main() { int a; a= b[4]; } ================================================ FILE: 29_Refactoring/tests/input44.c ================================================ int printf(char *fmt); int main() { int a; a= z; } ================================================ FILE: 29_Refactoring/tests/input45.c ================================================ int printf(char *fmt); int main() { int a; a= &5; } ================================================ FILE: 29_Refactoring/tests/input46.c ================================================ int printf(char *fmt); int main() { int a; a= *5; } ================================================ FILE: 29_Refactoring/tests/input47.c ================================================ int printf(char *fmt); int main() { int a; a= ++5; } ================================================ FILE: 29_Refactoring/tests/input48.c ================================================ int printf(char *fmt); int main() { int a; a= --5; } ================================================ FILE: 29_Refactoring/tests/input49.c ================================================ int printf(char *fmt); int main() { int x; char y; y= x; } ================================================ FILE: 29_Refactoring/tests/input50.c ================================================ int printf(char *fmt); int main() { char *a; char *b; a= a + b; } ================================================ FILE: 29_Refactoring/tests/input51.c ================================================ int printf(char *fmt); int main() { char a; a= 'fred'; } ================================================ FILE: 29_Refactoring/tests/input52.c ================================================ int printf(char *fmt); int main() { int a; a= $5.00; } ================================================ FILE: 29_Refactoring/tests/input53.c ================================================ int printf(char *fmt); int main() { printf("Hello world, %d\n", 23); return(0); } ================================================ FILE: 29_Refactoring/tests/input54.c ================================================ int printf(char *fmt); int main() { int i; for (i=0; i < 20; i++) { printf("Hello world, %d\n", i); } return(0); } ================================================ FILE: 29_Refactoring/tests/input55.c ================================================ int printf(char *fmt); int main(int argc, char **argv) { int i; char *argument; printf("Hello world\n"); for (i=0; i < argc; i++) { argument= *argv; argv= argv + 1; printf("Argument %d is %s\n", i, argument); } return(0); } ================================================ FILE: 29_Refactoring/tests/mktests ================================================ #!/bin/sh # Make the output files for each test # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make) fi for i in input*c do if [ ! -f "out.$i" -a ! -f "err.$i" ] then ../cwj -o out $i 2> "err.$i" # If the err file is empty if [ ! -s "err.$i" ] then rm -f "err.$i" cc -o out $i ../lib/printint.c ./out > "out.$i" fi fi rm -f out out.s done ================================================ FILE: 29_Refactoring/tests/out.input01.c ================================================ 36 10 25 ================================================ FILE: 29_Refactoring/tests/out.input02.c ================================================ 17 ================================================ FILE: 29_Refactoring/tests/out.input03.c ================================================ 1 2 3 4 5 ================================================ FILE: 29_Refactoring/tests/out.input04.c ================================================ 1 1 1 1 1 1 1 1 1 ================================================ FILE: 29_Refactoring/tests/out.input05.c ================================================ 6 ================================================ FILE: 29_Refactoring/tests/out.input06.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 29_Refactoring/tests/out.input07.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 29_Refactoring/tests/out.input08.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 29_Refactoring/tests/out.input09.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 29_Refactoring/tests/out.input10.c ================================================ 20 10 1 2 3 4 5 253 254 255 0 1 ================================================ FILE: 29_Refactoring/tests/out.input11.c ================================================ 10 20 30 1 2 3 4 5 253 254 255 0 1 2 3 1 2 3 4 5 ================================================ FILE: 29_Refactoring/tests/out.input12.c ================================================ 5 ================================================ FILE: 29_Refactoring/tests/out.input13.c ================================================ 23 56 ================================================ FILE: 29_Refactoring/tests/out.input14.c ================================================ 10 20 30 ================================================ FILE: 29_Refactoring/tests/out.input15.c ================================================ 18 18 12 12 ================================================ FILE: 29_Refactoring/tests/out.input16.c ================================================ 12 18 ================================================ FILE: 29_Refactoring/tests/out.input17.c ================================================ 19 12 ================================================ FILE: 29_Refactoring/tests/out.input18.c ================================================ 34 34 ================================================ FILE: 29_Refactoring/tests/out.input18a.c ================================================ 15 16 ================================================ FILE: 29_Refactoring/tests/out.input19.c ================================================ 30 ================================================ FILE: 29_Refactoring/tests/out.input20.c ================================================ 12 ================================================ FILE: 29_Refactoring/tests/out.input21.c ================================================ 10 Hello world ================================================ FILE: 29_Refactoring/tests/out.input22.c ================================================ 12 12 12 13 13 13 13 13 13 35 35 35 ================================================ FILE: 29_Refactoring/tests/out.input23.c ================================================ -23 100 -2 0 1 0 13 14 Hello world ================================================ FILE: 29_Refactoring/tests/out.input24.c ================================================ 2 59 57 8 7 ================================================ FILE: 29_Refactoring/tests/out.input25.c ================================================ 10 20 30 5 15 25 ================================================ FILE: 29_Refactoring/tests/out.input26.c ================================================ 13 23 34 44 54 64 74 84 94 95 96 ================================================ FILE: 29_Refactoring/tests/out.input27.c ================================================ 1 2 3 4 5 6 7 8 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 ================================================ FILE: 29_Refactoring/tests/out.input28.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 29_Refactoring/tests/out.input29.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 29_Refactoring/tests/out.input30.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input30.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 29_Refactoring/tests/out.input53.c ================================================ Hello world, 23 ================================================ FILE: 29_Refactoring/tests/out.input54.c ================================================ Hello world, 0 Hello world, 1 Hello world, 2 Hello world, 3 Hello world, 4 Hello world, 5 Hello world, 6 Hello world, 7 Hello world, 8 Hello world, 9 Hello world, 10 Hello world, 11 Hello world, 12 Hello world, 13 Hello world, 14 Hello world, 15 Hello world, 16 Hello world, 17 Hello world, 18 Hello world, 19 ================================================ FILE: 29_Refactoring/tests/out.input55.c ================================================ Hello world Argument 0 is ./out ================================================ FILE: 29_Refactoring/tests/runtests ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../cwj -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../cwj $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.s "trial.$i" done ================================================ FILE: 29_Refactoring/tests/runtestsn ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../compn ] then (cd ..; make) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../compn -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../compn $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.o out.s "trial.$i" done ================================================ FILE: 29_Refactoring/tree.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree functions // Copyright (c) 2019 Warren Toomey, GPL3 // Build and return a generic AST node struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, int intvalue) { struct ASTnode *n; // Malloc a new ASTnode n = (struct ASTnode *) malloc(sizeof(struct ASTnode)); if (n == NULL) fatal("Unable to malloc in mkastnode()"); // Copy in the field values and return it n->op = op; n->type = type; n->left = left; n->mid = mid; n->right = right; n->intvalue = intvalue; return (n); } // Make an AST leaf node struct ASTnode *mkastleaf(int op, int type, int intvalue) { return (mkastnode(op, type, NULL, NULL, NULL, intvalue)); } // Make a unary AST node: only one child struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, int intvalue) { return (mkastnode(op, type, left, NULL, NULL, intvalue)); } // Generate and return a new label number // just for AST dumping purposes static int gendumplabel(void) { static int id = 1; return (id++); } // Given an AST tree, print it out and follow the // traversal of the tree that genAST() follows void dumpAST(struct ASTnode *n, int label, int level) { int Lfalse, Lstart, Lend; switch (n->op) { case A_IF: Lfalse = gendumplabel(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_IF"); if (n->right) { Lend = gendumplabel(); fprintf(stdout, ", end L%d", Lend); } fprintf(stdout, "\n"); dumpAST(n->left, Lfalse, level + 2); dumpAST(n->mid, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); return; case A_WHILE: Lstart = gendumplabel(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_WHILE, start L%d\n", Lstart); Lend = gendumplabel(); dumpAST(n->left, Lend, level + 2); dumpAST(n->right, NOLABEL, level + 2); return; } // Reset level to -2 for A_GLUE if (n->op == A_GLUE) level = -2; // General AST node handling if (n->left) dumpAST(n->left, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); for (int i = 0; i < level; i++) fprintf(stdout, " "); switch (n->op) { case A_GLUE: fprintf(stdout, "\n\n"); return; case A_FUNCTION: fprintf(stdout, "A_FUNCTION %s\n", Symtable[n->id].name); return; case A_ADD: fprintf(stdout, "A_ADD\n"); return; case A_SUBTRACT: fprintf(stdout, "A_SUBTRACT\n"); return; case A_MULTIPLY: fprintf(stdout, "A_MULTIPLY\n"); return; case A_DIVIDE: fprintf(stdout, "A_DIVIDE\n"); return; case A_EQ: fprintf(stdout, "A_EQ\n"); return; case A_NE: fprintf(stdout, "A_NE\n"); return; case A_LT: fprintf(stdout, "A_LE\n"); return; case A_GT: fprintf(stdout, "A_GT\n"); return; case A_LE: fprintf(stdout, "A_LE\n"); return; case A_GE: fprintf(stdout, "A_GE\n"); return; case A_INTLIT: fprintf(stdout, "A_INTLIT %d\n", n->intvalue); return; case A_STRLIT: fprintf(stdout, "A_STRLIT rval label L%d\n", n->id); return; case A_IDENT: if (n->rvalue) fprintf(stdout, "A_IDENT rval %s\n", Symtable[n->id].name); else fprintf(stdout, "A_IDENT %s\n", Symtable[n->id].name); return; case A_ASSIGN: fprintf(stdout, "A_ASSIGN\n"); return; case A_WIDEN: fprintf(stdout, "A_WIDEN\n"); return; case A_RETURN: fprintf(stdout, "A_RETURN\n"); return; case A_FUNCCALL: fprintf(stdout, "A_FUNCCALL %s\n", Symtable[n->id].name); return; case A_ADDR: fprintf(stdout, "A_ADDR %s\n", Symtable[n->id].name); return; case A_DEREF: if (n->rvalue) fprintf(stdout, "A_DEREF rval\n"); else fprintf(stdout, "A_DEREF\n"); return; case A_SCALE: fprintf(stdout, "A_SCALE %d\n", n->size); return; case A_PREINC: fprintf(stdout, "A_PREINC %s\n", Symtable[n->id].name); return; case A_PREDEC: fprintf(stdout, "A_PREDEC %s\n", Symtable[n->id].name); return; case A_POSTINC: fprintf(stdout, "A_POSTINC\n"); return; case A_POSTDEC: fprintf(stdout, "A_POSTDEC\n"); return; case A_NEGATE: fprintf(stdout, "A_NEGATE\n"); return; default: fatald("Unknown dumpAST operator", n->op); } } ================================================ FILE: 29_Refactoring/types.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Types and type handling // Copyright (c) 2019 Warren Toomey, GPL3 // Return true if a type is an int type // of any size, false otherwise int inttype(int type) { return ((type & 0xf) == 0); } // Return true if a type is of pointer type int ptrtype(int type) { return ((type & 0xf) != 0); } // Given a primitive type, return // the type which is a pointer to it int pointer_to(int type) { if ((type & 0xf) == 0xf) fatald("Unrecognised in pointer_to: type", type); return (type + 1); } // Given a primitive pointer type, return // the type which it points to int value_at(int type) { if ((type & 0xf) == 0x0) fatald("Unrecognised in value_at: type", type); return (type - 1); } // Given an AST tree and a type which we want it to become, // possibly modify the tree by widening or scaling so that // it is compatible with this type. Return the original tree // if no changes occurred, a modified tree, or NULL if the // tree is not compatible with the given type. // If this will be part of a binary operation, the AST op is not zero. struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op) { int ltype; int lsize, rsize; ltype = tree->type; // Compare scalar int types if (inttype(ltype) && inttype(rtype)) { // Both types same, nothing to do if (ltype == rtype) return (tree); // Get the sizes for each type lsize = genprimsize(ltype); rsize = genprimsize(rtype); // Tree's size is too big if (lsize > rsize) return (NULL); // Widen to the right if (rsize > lsize) return (mkastunary(A_WIDEN, rtype, tree, 0)); } // For pointers on the left if (ptrtype(ltype)) { // OK is same type on right and not doing a binary op if (op == 0 && ltype == rtype) return (tree); } // We can scale only on A_ADD or A_SUBTRACT operation if (op == A_ADD || op == A_SUBTRACT) { // Left is int type, right is pointer type and the size // of the original type is >1: scale the left if (inttype(ltype) && ptrtype(rtype)) { rsize = genprimsize(value_at(rtype)); if (rsize > 1) return (mkastunary(A_SCALE, rtype, tree, rsize)); else return (tree); // Size 1, no need to scale } } // If we get here, the types are not compatible return (NULL); } ================================================ FILE: 30_Design_Composites/Makefile ================================================ HSRCS= data.h decl.h defs.h SRCS= cg.c decl.c expr.c gen.c main.c misc.c \ scan.c stmt.c sym.c tree.c types.c SRCN= cgn.c decl.c expr.c gen.c main.c misc.c \ scan.c stmt.c sym.c tree.c types.c ARMSRCS= cg_arm.c decl.c expr.c gen.c main.c misc.c \ scan.c stmt.c sym.c tree.c types.c cwj: $(SRCS) $(HSRCS) cc -o cwj -g -Wall $(SRCS) compn: $(SRCN) $(HSRCS) cc -D__NASM__ -o compn -g -Wall $(SRCN) cwjarm: $(ARMSRCS) $(HSRCS) cc -o cwjarm -g -Wall $(ARMSRCS) cp cwjarm cwj clean: rm -f cwj cwjarm compn *.o *.s out test: cwj tests/runtests (cd tests; chmod +x runtests; ./runtests) armtest: cwjarm tests/runtests (cd tests; chmod +x runtests; ./runtests) testn: compn tests/runtestsn (cd tests; chmod +x runtestsn; ./runtestsn) ================================================ FILE: 30_Design_Composites/Readme.md ================================================ # Part 30: Designing Structs, Unions and Enums I'm going to sketch out my design ideas for implementing structs, unions and enums in ths part of our compiler writing journey. As with functions, it's going to take a number of following steps to get it all implemented. I've also chosen to rewrite the symbol table from being a single array to being several singly-linked lists. I already mentioned my intention to do this: my ideas on how to implement the composite types made it important to rewrite the symbol table implementation at this point. Before we get into the code changes, let's look at what, exactly, are composite types. ## Composite Types, Enums and Typedefs In C, [structs](https://en.wikipedia.org/wiki/Struct_(C_programming_language)) and [unions](https://en.wikipedia.org/wiki/Union_type#C/C++) are known as *composite types*. A struct or union variable can have many members contained within. The difference is that, in a struct, the members are guaranteed not to overlap in memory whereas, in a union, we desire that all the members share the same memory locations. An example of a struct type is: ```c struct foo { int a; int b; char c; }; struct foo fred; ``` The variable `fred` is of type `struct foo`, and it has three members `a`, `b` and `c`. We can now do these three assignments to `fred`: ```c fred.a= 4; fred.b= 7; fred.c= 'x'; ``` and all three values are stored in the respective members in `fred`. On the other hand, here is an example of a union type: ```c union bar { int a; int b; char c; }; union bar jane; ``` If we perform these statements: ```c jane.a= 5; printf("%d\n", jane.b); ``` then the value 5 will be printed as the `a` and `b` members occupy the same memory location in the `jane` union. ### Enums I'll talk about enums here even though they don't define a composite type like the structs and unions. In C, [enums](https://en.wikipedia.org/wiki/Enumerated_type#C) are essentially a way to give names to integer values. An enum represents a list of named integer values. As an example, we can define these new identifiers: ```c enum { apple=1, banana, carrot, pear=10, peach, mango, papaya }; ``` We now have these named integer values: | Name | Value | |:------:|:-----:| | apple | 1 | | banana | 2 | | carrot | 3 | | pear | 10 | | peach | 11 | | mango | 12 | | papaya | 13 | There are some interesting issues with enums that I didn't know, which I'll cover below. ### Typedefs I should also touch on typedefs at this point, even though I won't need to implement them to get our compiler to compile itself. A [typedef](https://en.wikipedia.org/wiki/Typedef) is a way to give an existing type another name. It's often used to make naming structs and unions easier. Using a previous example, we can write: ```c typedef struct foo Husk; Husk kim; ``` `kim` is of type `Husk` which is the same as saying that `kim` is of type `struct foo`. ## Types versus Symbols? So, if structs, unions and typedefs are new types, what have they got to do with the symbol table which holds variable and function definitions? And enums are just names for integer literals, again not variables or functions. The thing is, all of these things have *names*: the name of the struct or union, the name of their members, the types of the members, the names of the enumerated values, and the names of the typedefs. We need to store these names somewhere, and we need to be able to find them. For the struct/union members, we need to find their underlying types. For the enumerated names, we need to look up their integer literal values. This is why I'm going to use the symbol table to store all of these things. But, we need to break up the table into several specific lists, so that we can find particular things and avoid finding things that we don't want to find. ## Redesigning the Symbol Table Structure Let's have, to start with: + a singly-linked list for the global variables and functions + a singly-linked list for the variables local to the current function + a singly-linked list for the parameters local to the current function With the old array-based symbol table, we had to skip over the function parameters when we were searching for global variables and functions. So, let's also have a list in a separate direction for the parameters of a function: ```c struct symtable { char *name; // Name of a symbol int stype; // Structural type for the symbol ... struct symtable *next; // Next symbol in one list struct symtable *member; // First parameter of a function }; ``` Let's have a look, graphically, how this will look for the following code fragment: ```c int a; char b; void func1(int x, int y); void main(int argc, char **argv) { int loc1; int loc2; } ``` This will be stored in three symbol table lists like this: ![](Figs/newsymlists.png) Note that we have three list "heads" which point to the three lists. We can now walk the global symbol list and not have to skip over the parameters, as each function keeps its parameters on its own list. When it comes time to parse a function's body, we can point the parameter list at the function's parameter list. Then, as local variables get declared, they are simply appended to the local variable list. Then, once the function's body is parsed and its assembly code generated, we can set the parameter and local lists back to being empty without disturbing the parameter list in the globally-visible function. This is where I'm up to with the rewrite of the symbol table. But it doesn't show how we can implement structs, unions and enums. ## Interesting Issues and Considerations Before we do see how to augment the existing symbol table node, plus singly-linked lists, to support structs, unions and enums, we first have to consider some of their more interesting issues. ### Unions We'll start with unions. Firstly, we can put a union into a struct. Secondly, the union doesn't need a name. Thirdly, a variable does not need to be declared in the struct to hold the union. As an example: ```c #include struct fred { int x; union { int a; int b; }; // No need to declare a variable of this union type }; int main() { struct fred foo; foo.x= 5; foo.a= 12; // a is treated like a struct member foo.b= 13; // b is treated like a struct member printf("%d %d\n", foo.x, foo.a); // Print 5 and 13 } ``` We need to be able to support this. Anonymous unions (and structs) will be easy: we just leave the `name` in the symbol table node set to NULL. But there is no variable name for this union: I think we can implement this by having the struct's member name also set to NULL, i.e. ![](Figs/structunion1.png) ### Enums I've used enums before but I haven't really thought about implementing them that much. So I wrote the following C program to see if I could "break" enums: ```c #include enum fred { bill, mary, dennis }; int fred; int mary; enum fred { chocolate, spinach, glue }; enum amy { garbage, dennis, flute, amy }; enum fred x; enum { pie, piano, axe, glyph } y; int main() { x= bill; y= pie; y= bill; x= axe; x= y; printf("%d %d %ld\n", x, y, sizeof(x)); } ``` The questions are: + Can we redeclare an enum list with different elements, e.g. `enum fred` and `enum fred`? + Can we declare a variable with the same name as an enum list, e.g. `fred`? + Can we declare a variable with the same name as an enum value, e.g. `mary`? + Can we reuse the name of an enum value from one enum list in another, e.g. `dennis` and `dennis`? + Can we assign a value from one enum list to a variable declared to be of a different enum list? + Can we assign bewteen variables declared to be of different enum lists? And here is what `gcc` produces as errors and warnings: ```c z.c:4:5: error: ‘mary’ redeclared as different kind of symbol int mary; ^~~~ z.c:2:19: note: previous definition of ‘mary’ was here enum fred { bill, mary, dennis }; ^~~~ z.c:5:6: error: nested redefinition of ‘enum fred’ enum fred { chocolate, spinach, glue }; ^~~~ z.c:5:6: error: redeclaration of ‘enum fred’ z.c:2:6: note: originally defined here enum fred { bill, mary, dennis }; ^~~~ z.c:6:21: error: redeclaration of enumerator ‘dennis’ enum amy { garbage, dennis, flute, amy }; ^~~~~~ z.c:2:25: note: previous definition of ‘dennis’ was here enum fred { bill, mary, dennis }; ^~~~~~ ``` After modifying and compiling the above program a few times, the answers are: + We can't redeclare `enum fred`. This seems to be the only place where we need to remember the name of an enum list. + We can reuse the enum list identifier `fred` as a variable name. + We can't reuse the enum value identifier `mary` in another enum list, nor as a variable name. + We can assign enum value anywhere: they seem to be treated simply as names for literal integer values. + It also appears that we can replace `enum` and `enum X` as a type with the word `int`. ## Design Considerations OK, so I think we're at the point where we can start listing what we want: + a list of named and unnamed structs, with the names of the members in each struct and the type details for each member. Also, we will need the memory offset for the member from the "base" of the struct. + ditto for named and unnamed structs, although the offset will always be zero. + a list of enumerated list names and the actual enumeration names and their associated values. + in the symbol table, we need the existing `type` information for non-composite types, but we'll also need a pointer to the relevant composite type, if a symbol is a struct or a union. + given that a struct can have a member which is a pointer to itself, we will need to be able to point the member's type back to the same struct. ## Changes to the Symbol Table Node Structure Below, in bold, are my changes to the current singly-linked list symbol table node:
struct symtable {
  char *name;                   // Name of a symbol
  int type;                     // Primitive type for the symbol
  struct symtable *ctype;       // If needed, pointer to the composite type
  int stype;                    // Structural type for the symbol
  int class;                    // Storage class for the symbol
  union {
    int size;                   // Number of elements in the symbol
    int endlabel;               // For functions, the end label
    int intvalue;               // For enum symbols, the associated value
  };
  union {
    int nelems;                 // For functions, # of params
    int posn;                   // For locals, the negative offset
                                // from the stack base pointer
  };
  struct symtable *next;        // Next symbol in one list
  struct symtable *member;      // First member of a function, struct,
};                              // union or enum
Along with this new node structure, we will have six linked lists: + a singly-linked list for the global variables and functions + a singly-linked list for the variables local to the current function + a singly-linked list for the parameters local to the current function + a singly-linked list for the struct types that have been defined + a singly-linked list for the union types that have been defined + a singly-linked list for the enum names and enumerated values that have been defined ## The Use Cases for the New Symbol Table Node Let's look at how each field in the above struct will get used by the six lists I enumerated above. ### New Types We will have two new types, P_STRUCT and P_UNION, which I'll describe below. ### Global Variables and Functions, Parameter Variables, Local Variables + *name*: name of the variable or function. + *type*: type of the variable, or the function's return value, plus the 4-bit indirection level. + *ctype*: if the variable is a P_STRUCT or P_UNION, this field points at the associated struct or union definition in the relevant singly-linked list. + *stype*: structural type of the variable or function: S_VARIABLE, S_FUNCTION or S_ARRAY. + *class*: storage class for the variable: C_GLOBAL, C_LOCAL, or C_PARAM. + *size*: for variables, the total size in bytes. For arrays, the number of elements in the array. We will use this to implement `sizeof()` later. + *endlabel*: for functions, the end label which we can `return` to. + *nelems*: for functions, the number of parameters. + *posn*: for local variables and parameters, the negative offset of the variable from the stack base pointer. + *next*: the next symbol in this list. + *member*: for functions, a pointer to the first parameter's node. NULL for variables. ### Struct Types + *name*: name of the struct type, or NULL if it is anonymous. + *type*: always P_STRUCT, not really required. + *ctype*: unused. + *stype*: unused. + *class*: unused. + *size*: the total size of the struct in bytes, to be used by `sizeof()` later. + *nelems*: the number of members in the struct. + *next*: the next struct type that has been defined. + *member*: a pointer to the first struct member's node. ### Union Types + *name*: name of the union type, or NULL if it is anonymous. + *type*: always P_UNION, not really required. + *ctype*: unused. + *stype*: unused. + *class*: unused. + *size*: the total size of the union in bytes, to be used by `sizeof()` later. + *nelems*: the number of members in the union. + *next*: the next union type that has been defined. + *member*: a pointer to the first union member's node. ### Struct and Union Members Each member is essentially a variable, so there is a strong similarity to normal variables. + *name*: name of the member. + *type*: type of the variable plus the 4-bit indirection level. + *ctype*: if the member is a P_STRUCT or P_UNION, this field points at the associated struct or union definition in the relevant singly-linked list. + *stype*: structural type of the member: S_VARIABLE or S_ARRAY. + *class*: unused. + *size*: for variables, the total size in bytes. For arrays, the number of elements in the array. We will use this to implement `sizeof()` later. + *posn*: the positive offset of the member from the base of the struct/union. + *next*: the next member in the struct/union. + *member*: NULL. ### Enum List Names and Values I want to store all the symbols and implicit values below: ```c enum fred { chocolate, spinach, glue }; enum amy { garbage, dennis, flute, couch }; ``` We could just link `fred` then `amy`, and use the `member` field in `fred` for the `chocolate`, `spinach`, `glue` list. Ditto the `garbage` etc. list. However, we really only care about the `fred` and `amy` names to prevent them being reused as enum list names. What we really care about are the actual enumeration names and their values. Therefore I propose a couple of "dummy" type values: P_ENUMLIST and P_ENUMVAL. We then build just a single-dimensional list like this: ```c fred -> chocolate-> spinach -> glue -> amy -> garbage -> dennis -> ... P_ENUMLIST P_ENUMVAL P_ENUMVAL P_ENUMVAL P_ENUMLIST P_ENUMVAL P_ENUMVAL ``` Thus, when we use the word `glue`, we only have to walk the one list. Otherwise, we'd have to find `fred`, walk `fred`'s member list, then the same for `amy`. I think the one list will be easier. ## What Has Been Changed Already Up at the top of this document, I mentioned that I've already rewritten the symbol table from being a single array to being several singly-linked lists, with these new fields in the `struct symtable` node: ```c struct symtable *next; // Next symbol in one list struct symtable *member; // First parameter of a function ``` So, let's have a quick tour of the changes. Firstly, there are no functional changes whatsoever. ### Three Symbol Table Lists We now have three symbol table lists in `data.h`: ```c // Symbol table lists struct symtable *Globhead, *Globtail; // Global variables and functions struct symtable *Loclhead, *Locltail; // Local variables struct symtable *Parmhead, *Parmtail; // Local parameters ``` and all of the functions in `sym.c` have been rewritten to use them. I have written a generic function to append to a list: ```c // Append a node to the singly-linked list pointed to by head or tail void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node) { // Check for valid pointers if (head == NULL || tail == NULL || node == NULL) fatal("Either head, tail or node is NULL in appendsym"); // Append to the list if (*tail) { (*tail)->next = node; *tail = node; } else *head = *tail = node; node->next = NULL; } ``` There is now a function `newsym()` which is given all the field values of a symbol table node. It `malloc()`s a new node, fills it in and returns it. I won't give the code here. For each list, there is a function to build and append a node to the list. One example is: ```c // Add a symbol to the global symbol list struct symtable *addglob(char *name, int type, int stype, int class, int size) { struct symtable *sym = newsym(name, type, stype, class, size, 0); appendsym(&Globhead, &Globtail, sym); return (sym); } ``` There is a generic function to find a symbol in a list, where the `list` pointer is the head of the list: ```c // Search for a symbol in a specific list. // Return a pointer to the found node or NULL if not found. static struct symtable *findsyminlist(char *s, struct symtable *list) { for (; list != NULL; list = list->next) if ((list->name != NULL) && !strcmp(s, list->name)) return (list); return (NULL); } ``` and there are three list-specific `findXXX()` functions. There is a function, `findsymbol()`, that tries to find a symbol in a function's parameter list first, then the function's local variables, then finally global variables. There is a function, `findlocl()`, that only searches a function's parameter list and local variables. We use this one when we are declaring local variables and need to prevent a redeclaration. Finally, there is a function, `clear_symtable()`, to reset the head and tail of all three lists to NULL, i.e. to clear all three lists. ### The Parameter and Local Lists The global symbol lists is only cleared once each individual source code file is parsed. But we need to a) set up the parameter list, and b) clear the local symbol list, each time we start parsing the body of a new function. So here is how it works. When we are parsing a parameter list in `param_declaration()` in `expr.c`, we call `var_declaration()` for each parameter. This creates a symbol table node and appends it to the parameter list, i.e. `Parmhead` and `Parmtail`. When `param_declaration()` returns, `Parmhead` points at the parameter list. Back in `function_declaration()` which is parsing the whole function (its name, parameter list *and* any function body), the parameter list is copied into the function's symbol table node: ```c newfuncsym->nelems = paramcnt; newfuncsym->member = Parmhead; // Clear out the parameter list Parmhead = Parmtail = NULL; ``` We clear the parameter list by `NULL`ing `Parmhead` and `Parmtail`, as shown. This would mean that all these are no longer available to search for via the global parameter list. The solution is to set a global variable, `Functionid`, to the function's symbol table entry: ```c Functionid = newfuncsym; ``` So, when we call `compound_statement()` to parse the function's body, we still have the parameter list available through `Functionid->member` to do things like: + prevent a local variable being declared that matches a parameter name + use a parameter's name as a normal local variable etc. Eventually, `function_declaration()` returns the AST tree which covers the whole function back to `global_declarations()` which then passes it to `genAST()` in `gen.c` to generate the assembly code. And when `genAST()` returns, `global_declarations()` calls `freeloclsyms()` to clear the local and parameter lists and reset `Functionid` back to `NULL`. ### Other Changes of Note Well, actually a heck of a lot of code had to be rewritten due to the change to several linked lists for the symbol table. I'm not going to go through the whole code base. But some things you can spot easily. For example, symbol nodes used to be referenced with code like `Symtable[n->id]`. This is now `n->sym`. Also, a lot of the code in `cg.c` refers to symbol names, so you now see these as `n->sym->name`. Similarly, the code to dump the AST trees in `tree.c` now has a lot of `n->sym->name` in it. ## Conclusion and What's Next This part of our journey was part design and part reimplementation. We spent a lot of time working out what issue we will face when implementing structs, unions and enums. Then we redesigned the symbol table to support these new concepts. Finally, we rewrote the symbol table into three linked lists (for now) in preparation for the implementation of these new concepts. In the next part of our compiler writing journey, I'll probably implement the declaration of struct types, but not actually write the code for them to be used. I'll do that in the following part. With both of these done, I'll hopefully be able to implement unions in a third part. Then, enums in the fourth part. We'll see! [Next step](../31_Struct_Declarations/Readme.md) ================================================ FILE: 30_Design_Composites/cg.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\t.text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\t.data\n", Outfile); currSeg = data_seg; } } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. static int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "%r10", "%r11", "%r12", "%r13", "%r9", "%r8", "%rcx", "%rdx", "%rsi", "%rdi" }; static char *breglist[] = { "%r10b", "%r11b", "%r12b", "%r13b", "%r9b", "%r8b", "%cl", "%dl", "%sil", "%dil" }; static char *dreglist[] = { "%r10d", "%r11d", "%r12d", "%r13d", "%r9d", "%r8d", "%ecx", "%edx", "%esi", "%edi" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the %rsp and %rsp fprintf(Outfile, "\t.globl\t%s\n" "\t.type\t%s, @function\n" "%s:\n" "\tpushq\t%%rbp\n" "\tmovq\t%%rsp, %%rbp\n", name, name, name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->posn = paramOffset; paramOffset += 8; } else { parm->posn = newlocaloffset(parm->type); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->posn = newlocaloffset(locvar->type); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\taddq\t$%d,%%rsp\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->endlabel); fprintf(Outfile, "\taddq\t$%d,%%rsp\n", stackOffset); fputs("\tpopq %rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmovq\t$%d, %s\n", value, reglist[r]); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", sym->name); } else // Print out the code to initialise it switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovzbq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", sym->name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovslq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", sym->name); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", sym->posn); fprintf(Outfile, "\tmovq\t%d(%%rbp), %s\n", sym->posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", sym->posn); } else switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", sym->posn); fprintf(Outfile, "\tmovzbq\t%d(%%rbp), %s\n", sym->posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", sym->posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", sym->posn); fprintf(Outfile, "\tmovslq\t%d(%%rbp), %s\n", sym->posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", sym->posn); break; default: fatald("Bad type in cgloadlocal:", sym->type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tleaq\tL%d(%%rip), %s\n", label, reglist[r]); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\taddq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsubq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timulq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s,%%rax\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidivq\t%s\n", reglist[r2]); fprintf(Outfile, "\tmovq\t%%rax,%s\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tandq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\torq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txorq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshlq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshrq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tnegq\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnotq\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); return (r); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(struct symtable *sym, int numargs) { // Get a new register int outr = alloc_register(); // Call the function fprintf(Outfile, "\tcall\t%s@PLT\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\taddq\t$%d, %%rsp\n", 8 * (numargs - 6)); // and copy the return value into our register fprintf(Outfile, "\tmovq\t%%rax, %s\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpushq\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmovq\t%s, %s\n", reglist[r], reglist[FIRSTPARAMREG - argposn + 1]); } } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsalq\t$%d, %s\n", val, reglist[r]); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %s(%%rip)\n", reglist[r], sym->name); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %s(%%rip)\n", breglist[r], sym->name); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %s(%%rip)\n", dreglist[r], sym->name); break; default: fatald("Bad type in cgstorglob:", sym->type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %d(%%rbp)\n", reglist[r], sym->posn); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %d(%%rbp)\n", breglist[r], sym->posn); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %d(%%rbp)\n", dreglist[r], sym->posn); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int typesize; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the type typesize = cgprimsize(node->type); // Generate the global identity and the label cgdataseg(); fprintf(Outfile, "\t.globl\t%s\n", node->name); fprintf(Outfile, "%s:", node->name); // Generate the space for (int i = 0; i < node->size; i++) { switch (typesize) { case 1: fprintf(Outfile, "\t.byte\t0\n"); break; case 4: fprintf(Outfile, "\t.long\t0\n"); break; case 8: fprintf(Outfile, "\t.quad\t0\n"); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\t.byte\t%d\n", *cptr); } fprintf(Outfile, "\t.byte\t0\n"); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzbl\t%s, %%eax\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %%eax\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } cgjump(sym->endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = alloc_register(); if (sym->class == C_GLOBAL) fprintf(Outfile, "\tleaq\t%s(%%rip), %s\n", sym->name, reglist[r]); else fprintf(Outfile, "\tleaq\t%d(%%rbp), %s\n", sym->posn, reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzbq\t(%s), %s\n", reglist[r], reglist[r]); break; case 2: fprintf(Outfile, "\tmovslq\t(%s), %s\n", reglist[r], reglist[r]); break; case 4: case 8: fprintf(Outfile, "\tmovq\t(%s), %s\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { // Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmovb\t%s, (%s)\n", breglist[r1], reglist[r2]); break; case 2: case 4: case 8: fprintf(Outfile, "\tmovq\t%s, (%s)\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 30_Design_Composites/cg_arm.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for ARMv6 on Raspberry Pi // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. static int freereg[4]; static char *reglist[4] = { "r4", "r5", "r6", "r7" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // We have to store large integer literal values in memory. // Keep a list of them which will be output in the postamble #define MAXINTS 1024 int Intlist[MAXINTS]; static int Intslot = 0; // Determine the offset of a large integer // literal from the .L3 label. If the integer // isn't in the list, add it. static void set_int_offset(int val) { int offset = -1; // See if it is already there for (int i = 0; i < Intslot; i++) { if (Intlist[i] == val) { offset = 4 * i; break; } } // Not in the list, so add it if (offset == -1) { offset = 4 * Intslot; if (Intslot == MAXINTS) fatal("Out of int slots in set_int_offset()"); Intlist[Intslot++] = val; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L3+%d\n", offset); } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n", Outfile); } // Print out the assembly postamble void cgpostamble() { // Print out the global variables fprintf(Outfile, ".L2:\n"); for (int i = 0; i < Globs; i++) { if (Symtable[i].stype == S_VARIABLE) fprintf(Outfile, "\t.word %s\n", Symtable[i].name); } // Print out the integer literals fprintf(Outfile, ".L3:\n"); for (int i = 0; i < Intslot; i++) { fprintf(Outfile, "\t.word %d\n", Intlist[i]); } } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Symtable[id].name; fprintf(Outfile, "\t.text\n" "\t.globl\t%s\n" "\t.type\t%s, \%%function\n" "%s:\n" "\tpush\t{fp, lr}\n" "\tadd\tfp, sp, #4\n" "\tsub\tsp, sp, #8\n" "\tstr\tr0, [fp, #-8]\n", name, name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Symtable[id].endlabel); fputs("\tsub\tsp, fp, #4\n" "\tpop\t{fp, pc}\n" "\t.align\t2\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); // If the literal value is small, do it with one instruction if (value <= 1000) fprintf(Outfile, "\tmov\t%s, #%d\n", reglist[r], value); else { set_int_offset(value); fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); } return (r); } // Determine the offset of a variable from the .L2 // label. Yes, this is inefficient code. static void set_var_offset(int id) { int offset = 0; // Walk the symbol table up to id. // Find S_VARIABLEs and add on 4 until // we get to our variable for (int i = 0; i < id; i++) { if (Symtable[i].stype == S_VARIABLE) offset += 4; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L2+%d\n", offset); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tldrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgloadglob:", Symtable[id].type); } return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s, %s\n", reglist[r1], reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\tmul\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { // To do a divide: r0 holds the dividend, r1 holds the divisor. // The quotient is in r0. fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r1]); fprintf(Outfile, "\tmov\tr1, %s\n", reglist[r2]); fprintf(Outfile, "\tbl\t__aeabi_idiv\n"); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r1]); free_register(r2); return (r1); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r]); fprintf(Outfile, "\tbl\t%s\n", Symtable[id].name); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r]); return (r); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tlsl\t%s, %s, #%d\n", reglist[r], reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, int id) { // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tstr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgstorglob:", Symtable[id].type); } return (r); } // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (4); switch (type) { case P_CHAR: return (1); case P_INT: case P_LONG: return (4); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Symtable[id].type); fprintf(Outfile, "\t.data\n" "\t.globl\t%s\n", Symtable[id].name); switch (typesize) { case 1: fprintf(Outfile, "%s:\t.byte\t0\n", Symtable[id].name); break; case 4: fprintf(Outfile, "%s:\t.long\t0\n", Symtable[id].name); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "moveq", "movne", "movlt", "movgt", "movle", "movge" }; // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "movne", "moveq", "movge", "movle", "movgt", "movlt" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #1\n", cmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #0\n", invcmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\tuxtb\t%s, %s\n", reglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tb\tL%d\n", l); } // List of inverted branch instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *brlist[] = { "bne", "beq", "bge", "ble", "bgt", "blt" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", brlist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[reg]); cgjump(Symtable[id].endlabel); } // Generate code to load the address of a global // identifier into a variable. Return a new register int cgaddress(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); fprintf(Outfile, "\tmov\t%s, r3\n", reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tldrb\t%s, [%s]\n", reglist[r], reglist[r]); break; case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [%s]\n", reglist[r], reglist[r]); break; } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [%s]\n", reglist[r1], reglist[r2]); break; case P_INT: case P_LONG: fprintf(Outfile, "\tstr\t%s, [%s]\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 30_Design_Composites/cgn.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\tsection .text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\tsection .data\n", Outfile); currSeg = data_seg; } } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "r10", "r11", "r12", "r13", "r9", "r8", "rcx", "rdx", "rsi", "rdi" }; static char *breglist[] = { "r10b", "r11b", "r12b", "r13b", "r9b", "r8b", "cl", "dl", "sil", "dil" }; static char *dreglist[] = { "r10d", "r11d", "r12d", "r13d", "r9d", "r8d", "ecx", "edx", "esi", "edi" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\textern\tprintint\n", Outfile); fputs("\textern\tprintchar\n", Outfile); fputs("\textern\topen\n", Outfile); fputs("\textern\tclose\n", Outfile); fputs("\textern\tread\n", Outfile); fputs("\textern\twrite\n", Outfile); fputs("\textern\tprintf\n", Outfile); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the rsp and rbp fprintf(Outfile, "\tglobal\t%s\n" "%s:\n" "\tpush\trbp\n" "\tmov\trbp, rsp\n", name, name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->posn = paramOffset; paramOffset += 8; } else { parm->posn = newlocaloffset(parm->type); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->posn = newlocaloffset(locvar->type); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\tadd\trsp, %d\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->endlabel); fprintf(Outfile, "\tadd\trsp, %d\n", stackOffset); fputs("\tpop rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], value); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", sym->name); fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], sym->name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", sym->name); } else // Print out the code to initialise it switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", sym->name); fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], sym->name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", sym->name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", sym->name); fprintf(Outfile, "\tmovsx\t%s, word [%s]\n", dreglist[r], sym->name); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", sym->name); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", sym->posn); fprintf(Outfile, "\tmov\t%s, [rbp+%d]\n", reglist[r], sym->posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", sym->posn); } else switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", sym->posn); fprintf(Outfile, "\tmovzx\t%s, byte [rbp+%d]\n", reglist[r], sym->posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", sym->posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", sym->posn); fprintf(Outfile, "\tmovsx\t%s, word [rbp+%d]\n", reglist[r], sym->posn); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", sym->posn); break; default: fatald("Bad type in cgloadlocal:", sym->type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, L%d\n", reglist[r], label); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timul\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmov\trax, %s\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidiv\t%s\n", reglist[r2]); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tand\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\tor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshl\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshr\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tneg\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnot\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r], breglist[r]); return (r); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, byte %s\n", reglist[r], breglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(struct symtable *sym, int numargs) { // Get a new register int outr = alloc_register(); // Call the function fprintf(Outfile, "\tcall\t%s\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\tadd\trsp, %d\n", 8 * (numargs - 6)); // and copy the return value into our register fprintf(Outfile, "\tmov\t%s, rax\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpush\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmov\t%s, %s\n", reglist[FIRSTPARAMREG - argposn + 1], reglist[r]); } } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsal\t%s, %d\n", reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, dreglist[r]); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\tqword\t[rbp+%d], %s\n", sym->posn, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\tbyte\t[rbp+%d], %s\n", sym->posn, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\tdword\t[rbp+%d], %s\n", sym->posn, dreglist[r]); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int typesize; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the type typesize = cgprimsize(node->type); // Generate the global identity and the label cgdataseg(); fprintf(Outfile, "\tsection\t.data\n" "\tglobal\t%s\n", node->name); fprintf(Outfile, "%s:", node->name); // Generate the space // original version for (int i = 0; i < node->size; i++) { switch(typesize) { case 1: fprintf(Outfile, "\tdb\t0\n"); break; case 4: fprintf(Outfile, "\tdd\t0\n"); break; case 8: fprintf(Outfile, "\tdq\t0\n"); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } /* compact version using times instead of loop switch(typesize) { case 1: fprintf(Outfile, "\ttimes\t%d\tdb\t0\n", node->size); break; case 4: fprintf(Outfile, "\ttimes\t%d\tdd\t0\n", node->size); break; case 8: fprintf(Outfile, "\ttimes\t%d\tdq\t0\n", node->size); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } */ } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\tdb\t%d\n", *cptr); } fprintf(Outfile, "\tdb\t0\n"); /* put string in readable format on a single line // probably went overboard with error checking int comma = 0, quote = 0, start = 1; fprintf(Outfile, "\tdb\t"); for (cptr=strvalue; *cptr; cptr++) { if ( ! isprint(*cptr) ) if (comma || start) { fprintf(Outfile, "%d, ", *cptr); start = 0; comma = 1; } else if (quote) { fprintf(Outfile, "\', %d, ", *cptr); comma = 1; quote = 0; } else { fprintf(Outfile, "%d, ", *cptr); comma = 1; quote = 0; } else if (start || comma) { fprintf(Outfile, "\'%c", *cptr); start = comma = 0; quote = 1; } else { fprintf(Outfile, "%c", *cptr); comma = 0; quote = 1; } } if (comma || start) fprintf(Outfile, "0\n"); else fprintf(Outfile, "\', 0\n"); */ } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r2], breglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzx\teax, %s\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmov\teax, %s\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } cgjump(sym->endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = alloc_register(); if (sym->class == C_GLOBAL) fprintf(Outfile, "\tmov\t%s, %s\n", reglist[r], sym->name); else fprintf(Outfile, "\tlea\t%s, [rbp+%d]\n", reglist[r], sym->posn); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], reglist[r]); break; case 2: fprintf(Outfile, "\tmovsx\t%s, dword [%s]\n", reglist[r], reglist[r]); break; case 4: case 8: fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { //Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmov\t[%s], byte %s\n", reglist[r2], breglist[r1]); break; case 2: case 4: case 8: fprintf(Outfile, "\tmov\t[%s], %s\n", reglist[r2], reglist[r1]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 30_Design_Composites/data.h ================================================ #ifndef extern_ #define extern_ extern #endif // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 extern_ int Line; // Current line number extern_ int Putback; // Character put back by scanner extern_ struct symtable *Functionid; // Symbol ptr of the current function extern_ FILE *Infile; // Input and output files extern_ FILE *Outfile; extern_ char *Outfilename; // Name of file we opened as Outfile extern_ struct token Token; // Last token scanned extern_ char Text[TEXTLEN + 1]; // Last identifier scanned // Symbol table lists extern_ struct symtable *Globhead, *Globtail; // Global variables and functions extern_ struct symtable *Loclhead, *Locltail; // Local variables extern_ struct symtable *Parmhead, *Parmtail; // Local parameters extern_ struct symtable *Comphead, *Comptail; // Composite types // Command-line flags extern_ int O_dumpAST; // If true, dump the AST trees extern_ int O_keepasm; // If true, keep any assembly files extern_ int O_assemble; // If true, assemble the assembly files extern_ int O_dolink; // If true, link the object files extern_ int O_verbose; // If true, print info on compilation stages ================================================ FILE: 30_Design_Composites/decl.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of declarations // Copyright (c) 2019 Warren Toomey, GPL3 // Parse the current token and return // a primitive type enum value. Also // scan in the next token int parse_type(void) { int type; switch (Token.token) { case T_VOID: type = P_VOID; break; case T_CHAR: type = P_CHAR; break; case T_INT: type = P_INT; break; case T_LONG: type = P_LONG; break; default: fatald("Illegal type, token", Token.token); } // Scan in one or more further '*' tokens // and determine the correct pointer type while (1) { scan(&Token); if (Token.token != T_STAR) break; type = pointer_to(type); } // We leave with the next token already scanned return (type); } // variable_declaration: type identifier ';' // | type identifier '[' INTLIT ']' ';' // ; // // Parse the declaration of a scalar variable or an array // with a given size. // The identifier has been scanned & we have the type. // class is the variable's class // Return the pointer to variable's entry in the symbol table struct symtable *var_declaration(int type, int class) { struct symtable *sym = NULL; // See if this has already been declared switch (class) { case C_GLOBAL: if (findglob(Text) != NULL) fatals("Duplicate global variable declaration", Text); case C_LOCAL: case C_PARAM: if (findlocl(Text) != NULL) fatals("Duplicate local variable declaration", Text); } // Text now has the identifier's name. // If the next token is a '[' if (Token.token == T_LBRACKET) { // Skip past the '[' scan(&Token); // Check we have an array size if (Token.token == T_INTLIT) { // Add this as a known array and generate its space in assembly. // We treat the array as a pointer to its elements' type switch (class) { case C_GLOBAL: sym = addglob(Text, pointer_to(type), S_ARRAY, class, Token.intvalue); break; case C_LOCAL: case C_PARAM: fatal("For now, declaration of local arrays is not implemented"); } } // Ensure we have a following ']' scan(&Token); match(T_RBRACKET, "]"); } else { // Add this as a known scalar // and generate its space in assembly switch (class) { case C_GLOBAL: sym = addglob(Text, type, S_VARIABLE, class, 1); break; case C_LOCAL: sym = addlocl(Text, type, S_VARIABLE, class, 1); break; case C_PARAM: sym = addparm(Text, type, S_VARIABLE, class, 1); break; } } return (sym); } // param_declaration: // | variable_declaration // | variable_declaration ',' param_declaration // // Parse the parameters in parentheses after the function name. // Add them as symbols to the symbol table and return the number // of parameters. If funcsym is not NULL, there is an existing function // prototype, and the function has this symbol table pointer. static int param_declaration(struct symtable *funcsym) { int type; int paramcnt = 0; struct symtable *protoptr = NULL; // If there is a prototype, get the pointer // to the first prototype parameter if (funcsym != NULL) protoptr = funcsym->member; // Loop until the final right parentheses while (Token.token != T_RPAREN) { // Get the type and identifier // and add it to the symbol table type = parse_type(); ident(); // We have an existing prototype. // Check that this type matches the prototype. if (protoptr != NULL) { if (type != protoptr->type) fatald("Type doesn't match prototype for parameter", paramcnt + 1); protoptr = protoptr->next; } else { // Add a new parameter to the new parameter list var_declaration(type, C_PARAM); } paramcnt++; // Must have a ',' or ')' at this point switch (Token.token) { case T_COMMA: scan(&Token); break; case T_RPAREN: break; default: fatald("Unexpected token in parameter list", Token.token); } } // Check that the number of parameters in this list matches // any existing prototype if ((funcsym != NULL) && (paramcnt != funcsym->nelems)) fatals("Parameter count mismatch for function", funcsym->name); // Return the count of parameters return (paramcnt); } // // function_declaration: type identifier '(' parameter_list ')' ; // | type identifier '(' parameter_list ')' compound_statement ; // // Parse the declaration of function. // The identifier has been scanned & we have the type. struct ASTnode *function_declaration(int type) { struct ASTnode *tree, *finalstmt; struct symtable *oldfuncsym, *newfuncsym = NULL; int endlabel, paramcnt; // Text has the identifier's name. If this exists and is a // function, get the id. Otherwise, set oldfuncsym to NULL. if ((oldfuncsym = findsymbol(Text)) != NULL) if (oldfuncsym->stype != S_FUNCTION) oldfuncsym = NULL; // If this is a new function declaration, get a // label-id for the end label, and add the function // to the symbol table, if (oldfuncsym == NULL) { endlabel = genlabel(); newfuncsym = addglob(Text, type, S_FUNCTION, C_GLOBAL, endlabel); } // Scan in the '(', any parameters and the ')'. // Pass in any existing function prototype pointer lparen(); paramcnt = param_declaration(oldfuncsym); rparen(); // If this is a new function declaration, update the // function symbol entry with the number of parameters. // Also copy the parameter list into the function's node. if (newfuncsym) { newfuncsym->nelems = paramcnt; newfuncsym->member = Parmhead; oldfuncsym = newfuncsym; } // Clear out the parameter list Parmhead = Parmtail = NULL; // Declaration ends in a semicolon, only a prototype. if (Token.token == T_SEMI) { scan(&Token); return (NULL); } // This is not just a prototype. // Set the Functionid global to the function's symbol pointer Functionid = oldfuncsym; // Get the AST tree for the compound statement tree = compound_statement(); // If the function type isn't P_VOID .. if (type != P_VOID) { // Error if no statements in the function if (tree == NULL) fatal("No statements in function with non-void type"); // Check that the last AST operation in the // compound statement was a return statement finalstmt = (tree->op == A_GLUE) ? tree->right : tree; if (finalstmt == NULL || finalstmt->op != A_RETURN) fatal("No return for function with non-void type"); } // Return an A_FUNCTION node which has the function's symbol pointer // and the compound statement sub-tree return (mkastunary(A_FUNCTION, type, tree, oldfuncsym, endlabel)); } // Parse one or more global declarations, either // variables or functions void global_declarations(void) { struct ASTnode *tree; int type; while (1) { // We have to read past the type and identifier // to see either a '(' for a function declaration // or a ',' or ';' for a variable declaration. // Text is filled in by the ident() call. type = parse_type(); ident(); if (Token.token == T_LPAREN) { // Parse the function declaration tree = function_declaration(type); // Only a function prototype, no code if (tree == NULL) continue; // A real function, generate the assembly code for it if (O_dumpAST) { dumpAST(tree, NOLABEL, 0); fprintf(stdout, "\n\n"); } genAST(tree, NOLABEL, 0); // Now free the symbols associated // with this function freeloclsyms(); } else { // Parse the global variable declaration // and skip past the trailing semicolon var_declaration(type, C_GLOBAL); semi(); } // Stop when we have reached EOF if (Token.token == T_EOF) break; } } ================================================ FILE: 30_Design_Composites/decl.h ================================================ // Function prototypes for all compiler files // Copyright (c) 2019 Warren Toomey, GPL3 // scan.c void reject_token(struct token *t); int scan(struct token *t); // tree.c struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue); struct ASTnode *mkastleaf(int op, int type, struct symtable *sym, int intvalue); struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, struct symtable *sym, int intvalue); void dumpAST(struct ASTnode *n, int label, int parentASTop); // gen.c int genlabel(void); int genAST(struct ASTnode *n, int reg, int parentASTop); void genpreamble(); void genpostamble(); void genfreeregs(); void genglobsym(struct symtable *node); int genglobstr(char *strvalue); int genprimsize(int type); void genreturn(int reg, int id); // cg.c void cgtextseg(); void cgdataseg(); void freeall_registers(void); void cgpreamble(); void cgpostamble(); void cgfuncpreamble(struct symtable *sym); void cgfuncpostamble(struct symtable *sym); int cgloadint(int value, int type); int cgloadglob(struct symtable *sym, int op); int cgloadlocal(struct symtable *sym, int op); int cgloadglobstr(int label); int cgadd(int r1, int r2); int cgsub(int r1, int r2); int cgmul(int r1, int r2); int cgdiv(int r1, int r2); int cgshlconst(int r, int val); int cgcall(struct symtable *sym, int numargs); void cgcopyarg(int r, int argposn); int cgstorglob(int r, struct symtable *sym); int cgstorlocal(int r, struct symtable *sym); void cgglobsym(struct symtable *node); void cgglobstr(int l, char *strvalue); int cgcompare_and_set(int ASTop, int r1, int r2); int cgcompare_and_jump(int ASTop, int r1, int r2, int label); void cglabel(int l); void cgjump(int l); int cgwiden(int r, int oldtype, int newtype); int cgprimsize(int type); void cgreturn(int reg, struct symtable *sym); int cgaddress(struct symtable *sym); int cgderef(int r, int type); int cgstorderef(int r1, int r2, int type); int cgnegate(int r); int cginvert(int r); int cglognot(int r); int cgboolean(int r, int op, int label); int cgand(int r1, int r2); int cgor(int r1, int r2); int cgxor(int r1, int r2); int cgshl(int r1, int r2); int cgshr(int r1, int r2); // expr.c struct ASTnode *binexpr(int ptp); // stmt.c struct ASTnode *compound_statement(void); // misc.c void match(int t, char *what); void semi(void); void lbrace(void); void rbrace(void); void lparen(void); void rparen(void); void ident(void); void fatal(char *s); void fatals(char *s1, char *s2); void fatald(char *s, int d); void fatalc(char *s, int c); // sym.c void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node); struct symtable *newsym(char *name, int type, int stype, int class, int size, int posn); struct symtable *addglob(char *name, int type, int stype, int class, int size); struct symtable *addlocl(char *name, int type, int stype, int class, int size); struct symtable *addparm(char *name, int type, int stype, int class, int size); struct symtable *findglob(char *s); struct symtable *findlocl(char *s); struct symtable *findsymbol(char *s); struct symtable *findcomposite(char *s); void clear_symtable(void); void freeloclsyms(void); // decl.c struct symtable *var_declaration(int type, int class); struct ASTnode *function_declaration(int type); void global_declarations(void); // types.c int inttype(int type); int ptrtype(int type); int parse_type(void); int pointer_to(int type); int value_at(int type); struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op); ================================================ FILE: 30_Design_Composites/defs.h ================================================ #include #include #include #include // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 enum { TEXTLEN = 512 // Length of identifiers in input }; // Commands and default filenames #define AOUT "a.out" #ifdef __NASM__ #define ASCMD "nasm -f elf64 -o " #define LDCMD "cc -no-pie -fno-plt -Wall -o " #else #define ASCMD "as -o " #define LDCMD "cc -o " #endif // Token types enum { T_EOF, // Binary operators T_ASSIGN, T_LOGOR, T_LOGAND, T_OR, T_XOR, T_AMPER, T_EQ, T_NE, T_LT, T_GT, T_LE, T_GE, T_LSHIFT, T_RSHIFT, T_PLUS, T_MINUS, T_STAR, T_SLASH, // Other operators T_INC, T_DEC, T_INVERT, T_LOGNOT, // Type keywords T_VOID, T_CHAR, T_INT, T_LONG, // Other keywords T_IF, T_ELSE, T_WHILE, T_FOR, T_RETURN, // Structural tokens T_INTLIT, T_STRLIT, T_SEMI, T_IDENT, T_LBRACE, T_RBRACE, T_LPAREN, T_RPAREN, T_LBRACKET, T_RBRACKET, T_COMMA }; // Token structure struct token { int token; // Token type, from the enum list above int intvalue; // For T_INTLIT, the integer value }; // AST node types. The first few line up // with the related tokens enum { A_ASSIGN = 1, A_LOGOR, A_LOGAND, A_OR, A_XOR, A_AND, A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE, A_LSHIFT, A_RSHIFT, A_ADD, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, A_INTLIT, A_STRLIT, A_IDENT, A_GLUE, A_IF, A_WHILE, A_FUNCTION, A_WIDEN, A_RETURN, A_FUNCCALL, A_DEREF, A_ADDR, A_SCALE, A_PREINC, A_PREDEC, A_POSTINC, A_POSTDEC, A_NEGATE, A_INVERT, A_LOGNOT, A_TOBOOL }; // Primitive types. The bottom 4 bits is an integer // value that represents the level of indirection, // e.g. 0= no pointer, 1= pointer, 2= pointer pointer etc. enum { P_NONE, P_VOID = 16, P_CHAR = 32, P_INT = 48, P_LONG = 64 }; // Structural types enum { S_VARIABLE, S_FUNCTION, S_ARRAY }; // Storage classes enum { C_GLOBAL = 1, // Globally visible symbol C_LOCAL, // Locally visible symbol C_PARAM // Locally visible function parameter }; // Symbol table structure // XXX Put some comments here struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol int stype; // Structural type for the symbol int class; // Storage class for the symbol union { int size; // Number of elements in the symbol int endlabel; // For functions, the end label }; union { int nelems; // For functions, # of params int posn; // For locals, the negative offset // from the stack base pointer }; struct symtable *next; // Next symbol in one list struct symtable *member; // First member of a function, struct, }; // union or enum // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates int rvalue; // True if the node is an rvalue struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; struct symtable *sym; // For many AST nodes, the pointer to union { // the symbol in the symbol table int intvalue; // For A_INTLIT, the integer value int size; // For A_SCALE, the size to scale by }; }; enum { NOREG = -1, // Use NOREG when the AST generation // functions have no register to return NOLABEL = 0 // Use NOLABEL when we have no label to // pass to genAST() }; ================================================ FILE: 30_Design_Composites/expr.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of expressions // Copyright (c) 2019 Warren Toomey, GPL3 // expression_list: // | expression // | expression ',' expression_list // ; // Parse a list of zero or more comma-separated expressions and // return an AST composed of A_GLUE nodes with the left-hand child // being the sub-tree of previous expressions (or NULL) and the right-hand // child being the next expression. Each A_GLUE node will have size field // set to the number of expressions in the tree at this point. If no // expressions are parsed, NULL is returned static struct ASTnode *expression_list(void) { struct ASTnode *tree = NULL; struct ASTnode *child = NULL; int exprcount = 0; // Loop until the final right parentheses while (Token.token != T_RPAREN) { // Parse the next expression and increment the expression count child = binexpr(0); exprcount++; // Build an A_GLUE AST node with the previous tree as the left child // and the new expression as the right child. Store the expression count. tree = mkastnode(A_GLUE, P_NONE, tree, NULL, child, NULL, exprcount); // Must have a ',' or ')' at this point switch (Token.token) { case T_COMMA: scan(&Token); break; case T_RPAREN: break; default: fatald("Unexpected token in expression list", Token.token); } } // Return the tree of expressions return (tree); } // Parse a function call and return its AST static struct ASTnode *funccall(void) { struct ASTnode *tree; struct symtable *funcptr; // Check that the identifier has been defined as a function, // then make a leaf node for it. if ((funcptr = findsymbol(Text)) == NULL || funcptr->stype != S_FUNCTION) { fatals("Undeclared function", Text); } // Get the '(' lparen(); // Parse the argument expression list tree = expression_list(); // XXX Check type of each argument against the function's prototype // Build the function call AST node. Store the // function's return type as this node's type. // Also record the function's symbol-id tree = mkastunary(A_FUNCCALL, funcptr->type, tree, funcptr, 0); // Get the ')' rparen(); return (tree); } // Parse the index into an array and return an AST tree for it static struct ASTnode *array_access(void) { struct ASTnode *left, *right; struct symtable *aryptr; // Check that the identifier has been defined as an array // then make a leaf node for it that points at the base if ((aryptr = findsymbol(Text)) == NULL || aryptr->stype != S_ARRAY) { fatals("Undeclared array", Text); } left = mkastleaf(A_ADDR, aryptr->type, aryptr, 0); // Get the '[' scan(&Token); // Parse the following expression right = binexpr(0); // Get the ']' match(T_RBRACKET, "]"); // Ensure that this is of int type if (!inttype(right->type)) fatal("Array index is not of integer type"); // Scale the index by the size of the element's type right = modify_type(right, left->type, A_ADD); // Return an AST tree where the array's base has the offset // added to it, and dereference the element. Still an lvalue // at this point. left = mkastnode(A_ADD, aryptr->type, left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, value_at(left->type), left, NULL, 0); return (left); } // Parse a postfix expression and return // an AST node representing it. The // identifier is already in Text. static struct ASTnode *postfix(void) { struct ASTnode *n; struct symtable *varptr; // Scan in the next token to see if we have a postfix expression scan(&Token); // Function call if (Token.token == T_LPAREN) return (funccall()); // An array reference if (Token.token == T_LBRACKET) return (array_access()); // A variable. Check that the variable exists. if ((varptr = findsymbol(Text)) == NULL || varptr->stype != S_VARIABLE) fatals("Unknown variable", Text); switch (Token.token) { // Post-increment: skip over the token case T_INC: scan(&Token); n = mkastleaf(A_POSTINC, varptr->type, varptr, 0); break; // Post-decrement: skip over the token case T_DEC: scan(&Token); n = mkastleaf(A_POSTDEC, varptr->type, varptr, 0); break; // Just a variable reference default: n = mkastleaf(A_IDENT, varptr->type, varptr, 0); } return (n); } // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(void) { struct ASTnode *n; int id; switch (Token.token) { case T_INTLIT: // For an INTLIT token, make a leaf AST node for it. // Make it a P_CHAR if it's within the P_CHAR range if ((Token.intvalue) >= 0 && (Token.intvalue < 256)) n = mkastleaf(A_INTLIT, P_CHAR, NULL, Token.intvalue); else n = mkastleaf(A_INTLIT, P_INT, NULL, Token.intvalue); break; case T_STRLIT: // For a STRLIT token, generate the assembly for it. // Then make a leaf AST node for it. id is the string's label. id = genglobstr(Text); n = mkastleaf(A_STRLIT, pointer_to(P_CHAR), NULL, id); break; case T_IDENT: return (postfix()); case T_LPAREN: // Beginning of a parenthesised expression, skip the '('. // Scan in the expression and the right parenthesis scan(&Token); n = binexpr(0); rparen(); return (n); default: fatald("Expecting a primary expression, got token", Token.token); } // Scan in the next token and return the leaf node scan(&Token); return (n); } // Convert a binary operator token into a binary AST operation. // We rely on a 1:1 mapping from token to AST operation static int binastop(int tokentype) { if (tokentype > T_EOF && tokentype <= T_SLASH) return (tokentype); fatald("Syntax error, token", tokentype); return (0); // Keep -Wall happy } // Return true if a token is right-associative, // false otherwise. static int rightassoc(int tokentype) { if (tokentype == T_ASSIGN) return (1); return (0); } // Operator precedence for each token. Must // match up with the order of tokens in defs.h static int OpPrec[] = { 0, 10, 20, 30, // T_EOF, T_ASSIGN, T_LOGOR, T_LOGAND 40, 50, 60, // T_OR, T_XOR, T_AMPER 70, 70, // T_EQ, T_NE 80, 80, 80, 80, // T_LT, T_GT, T_LE, T_GE 90, 90, // T_LSHIFT, T_RSHIFT 100, 100, // T_PLUS, T_MINUS 110, 110 // T_STAR, T_SLASH }; // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec; if (tokentype > T_SLASH) fatald("Token with no precedence in op_precedence:", tokentype); prec = OpPrec[tokentype]; if (prec == 0) fatald("Syntax error, token", tokentype); return (prec); } // prefix_expression: primary // | '*' prefix_expression // | '&' prefix_expression // | '-' prefix_expression // | '++' prefix_expression // | '--' prefix_expression // ; // Parse a prefix expression and return // a sub-tree representing it. struct ASTnode *prefix(void) { struct ASTnode *tree; switch (Token.token) { case T_AMPER: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Ensure that it's an identifier if (tree->op != A_IDENT) fatal("& operator must be followed by an identifier"); // Now change the operator to A_ADDR and the type to // a pointer to the original type tree->op = A_ADDR; tree->type = pointer_to(tree->type); break; case T_STAR: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's either another deref or an // identifier if (tree->op != A_IDENT && tree->op != A_DEREF) fatal("* operator must be followed by an identifier or *"); // Prepend an A_DEREF operation to the tree tree = mkastunary(A_DEREF, value_at(tree->type), tree, NULL, 0); break; case T_MINUS: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_NEGATE operation to the tree and // make the child an rvalue. Because chars are unsigned, // also widen this to int so that it's signed tree->rvalue = 1; tree = modify_type(tree, P_INT, 0); tree = mkastunary(A_NEGATE, tree->type, tree, NULL, 0); break; case T_INVERT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_INVERT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_INVERT, tree->type, tree, NULL, 0); break; case T_LOGNOT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_LOGNOT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_LOGNOT, tree->type, tree, NULL, 0); break; case T_INC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("++ operator must be followed by an identifier"); // Prepend an A_PREINC operation to the tree tree = mkastunary(A_PREINC, tree->type, tree, NULL, 0); break; case T_DEC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("-- operator must be followed by an identifier"); // Prepend an A_PREDEC operation to the tree tree = mkastunary(A_PREDEC, tree->type, tree, NULL, 0); break; default: tree = primary(); } return (tree); } // Return an AST tree whose root is a binary operator. // Parameter ptp is the previous token's precedence. struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; struct ASTnode *ltemp, *rtemp; int ASTop; int tokentype; // Get the tree on the left. // Fetch the next token at the same time. left = prefix(); // If we hit one of several terminating tokens, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA) { left->rvalue = 1; return (left); } // While the precedence of this token is more than that of the // previous token precedence, or it's right associative and // equal to the previous token's precedence while ((op_precedence(tokentype) > ptp) || (rightassoc(tokentype) && op_precedence(tokentype) == ptp)) { // Fetch in the next integer literal scan(&Token); // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Determine the operation to be performed on the sub-trees ASTop = binastop(tokentype); if (ASTop == A_ASSIGN) { // Assignment // Make the right tree into an rvalue right->rvalue = 1; // Ensure the right's type matches the left right = modify_type(right, left->type, 0); if (right == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree. However, switch // left and right around, so that the right expression's // code will be generated before the left expression ltemp = left; left = right; right = ltemp; } else { // We are not doing an assignment, so both trees should be rvalues // Convert both trees into rvalue if they are lvalue trees left->rvalue = 1; right->rvalue = 1; // Ensure the two types are compatible by trying // to modify each tree to match the other's type. ltemp = modify_type(left, right->type, ASTop); rtemp = modify_type(right, left->type, ASTop); if (ltemp == NULL && rtemp == NULL) fatal("Incompatible types in binary expression"); if (ltemp != NULL) left = ltemp; if (rtemp != NULL) right = rtemp; } // Join that sub-tree with ours. Convert the token // into an AST operation at the same time. left = mkastnode(binastop(tokentype), left->type, left, NULL, right, NULL, 0); // Update the details of the current token. // If we hit a terminating token, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA) { left->rvalue = 1; return (left); } } // Return the tree we have when the precedence // is the same or lower left->rvalue = 1; return (left); } ================================================ FILE: 30_Design_Composites/gen.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Generic code generator // Copyright (c) 2019 Warren Toomey, GPL3 // Generate and return a new label number int genlabel(void) { static int id = 1; return (id++); } // Generate the code for an IF statement // and an optional ELSE clause static int genIF(struct ASTnode *n) { int Lfalse, Lend; // Generate two labels: one for the // false compound statement, and one // for the end of the overall IF statement. // When there is no ELSE clause, Lfalse _is_ // the ending label! Lfalse = genlabel(); if (n->right) Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. genAST(n->left, Lfalse, n->op); genfreeregs(); // Generate the true compound statement genAST(n->mid, NOLABEL, n->op); genfreeregs(); // If there is an optional ELSE clause, // generate the jump to skip to the end if (n->right) cgjump(Lend); // Now the false label cglabel(Lfalse); // Optional ELSE clause: generate the // false compound statement and the // end label if (n->right) { genAST(n->right, NOLABEL, n->op); genfreeregs(); cglabel(Lend); } return (NOREG); } // Generate the code for a WHILE statement static int genWHILE(struct ASTnode *n) { int Lstart, Lend; // Generate the start and end labels // and output the start label Lstart = genlabel(); Lend = genlabel(); cglabel(Lstart); // Generate the condition code followed // by a jump to the end label. genAST(n->left, Lend, n->op); genfreeregs(); // Generate the compound statement for the body genAST(n->right, NOLABEL, n->op); genfreeregs(); // Finally output the jump back to the condition, // and the end label cgjump(Lstart); cglabel(Lend); return (NOREG); } // Generate the code to copy the arguments of a // function call to its parameters, then call the // function itself. Return the register that holds // the function's return value. static int gen_funccall(struct ASTnode *n) { struct ASTnode *gluetree = n->left; int reg; int numargs = 0; // If there is a list of arguments, walk this list // from the last argument (right-hand child) to the // first while (gluetree) { // Calculate the expression's value reg = genAST(gluetree->right, NOLABEL, gluetree->op); // Copy this into the n'th function parameter: size is 1, 2, 3, ... cgcopyarg(reg, gluetree->size); // Keep the first (highest) number of arguments if (numargs == 0) numargs = gluetree->size; genfreeregs(); gluetree = gluetree->left; } // Call the function, clean up the stack (based on numargs), // and return its result return (cgcall(n->sym, numargs)); } // Given an AST, an optional label, and the AST op // of the parent, generate assembly code recursively. // Return the register id with the tree's final value. int genAST(struct ASTnode *n, int label, int parentASTop) { int leftreg, rightreg; // We have some specific AST node handling at the top // so that we don't evaluate the child sub-trees immediately switch (n->op) { case A_IF: return (genIF(n)); case A_WHILE: return (genWHILE(n)); case A_FUNCCALL: return (gen_funccall(n)); case A_GLUE: // Do each child statement, and free the // registers after each child genAST(n->left, NOLABEL, n->op); genfreeregs(); genAST(n->right, NOLABEL, n->op); genfreeregs(); return (NOREG); case A_FUNCTION: // Generate the function's preamble before the code // in the child sub-tree cgfuncpreamble(n->sym); genAST(n->left, NOLABEL, n->op); cgfuncpostamble(n->sym); return (NOREG); } // General AST node handling below // Get the left and right sub-tree values if (n->left) leftreg = genAST(n->left, NOLABEL, n->op); if (n->right) rightreg = genAST(n->right, NOLABEL, n->op); switch (n->op) { case A_ADD: return (cgadd(leftreg, rightreg)); case A_SUBTRACT: return (cgsub(leftreg, rightreg)); case A_MULTIPLY: return (cgmul(leftreg, rightreg)); case A_DIVIDE: return (cgdiv(leftreg, rightreg)); case A_AND: return (cgand(leftreg, rightreg)); case A_OR: return (cgor(leftreg, rightreg)); case A_XOR: return (cgxor(leftreg, rightreg)); case A_LSHIFT: return (cgshl(leftreg, rightreg)); case A_RSHIFT: return (cgshr(leftreg, rightreg)); case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, compare registers // and set one to 1 or 0 based on the comparison. if (parentASTop == A_IF || parentASTop == A_WHILE) return (cgcompare_and_jump(n->op, leftreg, rightreg, label)); else return (cgcompare_and_set(n->op, leftreg, rightreg)); case A_INTLIT: return (cgloadint(n->intvalue, n->type)); case A_STRLIT: return (cgloadglobstr(n->intvalue)); case A_IDENT: // Load our value if we are an rvalue // or we are being dereferenced if (n->rvalue || parentASTop == A_DEREF) { if (n->sym->class == C_GLOBAL) { return (cgloadglob(n->sym, n->op)); } else { return (cgloadlocal(n->sym, n->op)); } } else return (NOREG); case A_ASSIGN: // Are we assigning to an identifier or through a pointer? switch (n->right->op) { case A_IDENT: if (n->right->sym->class == C_GLOBAL) return (cgstorglob(leftreg, n->right->sym)); else return (cgstorlocal(leftreg, n->right->sym)); case A_DEREF: return (cgstorderef(leftreg, rightreg, n->right->type)); default: fatald("Can't A_ASSIGN in genAST(), op", n->op); } case A_WIDEN: // Widen the child's type to the parent's type return (cgwiden(leftreg, n->left->type, n->type)); case A_RETURN: cgreturn(leftreg, Functionid); return (NOREG); case A_ADDR: return (cgaddress(n->sym)); case A_DEREF: // If we are an rvalue, dereference to get the value we point at, // otherwise leave it for A_ASSIGN to store through the pointer if (n->rvalue) return (cgderef(leftreg, n->left->type)); else return (leftreg); case A_SCALE: // Small optimisation: use shift if the // scale value is a known power of two switch (n->size) { case 2: return (cgshlconst(leftreg, 1)); case 4: return (cgshlconst(leftreg, 2)); case 8: return (cgshlconst(leftreg, 3)); default: // Load a register with the size and // multiply the leftreg by this size rightreg = cgloadint(n->size, P_INT); return (cgmul(leftreg, rightreg)); } case A_POSTINC: case A_POSTDEC: // Load and decrement the variable's value into a register // and post increment/decrement it if (n->sym->class == C_GLOBAL) return (cgloadglob(n->sym, n->op)); else return (cgloadlocal(n->sym, n->op)); case A_PREINC: case A_PREDEC: // Load and decrement the variable's value into a register // and pre increment/decrement it if (n->left->sym->class == C_GLOBAL) return (cgloadglob(n->left->sym, n->op)); else return (cgloadlocal(n->left->sym, n->op)); case A_NEGATE: return (cgnegate(leftreg)); case A_INVERT: return (cginvert(leftreg)); case A_LOGNOT: return (cglognot(leftreg)); case A_TOBOOL: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, set the register // to 0 or 1 based on it's zeroeness or non-zeroeness return (cgboolean(leftreg, parentASTop, label)); default: fatald("Unknown AST operator", n->op); } return (NOREG); // Keep -Wall happy } void genpreamble() { cgpreamble(); } void genpostamble() { cgpostamble(); } void genfreeregs() { freeall_registers(); } void genglobsym(struct symtable *node) { cgglobsym(node); } int genglobstr(char *strvalue) { int l = genlabel(); cgglobstr(l, strvalue); return (l); } int genprimsize(int type) { return (cgprimsize(type)); } ================================================ FILE: 30_Design_Composites/main.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "decl.h" #include #include // Compiler setup and top-level execution // Copyright (c) 2019 Warren Toomey, GPL3 // Given a string with a '.' and at least a 1-character suffix // after the '.', change the suffix to be the given character. // Return the new string or NULL if the original string could // not be modified char *alter_suffix(char *str, char suffix) { char *posn; char *newstr; // Clone the string if ((newstr = strdup(str)) == NULL) return (NULL); // Find the '.' if ((posn = strrchr(newstr, '.')) == NULL) return (NULL); // Ensure there is a suffix posn++; if (*posn == '\0') return (NULL); // Change the suffix and NUL-terminate the string *posn++ = suffix; *posn = '\0'; return (newstr); } // Given an input filename, compile that file // down to assembly code. Return the new file's name static char *do_compile(char *filename) { Outfilename = alter_suffix(filename, 's'); if (Outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .c on the end\n", filename); exit(1); } // Open up the input file if ((Infile = fopen(filename, "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", filename, strerror(errno)); exit(1); } // Create the output file if ((Outfile = fopen(Outfilename, "w")) == NULL) { fprintf(stderr, "Unable to create %s: %s\n", Outfilename, strerror(errno)); exit(1); } Line = 1; // Reset the scanner Putback = '\n'; clear_symtable(); // Clear the symbol table if (O_verbose) printf("compiling %s\n", filename); scan(&Token); // Get the first token from the input genpreamble(); // Output the preamble global_declarations(); // Parse the global declarations genpostamble(); // Output the postamble fclose(Outfile); // Close the output file return (Outfilename); } // Given an input filename, assemble that file // down to object code. Return the object filename char *do_assemble(char *filename) { char cmd[TEXTLEN]; int err; char *outfilename = alter_suffix(filename, 'o'); if (outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .s on the end\n", filename); exit(1); } // Build the assembly command and run it snprintf(cmd, TEXTLEN, "%s %s %s", ASCMD, outfilename, filename); if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Assembly of %s failed\n", filename); exit(1); } return (outfilename); } // Given a list of object files and an output filename, // link all of the object filenames together. void do_link(char *outfilename, char *objlist[]) { int cnt, size = TEXTLEN; char cmd[TEXTLEN], *cptr; int err; // Start with the linker command and the output file cptr = cmd; cnt = snprintf(cptr, size, "%s %s ", LDCMD, outfilename); cptr += cnt; size -= cnt; // Now append each object file while (*objlist != NULL) { cnt = snprintf(cptr, size, "%s ", *objlist); cptr += cnt; size -= cnt; objlist++; } if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Linking failed\n"); exit(1); } } // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "Usage: %s [-vcST] [-o outfile] file [file ...]\n", prog); fprintf(stderr, " -v give verbose output of the compilation stages\n"); fprintf(stderr, " -c generate object files but don't link them\n"); fprintf(stderr, " -S generate assembly files but don't link them\n"); fprintf(stderr, " -T dump the AST trees for each input file\n"); fprintf(stderr, " -o outfile, produce the outfile executable file\n"); exit(1); } // Main program: check arguments and print a usage // if we don't have an argument. Open up the input // file and call scanfile() to scan the tokens in it. enum { MAXOBJ = 100 }; int main(int argc, char *argv[]) { char *outfilename = AOUT; char *asmfile, *objfile; char *objlist[MAXOBJ]; int i, objcnt = 0; // Initialise our variables O_dumpAST = 0; O_keepasm = 0; O_assemble = 0; O_verbose = 0; O_dolink = 1; // Scan for command-line options for (i = 1; i < argc; i++) { // No leading '-', stop scanning for options if (*argv[i] != '-') break; // For each option in this argument for (int j = 1; (*argv[i] == '-') && argv[i][j]; j++) { switch (argv[i][j]) { case 'o': outfilename = argv[++i]; // Save & skip to next argument break; case 'T': O_dumpAST = 1; break; case 'c': O_assemble = 1; O_keepasm = 0; O_dolink = 0; break; case 'S': O_keepasm = 1; O_assemble = 0; O_dolink = 0; break; case 'v': O_verbose = 1; break; default: usage(argv[0]); } } } // Ensure we have at lease one input file argument if (i >= argc) usage(argv[0]); // Work on each input file in turn while (i < argc) { asmfile = do_compile(argv[i]); // Compile the source file if (O_dolink || O_assemble) { objfile = do_assemble(asmfile); // Assemble it to object forma if (objcnt == (MAXOBJ - 2)) { fprintf(stderr, "Too many object files for the compiler to handle\n"); exit(1); } objlist[objcnt++] = objfile; // Add the object file's name objlist[objcnt] = NULL; // to the list of object files } if (!O_keepasm) // Remove the assembly file if unlink(asmfile); // we don't need to keep it i++; } // Now link all the object files together if (O_dolink) { do_link(outfilename, objlist); // If we don't need to keep the object // files, then remove them if (!O_assemble) { for (i = 0; objlist[i] != NULL; i++) unlink(objlist[i]); } } return (0); } ================================================ FILE: 30_Design_Composites/misc.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" #include // Miscellaneous functions // Copyright (c) 2019 Warren Toomey, GPL3 // Ensure that the current token is t, // and fetch the next token. Otherwise // throw an error void match(int t, char *what) { if (Token.token == t) { scan(&Token); } else { fatals("Expected", what); } } // Match a semicolon and fetch the next token void semi(void) { match(T_SEMI, ";"); } // Match a left brace and fetch the next token void lbrace(void) { match(T_LBRACE, "{"); } // Match a right brace and fetch the next token void rbrace(void) { match(T_RBRACE, "}"); } // Match a left parenthesis and fetch the next token void lparen(void) { match(T_LPAREN, "("); } // Match a right parenthesis and fetch the next token void rparen(void) { match(T_RPAREN, ")"); } // Match an identifier and fetch the next token void ident(void) { match(T_IDENT, "identifier"); } // Print out fatal messages void fatal(char *s) { fprintf(stderr, "%s on line %d\n", s, Line); fclose(Outfile); unlink(Outfilename); exit(1); } void fatals(char *s1, char *s2) { fprintf(stderr, "%s:%s on line %d\n", s1, s2, Line); fclose(Outfile); unlink(Outfilename); exit(1); } void fatald(char *s, int d) { fprintf(stderr, "%s:%d on line %d\n", s, d, Line); fclose(Outfile); unlink(Outfilename); exit(1); } void fatalc(char *s, int c) { fprintf(stderr, "%s:%c on line %d\n", s, c, Line); fclose(Outfile); unlink(Outfilename); exit(1); } ================================================ FILE: 30_Design_Composites/scan.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { char *p; p = strchr(s, c); return (p ? p - s : -1); } // Get the next character from the input file. static int next(void) { int c; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return (c); } c = fgetc(Infile); // Read from input file if ('\n' == c) Line++; // Increment line count return (c); } // Put back an unwanted character static void putback(int c) { Putback = c; } // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } // Return the next character from a character // or string literal static int scanch(void) { int c; // Get the next input character and interpret // metacharacters that start with a backslash c = next(); if (c == '\\') { switch (c = next()) { case 'a': return '\a'; case 'b': return '\b'; case 'f': return '\f'; case 'n': return '\n'; case 'r': return '\r'; case 't': return '\t'; case 'v': return '\v'; case '\\': return '\\'; case '"': return '"'; case '\'': return '\''; default: fatalc("unknown escape sequence", c); } } return (c); // Just an ordinary old character! } // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0; // Convert each character into an int value while ((k = chrpos("0123456789", c)) >= 0) { val = val * 10 + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return (val); } // Scan in a string literal from the input file, // and store it in buf[]. Return the length of // the string. static int scanstr(char *buf) { int i, c; // Loop while we have enough buffer space for (i = 0; i < TEXTLEN - 1; i++) { // Get the next char and append to buf // Return when we hit the ending double quote if ((c = scanch()) == '"') { buf[i] = 0; return (i); } buf[i] = c; } // Ran out of buf[] space fatal("String literal too long"); return (0); } // Scan an identifier from the input file and // store it in buf[]. Return the identifier's length static int scanident(int c, char *buf, int lim) { int i = 0; // Allow digits, alpha and underscores while (isalpha(c) || isdigit(c) || '_' == c) { // Error if we hit the identifier length limit, // else append to buf[] and get next character if (lim - 1 == i) { fatal("Identifier too long"); } else if (i < lim - 1) { buf[i++] = c; } c = next(); } // We hit a non-valid character, put it back. // NUL-terminate the buf[] and return the length putback(c); buf[i] = '\0'; return (i); } // Given a word from the input, return the matching // keyword token number or 0 if it's not a keyword. // Switch on the first letter so that we don't have // to waste time strcmp()ing against all the keywords. static int keyword(char *s) { switch (*s) { case 'c': if (!strcmp(s, "char")) return (T_CHAR); break; case 'e': if (!strcmp(s, "else")) return (T_ELSE); break; case 'f': if (!strcmp(s, "for")) return (T_FOR); break; case 'i': if (!strcmp(s, "if")) return (T_IF); if (!strcmp(s, "int")) return (T_INT); break; case 'l': if (!strcmp(s, "long")) return (T_LONG); break; case 'r': if (!strcmp(s, "return")) return (T_RETURN); break; case 'w': if (!strcmp(s, "while")) return (T_WHILE); break; case 'v': if (!strcmp(s, "void")) return (T_VOID); break; } return (0); } // A pointer to a rejected token static struct token *Rejtoken = NULL; // Reject the token that we just scanned void reject_token(struct token *t) { if (Rejtoken != NULL) fatal("Can't reject token twice"); Rejtoken = t; } // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c, tokentype; // If we have any rejected token, return it if (Rejtoken != NULL) { t = Rejtoken; Rejtoken = NULL; return (1); } // Skip whitespace c = skip(); // Determine the token based on // the input character switch (c) { case EOF: t->token = T_EOF; return (0); case '+': if ((c = next()) == '+') { t->token = T_INC; } else { putback(c); t->token = T_PLUS; } break; case '-': if ((c = next()) == '-') { t->token = T_DEC; } else { putback(c); t->token = T_MINUS; } break; case '*': t->token = T_STAR; break; case '/': t->token = T_SLASH; break; case ';': t->token = T_SEMI; break; case '{': t->token = T_LBRACE; break; case '}': t->token = T_RBRACE; break; case '(': t->token = T_LPAREN; break; case ')': t->token = T_RPAREN; break; case '[': t->token = T_LBRACKET; break; case ']': t->token = T_RBRACKET; break; case '~': t->token = T_INVERT; break; case '^': t->token = T_XOR; break; case ',': t->token = T_COMMA; break; case '=': if ((c = next()) == '=') { t->token = T_EQ; } else { putback(c); t->token = T_ASSIGN; } break; case '!': if ((c = next()) == '=') { t->token = T_NE; } else { putback(c); t->token = T_LOGNOT; } break; case '<': if ((c = next()) == '=') { t->token = T_LE; } else if (c == '<') { t->token = T_LSHIFT; } else { putback(c); t->token = T_LT; } break; case '>': if ((c = next()) == '=') { t->token = T_GE; } else if (c == '>') { t->token = T_RSHIFT; } else { putback(c); t->token = T_GT; } break; case '&': if ((c = next()) == '&') { t->token = T_LOGAND; } else { putback(c); t->token = T_AMPER; } break; case '|': if ((c = next()) == '|') { t->token = T_LOGOR; } else { putback(c); t->token = T_OR; } break; case '\'': // If it's a quote, scan in the // literal character value and // the trailing quote t->intvalue = scanch(); t->token = T_INTLIT; if (next() != '\'') fatal("Expected '\\'' at end of char literal"); break; case '"': // Scan in a literal string scanstr(Text); t->token = T_STRLIT; break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } else if (isalpha(c) || '_' == c) { // Read in a keyword or identifier scanident(c, Text, TEXTLEN); // If it's a recognised keyword, return that token if ((tokentype = keyword(Text)) != 0) { t->token = tokentype; break; } // Not a recognised keyword, so it must be an identifier t->token = T_IDENT; break; } // The character isn't part of any recognised token, error fatalc("Unrecognised character", c); } // We found a token return (1); } ================================================ FILE: 30_Design_Composites/stmt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of statements // Copyright (c) 2019 Warren Toomey, GPL3 // Prototypes static struct ASTnode *single_statement(void); // compound_statement: // empty, i.e. no statement // | statement // | statement statements // ; // // statement: declaration // | expression_statement // | function_call // | if_statement // | while_statement // | for_statement // | return_statement // ; // if_statement: if_head // | if_head 'else' compound_statement // ; // // if_head: 'if' '(' true_false_expression ')' compound_statement ; // // Parse an IF statement including any // optional ELSE clause and return its AST static struct ASTnode *if_statement(void) { struct ASTnode *condAST, *trueAST, *falseAST = NULL; // Ensure we have 'if' '(' match(T_IF, "if"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); rparen(); // Get the AST for the compound statement trueAST = compound_statement(); // If we have an 'else', skip it // and get the AST for the compound statement if (Token.token == T_ELSE) { scan(&Token); falseAST = compound_statement(); } // Build and return the AST for this statement return (mkastnode(A_IF, P_NONE, condAST, trueAST, falseAST, NULL, 0)); } // while_statement: 'while' '(' true_false_expression ')' compound_statement ; // // Parse a WHILE statement and return its AST static struct ASTnode *while_statement(void) { struct ASTnode *condAST, *bodyAST; // Ensure we have 'while' '(' match(T_WHILE, "while"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); rparen(); // Get the AST for the compound statement bodyAST = compound_statement(); // Build and return the AST for this statement return (mkastnode(A_WHILE, P_NONE, condAST, NULL, bodyAST, NULL, 0)); } // for_statement: 'for' '(' preop_statement ';' // true_false_expression ';' // postop_statement ')' compound_statement ; // // preop_statement: statement (for now) // postop_statement: statement (for now) // // Parse a FOR statement and return its AST static struct ASTnode *for_statement(void) { struct ASTnode *condAST, *bodyAST; struct ASTnode *preopAST, *postopAST; struct ASTnode *tree; // Ensure we have 'for' '(' match(T_FOR, "for"); lparen(); // Get the pre_op statement and the ';' preopAST = single_statement(); semi(); // Get the condition and the ';'. // Force a non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); semi(); // Get the post_op statement and the ')' postopAST = single_statement(); rparen(); // Get the compound statement which is the body bodyAST = compound_statement(); // For now, all four sub-trees have to be non-NULL. // Later on, we'll change the semantics for when some are missing // Glue the compound statement and the postop tree tree = mkastnode(A_GLUE, P_NONE, bodyAST, NULL, postopAST, NULL, 0); // Make a WHILE loop with the condition and this new body tree = mkastnode(A_WHILE, P_NONE, condAST, NULL, tree, NULL, 0); // And glue the preop tree to the A_WHILE tree return (mkastnode(A_GLUE, P_NONE, preopAST, NULL, tree, NULL, 0)); } // return_statement: 'return' '(' expression ')' ; // // Parse a return statement and return its AST static struct ASTnode *return_statement(void) { struct ASTnode *tree; // Can't return a value if function returns P_VOID if (Functionid->type == P_VOID) fatal("Can't return from a void function"); // Ensure we have 'return' '(' match(T_RETURN, "return"); lparen(); // Parse the following expression tree = binexpr(0); // Ensure this is compatible with the function's type tree = modify_type(tree, Functionid->type, 0); if (tree == NULL) fatal("Incompatible type to return"); // Add on the A_RETURN node tree = mkastunary(A_RETURN, P_NONE, tree, NULL, 0); // Get the ')' rparen(); return (tree); } // Parse a single statement and return its AST static struct ASTnode *single_statement(void) { int type; switch (Token.token) { case T_CHAR: case T_INT: case T_LONG: // The beginning of a variable declaration. // Parse the type and get the identifier. // Then parse the rest of the declaration // and skip over the semicolon type = parse_type(); ident(); var_declaration(type, C_LOCAL); semi(); return (NULL); // No AST generated here case T_IF: return (if_statement()); case T_WHILE: return (while_statement()); case T_FOR: return (for_statement()); case T_RETURN: return (return_statement()); default: // For now, see if this is an expression. // This catches assignment statements. return (binexpr(0)); } return (NULL); // Keep -Wall happy } // Parse a compound statement // and return its AST struct ASTnode *compound_statement(void) { struct ASTnode *left = NULL; struct ASTnode *tree; // Require a left curly bracket lbrace(); while (1) { // Parse a single statement tree = single_statement(); // Some statements must be followed by a semicolon if (tree != NULL && (tree->op == A_ASSIGN || tree->op == A_RETURN || tree->op == A_FUNCCALL)) semi(); // For each new tree, either save it in left // if left is empty, or glue the left and the // new tree together if (tree != NULL) { if (left == NULL) left = tree; else left = mkastnode(A_GLUE, P_NONE, left, NULL, tree, NULL, 0); } // When we hit a right curly bracket, // skip past it and return the AST if (Token.token == T_RBRACE) { rbrace(); return (left); } } } ================================================ FILE: 30_Design_Composites/sym.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Symbol table functions // Copyright (c) 2019 Warren Toomey, GPL3 // Append a node to the singly-linked list pointed to by head or tail void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node) { // Check for valid pointers if (head == NULL || tail == NULL || node == NULL) fatal("Either head, tail or node is NULL in appendsym"); // Append to the list if (*tail) { (*tail)->next = node; *tail = node; } else *head = *tail = node; node->next = NULL; } // Create a symbol node to be added to a symbol table list. // Set up the node's: // + type: char, int etc. // + structural type: var, function, array etc. // + size: number of elements, or endlabel: end label for a function // + posn: Position information for local symbols // Return a pointer to the new node struct symtable *newsym(char *name, int type, int stype, int class, int size, int posn) { // Get a new node struct symtable *node = (struct symtable *) malloc(sizeof(struct symtable)); if (node == NULL) fatal("Unable to malloc a symbol table node in newsym"); // Fill in the values node->name = strdup(name); node->type = type; node->stype = stype; node->class = class; node->size = size; node->posn = posn; node->next = NULL; node->member = NULL; // Generate any global space if (class == C_GLOBAL) genglobsym(node); return (node); } // Add a symbol to the global symbol list struct symtable *addglob(char *name, int type, int stype, int class, int size) { struct symtable *sym = newsym(name, type, stype, class, size, 0); appendsym(&Globhead, &Globtail, sym); return (sym); } // Add a symbol to the local symbol list struct symtable *addlocl(char *name, int type, int stype, int class, int size) { struct symtable *sym = newsym(name, type, stype, class, size, 0); appendsym(&Loclhead, &Locltail, sym); return (sym); } // Add a symbol to the parameter list struct symtable *addparm(char *name, int type, int stype, int class, int size) { struct symtable *sym = newsym(name, type, stype, class, size, 0); appendsym(&Parmhead, &Parmtail, sym); return (sym); } // Search for a symbol in a specific list. // Return a pointer to the found node or NULL if not found. static struct symtable *findsyminlist(char *s, struct symtable *list) { for (; list != NULL; list = list->next) if ((list->name != NULL) && !strcmp(s, list->name)) return (list); return (NULL); } // Determine if the symbol s is in the global symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findglob(char *s) { return (findsyminlist(s, Globhead)); } // Determine if the symbol s is in the local symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findlocl(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member); if (node) return (node); } return (findsyminlist(s, Loclhead)); } // Determine if the symbol s is in the symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findsymbol(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member); if (node) return (node); } // Otherwise, try the local and global symbol lists node = findsyminlist(s, Loclhead); if (node) return (node); return (findsyminlist(s, Globhead)); } // Find a composite type. // Return a pointer to the found node or NULL if not found. struct symtable *findcomposite(char *s) { return (findsyminlist(s, Comphead)); } // Reset the contents of the symbol table void clear_symtable(void) { Globhead = Globtail = NULL; Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Comphead = Comptail = NULL; } // Clear all the entries in the local symbol table void freeloclsyms(void) { Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Functionid = NULL; } ================================================ FILE: 30_Design_Composites/tests/err.input31.c ================================================ Expecting a primary expression, got token:15 on line 5 ================================================ FILE: 30_Design_Composites/tests/err.input32.c ================================================ Unknown variable:cow on line 4 ================================================ FILE: 30_Design_Composites/tests/err.input33.c ================================================ Incompatible type to return on line 4 ================================================ FILE: 30_Design_Composites/tests/err.input34.c ================================================ For now, declaration of local arrays is not implemented on line 4 ================================================ FILE: 30_Design_Composites/tests/err.input35.c ================================================ Duplicate local variable declaration:a on line 4 ================================================ FILE: 30_Design_Composites/tests/err.input36.c ================================================ Type doesn't match prototype for parameter:2 on line 4 ================================================ FILE: 30_Design_Composites/tests/err.input37.c ================================================ Unexpected token in parameter list:15 on line 3 ================================================ FILE: 30_Design_Composites/tests/err.input38.c ================================================ Type doesn't match prototype for parameter:2 on line 4 ================================================ FILE: 30_Design_Composites/tests/err.input39.c ================================================ No statements in function with non-void type on line 4 ================================================ FILE: 30_Design_Composites/tests/err.input40.c ================================================ No return for function with non-void type on line 4 ================================================ FILE: 30_Design_Composites/tests/err.input41.c ================================================ Can't return from a void function on line 3 ================================================ FILE: 30_Design_Composites/tests/err.input42.c ================================================ Undeclared function:fred on line 3 ================================================ FILE: 30_Design_Composites/tests/err.input43.c ================================================ Undeclared array:b on line 3 ================================================ FILE: 30_Design_Composites/tests/err.input44.c ================================================ Unknown variable:z on line 3 ================================================ FILE: 30_Design_Composites/tests/err.input45.c ================================================ & operator must be followed by an identifier on line 3 ================================================ FILE: 30_Design_Composites/tests/err.input46.c ================================================ * operator must be followed by an identifier or * on line 3 ================================================ FILE: 30_Design_Composites/tests/err.input47.c ================================================ ++ operator must be followed by an identifier on line 3 ================================================ FILE: 30_Design_Composites/tests/err.input48.c ================================================ -- operator must be followed by an identifier on line 3 ================================================ FILE: 30_Design_Composites/tests/err.input49.c ================================================ Incompatible expression in assignment on line 6 ================================================ FILE: 30_Design_Composites/tests/err.input50.c ================================================ Incompatible types in binary expression on line 6 ================================================ FILE: 30_Design_Composites/tests/err.input51.c ================================================ Expected '\'' at end of char literal on line 4 ================================================ FILE: 30_Design_Composites/tests/err.input52.c ================================================ Unrecognised character:$ on line 5 ================================================ FILE: 30_Design_Composites/tests/input01.c ================================================ int printf(char *fmt); void main() { printf("%d\n", 12 * 3); printf("%d\n", 18 - 2 * 4); printf("%d\n", 1 + 2 + 9 - 5/2 + 3*5); } ================================================ FILE: 30_Design_Composites/tests/input02.c ================================================ int printf(char *fmt); void main() { int fred; int jim; fred= 5; jim= 12; printf("%d\n", fred + jim); } ================================================ FILE: 30_Design_Composites/tests/input03.c ================================================ int printf(char *fmt); void main() { int x; x= 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); } ================================================ FILE: 30_Design_Composites/tests/input04.c ================================================ int printf(char *fmt); void main() { int x; x= 7 < 9; printf("%d\n", x); x= 7 <= 9; printf("%d\n", x); x= 7 != 9; printf("%d\n", x); x= 7 == 7; printf("%d\n", x); x= 7 >= 7; printf("%d\n", x); x= 7 <= 7; printf("%d\n", x); x= 9 > 7; printf("%d\n", x); x= 9 >= 7; printf("%d\n", x); x= 9 != 7; printf("%d\n", x); } ================================================ FILE: 30_Design_Composites/tests/input05.c ================================================ int printf(char *fmt); void main() { int i; int j; i=6; j=12; if (i < j) { printf("%d\n", i); } else { printf("%d\n", j); } } ================================================ FILE: 30_Design_Composites/tests/input06.c ================================================ int printf(char *fmt); void main() { int i; i=1; while (i <= 10) { printf("%d\n", i); i= i + 1; } } ================================================ FILE: 30_Design_Composites/tests/input07.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 30_Design_Composites/tests/input08.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 30_Design_Composites/tests/input09.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } void fred() { int a; int b; a= 12; b= 3 * a; if (a >= b) { printf("%d\n", 2 * b - a); } } ================================================ FILE: 30_Design_Composites/tests/input10.c ================================================ int printf(char *fmt); void main() { int i; char j; j= 20; printf("%d\n", j); i= 10; printf("%d\n", i); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 2; j= j + 1) { printf("%d\n", j); } } ================================================ FILE: 30_Design_Composites/tests/input11.c ================================================ int printf(char *fmt); int main() { int i; char j; long k; i= 10; printf("%d\n", i); j= 20; printf("%d\n", j); k= 30; printf("%d\n", k); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 4; j= j + 1) { printf("%d\n", j); } for (k= 1; k <= 5; k= k + 1) { printf("%d\n", k); } return(i); printf("%d\n", 12345); return(3); } ================================================ FILE: 30_Design_Composites/tests/input12.c ================================================ int printf(char *fmt); int fred() { return(5); } void main() { int x; x= fred(2); printf("%d\n", x); } ================================================ FILE: 30_Design_Composites/tests/input13.c ================================================ int printf(char *fmt); int fred() { return(56); } void main() { int dummy; int result; dummy= printf("%d\n", 23); result= fred(10); dummy= printf("%d\n", result); } ================================================ FILE: 30_Design_Composites/tests/input14.c ================================================ int printf(char *fmt); int fred() { return(20); } int main() { int result; printf("%d\n", 10); result= fred(15); printf("%d\n", result); printf("%d\n", fred(15)+10); return(0); } ================================================ FILE: 30_Design_Composites/tests/input15.c ================================================ int printf(char *fmt); int main() { char a; char *b; char c; int d; int *e; int f; a= 18; printf("%d\n", a); b= &a; c= *b; printf("%d\n", c); d= 12; printf("%d\n", d); e= &d; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 30_Design_Composites/tests/input16.c ================================================ int printf(char *fmt); int c; int d; int *e; int f; int main() { c= 12; d=18; printf("%d\n", c); e= &c + 1; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 30_Design_Composites/tests/input17.c ================================================ int printf(char *fmt); int main() { char a; char *b; int d; int *e; b= &a; *b= 19; printf("%d\n", a); e= &d; *e= 12; printf("%d\n", d); return(0); } ================================================ FILE: 30_Design_Composites/tests/input18.c ================================================ int printf(char *fmt); int main() { int a; int b; a= b= 34; printf("%d\n", a); printf("%d\n", b); return(0); } ================================================ FILE: 30_Design_Composites/tests/input18a.c ================================================ int printf(char *fmt); int a; int *b; char c; char *d; int main() { b= &a; *b= 15; printf("%d\n", a); d= &c; *d= 16; printf("%d\n", c); return(0); } ================================================ FILE: 30_Design_Composites/tests/input19.c ================================================ int printf(char *fmt); int a; int b; int c; int d; int e; int main() { a= 2; b= 4; c= 3; d= 2; e= (a+b) * (c+d); printf("%d\n", e); return(0); } ================================================ FILE: 30_Design_Composites/tests/input20.c ================================================ int printf(char *fmt); int a; int b[25]; int main() { b[3]= 12; a= b[3]; printf("%d\n", a); return(0); } ================================================ FILE: 30_Design_Composites/tests/input21.c ================================================ int printf(char *fmt); char c; char *str; int main() { c= '\n'; printf("%d\n", c); for (str= "Hello world\n"; *str != 0; str= str + 1) { printf("%c", *str); } return(0); } ================================================ FILE: 30_Design_Composites/tests/input22.c ================================================ int printf(char *fmt); char a; char b; char c; int d; int e; int f; long g; long h; long i; int main() { b= 5; c= 7; a= b + c++; printf("%d\n", a); e= 5; f= 7; d= e + f++; printf("%d\n", d); h= 5; i= 7; g= h + i++; printf("%d\n", g); a= b-- + c; printf("%d\n", a); d= e-- + f; printf("%d\n", d); g= h-- + i; printf("%d\n", g); a= ++b + c; printf("%d\n", a); d= ++e + f; printf("%d\n", d); g= ++h + i; printf("%d\n", g); a= b * --c; printf("%d\n", a); d= e * --f; printf("%d\n", d); g= h * --i; printf("%d\n", g); return(0); } ================================================ FILE: 30_Design_Composites/tests/input23.c ================================================ int printf(char *fmt); char *str; int x; int main() { x= -23; printf("%d\n", x); printf("%d\n", -10 * -10); x= 1; x= ~x; printf("%d\n", x); x= 2 > 5; printf("%d\n", x); x= !x; printf("%d\n", x); x= !x; printf("%d\n", x); x= 13; if (x) { printf("%d\n", 13); } x= 0; if (!x) { printf("%d\n", 14); } for (str= "Hello world\n"; *str; str++) { printf("%c", *str); } return(0); } ================================================ FILE: 30_Design_Composites/tests/input24.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { a= 42; b= 19; printf("%d\n", a & b); printf("%d\n", a | b); printf("%d\n", a ^ b); printf("%d\n", 1 << 3); printf("%d\n", 63 >> 3); return(0); } ================================================ FILE: 30_Design_Composites/tests/input25.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { char z; int y; int x; x= 10; y= 20; z= 30; printf("%d\n", x); printf("%d\n", y); printf("%d\n", z); a= 5; b= 15; c= 25; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); return(0); } ================================================ FILE: 30_Design_Composites/tests/input26.c ================================================ int printf(char *fmt); int main(int a, char b, long c, int d, int e, int f, int g, int h) { int i; int j; int k; a= 13; printf("%d\n", a); b= 23; printf("%d\n", b); c= 34; printf("%d\n", c); d= 44; printf("%d\n", d); e= 54; printf("%d\n", e); f= 64; printf("%d\n", f); g= 74; printf("%d\n", g); h= 84; printf("%d\n", h); i= 94; printf("%d\n", i); j= 95; printf("%d\n", j); k= 96; printf("%d\n", k); return(0); } ================================================ FILE: 30_Design_Composites/tests/input27.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int param5(int a, int b, int c, int d, int e) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param2(int a, int b) { int c; int d; int e; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param0() { int a; int b; int c; int d; int e; a= 1; b= 2; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int main() { param8(1,2,3,4,5,6,7,8); param5(1,2,3,4,5); param2(1,2); param0(); return(0); } ================================================ FILE: 30_Design_Composites/tests/input28.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 30_Design_Composites/tests/input29.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h); int fred(int a, int b, int c); int main(); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 30_Design_Composites/tests/input30.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input30.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 30_Design_Composites/tests/input31.c ================================================ int printf(char *fmt); int main() { int x; x= 2 + + 3 - * / ; } ================================================ FILE: 30_Design_Composites/tests/input32.c ================================================ int printf(char *fmt); int main() { pizza cow llama sausage; } ================================================ FILE: 30_Design_Composites/tests/input33.c ================================================ int printf(char *fmt); int main() { char *z; return(z); } ================================================ FILE: 30_Design_Composites/tests/input34.c ================================================ int printf(char *fmt); int main() { int a[12]; return(0); } ================================================ FILE: 30_Design_Composites/tests/input35.c ================================================ int printf(char *fmt); int fred(int a, int b) { int a; return(a); } ================================================ FILE: 30_Design_Composites/tests/input36.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c); ================================================ FILE: 30_Design_Composites/tests/input37.c ================================================ int printf(char *fmt); int fred(int a, char b +, int z); ================================================ FILE: 30_Design_Composites/tests/input38.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c, int g); ================================================ FILE: 30_Design_Composites/tests/input39.c ================================================ int printf(char *fmt); int main() { int a; } ================================================ FILE: 30_Design_Composites/tests/input40.c ================================================ int printf(char *fmt); int main() { int a; a= 5; } ================================================ FILE: 30_Design_Composites/tests/input41.c ================================================ int printf(char *fmt); void fred() { return(5); } ================================================ FILE: 30_Design_Composites/tests/input42.c ================================================ int printf(char *fmt); int main() { fred(5); } ================================================ FILE: 30_Design_Composites/tests/input43.c ================================================ int printf(char *fmt); int main() { int a; a= b[4]; } ================================================ FILE: 30_Design_Composites/tests/input44.c ================================================ int printf(char *fmt); int main() { int a; a= z; } ================================================ FILE: 30_Design_Composites/tests/input45.c ================================================ int printf(char *fmt); int main() { int a; a= &5; } ================================================ FILE: 30_Design_Composites/tests/input46.c ================================================ int printf(char *fmt); int main() { int a; a= *5; } ================================================ FILE: 30_Design_Composites/tests/input47.c ================================================ int printf(char *fmt); int main() { int a; a= ++5; } ================================================ FILE: 30_Design_Composites/tests/input48.c ================================================ int printf(char *fmt); int main() { int a; a= --5; } ================================================ FILE: 30_Design_Composites/tests/input49.c ================================================ int printf(char *fmt); int main() { int x; char y; y= x; } ================================================ FILE: 30_Design_Composites/tests/input50.c ================================================ int printf(char *fmt); int main() { char *a; char *b; a= a + b; } ================================================ FILE: 30_Design_Composites/tests/input51.c ================================================ int printf(char *fmt); int main() { char a; a= 'fred'; } ================================================ FILE: 30_Design_Composites/tests/input52.c ================================================ int printf(char *fmt); int main() { int a; a= $5.00; } ================================================ FILE: 30_Design_Composites/tests/input53.c ================================================ int printf(char *fmt); int main() { printf("Hello world, %d\n", 23); return(0); } ================================================ FILE: 30_Design_Composites/tests/input54.c ================================================ int printf(char *fmt); int main() { int i; for (i=0; i < 20; i++) { printf("Hello world, %d\n", i); } return(0); } ================================================ FILE: 30_Design_Composites/tests/input55.c ================================================ int printf(char *fmt); int main(int argc, char **argv) { int i; char *argument; printf("Hello world\n"); for (i=0; i < argc; i++) { argument= *argv; argv= argv + 1; printf("Argument %d is %s\n", i, argument); } return(0); } ================================================ FILE: 30_Design_Composites/tests/mktests ================================================ #!/bin/sh # Make the output files for each test # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make) fi for i in input*c do if [ ! -f "out.$i" -a ! -f "err.$i" ] then ../cwj -o out $i 2> "err.$i" # If the err file is empty if [ ! -s "err.$i" ] then rm -f "err.$i" cc -o out $i ../lib/printint.c ./out > "out.$i" fi fi rm -f out out.s done ================================================ FILE: 30_Design_Composites/tests/out.input01.c ================================================ 36 10 25 ================================================ FILE: 30_Design_Composites/tests/out.input02.c ================================================ 17 ================================================ FILE: 30_Design_Composites/tests/out.input03.c ================================================ 1 2 3 4 5 ================================================ FILE: 30_Design_Composites/tests/out.input04.c ================================================ 1 1 1 1 1 1 1 1 1 ================================================ FILE: 30_Design_Composites/tests/out.input05.c ================================================ 6 ================================================ FILE: 30_Design_Composites/tests/out.input06.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 30_Design_Composites/tests/out.input07.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 30_Design_Composites/tests/out.input08.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 30_Design_Composites/tests/out.input09.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 30_Design_Composites/tests/out.input10.c ================================================ 20 10 1 2 3 4 5 253 254 255 0 1 ================================================ FILE: 30_Design_Composites/tests/out.input11.c ================================================ 10 20 30 1 2 3 4 5 253 254 255 0 1 2 3 1 2 3 4 5 ================================================ FILE: 30_Design_Composites/tests/out.input12.c ================================================ 5 ================================================ FILE: 30_Design_Composites/tests/out.input13.c ================================================ 23 56 ================================================ FILE: 30_Design_Composites/tests/out.input14.c ================================================ 10 20 30 ================================================ FILE: 30_Design_Composites/tests/out.input15.c ================================================ 18 18 12 12 ================================================ FILE: 30_Design_Composites/tests/out.input16.c ================================================ 12 18 ================================================ FILE: 30_Design_Composites/tests/out.input17.c ================================================ 19 12 ================================================ FILE: 30_Design_Composites/tests/out.input18.c ================================================ 34 34 ================================================ FILE: 30_Design_Composites/tests/out.input18a.c ================================================ 15 16 ================================================ FILE: 30_Design_Composites/tests/out.input19.c ================================================ 30 ================================================ FILE: 30_Design_Composites/tests/out.input20.c ================================================ 12 ================================================ FILE: 30_Design_Composites/tests/out.input21.c ================================================ 10 Hello world ================================================ FILE: 30_Design_Composites/tests/out.input22.c ================================================ 12 12 12 13 13 13 13 13 13 35 35 35 ================================================ FILE: 30_Design_Composites/tests/out.input23.c ================================================ -23 100 -2 0 1 0 13 14 Hello world ================================================ FILE: 30_Design_Composites/tests/out.input24.c ================================================ 2 59 57 8 7 ================================================ FILE: 30_Design_Composites/tests/out.input25.c ================================================ 10 20 30 5 15 25 ================================================ FILE: 30_Design_Composites/tests/out.input26.c ================================================ 13 23 34 44 54 64 74 84 94 95 96 ================================================ FILE: 30_Design_Composites/tests/out.input27.c ================================================ 1 2 3 4 5 6 7 8 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 ================================================ FILE: 30_Design_Composites/tests/out.input28.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 30_Design_Composites/tests/out.input29.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 30_Design_Composites/tests/out.input30.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input30.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 30_Design_Composites/tests/out.input53.c ================================================ Hello world, 23 ================================================ FILE: 30_Design_Composites/tests/out.input54.c ================================================ Hello world, 0 Hello world, 1 Hello world, 2 Hello world, 3 Hello world, 4 Hello world, 5 Hello world, 6 Hello world, 7 Hello world, 8 Hello world, 9 Hello world, 10 Hello world, 11 Hello world, 12 Hello world, 13 Hello world, 14 Hello world, 15 Hello world, 16 Hello world, 17 Hello world, 18 Hello world, 19 ================================================ FILE: 30_Design_Composites/tests/out.input55.c ================================================ Hello world Argument 0 is ./out ================================================ FILE: 30_Design_Composites/tests/runtests ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../cwj -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../cwj $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.s "trial.$i" done ================================================ FILE: 30_Design_Composites/tests/runtestsn ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../compn ] then (cd ..; make) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../compn -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../compn $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.o out.s "trial.$i" done ================================================ FILE: 30_Design_Composites/tree.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree functions // Copyright (c) 2019 Warren Toomey, GPL3 // Build and return a generic AST node struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue) { struct ASTnode *n; // Malloc a new ASTnode n = (struct ASTnode *) malloc(sizeof(struct ASTnode)); if (n == NULL) fatal("Unable to malloc in mkastnode()"); // Copy in the field values and return it n->op = op; n->type = type; n->left = left; n->mid = mid; n->right = right; n->sym = sym; n->intvalue = intvalue; return (n); } // Make an AST leaf node struct ASTnode *mkastleaf(int op, int type, struct symtable *sym, int intvalue) { return (mkastnode(op, type, NULL, NULL, NULL, sym, intvalue)); } // Make a unary AST node: only one child struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, struct symtable *sym, int intvalue) { return (mkastnode(op, type, left, NULL, NULL, sym, intvalue)); } // Generate and return a new label number // just for AST dumping purposes static int gendumplabel(void) { static int id = 1; return (id++); } // Given an AST tree, print it out and follow the // traversal of the tree that genAST() follows void dumpAST(struct ASTnode *n, int label, int level) { int Lfalse, Lstart, Lend; switch (n->op) { case A_IF: Lfalse = gendumplabel(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_IF"); if (n->right) { Lend = gendumplabel(); fprintf(stdout, ", end L%d", Lend); } fprintf(stdout, "\n"); dumpAST(n->left, Lfalse, level + 2); dumpAST(n->mid, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); return; case A_WHILE: Lstart = gendumplabel(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_WHILE, start L%d\n", Lstart); Lend = gendumplabel(); dumpAST(n->left, Lend, level + 2); dumpAST(n->right, NOLABEL, level + 2); return; } // Reset level to -2 for A_GLUE if (n->op == A_GLUE) level = -2; // General AST node handling if (n->left) dumpAST(n->left, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); for (int i = 0; i < level; i++) fprintf(stdout, " "); switch (n->op) { case A_GLUE: fprintf(stdout, "\n\n"); return; case A_FUNCTION: fprintf(stdout, "A_FUNCTION %s\n", n->sym->name); return; case A_ADD: fprintf(stdout, "A_ADD\n"); return; case A_SUBTRACT: fprintf(stdout, "A_SUBTRACT\n"); return; case A_MULTIPLY: fprintf(stdout, "A_MULTIPLY\n"); return; case A_DIVIDE: fprintf(stdout, "A_DIVIDE\n"); return; case A_EQ: fprintf(stdout, "A_EQ\n"); return; case A_NE: fprintf(stdout, "A_NE\n"); return; case A_LT: fprintf(stdout, "A_LE\n"); return; case A_GT: fprintf(stdout, "A_GT\n"); return; case A_LE: fprintf(stdout, "A_LE\n"); return; case A_GE: fprintf(stdout, "A_GE\n"); return; case A_INTLIT: fprintf(stdout, "A_INTLIT %d\n", n->intvalue); return; case A_STRLIT: fprintf(stdout, "A_STRLIT rval label L%d\n", n->intvalue); return; case A_IDENT: if (n->rvalue) fprintf(stdout, "A_IDENT rval %s\n", n->sym->name); else fprintf(stdout, "A_IDENT %s\n", n->sym->name); return; case A_ASSIGN: fprintf(stdout, "A_ASSIGN\n"); return; case A_WIDEN: fprintf(stdout, "A_WIDEN\n"); return; case A_RETURN: fprintf(stdout, "A_RETURN\n"); return; case A_FUNCCALL: fprintf(stdout, "A_FUNCCALL %s\n", n->sym->name); return; case A_ADDR: fprintf(stdout, "A_ADDR %s\n", n->sym->name); return; case A_DEREF: if (n->rvalue) fprintf(stdout, "A_DEREF rval\n"); else fprintf(stdout, "A_DEREF\n"); return; case A_SCALE: fprintf(stdout, "A_SCALE %d\n", n->size); return; case A_PREINC: fprintf(stdout, "A_PREINC %s\n", n->sym->name); return; case A_PREDEC: fprintf(stdout, "A_PREDEC %s\n", n->sym->name); return; case A_POSTINC: fprintf(stdout, "A_POSTINC\n"); return; case A_POSTDEC: fprintf(stdout, "A_POSTDEC\n"); return; case A_NEGATE: fprintf(stdout, "A_NEGATE\n"); return; default: fatald("Unknown dumpAST operator", n->op); } } ================================================ FILE: 30_Design_Composites/types.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Types and type handling // Copyright (c) 2019 Warren Toomey, GPL3 // Return true if a type is an int type // of any size, false otherwise int inttype(int type) { return ((type & 0xf) == 0); } // Return true if a type is of pointer type int ptrtype(int type) { return ((type & 0xf) != 0); } // Given a primitive type, return // the type which is a pointer to it int pointer_to(int type) { if ((type & 0xf) == 0xf) fatald("Unrecognised in pointer_to: type", type); return (type + 1); } // Given a primitive pointer type, return // the type which it points to int value_at(int type) { if ((type & 0xf) == 0x0) fatald("Unrecognised in value_at: type", type); return (type - 1); } // Given an AST tree and a type which we want it to become, // possibly modify the tree by widening or scaling so that // it is compatible with this type. Return the original tree // if no changes occurred, a modified tree, or NULL if the // tree is not compatible with the given type. // If this will be part of a binary operation, the AST op is not zero. struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op) { int ltype; int lsize, rsize; ltype = tree->type; // Compare scalar int types if (inttype(ltype) && inttype(rtype)) { // Both types same, nothing to do if (ltype == rtype) return (tree); // Get the sizes for each type lsize = genprimsize(ltype); rsize = genprimsize(rtype); // Tree's size is too big if (lsize > rsize) return (NULL); // Widen to the right if (rsize > lsize) return (mkastunary(A_WIDEN, rtype, tree, NULL, 0)); } // For pointers on the left if (ptrtype(ltype)) { // OK is same type on right and not doing a binary op if (op == 0 && ltype == rtype) return (tree); } // We can scale only on A_ADD or A_SUBTRACT operation if (op == A_ADD || op == A_SUBTRACT) { // Left is int type, right is pointer type and the size // of the original type is >1: scale the left if (inttype(ltype) && ptrtype(rtype)) { rsize = genprimsize(value_at(rtype)); if (rsize > 1) return (mkastunary(A_SCALE, rtype, tree, NULL, rsize)); else return (tree); // Size 1, no need to scale } } // If we get here, the types are not compatible return (NULL); } ================================================ FILE: 31_Struct_Declarations/Makefile ================================================ HSRCS= data.h decl.h defs.h SRCS= cg.c decl.c expr.c gen.c main.c misc.c \ scan.c stmt.c sym.c tree.c types.c SRCN= cgn.c decl.c expr.c gen.c main.c misc.c \ scan.c stmt.c sym.c tree.c types.c ARMSRCS= cg_arm.c decl.c expr.c gen.c main.c misc.c \ scan.c stmt.c sym.c tree.c types.c cwj: $(SRCS) $(HSRCS) cc -o cwj -g -Wall $(SRCS) compn: $(SRCN) $(HSRCS) cc -D__NASM__ -o compn -g -Wall $(SRCN) cwjarm: $(ARMSRCS) $(HSRCS) cc -o cwjarm -g -Wall $(ARMSRCS) cp cwjarm cwj clean: rm -f cwj cwjarm compn *.o *.s out test: cwj tests/runtests (cd tests; chmod +x runtests; ./runtests) armtest: cwjarm tests/runtests (cd tests; chmod +x runtests; ./runtests) testn: compn tests/runtestsn (cd tests; chmod +x runtestsn; ./runtestsn) ================================================ FILE: 31_Struct_Declarations/Readme.md ================================================ # Part 31: Implementing Structs, Part 1 In this part of our compiler writing journey, I've begun the process of implementing structs into the language. Even though these are not yet functional, I've made substantial changes to the code just to get to the point where we can declare structs, and global variables of struct type. ## Symbol Table Changes As I mentioned in the last part, we need to change the symbol table structure to include a pointer to a composite type node, when the symbol is of this type. We also added a `next` pointer to support linked lists, and a `member` pointer. The `member` pointer of a function node holds the function's parameter list. We will use the `member` node for structs to hold the struct's member fields. So, we now have: ```c struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol struct symtable *ctype; // If needed, pointer to the composite type ... struct symtable *next; // Next symbol in one list struct symtable *member; // First member of a function, struct, }; // union or enum ``` We also have two new lists for symbols in `data.h`: ```c // Symbol table lists struct symtable *Globhead, *Globtail; // Global variables and functions struct symtable *Loclhead, *Locltail; // Local variables struct symtable *Parmhead, *Parmtail; // Local parameters struct symtable *Membhead, *Membtail; // Temp list of struct/union members struct symtable *Structhead, *Structtail; // List of struct types ``` ## Changes to `sym.c` Throughout `sym.c`, and elsewhere in the code, we used to only receive the `int type` argument to determine the type of something. This isn't enough now that we have composite types: the P_STRUCT integer value tells us that something is a struct, not which one. Therefore, many functions now receive an `int type` argument and also a `struct symtable *ctype` argument. When `type` is P_STRUCT, `ctype` points at the node which defines this particular struct type. In `sym.c`, all the `addXX()` functions have been modified to have this extra argument. There is also a new `addmemb()` function and a new `addstruct()` function to add nodes to these two new lists. They function identically to the other `addXX()` functions but just on a different list. I will come back to these functions later. ## A New Token We have our first new token, P_STRUCT, in quite a while. It goes with the matching `struct` keyword. I'll omit the changes to `scan.c` as they are minor. ## Parsing Structs in our Grammar There are a bunch of places where we need to parse the `struct` keyword: + the definition of a named struct + the definition of an unnamed struct followed by a variable of this type + the definition of a struct within another struct or union + the definition of a variable of a previously defined struct type At first, I wasn't sure where to fit in the parsing of structs. Should I assume that we are parsing a new struct definition, but bail out when I see a variable identifier, or assume a variable declaration? In the end, I realised that, after seeing `struct `, I had to assume that this was just the naming of a type, just as `int` is the naming of the `int` type. We had to parse the next token to determine otherwise. Therefore, I modified `parse_type()` in `decl.c` to parse both scalar types (e.g. `int`) and composite types (e.g. `struct foo`). And now that it can return a composite type, I had to find a way to return the pointer to the node that defines this composite type: ```c // Parse the current token and return // a primitive type enum value and a pointer // to any composite type. // Also scan in the next token int parse_type(struct symtable **ctype) { int type; switch (Token.token) { ... // Existing code for T_VOID, T_CHAR etc. case T_STRUCT: type = P_STRUCT; *ctype = struct_declaration(); break; ... ``` We call `struct_declaration()`to either look up an existing struct type or to parse the declaration of the new struct type. ## Refactoring The Parsing of a Variable List In our old code, there was a function called `param_declaration()` that parsed a list of parameters separated by commas, e.g. ```c int fred(int x, char y, long z); ``` such as you would find as the parameter list for a function declaration. Well, a struct and union declaration also has a list of variables, except that they are separated by semicolons and surrounded by curly brackets, e.g. ```c struct fred { int x; char y; long z; }; ``` It makes sense to refactor the function to parse both lists. It now is passed two tokens: the separating token, e.g. T_SEMI and the ending token, e.g. T_RBRACE. Thus, we can use it to parse both styles of lists. ```c // Parse a list of variables. // Add them as symbols to one of the symbol table lists, and return the // number of variables. If funcsym is not NULL, there is an existing function // prototype, so compare each variable's type against this prototype. static int var_declaration_list(struct symtable *funcsym, int class, int separate_token, int end_token) { ... // Get the type and identifier type = parse_type(&ctype); ... // Add a new parameter to the right symbol table list, based on the class var_declaration(type, ctype, class); } ``` When we are parsing function parameter lists, we call: ```c var_declaration_list(oldfuncsym, C_PARAM, T_COMMA, T_RPAREN); ``` When we are parsing struct member lists, we call: ```c var_declaration_list(NULL, C_MEMBER, T_SEMI, T_RBRACE); ``` Also note that the call to `var_declaration()` now is given the type of the variable, the composite type pointer (if it is a struct or union), and the variable's class. Now we can parse the lists of members of a struct. So let's see how we parse the whole struct. ## The `struct_declaration()` Function Let's take this in stages. ```c static struct symtable *struct_declaration(void) { struct symtable *ctype = NULL; struct symtable *m; int offset; // Skip the struct keyword scan(&Token); // See if there is a following struct name if (Token.token == T_IDENT) { // Find any matching composite type ctype = findstruct(Text); scan(&Token); } ``` At this point we have seen `struct` possibly followed by an identifier. If this is an existing struct type, `ctype` now points at the existing type node. Otherwise, `ctype` is NULL. ```c // If the next token isn't an LBRACE , this is // the usage of an existing struct type. // Return the pointer to the type. if (Token.token != T_LBRACE) { if (ctype == NULL) fatals("unknown struct type", Text); return (ctype); } ``` We didn't see a '{', so this has to be just the naming of an existing type. `ctype` cannot be NULL, so we check that first and then simply return the pointer to this existing struct type. This is going to go back to `parse_type()` when we did: ```c type = P_STRUCT; *ctype = struct_declaration(); ``` But, assuming we didn't return, we must have found a '{', and this signals the definition of a struct type. Let's go on... ```c // Ensure this struct type hasn't been // previously defined if (ctype) fatals("previously defined struct", Text); // Build the struct node and skip the left brace ctype = addstruct(Text, P_STRUCT, NULL, 0, 0); scan(&Token); ``` We can't declare a struct with the same name twice, so prevent this. Then build the beginnings of the new struct type as a node in the symbol table. All we have so far is its name and that it is of P_STRUCT type. ```c // Scan in the list of members and attach // to the struct type's node var_declaration_list(NULL, C_MEMBER, T_SEMI, T_RBRACE); rbrace(); ``` This parses the list of members. For each one, a new symbol node is appended to the list that `Membhead` and `Membtail` point to. This list is only temporary, because the next lines of code move the member list into the new struct type node: ```c ctype->member = Membhead; Membhead = Membtail = NULL; ``` We now have a struct type node with a name, and the list of members in the struct. What's left to do? Well, we now need to determine: + the overall size of the struct, and + the offset of each member from the base of the struct Some of this is very hardware-specific due to the alignment of scalar values in memory. So I'll give the code as it stands now, and then follow the function call structure later. ```c // Set the offset of the initial member // and find the first free byte after it m = ctype->member; m->posn = 0; offset = typesize(m->type, m->ctype); ``` We now have a new function, `typesize()` to get the size of any type: scalar, pointer or composite. The first member's position is set to zero, and we use its size to determine the first possible byte where the next member could be stored. But now we need to worry about alignment. As an example, on a 32-bit architecture where 4-byte scalar values have to be aligned on a 4-byte boundary: ```c struct { char x; // At offset 0 int y; // At offset 4, not 1 }; ``` So here is the code to calculate the offset of each successive member: ```c // Set the position of each successive member in the struct for (m = m->next; m != NULL; m = m->next) { // Set the offset for this member m->posn = genalign(m->type, offset, 1); // Get the offset of the next free byte after this member offset += typesize(m->type, m->ctype); } ``` We have a new function, `genalign()` that takes a current offset and the type that we need to align, and returns the first offset that suits the alignment of this type. For example, `genalign(P_INT, 3, 1)` might return 4 if P_INTs have to be 4-aligned. I'll discuss the final 1 argument soon. So, `genalign()` works out the correct alignment for this member, and then we add on this member's size to get the next free (unaligned) position which is available for the next member. Once we have done the above for all the members in the list, the `offset` is the size in bytes of the overall struct. So: ```c // Set the overall size of the struct ctype->size = offset; return (ctype); } ``` ## The `typesize()` Function It's time to follow all the new functions to see what they do and how they do it. We'll start with `typesize()` in `types.c`: ```c // Given a type and a composite type pointer, return // the size of this type in bytes int typesize(int type, struct symtable *ctype) { if (type == P_STRUCT) return(ctype->size); return(genprimsize(type)); } ``` If the type is a struct, return the size from the struct's type node. Otherwise it's a scalar or pointer type, so ask `genprimsize()` (which calls the hardware-specific `cgprimsize()`) to get the type's size. Nice and easy. ## The `genalign()` and `cgalign()` Functions Now we get into some not so nice code. Given a type and an existing unaligned offset, we need to know which is the next aligned position to place a value of the given type. I also was worried that we might need to do this on the stack, which grows downwards not upwards. So there is a third argument to the function: the *direction* in which we need to find the next aligned position. Also, the knowledge of alignment is hardware specific, so: ```c int genalign(int type, int offset, int direction) { return (cgalign(type, offset, direction)); } ``` and we turn our attention to `cgalign()` in `cg.c`: ```c // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch(type) { case P_CHAR: return (offset); case P_INT: case P_LONG: break; default: fatald("Bad type in calc_aligned_offset:", type); } // Here we have an int or a long. Align it on a 4-byte offset // I put the generic code here so it can be reused elsewhere. alignment= 4; offset = (offset + direction * (alignment-1)) & ~(alignment-1); return (offset); } ``` Firstly, yes I know that we don't have to worry about alignment in the x86-64 architecture. But I thought we should go through the exercise of dealing with alignment, so there is an example of it being done which can be borrowed for other backends that may be written. The code returns the given offset for `char` types, as they can be stored at any alignment. But we enforce a 4-byte alignment on `int`s and `long`s. Let's break down the big offset expression. The first `alignment-1` turns `offset` 0 into 3, 1 into 4, 2 into 5 etc. Then, at the end we AND this with the inverse of 3, i.e. ...111111100 to discard the last two bits and lower the value back down to the correct alignment. Thus: | Offset | Add Value | New Offset | |:------:|:---------:|:----------:| | 0 | 3 | 0 | | 1 | 4 | 4 | | 2 | 5 | 4 | | 3 | 6 | 4 | | 4 | 7 | 4 | | 5 | 8 | 8 | | 6 | 9 | 8 | | 7 | 10 | 8 | An offset of 0 stays at zero, but values 1 to 3 are pushed up to 4. 4 stays aligned at 4, but 5 to 7 get pushed up to 8. Now the magic. A `direction` of 1 does everything that we have seen so far. A `direction` of -1 sends the offset in the opposite direction to ensure that the value's "high end" won't hit what's above it: | Offset | Add Value | New Offset | |:------:|:---------:|:----------:| | 0 | -3 | -4 | | -1 | -4 | -4 | | -2 | -5 | -8 | | -3 | -6 | -8 | | -4 | -7 | -8 | | -5 | -8 | -8 | | -6 | -9 | -12 | | -7 | -10 | -12 | ## Creating a Global Struct Variable So now we can parse a struct type, and declare a global variable to this type. Now let's modify the code to allocate the memory space for a global variable: ```c // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the type size = typesize(node->type, node->ctype); // Generate the global identity and the label cgdataseg(); fprintf(Outfile, "\t.globl\t%s\n", node->name); fprintf(Outfile, "%s:", node->name); // Generate the space for this type switch (size) { case 1: fprintf(Outfile, "\t.byte\t0\n"); break; case 4: fprintf(Outfile, "\t.long\t0\n"); break; case 8: fprintf(Outfile, "\t.quad\t0\n"); break; default: for (int i=0; i < size; i++) fprintf(Outfile, "\t.byte\t0\n"); } } ``` ## Trying The Changes Out We don't have any new functionality apart from parsing structs, storing new nodes in the symbol table and generating storage for global struct variables. I have this test program, `z.c`: ```c struct fred { int x; char y; long z; }; struct foo { char y; long z; } var1; struct { int x; }; struct fred var2; ``` which should create two global variables `var1` and `var2`. We create two named struct types, `fred` and `foo`, and one unnamed struct. The third struct should cause an error (or at least a warning) because there is no variable associated with the struct, so the struct itself is useless. I added some test code to print out the member offsets and struct sizes for the above structs, and this is the result: ``` Offset for fred.x is 0 Offset for fred.y is 4 Offset for fred.z is 8 Size of struct fred is 13 Offset for foo.y is 0 Offset for foo.z is 4 Size of struct foo is 9 Offset for struct.x is 0 Size of struct struct is 4 ``` Finally, when I do `./cwj -S z.c`, I get this assembly output: ``` .globl var1 var1: .byte 0 // Nine bytes ... .globl var2 // Thirteen bytes var2: .byte 0 ... ``` ## Conclusion and What's Next In this part I've had to change a lot of the existing code from dealing with just an `int type` to dealing with an `int type; struct symtable *ctype` pair. I'm sure I'll have to do this in more places. We've added the parsing of struct definitions and also declarations of struct variables, and we can generate the space for global struct variables. At the moment, we can't use the struct variables that we have created. But it's a good start. I also haven't even tried to deal with local struct variables, because that involves the stack and I'm sure that will be complicated. In the next part of our compiler writing journey, I will try to add the code to parse the '.' token so that we can access members in a struct variable. [Next step](../32_Struct_Access_pt1/Readme.md) ================================================ FILE: 31_Struct_Declarations/cg.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\t.text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\t.data\n", Outfile); currSeg = data_seg; } } // Given a scalar type value, return the // size of the type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch(type) { case P_CHAR: return (offset); case P_INT: case P_LONG: break; default: fatald("Bad type in calc_aligned_offset:", type); } // Here we have an int or a long. Align it on a 4-byte offset // I put the generic code here so it can be reused elsewhere. alignment= 4; offset = (offset + direction * (alignment-1)) & ~(alignment-1); return (offset); } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. static int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "%r10", "%r11", "%r12", "%r13", "%r9", "%r8", "%rcx", "%rdx", "%rsi", "%rdi" }; static char *breglist[] = { "%r10b", "%r11b", "%r12b", "%r13b", "%r9b", "%r8b", "%cl", "%dl", "%sil", "%dil" }; static char *dreglist[] = { "%r10d", "%r11d", "%r12d", "%r13d", "%r9d", "%r8d", "%ecx", "%edx", "%esi", "%edi" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the %rsp and %rsp fprintf(Outfile, "\t.globl\t%s\n" "\t.type\t%s, @function\n" "%s:\n" "\tpushq\t%%rbp\n" "\tmovq\t%%rsp, %%rbp\n", name, name, name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->posn = paramOffset; paramOffset += 8; } else { parm->posn = newlocaloffset(parm->type); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->posn = newlocaloffset(locvar->type); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\taddq\t$%d,%%rsp\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->endlabel); fprintf(Outfile, "\taddq\t$%d,%%rsp\n", stackOffset); fputs("\tpopq %rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmovq\t$%d, %s\n", value, reglist[r]); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", sym->name); } else // Print out the code to initialise it switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovzbq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", sym->name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovslq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", sym->name); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", sym->posn); fprintf(Outfile, "\tmovq\t%d(%%rbp), %s\n", sym->posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", sym->posn); } else switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", sym->posn); fprintf(Outfile, "\tmovzbq\t%d(%%rbp), %s\n", sym->posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", sym->posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", sym->posn); fprintf(Outfile, "\tmovslq\t%d(%%rbp), %s\n", sym->posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", sym->posn); break; default: fatald("Bad type in cgloadlocal:", sym->type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tleaq\tL%d(%%rip), %s\n", label, reglist[r]); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\taddq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsubq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timulq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s,%%rax\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidivq\t%s\n", reglist[r2]); fprintf(Outfile, "\tmovq\t%%rax,%s\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tandq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\torq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txorq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshlq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshrq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tnegq\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnotq\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); return (r); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(struct symtable *sym, int numargs) { // Get a new register int outr = alloc_register(); // Call the function fprintf(Outfile, "\tcall\t%s@PLT\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\taddq\t$%d, %%rsp\n", 8 * (numargs - 6)); // and copy the return value into our register fprintf(Outfile, "\tmovq\t%%rax, %s\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpushq\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmovq\t%s, %s\n", reglist[r], reglist[FIRSTPARAMREG - argposn + 1]); } } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsalq\t$%d, %s\n", val, reglist[r]); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %s(%%rip)\n", reglist[r], sym->name); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %s(%%rip)\n", breglist[r], sym->name); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %s(%%rip)\n", dreglist[r], sym->name); break; default: fatald("Bad type in cgstorglob:", sym->type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %d(%%rbp)\n", reglist[r], sym->posn); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %d(%%rbp)\n", breglist[r], sym->posn); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %d(%%rbp)\n", dreglist[r], sym->posn); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the type size = typesize(node->type, node->ctype); // Generate the global identity and the label cgdataseg(); fprintf(Outfile, "\t.globl\t%s\n", node->name); fprintf(Outfile, "%s:", node->name); // Generate the space for this type switch (size) { case 1: fprintf(Outfile, "\t.byte\t0\n"); break; case 4: fprintf(Outfile, "\t.long\t0\n"); break; case 8: fprintf(Outfile, "\t.quad\t0\n"); break; default: for (int i=0; i < size; i++) fprintf(Outfile, "\t.byte\t0\n"); } } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\t.byte\t%d\n", *cptr); } fprintf(Outfile, "\t.byte\t0\n"); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzbl\t%s, %%eax\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %%eax\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } cgjump(sym->endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = alloc_register(); if (sym->class == C_GLOBAL) fprintf(Outfile, "\tleaq\t%s(%%rip), %s\n", sym->name, reglist[r]); else fprintf(Outfile, "\tleaq\t%d(%%rbp), %s\n", sym->posn, reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzbq\t(%s), %s\n", reglist[r], reglist[r]); break; case 2: fprintf(Outfile, "\tmovslq\t(%s), %s\n", reglist[r], reglist[r]); break; case 4: case 8: fprintf(Outfile, "\tmovq\t(%s), %s\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { // Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmovb\t%s, (%s)\n", breglist[r1], reglist[r2]); break; case 2: case 4: case 8: fprintf(Outfile, "\tmovq\t%s, (%s)\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 31_Struct_Declarations/cg_arm.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for ARMv6 on Raspberry Pi // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. static int freereg[4]; static char *reglist[4] = { "r4", "r5", "r6", "r7" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // We have to store large integer literal values in memory. // Keep a list of them which will be output in the postamble #define MAXINTS 1024 int Intlist[MAXINTS]; static int Intslot = 0; // Determine the offset of a large integer // literal from the .L3 label. If the integer // isn't in the list, add it. static void set_int_offset(int val) { int offset = -1; // See if it is already there for (int i = 0; i < Intslot; i++) { if (Intlist[i] == val) { offset = 4 * i; break; } } // Not in the list, so add it if (offset == -1) { offset = 4 * Intslot; if (Intslot == MAXINTS) fatal("Out of int slots in set_int_offset()"); Intlist[Intslot++] = val; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L3+%d\n", offset); } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n", Outfile); } // Print out the assembly postamble void cgpostamble() { // Print out the global variables fprintf(Outfile, ".L2:\n"); for (int i = 0; i < Globs; i++) { if (Symtable[i].stype == S_VARIABLE) fprintf(Outfile, "\t.word %s\n", Symtable[i].name); } // Print out the integer literals fprintf(Outfile, ".L3:\n"); for (int i = 0; i < Intslot; i++) { fprintf(Outfile, "\t.word %d\n", Intlist[i]); } } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Symtable[id].name; fprintf(Outfile, "\t.text\n" "\t.globl\t%s\n" "\t.type\t%s, \%%function\n" "%s:\n" "\tpush\t{fp, lr}\n" "\tadd\tfp, sp, #4\n" "\tsub\tsp, sp, #8\n" "\tstr\tr0, [fp, #-8]\n", name, name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Symtable[id].endlabel); fputs("\tsub\tsp, fp, #4\n" "\tpop\t{fp, pc}\n" "\t.align\t2\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); // If the literal value is small, do it with one instruction if (value <= 1000) fprintf(Outfile, "\tmov\t%s, #%d\n", reglist[r], value); else { set_int_offset(value); fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); } return (r); } // Determine the offset of a variable from the .L2 // label. Yes, this is inefficient code. static void set_var_offset(int id) { int offset = 0; // Walk the symbol table up to id. // Find S_VARIABLEs and add on 4 until // we get to our variable for (int i = 0; i < id; i++) { if (Symtable[i].stype == S_VARIABLE) offset += 4; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L2+%d\n", offset); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tldrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgloadglob:", Symtable[id].type); } return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s, %s\n", reglist[r1], reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\tmul\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { // To do a divide: r0 holds the dividend, r1 holds the divisor. // The quotient is in r0. fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r1]); fprintf(Outfile, "\tmov\tr1, %s\n", reglist[r2]); fprintf(Outfile, "\tbl\t__aeabi_idiv\n"); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r1]); free_register(r2); return (r1); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r]); fprintf(Outfile, "\tbl\t%s\n", Symtable[id].name); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r]); return (r); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tlsl\t%s, %s, #%d\n", reglist[r], reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, int id) { // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tstr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgstorglob:", Symtable[id].type); } return (r); } // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (4); switch (type) { case P_CHAR: return (1); case P_INT: case P_LONG: return (4); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Symtable[id].type); fprintf(Outfile, "\t.data\n" "\t.globl\t%s\n", Symtable[id].name); switch (typesize) { case 1: fprintf(Outfile, "%s:\t.byte\t0\n", Symtable[id].name); break; case 4: fprintf(Outfile, "%s:\t.long\t0\n", Symtable[id].name); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "moveq", "movne", "movlt", "movgt", "movle", "movge" }; // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "movne", "moveq", "movge", "movle", "movgt", "movlt" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #1\n", cmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #0\n", invcmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\tuxtb\t%s, %s\n", reglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tb\tL%d\n", l); } // List of inverted branch instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *brlist[] = { "bne", "beq", "bge", "ble", "bgt", "blt" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", brlist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[reg]); cgjump(Symtable[id].endlabel); } // Generate code to load the address of a global // identifier into a variable. Return a new register int cgaddress(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); fprintf(Outfile, "\tmov\t%s, r3\n", reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tldrb\t%s, [%s]\n", reglist[r], reglist[r]); break; case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [%s]\n", reglist[r], reglist[r]); break; } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [%s]\n", reglist[r1], reglist[r2]); break; case P_INT: case P_LONG: fprintf(Outfile, "\tstr\t%s, [%s]\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 31_Struct_Declarations/cgn.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\tsection .text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\tsection .data\n", Outfile); currSeg = data_seg; } } // Given a scalar type value, return the // size of the type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch(type) { case P_CHAR: return (offset); case P_INT: case P_LONG: break; default: fatald("Bad type in calc_aligned_offset:", type); } // Here we have an int or a long. Align it on a 4-byte offset // I put the generic code here so it can be reused elsewhere. alignment= 4; offset = (offset + direction * (alignment-1)) & ~(alignment-1); return (offset); } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "r10", "r11", "r12", "r13", "r9", "r8", "rcx", "rdx", "rsi", "rdi" }; static char *breglist[] = { "r10b", "r11b", "r12b", "r13b", "r9b", "r8b", "cl", "dl", "sil", "dil" }; static char *dreglist[] = { "r10d", "r11d", "r12d", "r13d", "r9d", "r8d", "ecx", "edx", "esi", "edi" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\textern\tprintint\n", Outfile); fputs("\textern\tprintchar\n", Outfile); fputs("\textern\topen\n", Outfile); fputs("\textern\tclose\n", Outfile); fputs("\textern\tread\n", Outfile); fputs("\textern\twrite\n", Outfile); fputs("\textern\tprintf\n", Outfile); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the rsp and rbp fprintf(Outfile, "\tglobal\t%s\n" "%s:\n" "\tpush\trbp\n" "\tmov\trbp, rsp\n", name, name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->posn = paramOffset; paramOffset += 8; } else { parm->posn = newlocaloffset(parm->type); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->posn = newlocaloffset(locvar->type); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\tadd\trsp, %d\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->endlabel); fprintf(Outfile, "\tadd\trsp, %d\n", stackOffset); fputs("\tpop rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], value); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", sym->name); fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], sym->name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", sym->name); } else // Print out the code to initialise it switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", sym->name); fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], sym->name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", sym->name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", sym->name); fprintf(Outfile, "\tmovsx\t%s, word [%s]\n", dreglist[r], sym->name); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", sym->name); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", sym->posn); fprintf(Outfile, "\tmov\t%s, [rbp+%d]\n", reglist[r], sym->posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", sym->posn); } else switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", sym->posn); fprintf(Outfile, "\tmovzx\t%s, byte [rbp+%d]\n", reglist[r], sym->posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", sym->posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", sym->posn); fprintf(Outfile, "\tmovsx\t%s, word [rbp+%d]\n", reglist[r], sym->posn); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", sym->posn); break; default: fatald("Bad type in cgloadlocal:", sym->type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, L%d\n", reglist[r], label); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timul\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmov\trax, %s\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidiv\t%s\n", reglist[r2]); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tand\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\tor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshl\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshr\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tneg\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnot\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r], breglist[r]); return (r); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, byte %s\n", reglist[r], breglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(struct symtable *sym, int numargs) { // Get a new register int outr = alloc_register(); // Call the function fprintf(Outfile, "\tcall\t%s\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\tadd\trsp, %d\n", 8 * (numargs - 6)); // and copy the return value into our register fprintf(Outfile, "\tmov\t%s, rax\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpush\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmov\t%s, %s\n", reglist[FIRSTPARAMREG - argposn + 1], reglist[r]); } } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsal\t%s, %d\n", reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, dreglist[r]); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\tqword\t[rbp+%d], %s\n", sym->posn, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\tbyte\t[rbp+%d], %s\n", sym->posn, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\tdword\t[rbp+%d], %s\n", sym->posn, dreglist[r]); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the type size = typesize(node->type, node->ctype); // Generate the global identity and the label cgdataseg(); fprintf(Outfile, "\tsection\t.data\n" "\tglobal\t%s\n", node->name); fprintf(Outfile, "%s:", node->name); // Generate the space for this type // original version for (int i = 0; i < node->size; i++) { switch(size) { case 1: fprintf(Outfile, "\tdb\t0\n"); break; case 4: fprintf(Outfile, "\tdd\t0\n"); break; case 8: fprintf(Outfile, "\tdq\t0\n"); break; default: for (int i = 0; i < size; i++) fprintf(Outfile, "\tdb\t0\n"); } } /* compact version using times instead of loop switch(size) { case 1: fprintf(Outfile, "\ttimes\t%d\tdb\t0\n", node->size); break; case 4: fprintf(Outfile, "\ttimes\t%d\tdd\t0\n", node->size); break; case 8: fprintf(Outfile, "\ttimes\t%d\tdq\t0\n", node->size); break; default: fprintf(Outfile, "\ttimes\t%d\tdb\t0\n", size); } */ } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\tdb\t%d\n", *cptr); } fprintf(Outfile, "\tdb\t0\n"); /* put string in readable format on a single line // probably went overboard with error checking int comma = 0, quote = 0, start = 1; fprintf(Outfile, "\tdb\t"); for (cptr=strvalue; *cptr; cptr++) { if ( ! isprint(*cptr) ) if (comma || start) { fprintf(Outfile, "%d, ", *cptr); start = 0; comma = 1; } else if (quote) { fprintf(Outfile, "\', %d, ", *cptr); comma = 1; quote = 0; } else { fprintf(Outfile, "%d, ", *cptr); comma = 1; quote = 0; } else if (start || comma) { fprintf(Outfile, "\'%c", *cptr); start = comma = 0; quote = 1; } else { fprintf(Outfile, "%c", *cptr); comma = 0; quote = 1; } } if (comma || start) fprintf(Outfile, "0\n"); else fprintf(Outfile, "\', 0\n"); */ } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r2], breglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzx\teax, %s\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmov\teax, %s\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } cgjump(sym->endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = alloc_register(); if (sym->class == C_GLOBAL) fprintf(Outfile, "\tmov\t%s, %s\n", reglist[r], sym->name); else fprintf(Outfile, "\tlea\t%s, [rbp+%d]\n", reglist[r], sym->posn); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], reglist[r]); break; case 2: fprintf(Outfile, "\tmovsx\t%s, dword [%s]\n", reglist[r], reglist[r]); break; case 4: case 8: fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { //Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmov\t[%s], byte %s\n", reglist[r2], breglist[r1]); break; case 2: case 4: case 8: fprintf(Outfile, "\tmov\t[%s], %s\n", reglist[r2], reglist[r1]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 31_Struct_Declarations/data.h ================================================ #ifndef extern_ #define extern_ extern #endif // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 extern_ int Line; // Current line number extern_ int Putback; // Character put back by scanner extern_ struct symtable *Functionid; // Symbol ptr of the current function extern_ FILE *Infile; // Input and output files extern_ FILE *Outfile; extern_ char *Outfilename; // Name of file we opened as Outfile extern_ struct token Token; // Last token scanned extern_ char Text[TEXTLEN + 1]; // Last identifier scanned // Symbol table lists extern_ struct symtable *Globhead, *Globtail; // Global variables and functions extern_ struct symtable *Loclhead, *Locltail; // Local variables extern_ struct symtable *Parmhead, *Parmtail; // Local parameters extern_ struct symtable *Membhead, *Membtail; // Temp list of struct/union members extern_ struct symtable *Structhead, *Structtail; // List of struct types // Command-line flags extern_ int O_dumpAST; // If true, dump the AST trees extern_ int O_keepasm; // If true, keep any assembly files extern_ int O_assemble; // If true, assemble the assembly files extern_ int O_dolink; // If true, link the object files extern_ int O_verbose; // If true, print info on compilation stages ================================================ FILE: 31_Struct_Declarations/decl.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of declarations // Copyright (c) 2019 Warren Toomey, GPL3 static struct symtable *struct_declaration(void); // Parse the current token and return // a primitive type enum value and a pointer // to any composite type. // Also scan in the next token int parse_type(struct symtable **ctype) { int type; switch (Token.token) { case T_VOID: type = P_VOID; scan(&Token); break; case T_CHAR: type = P_CHAR; scan(&Token); break; case T_INT: type = P_INT; scan(&Token); break; case T_LONG: type = P_LONG; scan(&Token); break; case T_STRUCT: type = P_STRUCT; *ctype = struct_declaration(); break; default: fatald("Illegal type, token", Token.token); } // Scan in one or more further '*' tokens // and determine the correct pointer type while (1) { if (Token.token != T_STAR) break; type = pointer_to(type); scan(&Token); } // We leave with the next token already scanned return (type); } // variable_declaration: type identifier ';' // | type identifier '[' INTLIT ']' ';' // ; // // Parse the declaration of a scalar variable or an array // with a given size. // The identifier has been scanned & we have the type. // class is the variable's class // Return the pointer to variable's entry in the symbol table struct symtable *var_declaration(int type, struct symtable *ctype, int class) { struct symtable *sym = NULL; // See if this has already been declared switch (class) { case C_GLOBAL: if (findglob(Text) != NULL) fatals("Duplicate global variable declaration", Text); case C_LOCAL: case C_PARAM: if (findlocl(Text) != NULL) fatals("Duplicate local variable declaration", Text); case C_MEMBER: if (findmember(Text) != NULL) fatals("Duplicate struct/union member declaration", Text); } // Text now has the identifier's name. // If the next token is a '[' if (Token.token == T_LBRACKET) { // Skip past the '[' scan(&Token); // Check we have an array size if (Token.token == T_INTLIT) { // Add this as a known array and generate its space in assembly. // We treat the array as a pointer to its elements' type switch (class) { case C_GLOBAL: sym = addglob(Text, pointer_to(type), ctype, S_ARRAY, Token.intvalue); break; case C_LOCAL: case C_PARAM: case C_MEMBER: fatal ("For now, declaration of non-global arrays is not implemented"); } } // Ensure we have a following ']' scan(&Token); match(T_RBRACKET, "]"); } else { // Add this as a known scalar // and generate its space in assembly switch (class) { case C_GLOBAL: sym = addglob(Text, type, ctype, S_VARIABLE, 1); break; case C_LOCAL: sym = addlocl(Text, type, ctype, S_VARIABLE, 1); break; case C_PARAM: sym = addparm(Text, type, ctype, S_VARIABLE, 1); break; case C_MEMBER: sym = addmemb(Text, type, ctype, S_VARIABLE, 1); break; } } return (sym); } // var_declaration_list: // | variable_declaration // | variable_declaration separate_token var_declaration_list ; // // When called to parse function parameters, separate_token is ','. // When called to parse members of a struct/union, separate_token is ';'. // // Parse a list of variables. // Add them as symbols to one of the symbol table lists, and return the // number of variables. If funcsym is not NULL, there is an existing function // prototype, so compare each variable's type against this prototype. static int var_declaration_list(struct symtable *funcsym, int class, int separate_token, int end_token) { int type; int paramcnt = 0; struct symtable *protoptr = NULL; struct symtable *ctype; // If there is a prototype, get the pointer // to the first prototype parameter if (funcsym != NULL) protoptr = funcsym->member; // Loop until the final end token while (Token.token != end_token) { // Get the type and identifier type = parse_type(&ctype); ident(); // Check that this type matches the prototype if there is one if (protoptr != NULL) { if (type != protoptr->type) fatald("Type doesn't match prototype for parameter", paramcnt + 1); protoptr = protoptr->next; } else { // Add a new parameter to the right symbol table list, based on the class var_declaration(type, ctype, class); } paramcnt++; // Must have a separate_token or ')' at this point if ((Token.token != separate_token) && (Token.token != end_token)) fatald("Unexpected token in parameter list", Token.token); if (Token.token == separate_token) scan(&Token); } // Check that the number of parameters in this list matches // any existing prototype if ((funcsym != NULL) && (paramcnt != funcsym->nelems)) fatals("Parameter count mismatch for function", funcsym->name); // Return the count of parameters return (paramcnt); } // // function_declaration: type identifier '(' parameter_list ')' ; // | type identifier '(' parameter_list ')' compound_statement ; // // Parse the declaration of function. // The identifier has been scanned & we have the type. struct ASTnode *function_declaration(int type) { struct ASTnode *tree, *finalstmt; struct symtable *oldfuncsym, *newfuncsym = NULL; int endlabel, paramcnt; // Text has the identifier's name. If this exists and is a // function, get the id. Otherwise, set oldfuncsym to NULL. if ((oldfuncsym = findsymbol(Text)) != NULL) if (oldfuncsym->stype != S_FUNCTION) oldfuncsym = NULL; // If this is a new function declaration, get a // label-id for the end label, and add the function // to the symbol table, if (oldfuncsym == NULL) { endlabel = genlabel(); // Assumtion: functions only return scalar types, so NULL below newfuncsym = addglob(Text, type, NULL, S_FUNCTION, endlabel); } // Scan in the '(', any parameters and the ')'. // Pass in any existing function prototype pointer lparen(); paramcnt = var_declaration_list(oldfuncsym, C_PARAM, T_COMMA, T_RPAREN); rparen(); // If this is a new function declaration, update the // function symbol entry with the number of parameters. // Also copy the parameter list into the function's node. if (newfuncsym) { newfuncsym->nelems = paramcnt; newfuncsym->member = Parmhead; oldfuncsym = newfuncsym; } // Clear out the parameter list Parmhead = Parmtail = NULL; // Declaration ends in a semicolon, only a prototype. if (Token.token == T_SEMI) { scan(&Token); return (NULL); } // This is not just a prototype. // Set the Functionid global to the function's symbol pointer Functionid = oldfuncsym; // Get the AST tree for the compound statement tree = compound_statement(); // If the function type isn't P_VOID .. if (type != P_VOID) { // Error if no statements in the function if (tree == NULL) fatal("No statements in function with non-void type"); // Check that the last AST operation in the // compound statement was a return statement finalstmt = (tree->op == A_GLUE) ? tree->right : tree; if (finalstmt == NULL || finalstmt->op != A_RETURN) fatal("No return for function with non-void type"); } // Return an A_FUNCTION node which has the function's symbol pointer // and the compound statement sub-tree return (mkastunary(A_FUNCTION, type, tree, oldfuncsym, endlabel)); } // Parse struct declarations. Either find an existing // struct declaration, or build a struct symbol table // entry and return its pointer. static struct symtable *struct_declaration(void) { struct symtable *ctype = NULL; struct symtable *m; int offset; // Skip the struct keyword scan(&Token); // See if there is a following struct name if (Token.token == T_IDENT) { // Find any matching composite type ctype = findstruct(Text); scan(&Token); } // If the next token isn't an LBRACE , this is // the usage of an existing struct type. // Return the pointer to the type. if (Token.token != T_LBRACE) { if (ctype == NULL) fatals("unknown struct type", Text); return (ctype); } // Ensure this struct type hasn't been // previously defined if (ctype) fatals("previously defined struct", Text); // Build the struct node and skip the left brace ctype = addstruct(Text, P_STRUCT, NULL, 0, 0); scan(&Token); // Scan in the list of members and attach // to the struct type's node var_declaration_list(NULL, C_MEMBER, T_SEMI, T_RBRACE); rbrace(); ctype->member = Membhead; Membhead = Membtail = NULL; // Set the offset of the initial member // and find the first free byte after it m = ctype->member; m->posn = 0; offset = typesize(m->type, m->ctype); // Set the position of each successive member in the struct for (m = m->next; m != NULL; m = m->next) { // Set the offset for this member m->posn = genalign(m->type, offset, 1); // Get the offset of the next free byte after this member offset += typesize(m->type, m->ctype); } // Set the overall size of the struct ctype->size = offset; return (ctype); } // Parse one or more global declarations, either // variables, functions or structs void global_declarations(void) { struct ASTnode *tree; struct symtable *ctype; int type; while (1) { // Stop when we have reached EOF if (Token.token == T_EOF) break; // Get the type type = parse_type(&ctype); // We might have just parsed a struct declaration // with no associated variable. The next token // might be a ';'. Loop back if it is. XXX. I'm // not happy with this as it allows "struct fred;" // as an accepted statement if (type == P_STRUCT && Token.token == T_SEMI) { scan(&Token); continue; } // We have to read past the identifier // to see either a '(' for a function declaration // or a ',' or ';' for a variable declaration. // Text is filled in by the ident() call. ident(); if (Token.token == T_LPAREN) { // Parse the function declaration tree = function_declaration(type); // Only a function prototype, no code if (tree == NULL) continue; // A real function, generate the assembly code for it if (O_dumpAST) { dumpAST(tree, NOLABEL, 0); fprintf(stdout, "\n\n"); } genAST(tree, NOLABEL, 0); // Now free the symbols associated // with this function freeloclsyms(); } else { // Parse the global variable declaration // and skip past the trailing semicolon var_declaration(type, ctype, C_GLOBAL); semi(); } } } ================================================ FILE: 31_Struct_Declarations/decl.h ================================================ // Function prototypes for all compiler files // Copyright (c) 2019 Warren Toomey, GPL3 // scan.c void reject_token(struct token *t); int scan(struct token *t); // tree.c struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue); struct ASTnode *mkastleaf(int op, int type, struct symtable *sym, int intvalue); struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, struct symtable *sym, int intvalue); void dumpAST(struct ASTnode *n, int label, int parentASTop); // gen.c int genlabel(void); int genAST(struct ASTnode *n, int reg, int parentASTop); void genpreamble(); void genpostamble(); void genfreeregs(); void genglobsym(struct symtable *node); int genglobstr(char *strvalue); int genprimsize(int type); int genalign(int type, int offset, int direction); void genreturn(int reg, int id); // cg.c int cgprimsize(int type); int cgalign(int type, int offset, int direction); void cgtextseg(); void cgdataseg(); void freeall_registers(void); void cgpreamble(); void cgpostamble(); void cgfuncpreamble(struct symtable *sym); void cgfuncpostamble(struct symtable *sym); int cgloadint(int value, int type); int cgloadglob(struct symtable *sym, int op); int cgloadlocal(struct symtable *sym, int op); int cgloadglobstr(int label); int cgadd(int r1, int r2); int cgsub(int r1, int r2); int cgmul(int r1, int r2); int cgdiv(int r1, int r2); int cgshlconst(int r, int val); int cgcall(struct symtable *sym, int numargs); void cgcopyarg(int r, int argposn); int cgstorglob(int r, struct symtable *sym); int cgstorlocal(int r, struct symtable *sym); void cgglobsym(struct symtable *node); void cgglobstr(int l, char *strvalue); int cgcompare_and_set(int ASTop, int r1, int r2); int cgcompare_and_jump(int ASTop, int r1, int r2, int label); void cglabel(int l); void cgjump(int l); int cgwiden(int r, int oldtype, int newtype); void cgreturn(int reg, struct symtable *sym); int cgaddress(struct symtable *sym); int cgderef(int r, int type); int cgstorderef(int r1, int r2, int type); int cgnegate(int r); int cginvert(int r); int cglognot(int r); int cgboolean(int r, int op, int label); int cgand(int r1, int r2); int cgor(int r1, int r2); int cgxor(int r1, int r2); int cgshl(int r1, int r2); int cgshr(int r1, int r2); // expr.c struct ASTnode *binexpr(int ptp); // stmt.c struct ASTnode *compound_statement(void); // misc.c void match(int t, char *what); void semi(void); void lbrace(void); void rbrace(void); void lparen(void); void rparen(void); void ident(void); void fatal(char *s); void fatals(char *s1, char *s2); void fatald(char *s, int d); void fatalc(char *s, int c); // sym.c void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node); struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int size, int posn); struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int size); struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int size); struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype, int size); struct symtable *addstruct(char *name, int type, struct symtable *ctype, int stype, int size); struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int size); struct symtable *findglob(char *s); struct symtable *findlocl(char *s); struct symtable *findsymbol(char *s); struct symtable *findmember(char *s); struct symtable *findstruct(char *s); void clear_symtable(void); void freeloclsyms(void); // decl.c struct symtable *var_declaration(int type, struct symtable *ctype, int class); struct ASTnode *function_declaration(int type); void global_declarations(void); // types.c int inttype(int type); int ptrtype(int type); int parse_type(struct symtable **ctype); int pointer_to(int type); int value_at(int type); int typesize(int type, struct symtable *ctype); struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op); ================================================ FILE: 31_Struct_Declarations/defs.h ================================================ #include #include #include #include // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 enum { TEXTLEN = 512 // Length of identifiers in input }; // Commands and default filenames #define AOUT "a.out" #ifdef __NASM__ #define ASCMD "nasm -f elf64 -o " #define LDCMD "cc -no-pie -fno-plt -Wall -o " #else #define ASCMD "as -o " #define LDCMD "cc -o " #endif // Token types enum { T_EOF, // Binary operators T_ASSIGN, T_LOGOR, T_LOGAND, T_OR, T_XOR, T_AMPER, T_EQ, T_NE, T_LT, T_GT, T_LE, T_GE, T_LSHIFT, T_RSHIFT, T_PLUS, T_MINUS, T_STAR, T_SLASH, // Other operators T_INC, T_DEC, T_INVERT, T_LOGNOT, // Type keywords T_VOID, T_CHAR, T_INT, T_LONG, // Other keywords T_IF, T_ELSE, T_WHILE, T_FOR, T_RETURN, T_STRUCT, // Structural tokens T_INTLIT, T_STRLIT, T_SEMI, T_IDENT, T_LBRACE, T_RBRACE, T_LPAREN, T_RPAREN, T_LBRACKET, T_RBRACKET, T_COMMA }; // Token structure struct token { int token; // Token type, from the enum list above int intvalue; // For T_INTLIT, the integer value }; // AST node types. The first few line up // with the related tokens enum { A_ASSIGN = 1, A_LOGOR, A_LOGAND, A_OR, A_XOR, A_AND, A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE, A_LSHIFT, A_RSHIFT, A_ADD, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, A_INTLIT, A_STRLIT, A_IDENT, A_GLUE, A_IF, A_WHILE, A_FUNCTION, A_WIDEN, A_RETURN, A_FUNCCALL, A_DEREF, A_ADDR, A_SCALE, A_PREINC, A_PREDEC, A_POSTINC, A_POSTDEC, A_NEGATE, A_INVERT, A_LOGNOT, A_TOBOOL }; // Primitive types. The bottom 4 bits is an integer // value that represents the level of indirection, // e.g. 0= no pointer, 1= pointer, 2= pointer pointer etc. enum { P_NONE, P_VOID = 16, P_CHAR = 32, P_INT = 48, P_LONG = 64, P_STRUCT=80 }; // Structural types enum { S_VARIABLE, S_FUNCTION, S_ARRAY }; // Storage classes enum { C_GLOBAL = 1, // Globally visible symbol C_LOCAL, // Locally visible symbol C_PARAM, // Locally visible function parameter C_STRUCT, // A struct C_MEMBER // Member of a struct or union }; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol struct symtable *ctype; // If struct/union, ptr to that type int stype; // Structural type for the symbol int class; // Storage class for the symbol union { int size; // Number of elements in the symbol int endlabel; // For functions, the end label }; union { int nelems; // For functions, # of params int posn; // For locals, the negative offset // from the stack base pointer }; struct symtable *next; // Next symbol in one list struct symtable *member; // First member of a function, struct, }; // union or enum // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates int rvalue; // True if the node is an rvalue struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; struct symtable *sym; // For many AST nodes, the pointer to union { // the symbol in the symbol table int intvalue; // For A_INTLIT, the integer value int size; // For A_SCALE, the size to scale by }; }; enum { NOREG = -1, // Use NOREG when the AST generation // functions have no register to return NOLABEL = 0 // Use NOLABEL when we have no label to // pass to genAST() }; ================================================ FILE: 31_Struct_Declarations/expr.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of expressions // Copyright (c) 2019 Warren Toomey, GPL3 // expression_list: // | expression // | expression ',' expression_list // ; // Parse a list of zero or more comma-separated expressions and // return an AST composed of A_GLUE nodes with the left-hand child // being the sub-tree of previous expressions (or NULL) and the right-hand // child being the next expression. Each A_GLUE node will have size field // set to the number of expressions in the tree at this point. If no // expressions are parsed, NULL is returned static struct ASTnode *expression_list(void) { struct ASTnode *tree = NULL; struct ASTnode *child = NULL; int exprcount = 0; // Loop until the final right parentheses while (Token.token != T_RPAREN) { // Parse the next expression and increment the expression count child = binexpr(0); exprcount++; // Build an A_GLUE AST node with the previous tree as the left child // and the new expression as the right child. Store the expression count. tree = mkastnode(A_GLUE, P_NONE, tree, NULL, child, NULL, exprcount); // Must have a ',' or ')' at this point switch (Token.token) { case T_COMMA: scan(&Token); break; case T_RPAREN: break; default: fatald("Unexpected token in expression list", Token.token); } } // Return the tree of expressions return (tree); } // Parse a function call and return its AST static struct ASTnode *funccall(void) { struct ASTnode *tree; struct symtable *funcptr; // Check that the identifier has been defined as a function, // then make a leaf node for it. if ((funcptr = findsymbol(Text)) == NULL || funcptr->stype != S_FUNCTION) { fatals("Undeclared function", Text); } // Get the '(' lparen(); // Parse the argument expression list tree = expression_list(); // XXX Check type of each argument against the function's prototype // Build the function call AST node. Store the // function's return type as this node's type. // Also record the function's symbol-id tree = mkastunary(A_FUNCCALL, funcptr->type, tree, funcptr, 0); // Get the ')' rparen(); return (tree); } // Parse the index into an array and return an AST tree for it static struct ASTnode *array_access(void) { struct ASTnode *left, *right; struct symtable *aryptr; // Check that the identifier has been defined as an array // then make a leaf node for it that points at the base if ((aryptr = findsymbol(Text)) == NULL || aryptr->stype != S_ARRAY) { fatals("Undeclared array", Text); } left = mkastleaf(A_ADDR, aryptr->type, aryptr, 0); // Get the '[' scan(&Token); // Parse the following expression right = binexpr(0); // Get the ']' match(T_RBRACKET, "]"); // Ensure that this is of int type if (!inttype(right->type)) fatal("Array index is not of integer type"); // Scale the index by the size of the element's type right = modify_type(right, left->type, A_ADD); // Return an AST tree where the array's base has the offset // added to it, and dereference the element. Still an lvalue // at this point. left = mkastnode(A_ADD, aryptr->type, left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, value_at(left->type), left, NULL, 0); return (left); } // Parse a postfix expression and return // an AST node representing it. The // identifier is already in Text. static struct ASTnode *postfix(void) { struct ASTnode *n; struct symtable *varptr; // Scan in the next token to see if we have a postfix expression scan(&Token); // Function call if (Token.token == T_LPAREN) return (funccall()); // An array reference if (Token.token == T_LBRACKET) return (array_access()); // A variable. Check that the variable exists. if ((varptr = findsymbol(Text)) == NULL || varptr->stype != S_VARIABLE) fatals("Unknown variable", Text); switch (Token.token) { // Post-increment: skip over the token case T_INC: scan(&Token); n = mkastleaf(A_POSTINC, varptr->type, varptr, 0); break; // Post-decrement: skip over the token case T_DEC: scan(&Token); n = mkastleaf(A_POSTDEC, varptr->type, varptr, 0); break; // Just a variable reference default: n = mkastleaf(A_IDENT, varptr->type, varptr, 0); } return (n); } // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(void) { struct ASTnode *n; int id; switch (Token.token) { case T_INTLIT: // For an INTLIT token, make a leaf AST node for it. // Make it a P_CHAR if it's within the P_CHAR range if ((Token.intvalue) >= 0 && (Token.intvalue < 256)) n = mkastleaf(A_INTLIT, P_CHAR, NULL, Token.intvalue); else n = mkastleaf(A_INTLIT, P_INT, NULL, Token.intvalue); break; case T_STRLIT: // For a STRLIT token, generate the assembly for it. // Then make a leaf AST node for it. id is the string's label. id = genglobstr(Text); n = mkastleaf(A_STRLIT, pointer_to(P_CHAR), NULL, id); break; case T_IDENT: return (postfix()); case T_LPAREN: // Beginning of a parenthesised expression, skip the '('. // Scan in the expression and the right parenthesis scan(&Token); n = binexpr(0); rparen(); return (n); default: fatald("Expecting a primary expression, got token", Token.token); } // Scan in the next token and return the leaf node scan(&Token); return (n); } // Convert a binary operator token into a binary AST operation. // We rely on a 1:1 mapping from token to AST operation static int binastop(int tokentype) { if (tokentype > T_EOF && tokentype <= T_SLASH) return (tokentype); fatald("Syntax error, token", tokentype); return (0); // Keep -Wall happy } // Return true if a token is right-associative, // false otherwise. static int rightassoc(int tokentype) { if (tokentype == T_ASSIGN) return (1); return (0); } // Operator precedence for each token. Must // match up with the order of tokens in defs.h static int OpPrec[] = { 0, 10, 20, 30, // T_EOF, T_ASSIGN, T_LOGOR, T_LOGAND 40, 50, 60, // T_OR, T_XOR, T_AMPER 70, 70, // T_EQ, T_NE 80, 80, 80, 80, // T_LT, T_GT, T_LE, T_GE 90, 90, // T_LSHIFT, T_RSHIFT 100, 100, // T_PLUS, T_MINUS 110, 110 // T_STAR, T_SLASH }; // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec; if (tokentype > T_SLASH) fatald("Token with no precedence in op_precedence:", tokentype); prec = OpPrec[tokentype]; if (prec == 0) fatald("Syntax error, token", tokentype); return (prec); } // prefix_expression: primary // | '*' prefix_expression // | '&' prefix_expression // | '-' prefix_expression // | '++' prefix_expression // | '--' prefix_expression // ; // Parse a prefix expression and return // a sub-tree representing it. struct ASTnode *prefix(void) { struct ASTnode *tree; switch (Token.token) { case T_AMPER: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Ensure that it's an identifier if (tree->op != A_IDENT) fatal("& operator must be followed by an identifier"); // Now change the operator to A_ADDR and the type to // a pointer to the original type tree->op = A_ADDR; tree->type = pointer_to(tree->type); break; case T_STAR: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's either another deref or an // identifier if (tree->op != A_IDENT && tree->op != A_DEREF) fatal("* operator must be followed by an identifier or *"); // Prepend an A_DEREF operation to the tree tree = mkastunary(A_DEREF, value_at(tree->type), tree, NULL, 0); break; case T_MINUS: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_NEGATE operation to the tree and // make the child an rvalue. Because chars are unsigned, // also widen this to int so that it's signed tree->rvalue = 1; tree = modify_type(tree, P_INT, 0); tree = mkastunary(A_NEGATE, tree->type, tree, NULL, 0); break; case T_INVERT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_INVERT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_INVERT, tree->type, tree, NULL, 0); break; case T_LOGNOT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_LOGNOT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_LOGNOT, tree->type, tree, NULL, 0); break; case T_INC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("++ operator must be followed by an identifier"); // Prepend an A_PREINC operation to the tree tree = mkastunary(A_PREINC, tree->type, tree, NULL, 0); break; case T_DEC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("-- operator must be followed by an identifier"); // Prepend an A_PREDEC operation to the tree tree = mkastunary(A_PREDEC, tree->type, tree, NULL, 0); break; default: tree = primary(); } return (tree); } // Return an AST tree whose root is a binary operator. // Parameter ptp is the previous token's precedence. struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; struct ASTnode *ltemp, *rtemp; int ASTop; int tokentype; // Get the tree on the left. // Fetch the next token at the same time. left = prefix(); // If we hit one of several terminating tokens, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA) { left->rvalue = 1; return (left); } // While the precedence of this token is more than that of the // previous token precedence, or it's right associative and // equal to the previous token's precedence while ((op_precedence(tokentype) > ptp) || (rightassoc(tokentype) && op_precedence(tokentype) == ptp)) { // Fetch in the next integer literal scan(&Token); // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Determine the operation to be performed on the sub-trees ASTop = binastop(tokentype); if (ASTop == A_ASSIGN) { // Assignment // Make the right tree into an rvalue right->rvalue = 1; // Ensure the right's type matches the left right = modify_type(right, left->type, 0); if (right == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree. However, switch // left and right around, so that the right expression's // code will be generated before the left expression ltemp = left; left = right; right = ltemp; } else { // We are not doing an assignment, so both trees should be rvalues // Convert both trees into rvalue if they are lvalue trees left->rvalue = 1; right->rvalue = 1; // Ensure the two types are compatible by trying // to modify each tree to match the other's type. ltemp = modify_type(left, right->type, ASTop); rtemp = modify_type(right, left->type, ASTop); if (ltemp == NULL && rtemp == NULL) fatal("Incompatible types in binary expression"); if (ltemp != NULL) left = ltemp; if (rtemp != NULL) right = rtemp; } // Join that sub-tree with ours. Convert the token // into an AST operation at the same time. left = mkastnode(binastop(tokentype), left->type, left, NULL, right, NULL, 0); // Update the details of the current token. // If we hit a terminating token, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA) { left->rvalue = 1; return (left); } } // Return the tree we have when the precedence // is the same or lower left->rvalue = 1; return (left); } ================================================ FILE: 31_Struct_Declarations/gen.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Generic code generator // Copyright (c) 2019 Warren Toomey, GPL3 // Generate and return a new label number int genlabel(void) { static int id = 1; return (id++); } // Generate the code for an IF statement // and an optional ELSE clause static int genIF(struct ASTnode *n) { int Lfalse, Lend; // Generate two labels: one for the // false compound statement, and one // for the end of the overall IF statement. // When there is no ELSE clause, Lfalse _is_ // the ending label! Lfalse = genlabel(); if (n->right) Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. genAST(n->left, Lfalse, n->op); genfreeregs(); // Generate the true compound statement genAST(n->mid, NOLABEL, n->op); genfreeregs(); // If there is an optional ELSE clause, // generate the jump to skip to the end if (n->right) cgjump(Lend); // Now the false label cglabel(Lfalse); // Optional ELSE clause: generate the // false compound statement and the // end label if (n->right) { genAST(n->right, NOLABEL, n->op); genfreeregs(); cglabel(Lend); } return (NOREG); } // Generate the code for a WHILE statement static int genWHILE(struct ASTnode *n) { int Lstart, Lend; // Generate the start and end labels // and output the start label Lstart = genlabel(); Lend = genlabel(); cglabel(Lstart); // Generate the condition code followed // by a jump to the end label. genAST(n->left, Lend, n->op); genfreeregs(); // Generate the compound statement for the body genAST(n->right, NOLABEL, n->op); genfreeregs(); // Finally output the jump back to the condition, // and the end label cgjump(Lstart); cglabel(Lend); return (NOREG); } // Generate the code to copy the arguments of a // function call to its parameters, then call the // function itself. Return the register that holds // the function's return value. static int gen_funccall(struct ASTnode *n) { struct ASTnode *gluetree = n->left; int reg; int numargs = 0; // If there is a list of arguments, walk this list // from the last argument (right-hand child) to the // first while (gluetree) { // Calculate the expression's value reg = genAST(gluetree->right, NOLABEL, gluetree->op); // Copy this into the n'th function parameter: size is 1, 2, 3, ... cgcopyarg(reg, gluetree->size); // Keep the first (highest) number of arguments if (numargs == 0) numargs = gluetree->size; genfreeregs(); gluetree = gluetree->left; } // Call the function, clean up the stack (based on numargs), // and return its result return (cgcall(n->sym, numargs)); } // Given an AST, an optional label, and the AST op // of the parent, generate assembly code recursively. // Return the register id with the tree's final value. int genAST(struct ASTnode *n, int label, int parentASTop) { int leftreg, rightreg; // We have some specific AST node handling at the top // so that we don't evaluate the child sub-trees immediately switch (n->op) { case A_IF: return (genIF(n)); case A_WHILE: return (genWHILE(n)); case A_FUNCCALL: return (gen_funccall(n)); case A_GLUE: // Do each child statement, and free the // registers after each child genAST(n->left, NOLABEL, n->op); genfreeregs(); genAST(n->right, NOLABEL, n->op); genfreeregs(); return (NOREG); case A_FUNCTION: // Generate the function's preamble before the code // in the child sub-tree cgfuncpreamble(n->sym); genAST(n->left, NOLABEL, n->op); cgfuncpostamble(n->sym); return (NOREG); } // General AST node handling below // Get the left and right sub-tree values if (n->left) leftreg = genAST(n->left, NOLABEL, n->op); if (n->right) rightreg = genAST(n->right, NOLABEL, n->op); switch (n->op) { case A_ADD: return (cgadd(leftreg, rightreg)); case A_SUBTRACT: return (cgsub(leftreg, rightreg)); case A_MULTIPLY: return (cgmul(leftreg, rightreg)); case A_DIVIDE: return (cgdiv(leftreg, rightreg)); case A_AND: return (cgand(leftreg, rightreg)); case A_OR: return (cgor(leftreg, rightreg)); case A_XOR: return (cgxor(leftreg, rightreg)); case A_LSHIFT: return (cgshl(leftreg, rightreg)); case A_RSHIFT: return (cgshr(leftreg, rightreg)); case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, compare registers // and set one to 1 or 0 based on the comparison. if (parentASTop == A_IF || parentASTop == A_WHILE) return (cgcompare_and_jump(n->op, leftreg, rightreg, label)); else return (cgcompare_and_set(n->op, leftreg, rightreg)); case A_INTLIT: return (cgloadint(n->intvalue, n->type)); case A_STRLIT: return (cgloadglobstr(n->intvalue)); case A_IDENT: // Load our value if we are an rvalue // or we are being dereferenced if (n->rvalue || parentASTop == A_DEREF) { if (n->sym->class == C_GLOBAL) { return (cgloadglob(n->sym, n->op)); } else { return (cgloadlocal(n->sym, n->op)); } } else return (NOREG); case A_ASSIGN: // Are we assigning to an identifier or through a pointer? switch (n->right->op) { case A_IDENT: if (n->right->sym->class == C_GLOBAL) return (cgstorglob(leftreg, n->right->sym)); else return (cgstorlocal(leftreg, n->right->sym)); case A_DEREF: return (cgstorderef(leftreg, rightreg, n->right->type)); default: fatald("Can't A_ASSIGN in genAST(), op", n->op); } case A_WIDEN: // Widen the child's type to the parent's type return (cgwiden(leftreg, n->left->type, n->type)); case A_RETURN: cgreturn(leftreg, Functionid); return (NOREG); case A_ADDR: return (cgaddress(n->sym)); case A_DEREF: // If we are an rvalue, dereference to get the value we point at, // otherwise leave it for A_ASSIGN to store through the pointer if (n->rvalue) return (cgderef(leftreg, n->left->type)); else return (leftreg); case A_SCALE: // Small optimisation: use shift if the // scale value is a known power of two switch (n->size) { case 2: return (cgshlconst(leftreg, 1)); case 4: return (cgshlconst(leftreg, 2)); case 8: return (cgshlconst(leftreg, 3)); default: // Load a register with the size and // multiply the leftreg by this size rightreg = cgloadint(n->size, P_INT); return (cgmul(leftreg, rightreg)); } case A_POSTINC: case A_POSTDEC: // Load and decrement the variable's value into a register // and post increment/decrement it if (n->sym->class == C_GLOBAL) return (cgloadglob(n->sym, n->op)); else return (cgloadlocal(n->sym, n->op)); case A_PREINC: case A_PREDEC: // Load and decrement the variable's value into a register // and pre increment/decrement it if (n->left->sym->class == C_GLOBAL) return (cgloadglob(n->left->sym, n->op)); else return (cgloadlocal(n->left->sym, n->op)); case A_NEGATE: return (cgnegate(leftreg)); case A_INVERT: return (cginvert(leftreg)); case A_LOGNOT: return (cglognot(leftreg)); case A_TOBOOL: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, set the register // to 0 or 1 based on it's zeroeness or non-zeroeness return (cgboolean(leftreg, parentASTop, label)); default: fatald("Unknown AST operator", n->op); } return (NOREG); // Keep -Wall happy } void genpreamble() { cgpreamble(); } void genpostamble() { cgpostamble(); } void genfreeregs() { freeall_registers(); } void genglobsym(struct symtable *node) { cgglobsym(node); } int genglobstr(char *strvalue) { int l = genlabel(); cgglobstr(l, strvalue); return (l); } int genprimsize(int type) { return (cgprimsize(type)); } int genalign(int type, int offset, int direction) { return (cgalign(type, offset, direction)); } ================================================ FILE: 31_Struct_Declarations/main.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "decl.h" #include #include // Compiler setup and top-level execution // Copyright (c) 2019 Warren Toomey, GPL3 // Given a string with a '.' and at least a 1-character suffix // after the '.', change the suffix to be the given character. // Return the new string or NULL if the original string could // not be modified char *alter_suffix(char *str, char suffix) { char *posn; char *newstr; // Clone the string if ((newstr = strdup(str)) == NULL) return (NULL); // Find the '.' if ((posn = strrchr(newstr, '.')) == NULL) return (NULL); // Ensure there is a suffix posn++; if (*posn == '\0') return (NULL); // Change the suffix and NUL-terminate the string *posn++ = suffix; *posn = '\0'; return (newstr); } // Given an input filename, compile that file // down to assembly code. Return the new file's name static char *do_compile(char *filename) { Outfilename = alter_suffix(filename, 's'); if (Outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .c on the end\n", filename); exit(1); } // Open up the input file if ((Infile = fopen(filename, "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", filename, strerror(errno)); exit(1); } // Create the output file if ((Outfile = fopen(Outfilename, "w")) == NULL) { fprintf(stderr, "Unable to create %s: %s\n", Outfilename, strerror(errno)); exit(1); } Line = 1; // Reset the scanner Putback = '\n'; clear_symtable(); // Clear the symbol table if (O_verbose) printf("compiling %s\n", filename); scan(&Token); // Get the first token from the input genpreamble(); // Output the preamble global_declarations(); // Parse the global declarations genpostamble(); // Output the postamble fclose(Outfile); // Close the output file return (Outfilename); } // Given an input filename, assemble that file // down to object code. Return the object filename char *do_assemble(char *filename) { char cmd[TEXTLEN]; int err; char *outfilename = alter_suffix(filename, 'o'); if (outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .s on the end\n", filename); exit(1); } // Build the assembly command and run it snprintf(cmd, TEXTLEN, "%s %s %s", ASCMD, outfilename, filename); if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Assembly of %s failed\n", filename); exit(1); } return (outfilename); } // Given a list of object files and an output filename, // link all of the object filenames together. void do_link(char *outfilename, char *objlist[]) { int cnt, size = TEXTLEN; char cmd[TEXTLEN], *cptr; int err; // Start with the linker command and the output file cptr = cmd; cnt = snprintf(cptr, size, "%s %s ", LDCMD, outfilename); cptr += cnt; size -= cnt; // Now append each object file while (*objlist != NULL) { cnt = snprintf(cptr, size, "%s ", *objlist); cptr += cnt; size -= cnt; objlist++; } if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Linking failed\n"); exit(1); } } // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "Usage: %s [-vcST] [-o outfile] file [file ...]\n", prog); fprintf(stderr, " -v give verbose output of the compilation stages\n"); fprintf(stderr, " -c generate object files but don't link them\n"); fprintf(stderr, " -S generate assembly files but don't link them\n"); fprintf(stderr, " -T dump the AST trees for each input file\n"); fprintf(stderr, " -o outfile, produce the outfile executable file\n"); exit(1); } // Main program: check arguments and print a usage // if we don't have an argument. Open up the input // file and call scanfile() to scan the tokens in it. enum { MAXOBJ = 100 }; int main(int argc, char *argv[]) { char *outfilename = AOUT; char *asmfile, *objfile; char *objlist[MAXOBJ]; int i, objcnt = 0; // Initialise our variables O_dumpAST = 0; O_keepasm = 0; O_assemble = 0; O_verbose = 0; O_dolink = 1; // Scan for command-line options for (i = 1; i < argc; i++) { // No leading '-', stop scanning for options if (*argv[i] != '-') break; // For each option in this argument for (int j = 1; (*argv[i] == '-') && argv[i][j]; j++) { switch (argv[i][j]) { case 'o': outfilename = argv[++i]; // Save & skip to next argument break; case 'T': O_dumpAST = 1; break; case 'c': O_assemble = 1; O_keepasm = 0; O_dolink = 0; break; case 'S': O_keepasm = 1; O_assemble = 0; O_dolink = 0; break; case 'v': O_verbose = 1; break; default: usage(argv[0]); } } } // Ensure we have at lease one input file argument if (i >= argc) usage(argv[0]); // Work on each input file in turn while (i < argc) { asmfile = do_compile(argv[i]); // Compile the source file if (O_dolink || O_assemble) { objfile = do_assemble(asmfile); // Assemble it to object forma if (objcnt == (MAXOBJ - 2)) { fprintf(stderr, "Too many object files for the compiler to handle\n"); exit(1); } objlist[objcnt++] = objfile; // Add the object file's name objlist[objcnt] = NULL; // to the list of object files } if (!O_keepasm) // Remove the assembly file if unlink(asmfile); // we don't need to keep it i++; } // Now link all the object files together if (O_dolink) { do_link(outfilename, objlist); // If we don't need to keep the object // files, then remove them if (!O_assemble) { for (i = 0; objlist[i] != NULL; i++) unlink(objlist[i]); } } return (0); } ================================================ FILE: 31_Struct_Declarations/misc.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" #include // Miscellaneous functions // Copyright (c) 2019 Warren Toomey, GPL3 // Ensure that the current token is t, // and fetch the next token. Otherwise // throw an error void match(int t, char *what) { if (Token.token == t) { scan(&Token); } else { fatals("Expected", what); } } // Match a semicolon and fetch the next token void semi(void) { match(T_SEMI, ";"); } // Match a left brace and fetch the next token void lbrace(void) { match(T_LBRACE, "{"); } // Match a right brace and fetch the next token void rbrace(void) { match(T_RBRACE, "}"); } // Match a left parenthesis and fetch the next token void lparen(void) { match(T_LPAREN, "("); } // Match a right parenthesis and fetch the next token void rparen(void) { match(T_RPAREN, ")"); } // Match an identifier and fetch the next token void ident(void) { match(T_IDENT, "identifier"); } // Print out fatal messages void fatal(char *s) { fprintf(stderr, "%s on line %d\n", s, Line); fclose(Outfile); unlink(Outfilename); exit(1); } void fatals(char *s1, char *s2) { fprintf(stderr, "%s:%s on line %d\n", s1, s2, Line); fclose(Outfile); unlink(Outfilename); exit(1); } void fatald(char *s, int d) { fprintf(stderr, "%s:%d on line %d\n", s, d, Line); fclose(Outfile); unlink(Outfilename); exit(1); } void fatalc(char *s, int c) { fprintf(stderr, "%s:%c on line %d\n", s, c, Line); fclose(Outfile); unlink(Outfilename); exit(1); } ================================================ FILE: 31_Struct_Declarations/scan.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { char *p; p = strchr(s, c); return (p ? p - s : -1); } // Get the next character from the input file. static int next(void) { int c; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return (c); } c = fgetc(Infile); // Read from input file if ('\n' == c) Line++; // Increment line count return (c); } // Put back an unwanted character static void putback(int c) { Putback = c; } // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } // Return the next character from a character // or string literal static int scanch(void) { int c; // Get the next input character and interpret // metacharacters that start with a backslash c = next(); if (c == '\\') { switch (c = next()) { case 'a': return '\a'; case 'b': return '\b'; case 'f': return '\f'; case 'n': return '\n'; case 'r': return '\r'; case 't': return '\t'; case 'v': return '\v'; case '\\': return '\\'; case '"': return '"'; case '\'': return '\''; default: fatalc("unknown escape sequence", c); } } return (c); // Just an ordinary old character! } // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0; // Convert each character into an int value while ((k = chrpos("0123456789", c)) >= 0) { val = val * 10 + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return (val); } // Scan in a string literal from the input file, // and store it in buf[]. Return the length of // the string. static int scanstr(char *buf) { int i, c; // Loop while we have enough buffer space for (i = 0; i < TEXTLEN - 1; i++) { // Get the next char and append to buf // Return when we hit the ending double quote if ((c = scanch()) == '"') { buf[i] = 0; return (i); } buf[i] = c; } // Ran out of buf[] space fatal("String literal too long"); return (0); } // Scan an identifier from the input file and // store it in buf[]. Return the identifier's length static int scanident(int c, char *buf, int lim) { int i = 0; // Allow digits, alpha and underscores while (isalpha(c) || isdigit(c) || '_' == c) { // Error if we hit the identifier length limit, // else append to buf[] and get next character if (lim - 1 == i) { fatal("Identifier too long"); } else if (i < lim - 1) { buf[i++] = c; } c = next(); } // We hit a non-valid character, put it back. // NUL-terminate the buf[] and return the length putback(c); buf[i] = '\0'; return (i); } // Given a word from the input, return the matching // keyword token number or 0 if it's not a keyword. // Switch on the first letter so that we don't have // to waste time strcmp()ing against all the keywords. static int keyword(char *s) { switch (*s) { case 'c': if (!strcmp(s, "char")) return (T_CHAR); break; case 'e': if (!strcmp(s, "else")) return (T_ELSE); break; case 'f': if (!strcmp(s, "for")) return (T_FOR); break; case 'i': if (!strcmp(s, "if")) return (T_IF); if (!strcmp(s, "int")) return (T_INT); break; case 'l': if (!strcmp(s, "long")) return (T_LONG); break; case 'r': if (!strcmp(s, "return")) return (T_RETURN); break; case 's': if (!strcmp(s, "struct")) return (T_STRUCT); break; case 'w': if (!strcmp(s, "while")) return (T_WHILE); break; case 'v': if (!strcmp(s, "void")) return (T_VOID); break; } return (0); } // A pointer to a rejected token static struct token *Rejtoken = NULL; // Reject the token that we just scanned void reject_token(struct token *t) { if (Rejtoken != NULL) fatal("Can't reject token twice"); Rejtoken = t; } // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c, tokentype; // If we have any rejected token, return it if (Rejtoken != NULL) { t = Rejtoken; Rejtoken = NULL; return (1); } // Skip whitespace c = skip(); // Determine the token based on // the input character switch (c) { case EOF: t->token = T_EOF; return (0); case '+': if ((c = next()) == '+') { t->token = T_INC; } else { putback(c); t->token = T_PLUS; } break; case '-': if ((c = next()) == '-') { t->token = T_DEC; } else { putback(c); t->token = T_MINUS; } break; case '*': t->token = T_STAR; break; case '/': t->token = T_SLASH; break; case ';': t->token = T_SEMI; break; case '{': t->token = T_LBRACE; break; case '}': t->token = T_RBRACE; break; case '(': t->token = T_LPAREN; break; case ')': t->token = T_RPAREN; break; case '[': t->token = T_LBRACKET; break; case ']': t->token = T_RBRACKET; break; case '~': t->token = T_INVERT; break; case '^': t->token = T_XOR; break; case ',': t->token = T_COMMA; break; case '=': if ((c = next()) == '=') { t->token = T_EQ; } else { putback(c); t->token = T_ASSIGN; } break; case '!': if ((c = next()) == '=') { t->token = T_NE; } else { putback(c); t->token = T_LOGNOT; } break; case '<': if ((c = next()) == '=') { t->token = T_LE; } else if (c == '<') { t->token = T_LSHIFT; } else { putback(c); t->token = T_LT; } break; case '>': if ((c = next()) == '=') { t->token = T_GE; } else if (c == '>') { t->token = T_RSHIFT; } else { putback(c); t->token = T_GT; } break; case '&': if ((c = next()) == '&') { t->token = T_LOGAND; } else { putback(c); t->token = T_AMPER; } break; case '|': if ((c = next()) == '|') { t->token = T_LOGOR; } else { putback(c); t->token = T_OR; } break; case '\'': // If it's a quote, scan in the // literal character value and // the trailing quote t->intvalue = scanch(); t->token = T_INTLIT; if (next() != '\'') fatal("Expected '\\'' at end of char literal"); break; case '"': // Scan in a literal string scanstr(Text); t->token = T_STRLIT; break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } else if (isalpha(c) || '_' == c) { // Read in a keyword or identifier scanident(c, Text, TEXTLEN); // If it's a recognised keyword, return that token if ((tokentype = keyword(Text)) != 0) { t->token = tokentype; break; } // Not a recognised keyword, so it must be an identifier t->token = T_IDENT; break; } // The character isn't part of any recognised token, error fatalc("Unrecognised character", c); } // We found a token return (1); } ================================================ FILE: 31_Struct_Declarations/stmt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of statements // Copyright (c) 2019 Warren Toomey, GPL3 // Prototypes static struct ASTnode *single_statement(void); // compound_statement: // empty, i.e. no statement // | statement // | statement statements // ; // // statement: declaration // | expression_statement // | function_call // | if_statement // | while_statement // | for_statement // | return_statement // ; // if_statement: if_head // | if_head 'else' compound_statement // ; // // if_head: 'if' '(' true_false_expression ')' compound_statement ; // // Parse an IF statement including any // optional ELSE clause and return its AST static struct ASTnode *if_statement(void) { struct ASTnode *condAST, *trueAST, *falseAST = NULL; // Ensure we have 'if' '(' match(T_IF, "if"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); rparen(); // Get the AST for the compound statement trueAST = compound_statement(); // If we have an 'else', skip it // and get the AST for the compound statement if (Token.token == T_ELSE) { scan(&Token); falseAST = compound_statement(); } // Build and return the AST for this statement return (mkastnode(A_IF, P_NONE, condAST, trueAST, falseAST, NULL, 0)); } // while_statement: 'while' '(' true_false_expression ')' compound_statement ; // // Parse a WHILE statement and return its AST static struct ASTnode *while_statement(void) { struct ASTnode *condAST, *bodyAST; // Ensure we have 'while' '(' match(T_WHILE, "while"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); rparen(); // Get the AST for the compound statement bodyAST = compound_statement(); // Build and return the AST for this statement return (mkastnode(A_WHILE, P_NONE, condAST, NULL, bodyAST, NULL, 0)); } // for_statement: 'for' '(' preop_statement ';' // true_false_expression ';' // postop_statement ')' compound_statement ; // // preop_statement: statement (for now) // postop_statement: statement (for now) // // Parse a FOR statement and return its AST static struct ASTnode *for_statement(void) { struct ASTnode *condAST, *bodyAST; struct ASTnode *preopAST, *postopAST; struct ASTnode *tree; // Ensure we have 'for' '(' match(T_FOR, "for"); lparen(); // Get the pre_op statement and the ';' preopAST = single_statement(); semi(); // Get the condition and the ';'. // Force a non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); semi(); // Get the post_op statement and the ')' postopAST = single_statement(); rparen(); // Get the compound statement which is the body bodyAST = compound_statement(); // For now, all four sub-trees have to be non-NULL. // Later on, we'll change the semantics for when some are missing // Glue the compound statement and the postop tree tree = mkastnode(A_GLUE, P_NONE, bodyAST, NULL, postopAST, NULL, 0); // Make a WHILE loop with the condition and this new body tree = mkastnode(A_WHILE, P_NONE, condAST, NULL, tree, NULL, 0); // And glue the preop tree to the A_WHILE tree return (mkastnode(A_GLUE, P_NONE, preopAST, NULL, tree, NULL, 0)); } // return_statement: 'return' '(' expression ')' ; // // Parse a return statement and return its AST static struct ASTnode *return_statement(void) { struct ASTnode *tree; // Can't return a value if function returns P_VOID if (Functionid->type == P_VOID) fatal("Can't return from a void function"); // Ensure we have 'return' '(' match(T_RETURN, "return"); lparen(); // Parse the following expression tree = binexpr(0); // Ensure this is compatible with the function's type tree = modify_type(tree, Functionid->type, 0); if (tree == NULL) fatal("Incompatible type to return"); // Add on the A_RETURN node tree = mkastunary(A_RETURN, P_NONE, tree, NULL, 0); // Get the ')' rparen(); return (tree); } // Parse a single statement and return its AST static struct ASTnode *single_statement(void) { int type; struct symtable *ctype; switch (Token.token) { case T_CHAR: case T_INT: case T_LONG: // The beginning of a variable declaration. // Parse the type and get the identifier. // Then parse the rest of the declaration // and skip over the semicolon type = parse_type(&ctype); ident(); var_declaration(type, ctype, C_LOCAL); semi(); return (NULL); // No AST generated here case T_IF: return (if_statement()); case T_WHILE: return (while_statement()); case T_FOR: return (for_statement()); case T_RETURN: return (return_statement()); default: // For now, see if this is an expression. // This catches assignment statements. return (binexpr(0)); } return (NULL); // Keep -Wall happy } // Parse a compound statement // and return its AST struct ASTnode *compound_statement(void) { struct ASTnode *left = NULL; struct ASTnode *tree; // Require a left curly bracket lbrace(); while (1) { // Parse a single statement tree = single_statement(); // Some statements must be followed by a semicolon if (tree != NULL && (tree->op == A_ASSIGN || tree->op == A_RETURN || tree->op == A_FUNCCALL)) semi(); // For each new tree, either save it in left // if left is empty, or glue the left and the // new tree together if (tree != NULL) { if (left == NULL) left = tree; else left = mkastnode(A_GLUE, P_NONE, left, NULL, tree, NULL, 0); } // When we hit a right curly bracket, // skip past it and return the AST if (Token.token == T_RBRACE) { rbrace(); return (left); } } } ================================================ FILE: 31_Struct_Declarations/sym.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Symbol table functions // Copyright (c) 2019 Warren Toomey, GPL3 // Append a node to the singly-linked list pointed to by head or tail void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node) { // Check for valid pointers if (head == NULL || tail == NULL || node == NULL) fatal("Either head, tail or node is NULL in appendsym"); // Append to the list if (*tail) { (*tail)->next = node; *tail = node; } else *head = *tail = node; node->next = NULL; } // Create a symbol node to be added to a symbol table list. // Set up the node's: // + type: char, int etc. // + ctype: composite type pointer for struct/union // + structural type: var, function, array etc. // + size: number of elements, or endlabel: end label for a function // + posn: Position information for local symbols // Return a pointer to the new node struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int size, int posn) { // Get a new node struct symtable *node = (struct symtable *) malloc(sizeof(struct symtable)); if (node == NULL) fatal("Unable to malloc a symbol table node in newsym"); // Fill in the values node->name = strdup(name); node->type = type; node->ctype = ctype; node->stype = stype; node->class = class; node->size = size; node->posn = posn; node->next = NULL; node->member = NULL; // Generate any global space if (class == C_GLOBAL) genglobsym(node); return (node); } // Add a symbol to the global symbol list struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int size) { struct symtable *sym = newsym(name, type, ctype, stype, C_GLOBAL, size, 0); appendsym(&Globhead, &Globtail, sym); return (sym); } // Add a symbol to the local symbol list struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int size) { struct symtable *sym = newsym(name, type, ctype, stype, C_LOCAL, size, 0); appendsym(&Loclhead, &Locltail, sym); return (sym); } // Add a symbol to the parameter list struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype, int size) { struct symtable *sym = newsym(name, type, ctype, stype, C_PARAM, size, 0); appendsym(&Parmhead, &Parmtail, sym); return (sym); } // Add a symbol to the temporary member list struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int size) { struct symtable *sym = newsym(name, type, ctype, stype, C_MEMBER, size, 0); appendsym(&Membhead, &Membtail, sym); return (sym); } // Add a struct to the struct list struct symtable *addstruct(char *name, int type, struct symtable *ctype, int stype, int size) { struct symtable *sym = newsym(name, type, ctype, stype, C_STRUCT, size, 0); appendsym(&Structhead, &Structtail, sym); return (sym); } // Search for a symbol in a specific list. // Return a pointer to the found node or NULL if not found. static struct symtable *findsyminlist(char *s, struct symtable *list) { for (; list != NULL; list = list->next) if ((list->name != NULL) && !strcmp(s, list->name)) return (list); return (NULL); } // Determine if the symbol s is in the global symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findglob(char *s) { return (findsyminlist(s, Globhead)); } // Determine if the symbol s is in the local symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findlocl(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member); if (node) return (node); } return (findsyminlist(s, Loclhead)); } // Determine if the symbol s is in the symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findsymbol(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member); if (node) return (node); } // Otherwise, try the local and global symbol lists node = findsyminlist(s, Loclhead); if (node) return (node); return (findsyminlist(s, Globhead)); } // Find a member in the member list // Return a pointer to the found node or NULL if not found. struct symtable *findmember(char *s) { return (findsyminlist(s, Membhead)); } // Find a struct in the struct list // Return a pointer to the found node or NULL if not found. struct symtable *findstruct(char *s) { return (findsyminlist(s, Structhead)); } // Reset the contents of the symbol table void clear_symtable(void) { Globhead = Globtail = NULL; Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Membhead = Membtail = NULL; Structhead = Structtail = NULL; } // Clear all the entries in the local symbol table void freeloclsyms(void) { Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Functionid = NULL; } ================================================ FILE: 31_Struct_Declarations/tests/err.input31.c ================================================ Expecting a primary expression, got token:15 on line 5 ================================================ FILE: 31_Struct_Declarations/tests/err.input32.c ================================================ Unknown variable:cow on line 4 ================================================ FILE: 31_Struct_Declarations/tests/err.input33.c ================================================ Incompatible type to return on line 4 ================================================ FILE: 31_Struct_Declarations/tests/err.input34.c ================================================ For now, declaration of non-global arrays is not implemented on line 4 ================================================ FILE: 31_Struct_Declarations/tests/err.input35.c ================================================ Duplicate local variable declaration:a on line 4 ================================================ FILE: 31_Struct_Declarations/tests/err.input36.c ================================================ Type doesn't match prototype for parameter:2 on line 4 ================================================ FILE: 31_Struct_Declarations/tests/err.input37.c ================================================ Unexpected token in parameter list:15 on line 3 ================================================ FILE: 31_Struct_Declarations/tests/err.input38.c ================================================ Type doesn't match prototype for parameter:2 on line 4 ================================================ FILE: 31_Struct_Declarations/tests/err.input39.c ================================================ No statements in function with non-void type on line 4 ================================================ FILE: 31_Struct_Declarations/tests/err.input40.c ================================================ No return for function with non-void type on line 4 ================================================ FILE: 31_Struct_Declarations/tests/err.input41.c ================================================ Can't return from a void function on line 3 ================================================ FILE: 31_Struct_Declarations/tests/err.input42.c ================================================ Undeclared function:fred on line 3 ================================================ FILE: 31_Struct_Declarations/tests/err.input43.c ================================================ Undeclared array:b on line 3 ================================================ FILE: 31_Struct_Declarations/tests/err.input44.c ================================================ Unknown variable:z on line 3 ================================================ FILE: 31_Struct_Declarations/tests/err.input45.c ================================================ & operator must be followed by an identifier on line 3 ================================================ FILE: 31_Struct_Declarations/tests/err.input46.c ================================================ * operator must be followed by an identifier or * on line 3 ================================================ FILE: 31_Struct_Declarations/tests/err.input47.c ================================================ ++ operator must be followed by an identifier on line 3 ================================================ FILE: 31_Struct_Declarations/tests/err.input48.c ================================================ -- operator must be followed by an identifier on line 3 ================================================ FILE: 31_Struct_Declarations/tests/err.input49.c ================================================ Incompatible expression in assignment on line 6 ================================================ FILE: 31_Struct_Declarations/tests/err.input50.c ================================================ Incompatible types in binary expression on line 6 ================================================ FILE: 31_Struct_Declarations/tests/err.input51.c ================================================ Expected '\'' at end of char literal on line 4 ================================================ FILE: 31_Struct_Declarations/tests/err.input52.c ================================================ Unrecognised character:$ on line 5 ================================================ FILE: 31_Struct_Declarations/tests/err.input56.c ================================================ unknown struct type:var1 on line 2 ================================================ FILE: 31_Struct_Declarations/tests/err.input57.c ================================================ previously defined struct:fred on line 2 ================================================ FILE: 31_Struct_Declarations/tests/input01.c ================================================ int printf(char *fmt); void main() { printf("%d\n", 12 * 3); printf("%d\n", 18 - 2 * 4); printf("%d\n", 1 + 2 + 9 - 5/2 + 3*5); } ================================================ FILE: 31_Struct_Declarations/tests/input02.c ================================================ int printf(char *fmt); void main() { int fred; int jim; fred= 5; jim= 12; printf("%d\n", fred + jim); } ================================================ FILE: 31_Struct_Declarations/tests/input03.c ================================================ int printf(char *fmt); void main() { int x; x= 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); } ================================================ FILE: 31_Struct_Declarations/tests/input04.c ================================================ int printf(char *fmt); void main() { int x; x= 7 < 9; printf("%d\n", x); x= 7 <= 9; printf("%d\n", x); x= 7 != 9; printf("%d\n", x); x= 7 == 7; printf("%d\n", x); x= 7 >= 7; printf("%d\n", x); x= 7 <= 7; printf("%d\n", x); x= 9 > 7; printf("%d\n", x); x= 9 >= 7; printf("%d\n", x); x= 9 != 7; printf("%d\n", x); } ================================================ FILE: 31_Struct_Declarations/tests/input05.c ================================================ int printf(char *fmt); void main() { int i; int j; i=6; j=12; if (i < j) { printf("%d\n", i); } else { printf("%d\n", j); } } ================================================ FILE: 31_Struct_Declarations/tests/input06.c ================================================ int printf(char *fmt); void main() { int i; i=1; while (i <= 10) { printf("%d\n", i); i= i + 1; } } ================================================ FILE: 31_Struct_Declarations/tests/input07.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 31_Struct_Declarations/tests/input08.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 31_Struct_Declarations/tests/input09.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } void fred() { int a; int b; a= 12; b= 3 * a; if (a >= b) { printf("%d\n", 2 * b - a); } } ================================================ FILE: 31_Struct_Declarations/tests/input10.c ================================================ int printf(char *fmt); void main() { int i; char j; j= 20; printf("%d\n", j); i= 10; printf("%d\n", i); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 2; j= j + 1) { printf("%d\n", j); } } ================================================ FILE: 31_Struct_Declarations/tests/input11.c ================================================ int printf(char *fmt); int main() { int i; char j; long k; i= 10; printf("%d\n", i); j= 20; printf("%d\n", j); k= 30; printf("%d\n", k); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 4; j= j + 1) { printf("%d\n", j); } for (k= 1; k <= 5; k= k + 1) { printf("%d\n", k); } return(i); printf("%d\n", 12345); return(3); } ================================================ FILE: 31_Struct_Declarations/tests/input12.c ================================================ int printf(char *fmt); int fred() { return(5); } void main() { int x; x= fred(2); printf("%d\n", x); } ================================================ FILE: 31_Struct_Declarations/tests/input13.c ================================================ int printf(char *fmt); int fred() { return(56); } void main() { int dummy; int result; dummy= printf("%d\n", 23); result= fred(10); dummy= printf("%d\n", result); } ================================================ FILE: 31_Struct_Declarations/tests/input14.c ================================================ int printf(char *fmt); int fred() { return(20); } int main() { int result; printf("%d\n", 10); result= fred(15); printf("%d\n", result); printf("%d\n", fred(15)+10); return(0); } ================================================ FILE: 31_Struct_Declarations/tests/input15.c ================================================ int printf(char *fmt); int main() { char a; char *b; char c; int d; int *e; int f; a= 18; printf("%d\n", a); b= &a; c= *b; printf("%d\n", c); d= 12; printf("%d\n", d); e= &d; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 31_Struct_Declarations/tests/input16.c ================================================ int printf(char *fmt); int c; int d; int *e; int f; int main() { c= 12; d=18; printf("%d\n", c); e= &c + 1; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 31_Struct_Declarations/tests/input17.c ================================================ int printf(char *fmt); int main() { char a; char *b; int d; int *e; b= &a; *b= 19; printf("%d\n", a); e= &d; *e= 12; printf("%d\n", d); return(0); } ================================================ FILE: 31_Struct_Declarations/tests/input18.c ================================================ int printf(char *fmt); int main() { int a; int b; a= b= 34; printf("%d\n", a); printf("%d\n", b); return(0); } ================================================ FILE: 31_Struct_Declarations/tests/input18a.c ================================================ int printf(char *fmt); int a; int *b; char c; char *d; int main() { b= &a; *b= 15; printf("%d\n", a); d= &c; *d= 16; printf("%d\n", c); return(0); } ================================================ FILE: 31_Struct_Declarations/tests/input19.c ================================================ int printf(char *fmt); int a; int b; int c; int d; int e; int main() { a= 2; b= 4; c= 3; d= 2; e= (a+b) * (c+d); printf("%d\n", e); return(0); } ================================================ FILE: 31_Struct_Declarations/tests/input20.c ================================================ int printf(char *fmt); int a; int b[25]; int main() { b[3]= 12; a= b[3]; printf("%d\n", a); return(0); } ================================================ FILE: 31_Struct_Declarations/tests/input21.c ================================================ int printf(char *fmt); char c; char *str; int main() { c= '\n'; printf("%d\n", c); for (str= "Hello world\n"; *str != 0; str= str + 1) { printf("%c", *str); } return(0); } ================================================ FILE: 31_Struct_Declarations/tests/input22.c ================================================ int printf(char *fmt); char a; char b; char c; int d; int e; int f; long g; long h; long i; int main() { b= 5; c= 7; a= b + c++; printf("%d\n", a); e= 5; f= 7; d= e + f++; printf("%d\n", d); h= 5; i= 7; g= h + i++; printf("%d\n", g); a= b-- + c; printf("%d\n", a); d= e-- + f; printf("%d\n", d); g= h-- + i; printf("%d\n", g); a= ++b + c; printf("%d\n", a); d= ++e + f; printf("%d\n", d); g= ++h + i; printf("%d\n", g); a= b * --c; printf("%d\n", a); d= e * --f; printf("%d\n", d); g= h * --i; printf("%d\n", g); return(0); } ================================================ FILE: 31_Struct_Declarations/tests/input23.c ================================================ int printf(char *fmt); char *str; int x; int main() { x= -23; printf("%d\n", x); printf("%d\n", -10 * -10); x= 1; x= ~x; printf("%d\n", x); x= 2 > 5; printf("%d\n", x); x= !x; printf("%d\n", x); x= !x; printf("%d\n", x); x= 13; if (x) { printf("%d\n", 13); } x= 0; if (!x) { printf("%d\n", 14); } for (str= "Hello world\n"; *str; str++) { printf("%c", *str); } return(0); } ================================================ FILE: 31_Struct_Declarations/tests/input24.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { a= 42; b= 19; printf("%d\n", a & b); printf("%d\n", a | b); printf("%d\n", a ^ b); printf("%d\n", 1 << 3); printf("%d\n", 63 >> 3); return(0); } ================================================ FILE: 31_Struct_Declarations/tests/input25.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { char z; int y; int x; x= 10; y= 20; z= 30; printf("%d\n", x); printf("%d\n", y); printf("%d\n", z); a= 5; b= 15; c= 25; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); return(0); } ================================================ FILE: 31_Struct_Declarations/tests/input26.c ================================================ int printf(char *fmt); int main(int a, char b, long c, int d, int e, int f, int g, int h) { int i; int j; int k; a= 13; printf("%d\n", a); b= 23; printf("%d\n", b); c= 34; printf("%d\n", c); d= 44; printf("%d\n", d); e= 54; printf("%d\n", e); f= 64; printf("%d\n", f); g= 74; printf("%d\n", g); h= 84; printf("%d\n", h); i= 94; printf("%d\n", i); j= 95; printf("%d\n", j); k= 96; printf("%d\n", k); return(0); } ================================================ FILE: 31_Struct_Declarations/tests/input27.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int param5(int a, int b, int c, int d, int e) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param2(int a, int b) { int c; int d; int e; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param0() { int a; int b; int c; int d; int e; a= 1; b= 2; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int main() { param8(1,2,3,4,5,6,7,8); param5(1,2,3,4,5); param2(1,2); param0(); return(0); } ================================================ FILE: 31_Struct_Declarations/tests/input28.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 31_Struct_Declarations/tests/input29.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h); int fred(int a, int b, int c); int main(); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 31_Struct_Declarations/tests/input30.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input30.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 31_Struct_Declarations/tests/input31.c ================================================ int printf(char *fmt); int main() { int x; x= 2 + + 3 - * / ; } ================================================ FILE: 31_Struct_Declarations/tests/input32.c ================================================ int printf(char *fmt); int main() { pizza cow llama sausage; } ================================================ FILE: 31_Struct_Declarations/tests/input33.c ================================================ int printf(char *fmt); int main() { char *z; return(z); } ================================================ FILE: 31_Struct_Declarations/tests/input34.c ================================================ int printf(char *fmt); int main() { int a[12]; return(0); } ================================================ FILE: 31_Struct_Declarations/tests/input35.c ================================================ int printf(char *fmt); int fred(int a, int b) { int a; return(a); } ================================================ FILE: 31_Struct_Declarations/tests/input36.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c); ================================================ FILE: 31_Struct_Declarations/tests/input37.c ================================================ int printf(char *fmt); int fred(int a, char b +, int z); ================================================ FILE: 31_Struct_Declarations/tests/input38.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c, int g); ================================================ FILE: 31_Struct_Declarations/tests/input39.c ================================================ int printf(char *fmt); int main() { int a; } ================================================ FILE: 31_Struct_Declarations/tests/input40.c ================================================ int printf(char *fmt); int main() { int a; a= 5; } ================================================ FILE: 31_Struct_Declarations/tests/input41.c ================================================ int printf(char *fmt); void fred() { return(5); } ================================================ FILE: 31_Struct_Declarations/tests/input42.c ================================================ int printf(char *fmt); int main() { fred(5); } ================================================ FILE: 31_Struct_Declarations/tests/input43.c ================================================ int printf(char *fmt); int main() { int a; a= b[4]; } ================================================ FILE: 31_Struct_Declarations/tests/input44.c ================================================ int printf(char *fmt); int main() { int a; a= z; } ================================================ FILE: 31_Struct_Declarations/tests/input45.c ================================================ int printf(char *fmt); int main() { int a; a= &5; } ================================================ FILE: 31_Struct_Declarations/tests/input46.c ================================================ int printf(char *fmt); int main() { int a; a= *5; } ================================================ FILE: 31_Struct_Declarations/tests/input47.c ================================================ int printf(char *fmt); int main() { int a; a= ++5; } ================================================ FILE: 31_Struct_Declarations/tests/input48.c ================================================ int printf(char *fmt); int main() { int a; a= --5; } ================================================ FILE: 31_Struct_Declarations/tests/input49.c ================================================ int printf(char *fmt); int main() { int x; char y; y= x; } ================================================ FILE: 31_Struct_Declarations/tests/input50.c ================================================ int printf(char *fmt); int main() { char *a; char *b; a= a + b; } ================================================ FILE: 31_Struct_Declarations/tests/input51.c ================================================ int printf(char *fmt); int main() { char a; a= 'fred'; } ================================================ FILE: 31_Struct_Declarations/tests/input52.c ================================================ int printf(char *fmt); int main() { int a; a= $5.00; } ================================================ FILE: 31_Struct_Declarations/tests/input53.c ================================================ int printf(char *fmt); int main() { printf("Hello world, %d\n", 23); return(0); } ================================================ FILE: 31_Struct_Declarations/tests/input54.c ================================================ int printf(char *fmt); int main() { int i; for (i=0; i < 20; i++) { printf("Hello world, %d\n", i); } return(0); } ================================================ FILE: 31_Struct_Declarations/tests/input55.c ================================================ int printf(char *fmt); int main(int argc, char **argv) { int i; char *argument; printf("Hello world\n"); for (i=0; i < argc; i++) { argument= *argv; argv= argv + 1; printf("Argument %d is %s\n", i, argument); } return(0); } ================================================ FILE: 31_Struct_Declarations/tests/input56.c ================================================ struct foo { int x; }; struct mary var1; ================================================ FILE: 31_Struct_Declarations/tests/input57.c ================================================ struct fred { int x; } ; struct fred { char y; } ; ================================================ FILE: 31_Struct_Declarations/tests/mktests ================================================ #!/bin/sh # Make the output files for each test # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make) fi for i in input*c do if [ ! -f "out.$i" -a ! -f "err.$i" ] then ../cwj -o out $i 2> "err.$i" # If the err file is empty if [ ! -s "err.$i" ] then rm -f "err.$i" cc -o out $i ../lib/printint.c ./out > "out.$i" fi fi rm -f out out.s done ================================================ FILE: 31_Struct_Declarations/tests/out.input01.c ================================================ 36 10 25 ================================================ FILE: 31_Struct_Declarations/tests/out.input02.c ================================================ 17 ================================================ FILE: 31_Struct_Declarations/tests/out.input03.c ================================================ 1 2 3 4 5 ================================================ FILE: 31_Struct_Declarations/tests/out.input04.c ================================================ 1 1 1 1 1 1 1 1 1 ================================================ FILE: 31_Struct_Declarations/tests/out.input05.c ================================================ 6 ================================================ FILE: 31_Struct_Declarations/tests/out.input06.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 31_Struct_Declarations/tests/out.input07.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 31_Struct_Declarations/tests/out.input08.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 31_Struct_Declarations/tests/out.input09.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 31_Struct_Declarations/tests/out.input10.c ================================================ 20 10 1 2 3 4 5 253 254 255 0 1 ================================================ FILE: 31_Struct_Declarations/tests/out.input11.c ================================================ 10 20 30 1 2 3 4 5 253 254 255 0 1 2 3 1 2 3 4 5 ================================================ FILE: 31_Struct_Declarations/tests/out.input12.c ================================================ 5 ================================================ FILE: 31_Struct_Declarations/tests/out.input13.c ================================================ 23 56 ================================================ FILE: 31_Struct_Declarations/tests/out.input14.c ================================================ 10 20 30 ================================================ FILE: 31_Struct_Declarations/tests/out.input15.c ================================================ 18 18 12 12 ================================================ FILE: 31_Struct_Declarations/tests/out.input16.c ================================================ 12 18 ================================================ FILE: 31_Struct_Declarations/tests/out.input17.c ================================================ 19 12 ================================================ FILE: 31_Struct_Declarations/tests/out.input18.c ================================================ 34 34 ================================================ FILE: 31_Struct_Declarations/tests/out.input18a.c ================================================ 15 16 ================================================ FILE: 31_Struct_Declarations/tests/out.input19.c ================================================ 30 ================================================ FILE: 31_Struct_Declarations/tests/out.input20.c ================================================ 12 ================================================ FILE: 31_Struct_Declarations/tests/out.input21.c ================================================ 10 Hello world ================================================ FILE: 31_Struct_Declarations/tests/out.input22.c ================================================ 12 12 12 13 13 13 13 13 13 35 35 35 ================================================ FILE: 31_Struct_Declarations/tests/out.input23.c ================================================ -23 100 -2 0 1 0 13 14 Hello world ================================================ FILE: 31_Struct_Declarations/tests/out.input24.c ================================================ 2 59 57 8 7 ================================================ FILE: 31_Struct_Declarations/tests/out.input25.c ================================================ 10 20 30 5 15 25 ================================================ FILE: 31_Struct_Declarations/tests/out.input26.c ================================================ 13 23 34 44 54 64 74 84 94 95 96 ================================================ FILE: 31_Struct_Declarations/tests/out.input27.c ================================================ 1 2 3 4 5 6 7 8 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 ================================================ FILE: 31_Struct_Declarations/tests/out.input28.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 31_Struct_Declarations/tests/out.input29.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 31_Struct_Declarations/tests/out.input30.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input30.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 31_Struct_Declarations/tests/out.input53.c ================================================ Hello world, 23 ================================================ FILE: 31_Struct_Declarations/tests/out.input54.c ================================================ Hello world, 0 Hello world, 1 Hello world, 2 Hello world, 3 Hello world, 4 Hello world, 5 Hello world, 6 Hello world, 7 Hello world, 8 Hello world, 9 Hello world, 10 Hello world, 11 Hello world, 12 Hello world, 13 Hello world, 14 Hello world, 15 Hello world, 16 Hello world, 17 Hello world, 18 Hello world, 19 ================================================ FILE: 31_Struct_Declarations/tests/out.input55.c ================================================ Hello world Argument 0 is ./out ================================================ FILE: 31_Struct_Declarations/tests/runtests ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../cwj -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../cwj $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.s "trial.$i" done ================================================ FILE: 31_Struct_Declarations/tests/runtestsn ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../compn ] then (cd ..; make) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../compn -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../compn $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.o out.s "trial.$i" done ================================================ FILE: 31_Struct_Declarations/tree.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree functions // Copyright (c) 2019 Warren Toomey, GPL3 // Build and return a generic AST node struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue) { struct ASTnode *n; // Malloc a new ASTnode n = (struct ASTnode *) malloc(sizeof(struct ASTnode)); if (n == NULL) fatal("Unable to malloc in mkastnode()"); // Copy in the field values and return it n->op = op; n->type = type; n->left = left; n->mid = mid; n->right = right; n->sym = sym; n->intvalue = intvalue; return (n); } // Make an AST leaf node struct ASTnode *mkastleaf(int op, int type, struct symtable *sym, int intvalue) { return (mkastnode(op, type, NULL, NULL, NULL, sym, intvalue)); } // Make a unary AST node: only one child struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, struct symtable *sym, int intvalue) { return (mkastnode(op, type, left, NULL, NULL, sym, intvalue)); } // Generate and return a new label number // just for AST dumping purposes static int gendumplabel(void) { static int id = 1; return (id++); } // Given an AST tree, print it out and follow the // traversal of the tree that genAST() follows void dumpAST(struct ASTnode *n, int label, int level) { int Lfalse, Lstart, Lend; switch (n->op) { case A_IF: Lfalse = gendumplabel(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_IF"); if (n->right) { Lend = gendumplabel(); fprintf(stdout, ", end L%d", Lend); } fprintf(stdout, "\n"); dumpAST(n->left, Lfalse, level + 2); dumpAST(n->mid, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); return; case A_WHILE: Lstart = gendumplabel(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_WHILE, start L%d\n", Lstart); Lend = gendumplabel(); dumpAST(n->left, Lend, level + 2); dumpAST(n->right, NOLABEL, level + 2); return; } // Reset level to -2 for A_GLUE if (n->op == A_GLUE) level = -2; // General AST node handling if (n->left) dumpAST(n->left, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); for (int i = 0; i < level; i++) fprintf(stdout, " "); switch (n->op) { case A_GLUE: fprintf(stdout, "\n\n"); return; case A_FUNCTION: fprintf(stdout, "A_FUNCTION %s\n", n->sym->name); return; case A_ADD: fprintf(stdout, "A_ADD\n"); return; case A_SUBTRACT: fprintf(stdout, "A_SUBTRACT\n"); return; case A_MULTIPLY: fprintf(stdout, "A_MULTIPLY\n"); return; case A_DIVIDE: fprintf(stdout, "A_DIVIDE\n"); return; case A_EQ: fprintf(stdout, "A_EQ\n"); return; case A_NE: fprintf(stdout, "A_NE\n"); return; case A_LT: fprintf(stdout, "A_LE\n"); return; case A_GT: fprintf(stdout, "A_GT\n"); return; case A_LE: fprintf(stdout, "A_LE\n"); return; case A_GE: fprintf(stdout, "A_GE\n"); return; case A_INTLIT: fprintf(stdout, "A_INTLIT %d\n", n->intvalue); return; case A_STRLIT: fprintf(stdout, "A_STRLIT rval label L%d\n", n->intvalue); return; case A_IDENT: if (n->rvalue) fprintf(stdout, "A_IDENT rval %s\n", n->sym->name); else fprintf(stdout, "A_IDENT %s\n", n->sym->name); return; case A_ASSIGN: fprintf(stdout, "A_ASSIGN\n"); return; case A_WIDEN: fprintf(stdout, "A_WIDEN\n"); return; case A_RETURN: fprintf(stdout, "A_RETURN\n"); return; case A_FUNCCALL: fprintf(stdout, "A_FUNCCALL %s\n", n->sym->name); return; case A_ADDR: fprintf(stdout, "A_ADDR %s\n", n->sym->name); return; case A_DEREF: if (n->rvalue) fprintf(stdout, "A_DEREF rval\n"); else fprintf(stdout, "A_DEREF\n"); return; case A_SCALE: fprintf(stdout, "A_SCALE %d\n", n->size); return; case A_PREINC: fprintf(stdout, "A_PREINC %s\n", n->sym->name); return; case A_PREDEC: fprintf(stdout, "A_PREDEC %s\n", n->sym->name); return; case A_POSTINC: fprintf(stdout, "A_POSTINC\n"); return; case A_POSTDEC: fprintf(stdout, "A_POSTDEC\n"); return; case A_NEGATE: fprintf(stdout, "A_NEGATE\n"); return; default: fatald("Unknown dumpAST operator", n->op); } } ================================================ FILE: 31_Struct_Declarations/types.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Types and type handling // Copyright (c) 2019 Warren Toomey, GPL3 // Return true if a type is an int type // of any size, false otherwise int inttype(int type) { return ((type & 0xf) == 0); } // Return true if a type is of pointer type int ptrtype(int type) { return ((type & 0xf) != 0); } // Given a primitive type, return // the type which is a pointer to it int pointer_to(int type) { if ((type & 0xf) == 0xf) fatald("Unrecognised in pointer_to: type", type); return (type + 1); } // Given a primitive pointer type, return // the type which it points to int value_at(int type) { if ((type & 0xf) == 0x0) fatald("Unrecognised in value_at: type", type); return (type - 1); } // Given a type and a composite type pointer, return // the size of this type in bytes int typesize(int type, struct symtable *ctype) { if (type == P_STRUCT) return(ctype->size); return(genprimsize(type)); } // Given an AST tree and a type which we want it to become, // possibly modify the tree by widening or scaling so that // it is compatible with this type. Return the original tree // if no changes occurred, a modified tree, or NULL if the // tree is not compatible with the given type. // If this will be part of a binary operation, the AST op is not zero. struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op) { int ltype; int lsize, rsize; ltype = tree->type; // XXX No idea on these yet if (ltype == P_STRUCT) fatal("Don't know how to do this yet"); if (rtype == P_STRUCT) fatal("Don't know how to do this yet"); // Compare scalar int types if (inttype(ltype) && inttype(rtype)) { // Both types same, nothing to do if (ltype == rtype) return (tree); // Get the sizes for each type lsize = typesize(ltype, NULL); // XXX Fix soon rsize = typesize(rtype, NULL); // XXX Fix soon // Tree's size is too big if (lsize > rsize) return (NULL); // Widen to the right if (rsize > lsize) return (mkastunary(A_WIDEN, rtype, tree, NULL, 0)); } // For pointers on the left if (ptrtype(ltype)) { // OK is same type on right and not doing a binary op if (op == 0 && ltype == rtype) return (tree); } // We can scale only on A_ADD or A_SUBTRACT operation if (op == A_ADD || op == A_SUBTRACT) { // Left is int type, right is pointer type and the size // of the original type is >1: scale the left if (inttype(ltype) && ptrtype(rtype)) { rsize = genprimsize(value_at(rtype)); if (rsize > 1) return (mkastunary(A_SCALE, rtype, tree, NULL, rsize)); else return (tree); // Size 1, no need to scale } } // If we get here, the types are not compatible return (NULL); } ================================================ FILE: 32_Struct_Access_pt1/Makefile ================================================ HSRCS= data.h decl.h defs.h SRCS= cg.c decl.c expr.c gen.c main.c misc.c \ scan.c stmt.c sym.c tree.c types.c SRCN= cgn.c decl.c expr.c gen.c main.c misc.c \ scan.c stmt.c sym.c tree.c types.c ARMSRCS= cg_arm.c decl.c expr.c gen.c main.c misc.c \ scan.c stmt.c sym.c tree.c types.c cwj: $(SRCS) $(HSRCS) cc -o cwj -g -Wall $(SRCS) compn: $(SRCN) $(HSRCS) cc -D__NASM__ -o compn -g -Wall $(SRCN) cwjarm: $(ARMSRCS) $(HSRCS) cc -o cwjarm -g -Wall $(ARMSRCS) cp cwjarm cwj clean: rm -f cwj cwjarm compn *.o *.s out test: cwj tests/runtests (cd tests; chmod +x runtests; ./runtests) armtest: cwjarm tests/runtests (cd tests; chmod +x runtests; ./runtests) testn: compn tests/runtestsn (cd tests; chmod +x runtestsn; ./runtestsn) ================================================ FILE: 32_Struct_Access_pt1/Readme.md ================================================ # Part 32: Accessing Members in a Struct This part of our compiler writing journey turned out to be quite simple. I've added the '.' and '->' tokens to our language, and I've implemented one level of member access to global struct variables. I'll give our test program, `tests/input58.c`, here so that you can see the language features that I've implemented: ```c int printf(char *fmt); struct fred { // Struct declaration, done last time int x; char y; long z; }; struct fred var2; // Variable declaration, done last time struct fred *varptr; // Pointer variable declaration, done last time int main() { long result; var2.x= 12; printf("%d\n", var2.x); // Member access as lvalue, new var2.y= 'c'; printf("%d\n", var2.y); var2.z= 4005; printf("%d\n", var2.z); result= var2.x + var2.y + var2.z; // Member access as rvalue, new printf("%d\n", result); varptr= &var2; // Old behaviour result= varptr->x + varptr->y + varptr->z; // Member access through pointer, new printf("%d\n", result); return(0); } ``` ## The New Tokens We have two new tokens, T_DOT and T_ARROW, to match the '.' and '->' elements in the input. As always, I won't give the code in `scan.c` to identify these. ## Parsing the Member References This turned out to be very similar to our existing array element accessing code. Let's look at the similarities and the differences. With this code: ```c int x[5]; int y; ... y= x[3]; ``` we get the base address of the `x` array, multiply 3 by the size of the `int` type in bytes (e.g.3*4 is 12), add that to the base address, and treat this as the address of the `int` that we want to access. Then we dereference this address to get the value at that array position. Accessing a struct member is similar: ```c struct fred { int x; char y; long z; }; struct fred var2; char y; ... y= var2.y; ``` We get the base address of `var2`. We get the offset of the `y` member in the `fred` struct, add this to the the base address, and treat this as the address of the `char` that we want to access. Then we dereference this address to get the value there. ## Postfix Operators T_DOT and T_ARROW are postfix operators, like the '[' of an array reference, as they come after an identifier's name. So it makes sense to add their parsing in the existing `postfix()` function in `expr.c`: ```c static struct ASTnode *postfix(void) { ... // Access into a struct or union if (Token.token == T_DOT) return (member_access(0)); if (Token.token == T_ARROW) return (member_access(1)); ... } ``` The argument to the new `member_access()` function in `expr.c` indicates if we are accessing a member through a pointer or directly. Now let's look at the new `member_access()` in stages. ```c // Parse the member reference of a struct (or union, soon) // and return an AST tree for it. If withpointer is true, // the access is through a pointer to the member. static struct ASTnode *member_access(int withpointer) { struct ASTnode *left, *right; struct symtable *compvar; struct symtable *typeptr; struct symtable *m; // Check that the identifier has been declared as a struct (or a union, later), // or a struct/union pointer if ((compvar = findsymbol(Text)) == NULL) fatals("Undeclared variable", Text); if (withpointer && compvar->type != pointer_to(P_STRUCT)) fatals("Undeclared variable", Text); if (!withpointer && compvar->type != P_STRUCT) fatals("Undeclared variable", Text); ``` First, some error checking. I know I will have to add checking for unions here, so I'm not going to refactor the code just yet. ```c // If a pointer to a struct, get the pointer's value. // Otherwise, make a leaf node that points at the base // Either way, it's an rvalue if (withpointer) { left = mkastleaf(A_IDENT, pointer_to(P_STRUCT), compvar, 0); } else left = mkastleaf(A_ADDR, compvar->type, compvar, 0); left->rvalue = 1; ``` At this point we need to get the base address of the composite variable. If we are given a pointer, we simply load the pointer's value by making an A_IDENT AST node. Otherwise, the identifier *is* the struct or union, so we had better get its address with an A_ADDR AST node. This node can't be an lvalue, i.e. we can't say `var2. = 5`. It has to be an rvalue. ```c // Get the details of the composite type typeptr = compvar->ctype; // Skip the '.' or '->' token and get the member's name scan(&Token); ident(); ``` We get a pointer to the composite type so that we can walk the list of members in the type, and we get the member's name after the '.' or '->' (and confirm that it is an identifier). ```c // Find the matching member's name in the type // Die if we can't find it for (m = typeptr->member; m != NULL; m = m->next) if (!strcmp(m->name, Text)) break; if (m == NULL) fatals("No member found in struct/union: ", Text); ``` We walk the member's list to find the matching member's name. ```c // Build an A_INTLIT node with the offset right = mkastleaf(A_INTLIT, P_INT, NULL, m->posn); // Add the member's offset to the base of the struct and // dereference it. Still an lvalue at this point left = mkastnode(A_ADD, pointer_to(m->type), left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, m->type, left, NULL, 0); return (left); } ``` The member's offset in bytes is stored in `m->posn` so we make an A_INTLIT node with this value, and A_ADD it to the base address stored in `left`. At this point we have an address of the member, so we dereference it (A_DEREF) to get access to the member's value. At this point, this is still an lvalue; this allows us to do both `5 + var2.x` and `var2.x= 6`. ### Running Our Test Code The output of `tests/input58.c` is, unsurprisingly: ``` 12 99 4005 4116 4116 ``` Let's have a look at some of the assembly output: ``` # var2.y= 'c'; movq $99, %r10 # Load 'c' into %r10 leaq var2(%rip), %r11 # Get base address of var2 into %r11 movq $4, %r12 addq %r11, %r12 # Add 4 to this base address movb %r10b, (%r12) # Write 'c' into this new address # printf("%d\n", var2.z); leaq var2(%rip), %r10 # Get base address of var2 into %r11 movq $4, %r11 addq %r10, %r11 # Add 4 to this base address movzbq (%r11), %r11 # Load byte value from this address into %r11 movq %r11, %rsi # Copy it into %rsi leaq L4(%rip), %r10 movq %r10, %rdi call printf@PLT # and call printf() ``` ## Conclusion and What's Next Well, this was a nice pleasant surprise to get structs to work this easily! I'm sure the future parts of our journey will make up for it. I also know that our compiler as it stands still is pretty limited. For example, it can't do this: ```c struct foo { int x; struct foo *next; }; struct foo *listhead; struct foo *l; int main() { ... l= listhead->next->next; ``` as this requires following two pointer levels. The existing code can only follow one pointer level. We will have to fix this later. It is probably also a good time to indicate that we will have to spend a lot of time getting the compiler to "do it right". I've been adding functionality, but only enough to get one specific feature to work. At some point these specific features will have to be made more general. So there will be a "mop up" stage in this journey. Now that we have structs mostly working, in the next part of our compiler writing journey, I will try to add unions. [Next step](../33_Unions/Readme.md) ================================================ FILE: 32_Struct_Access_pt1/cg.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\t.text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\t.data\n", Outfile); currSeg = data_seg; } } // Given a scalar type value, return the // size of the type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch(type) { case P_CHAR: return (offset); case P_INT: case P_LONG: break; default: fatald("Bad type in calc_aligned_offset:", type); } // Here we have an int or a long. Align it on a 4-byte offset // I put the generic code here so it can be reused elsewhere. alignment= 4; offset = (offset + direction * (alignment-1)) & ~(alignment-1); return (offset); } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. static int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "%r10", "%r11", "%r12", "%r13", "%r9", "%r8", "%rcx", "%rdx", "%rsi", "%rdi" }; static char *breglist[] = { "%r10b", "%r11b", "%r12b", "%r13b", "%r9b", "%r8b", "%cl", "%dl", "%sil", "%dil" }; static char *dreglist[] = { "%r10d", "%r11d", "%r12d", "%r13d", "%r9d", "%r8d", "%ecx", "%edx", "%esi", "%edi" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the %rsp and %rsp fprintf(Outfile, "\t.globl\t%s\n" "\t.type\t%s, @function\n" "%s:\n" "\tpushq\t%%rbp\n" "\tmovq\t%%rsp, %%rbp\n", name, name, name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->posn = paramOffset; paramOffset += 8; } else { parm->posn = newlocaloffset(parm->type); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->posn = newlocaloffset(locvar->type); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\taddq\t$%d,%%rsp\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->endlabel); fprintf(Outfile, "\taddq\t$%d,%%rsp\n", stackOffset); fputs("\tpopq %rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmovq\t$%d, %s\n", value, reglist[r]); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", sym->name); } else // Print out the code to initialise it switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovzbq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", sym->name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovslq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", sym->name); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", sym->posn); fprintf(Outfile, "\tmovq\t%d(%%rbp), %s\n", sym->posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", sym->posn); } else switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", sym->posn); fprintf(Outfile, "\tmovzbq\t%d(%%rbp), %s\n", sym->posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", sym->posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", sym->posn); fprintf(Outfile, "\tmovslq\t%d(%%rbp), %s\n", sym->posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", sym->posn); break; default: fatald("Bad type in cgloadlocal:", sym->type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tleaq\tL%d(%%rip), %s\n", label, reglist[r]); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\taddq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsubq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timulq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s,%%rax\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidivq\t%s\n", reglist[r2]); fprintf(Outfile, "\tmovq\t%%rax,%s\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tandq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\torq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txorq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshlq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshrq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tnegq\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnotq\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); return (r); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(struct symtable *sym, int numargs) { // Get a new register int outr = alloc_register(); // Call the function fprintf(Outfile, "\tcall\t%s@PLT\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\taddq\t$%d, %%rsp\n", 8 * (numargs - 6)); // and copy the return value into our register fprintf(Outfile, "\tmovq\t%%rax, %s\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpushq\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmovq\t%s, %s\n", reglist[r], reglist[FIRSTPARAMREG - argposn + 1]); } } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsalq\t$%d, %s\n", val, reglist[r]); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %s(%%rip)\n", reglist[r], sym->name); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %s(%%rip)\n", breglist[r], sym->name); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %s(%%rip)\n", dreglist[r], sym->name); break; default: fatald("Bad type in cgstorglob:", sym->type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %d(%%rbp)\n", reglist[r], sym->posn); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %d(%%rbp)\n", breglist[r], sym->posn); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %d(%%rbp)\n", dreglist[r], sym->posn); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the type size = typesize(node->type, node->ctype); // Generate the global identity and the label cgdataseg(); fprintf(Outfile, "\t.globl\t%s\n", node->name); fprintf(Outfile, "%s:", node->name); // Generate the space for this type switch (size) { case 1: fprintf(Outfile, "\t.byte\t0\n"); break; case 4: fprintf(Outfile, "\t.long\t0\n"); break; case 8: fprintf(Outfile, "\t.quad\t0\n"); break; default: for (int i=0; i < size; i++) fprintf(Outfile, "\t.byte\t0\n"); } } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\t.byte\t%d\n", *cptr); } fprintf(Outfile, "\t.byte\t0\n"); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzbl\t%s, %%eax\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %%eax\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } cgjump(sym->endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = alloc_register(); if (sym->class == C_GLOBAL) fprintf(Outfile, "\tleaq\t%s(%%rip), %s\n", sym->name, reglist[r]); else fprintf(Outfile, "\tleaq\t%d(%%rbp), %s\n", sym->posn, reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzbq\t(%s), %s\n", reglist[r], reglist[r]); break; case 2: fprintf(Outfile, "\tmovslq\t(%s), %s\n", reglist[r], reglist[r]); break; case 4: case 8: fprintf(Outfile, "\tmovq\t(%s), %s\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { // Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmovb\t%s, (%s)\n", breglist[r1], reglist[r2]); break; case 2: case 4: case 8: fprintf(Outfile, "\tmovq\t%s, (%s)\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 32_Struct_Access_pt1/cg_arm.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for ARMv6 on Raspberry Pi // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. static int freereg[4]; static char *reglist[4] = { "r4", "r5", "r6", "r7" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // We have to store large integer literal values in memory. // Keep a list of them which will be output in the postamble #define MAXINTS 1024 int Intlist[MAXINTS]; static int Intslot = 0; // Determine the offset of a large integer // literal from the .L3 label. If the integer // isn't in the list, add it. static void set_int_offset(int val) { int offset = -1; // See if it is already there for (int i = 0; i < Intslot; i++) { if (Intlist[i] == val) { offset = 4 * i; break; } } // Not in the list, so add it if (offset == -1) { offset = 4 * Intslot; if (Intslot == MAXINTS) fatal("Out of int slots in set_int_offset()"); Intlist[Intslot++] = val; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L3+%d\n", offset); } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n", Outfile); } // Print out the assembly postamble void cgpostamble() { // Print out the global variables fprintf(Outfile, ".L2:\n"); for (int i = 0; i < Globs; i++) { if (Symtable[i].stype == S_VARIABLE) fprintf(Outfile, "\t.word %s\n", Symtable[i].name); } // Print out the integer literals fprintf(Outfile, ".L3:\n"); for (int i = 0; i < Intslot; i++) { fprintf(Outfile, "\t.word %d\n", Intlist[i]); } } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Symtable[id].name; fprintf(Outfile, "\t.text\n" "\t.globl\t%s\n" "\t.type\t%s, \%%function\n" "%s:\n" "\tpush\t{fp, lr}\n" "\tadd\tfp, sp, #4\n" "\tsub\tsp, sp, #8\n" "\tstr\tr0, [fp, #-8]\n", name, name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Symtable[id].endlabel); fputs("\tsub\tsp, fp, #4\n" "\tpop\t{fp, pc}\n" "\t.align\t2\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); // If the literal value is small, do it with one instruction if (value <= 1000) fprintf(Outfile, "\tmov\t%s, #%d\n", reglist[r], value); else { set_int_offset(value); fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); } return (r); } // Determine the offset of a variable from the .L2 // label. Yes, this is inefficient code. static void set_var_offset(int id) { int offset = 0; // Walk the symbol table up to id. // Find S_VARIABLEs and add on 4 until // we get to our variable for (int i = 0; i < id; i++) { if (Symtable[i].stype == S_VARIABLE) offset += 4; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L2+%d\n", offset); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tldrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgloadglob:", Symtable[id].type); } return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s, %s\n", reglist[r1], reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\tmul\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { // To do a divide: r0 holds the dividend, r1 holds the divisor. // The quotient is in r0. fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r1]); fprintf(Outfile, "\tmov\tr1, %s\n", reglist[r2]); fprintf(Outfile, "\tbl\t__aeabi_idiv\n"); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r1]); free_register(r2); return (r1); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r]); fprintf(Outfile, "\tbl\t%s\n", Symtable[id].name); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r]); return (r); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tlsl\t%s, %s, #%d\n", reglist[r], reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, int id) { // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tstr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgstorglob:", Symtable[id].type); } return (r); } // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (4); switch (type) { case P_CHAR: return (1); case P_INT: case P_LONG: return (4); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Symtable[id].type); fprintf(Outfile, "\t.data\n" "\t.globl\t%s\n", Symtable[id].name); switch (typesize) { case 1: fprintf(Outfile, "%s:\t.byte\t0\n", Symtable[id].name); break; case 4: fprintf(Outfile, "%s:\t.long\t0\n", Symtable[id].name); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "moveq", "movne", "movlt", "movgt", "movle", "movge" }; // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "movne", "moveq", "movge", "movle", "movgt", "movlt" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #1\n", cmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #0\n", invcmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\tuxtb\t%s, %s\n", reglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tb\tL%d\n", l); } // List of inverted branch instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *brlist[] = { "bne", "beq", "bge", "ble", "bgt", "blt" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", brlist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[reg]); cgjump(Symtable[id].endlabel); } // Generate code to load the address of a global // identifier into a variable. Return a new register int cgaddress(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); fprintf(Outfile, "\tmov\t%s, r3\n", reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tldrb\t%s, [%s]\n", reglist[r], reglist[r]); break; case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [%s]\n", reglist[r], reglist[r]); break; } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [%s]\n", reglist[r1], reglist[r2]); break; case P_INT: case P_LONG: fprintf(Outfile, "\tstr\t%s, [%s]\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 32_Struct_Access_pt1/cgn.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\tsection .text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\tsection .data\n", Outfile); currSeg = data_seg; } } // Given a scalar type value, return the // size of the type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch(type) { case P_CHAR: return (offset); case P_INT: case P_LONG: break; default: fatald("Bad type in calc_aligned_offset:", type); } // Here we have an int or a long. Align it on a 4-byte offset // I put the generic code here so it can be reused elsewhere. alignment= 4; offset = (offset + direction * (alignment-1)) & ~(alignment-1); return (offset); } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "r10", "r11", "r12", "r13", "r9", "r8", "rcx", "rdx", "rsi", "rdi" }; static char *breglist[] = { "r10b", "r11b", "r12b", "r13b", "r9b", "r8b", "cl", "dl", "sil", "dil" }; static char *dreglist[] = { "r10d", "r11d", "r12d", "r13d", "r9d", "r8d", "ecx", "edx", "esi", "edi" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\textern\tprintint\n", Outfile); fputs("\textern\tprintchar\n", Outfile); fputs("\textern\topen\n", Outfile); fputs("\textern\tclose\n", Outfile); fputs("\textern\tread\n", Outfile); fputs("\textern\twrite\n", Outfile); fputs("\textern\tprintf\n", Outfile); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the rsp and rbp fprintf(Outfile, "\tglobal\t%s\n" "%s:\n" "\tpush\trbp\n" "\tmov\trbp, rsp\n", name, name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->posn = paramOffset; paramOffset += 8; } else { parm->posn = newlocaloffset(parm->type); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->posn = newlocaloffset(locvar->type); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\tadd\trsp, %d\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->endlabel); fprintf(Outfile, "\tadd\trsp, %d\n", stackOffset); fputs("\tpop rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], value); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", sym->name); fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], sym->name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", sym->name); } else // Print out the code to initialise it switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", sym->name); fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], sym->name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", sym->name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", sym->name); fprintf(Outfile, "\tmovsx\t%s, word [%s]\n", dreglist[r], sym->name); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", sym->name); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", sym->posn); fprintf(Outfile, "\tmov\t%s, [rbp+%d]\n", reglist[r], sym->posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", sym->posn); } else switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", sym->posn); fprintf(Outfile, "\tmovzx\t%s, byte [rbp+%d]\n", reglist[r], sym->posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", sym->posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", sym->posn); fprintf(Outfile, "\tmovsx\t%s, word [rbp+%d]\n", reglist[r], sym->posn); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", sym->posn); break; default: fatald("Bad type in cgloadlocal:", sym->type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, L%d\n", reglist[r], label); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timul\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmov\trax, %s\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidiv\t%s\n", reglist[r2]); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tand\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\tor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshl\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshr\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tneg\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnot\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r], breglist[r]); return (r); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, byte %s\n", reglist[r], breglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(struct symtable *sym, int numargs) { // Get a new register int outr = alloc_register(); // Call the function fprintf(Outfile, "\tcall\t%s\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\tadd\trsp, %d\n", 8 * (numargs - 6)); // and copy the return value into our register fprintf(Outfile, "\tmov\t%s, rax\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpush\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmov\t%s, %s\n", reglist[FIRSTPARAMREG - argposn + 1], reglist[r]); } } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsal\t%s, %d\n", reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, dreglist[r]); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\tqword\t[rbp+%d], %s\n", sym->posn, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\tbyte\t[rbp+%d], %s\n", sym->posn, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\tdword\t[rbp+%d], %s\n", sym->posn, dreglist[r]); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the type size = typesize(node->type, node->ctype); // Generate the global identity and the label cgdataseg(); fprintf(Outfile, "\tsection\t.data\n" "\tglobal\t%s\n", node->name); fprintf(Outfile, "%s:", node->name); // Generate the space for this type // original version for (int i = 0; i < node->size; i++) { switch(size) { case 1: fprintf(Outfile, "\tdb\t0\n"); break; case 4: fprintf(Outfile, "\tdd\t0\n"); break; case 8: fprintf(Outfile, "\tdq\t0\n"); break; default: for (int i = 0; i < size; i++) fprintf(Outfile, "\tdb\t0\n"); } } /* compact version using times instead of loop switch(size) { case 1: fprintf(Outfile, "\ttimes\t%d\tdb\t0\n", node->size); break; case 4: fprintf(Outfile, "\ttimes\t%d\tdd\t0\n", node->size); break; case 8: fprintf(Outfile, "\ttimes\t%d\tdq\t0\n", node->size); break; default: fprintf(Outfile, "\ttimes\t%d\tdb\t0\n", size); } */ } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\tdb\t%d\n", *cptr); } fprintf(Outfile, "\tdb\t0\n"); /* put string in readable format on a single line // probably went overboard with error checking int comma = 0, quote = 0, start = 1; fprintf(Outfile, "\tdb\t"); for (cptr=strvalue; *cptr; cptr++) { if ( ! isprint(*cptr) ) if (comma || start) { fprintf(Outfile, "%d, ", *cptr); start = 0; comma = 1; } else if (quote) { fprintf(Outfile, "\', %d, ", *cptr); comma = 1; quote = 0; } else { fprintf(Outfile, "%d, ", *cptr); comma = 1; quote = 0; } else if (start || comma) { fprintf(Outfile, "\'%c", *cptr); start = comma = 0; quote = 1; } else { fprintf(Outfile, "%c", *cptr); comma = 0; quote = 1; } } if (comma || start) fprintf(Outfile, "0\n"); else fprintf(Outfile, "\', 0\n"); */ } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r2], breglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzx\teax, %s\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmov\teax, %s\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } cgjump(sym->endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = alloc_register(); if (sym->class == C_GLOBAL) fprintf(Outfile, "\tmov\t%s, %s\n", reglist[r], sym->name); else fprintf(Outfile, "\tlea\t%s, [rbp+%d]\n", reglist[r], sym->posn); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], reglist[r]); break; case 2: fprintf(Outfile, "\tmovsx\t%s, dword [%s]\n", reglist[r], reglist[r]); break; case 4: case 8: fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { //Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmov\t[%s], byte %s\n", reglist[r2], breglist[r1]); break; case 2: case 4: case 8: fprintf(Outfile, "\tmov\t[%s], %s\n", reglist[r2], reglist[r1]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 32_Struct_Access_pt1/data.h ================================================ #ifndef extern_ #define extern_ extern #endif // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 extern_ int Line; // Current line number extern_ int Putback; // Character put back by scanner extern_ struct symtable *Functionid; // Symbol ptr of the current function extern_ FILE *Infile; // Input and output files extern_ FILE *Outfile; extern_ char *Outfilename; // Name of file we opened as Outfile extern_ struct token Token; // Last token scanned extern_ char Text[TEXTLEN + 1]; // Last identifier scanned // Symbol table lists extern_ struct symtable *Globhead, *Globtail; // Global variables and functions extern_ struct symtable *Loclhead, *Locltail; // Local variables extern_ struct symtable *Parmhead, *Parmtail; // Local parameters extern_ struct symtable *Membhead, *Membtail; // Temp list of struct/union members extern_ struct symtable *Structhead, *Structtail; // List of struct types // Command-line flags extern_ int O_dumpAST; // If true, dump the AST trees extern_ int O_keepasm; // If true, keep any assembly files extern_ int O_assemble; // If true, assemble the assembly files extern_ int O_dolink; // If true, link the object files extern_ int O_verbose; // If true, print info on compilation stages ================================================ FILE: 32_Struct_Access_pt1/decl.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of declarations // Copyright (c) 2019 Warren Toomey, GPL3 static struct symtable *struct_declaration(void); // Parse the current token and return // a primitive type enum value and a pointer // to any composite type. // Also scan in the next token int parse_type(struct symtable **ctype) { int type; switch (Token.token) { case T_VOID: type = P_VOID; scan(&Token); break; case T_CHAR: type = P_CHAR; scan(&Token); break; case T_INT: type = P_INT; scan(&Token); break; case T_LONG: type = P_LONG; scan(&Token); break; case T_STRUCT: type = P_STRUCT; *ctype = struct_declaration(); break; default: fatald("Illegal type, token", Token.token); } // Scan in one or more further '*' tokens // and determine the correct pointer type while (1) { if (Token.token != T_STAR) break; type = pointer_to(type); scan(&Token); } // We leave with the next token already scanned return (type); } // variable_declaration: type identifier ';' // | type identifier '[' INTLIT ']' ';' // ; // // Parse the declaration of a scalar variable or an array // with a given size. // The identifier has been scanned & we have the type. // class is the variable's class // Return the pointer to variable's entry in the symbol table struct symtable *var_declaration(int type, struct symtable *ctype, int class) { struct symtable *sym = NULL; // See if this has already been declared switch (class) { case C_GLOBAL: if (findglob(Text) != NULL) fatals("Duplicate global variable declaration", Text); case C_LOCAL: case C_PARAM: if (findlocl(Text) != NULL) fatals("Duplicate local variable declaration", Text); case C_MEMBER: if (findmember(Text) != NULL) fatals("Duplicate struct/union member declaration", Text); } // Text now has the identifier's name. // If the next token is a '[' if (Token.token == T_LBRACKET) { // Skip past the '[' scan(&Token); // Check we have an array size if (Token.token == T_INTLIT) { // Add this as a known array and generate its space in assembly. // We treat the array as a pointer to its elements' type switch (class) { case C_GLOBAL: sym = addglob(Text, pointer_to(type), ctype, S_ARRAY, Token.intvalue); break; case C_LOCAL: case C_PARAM: case C_MEMBER: fatal ("For now, declaration of non-global arrays is not implemented"); } } // Ensure we have a following ']' scan(&Token); match(T_RBRACKET, "]"); } else { // Add this as a known scalar // and generate its space in assembly switch (class) { case C_GLOBAL: sym = addglob(Text, type, ctype, S_VARIABLE, 1); break; case C_LOCAL: sym = addlocl(Text, type, ctype, S_VARIABLE, 1); break; case C_PARAM: sym = addparm(Text, type, ctype, S_VARIABLE, 1); break; case C_MEMBER: sym = addmemb(Text, type, ctype, S_VARIABLE, 1); break; } } return (sym); } // var_declaration_list: // | variable_declaration // | variable_declaration separate_token var_declaration_list ; // // When called to parse function parameters, separate_token is ','. // When called to parse members of a struct/union, separate_token is ';'. // // Parse a list of variables. // Add them as symbols to one of the symbol table lists, and return the // number of variables. If funcsym is not NULL, there is an existing function // prototype, so compare each variable's type against this prototype. static int var_declaration_list(struct symtable *funcsym, int class, int separate_token, int end_token) { int type; int paramcnt = 0; struct symtable *protoptr = NULL; struct symtable *ctype; // If there is a prototype, get the pointer // to the first prototype parameter if (funcsym != NULL) protoptr = funcsym->member; // Loop until the final end token while (Token.token != end_token) { // Get the type and identifier type = parse_type(&ctype); ident(); // Check that this type matches the prototype if there is one if (protoptr != NULL) { if (type != protoptr->type) fatald("Type doesn't match prototype for parameter", paramcnt + 1); protoptr = protoptr->next; } else { // Add a new parameter to the right symbol table list, based on the class var_declaration(type, ctype, class); } paramcnt++; // Must have a separate_token or ')' at this point if ((Token.token != separate_token) && (Token.token != end_token)) fatald("Unexpected token in parameter list", Token.token); if (Token.token == separate_token) scan(&Token); } // Check that the number of parameters in this list matches // any existing prototype if ((funcsym != NULL) && (paramcnt != funcsym->nelems)) fatals("Parameter count mismatch for function", funcsym->name); // Return the count of parameters return (paramcnt); } // // function_declaration: type identifier '(' parameter_list ')' ; // | type identifier '(' parameter_list ')' compound_statement ; // // Parse the declaration of function. // The identifier has been scanned & we have the type. struct ASTnode *function_declaration(int type) { struct ASTnode *tree, *finalstmt; struct symtable *oldfuncsym, *newfuncsym = NULL; int endlabel, paramcnt; // Text has the identifier's name. If this exists and is a // function, get the id. Otherwise, set oldfuncsym to NULL. if ((oldfuncsym = findsymbol(Text)) != NULL) if (oldfuncsym->stype != S_FUNCTION) oldfuncsym = NULL; // If this is a new function declaration, get a // label-id for the end label, and add the function // to the symbol table, if (oldfuncsym == NULL) { endlabel = genlabel(); // Assumtion: functions only return scalar types, so NULL below newfuncsym = addglob(Text, type, NULL, S_FUNCTION, endlabel); } // Scan in the '(', any parameters and the ')'. // Pass in any existing function prototype pointer lparen(); paramcnt = var_declaration_list(oldfuncsym, C_PARAM, T_COMMA, T_RPAREN); rparen(); // If this is a new function declaration, update the // function symbol entry with the number of parameters. // Also copy the parameter list into the function's node. if (newfuncsym) { newfuncsym->nelems = paramcnt; newfuncsym->member = Parmhead; oldfuncsym = newfuncsym; } // Clear out the parameter list Parmhead = Parmtail = NULL; // Declaration ends in a semicolon, only a prototype. if (Token.token == T_SEMI) { scan(&Token); return (NULL); } // This is not just a prototype. // Set the Functionid global to the function's symbol pointer Functionid = oldfuncsym; // Get the AST tree for the compound statement tree = compound_statement(); // If the function type isn't P_VOID .. if (type != P_VOID) { // Error if no statements in the function if (tree == NULL) fatal("No statements in function with non-void type"); // Check that the last AST operation in the // compound statement was a return statement finalstmt = (tree->op == A_GLUE) ? tree->right : tree; if (finalstmt == NULL || finalstmt->op != A_RETURN) fatal("No return for function with non-void type"); } // Return an A_FUNCTION node which has the function's symbol pointer // and the compound statement sub-tree return (mkastunary(A_FUNCTION, type, tree, oldfuncsym, endlabel)); } // Parse struct declarations. Either find an existing // struct declaration, or build a struct symbol table // entry and return its pointer. static struct symtable *struct_declaration(void) { struct symtable *ctype = NULL; struct symtable *m; int offset; // Skip the struct keyword scan(&Token); // See if there is a following struct name if (Token.token == T_IDENT) { // Find any matching composite type ctype = findstruct(Text); scan(&Token); } // If the next token isn't an LBRACE , this is // the usage of an existing struct type. // Return the pointer to the type. if (Token.token != T_LBRACE) { if (ctype == NULL) fatals("unknown struct type", Text); return (ctype); } // Ensure this struct type hasn't been // previously defined if (ctype) fatals("previously defined struct", Text); // Build the struct node and skip the left brace ctype = addstruct(Text, P_STRUCT, NULL, 0, 0); scan(&Token); // Scan in the list of members and attach // to the struct type's node var_declaration_list(NULL, C_MEMBER, T_SEMI, T_RBRACE); rbrace(); ctype->member = Membhead; Membhead = Membtail = NULL; // Set the offset of the initial member // and find the first free byte after it m = ctype->member; m->posn = 0; offset = typesize(m->type, m->ctype); // Set the position of each successive member in the struct for (m = m->next; m != NULL; m = m->next) { // Set the offset for this member m->posn = genalign(m->type, offset, 1); // Get the offset of the next free byte after this member offset += typesize(m->type, m->ctype); } // Set the overall size of the struct ctype->size = offset; return (ctype); } // Parse one or more global declarations, either // variables, functions or structs void global_declarations(void) { struct ASTnode *tree; struct symtable *ctype; int type; while (1) { // Stop when we have reached EOF if (Token.token == T_EOF) break; // Get the type type = parse_type(&ctype); // We might have just parsed a struct declaration // with no associated variable. The next token // might be a ';'. Loop back if it is. XXX. I'm // not happy with this as it allows "struct fred;" // as an accepted statement if (type == P_STRUCT && Token.token == T_SEMI) { scan(&Token); continue; } // We have to read past the identifier // to see either a '(' for a function declaration // or a ',' or ';' for a variable declaration. // Text is filled in by the ident() call. ident(); if (Token.token == T_LPAREN) { // Parse the function declaration tree = function_declaration(type); // Only a function prototype, no code if (tree == NULL) continue; // A real function, generate the assembly code for it if (O_dumpAST) { dumpAST(tree, NOLABEL, 0); fprintf(stdout, "\n\n"); } genAST(tree, NOLABEL, 0); // Now free the symbols associated // with this function freeloclsyms(); } else { // Parse the global variable declaration // and skip past the trailing semicolon var_declaration(type, ctype, C_GLOBAL); semi(); } } } ================================================ FILE: 32_Struct_Access_pt1/decl.h ================================================ // Function prototypes for all compiler files // Copyright (c) 2019 Warren Toomey, GPL3 // scan.c void reject_token(struct token *t); int scan(struct token *t); // tree.c struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue); struct ASTnode *mkastleaf(int op, int type, struct symtable *sym, int intvalue); struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, struct symtable *sym, int intvalue); void dumpAST(struct ASTnode *n, int label, int parentASTop); // gen.c int genlabel(void); int genAST(struct ASTnode *n, int reg, int parentASTop); void genpreamble(); void genpostamble(); void genfreeregs(); void genglobsym(struct symtable *node); int genglobstr(char *strvalue); int genprimsize(int type); int genalign(int type, int offset, int direction); void genreturn(int reg, int id); // cg.c int cgprimsize(int type); int cgalign(int type, int offset, int direction); void cgtextseg(); void cgdataseg(); void freeall_registers(void); void cgpreamble(); void cgpostamble(); void cgfuncpreamble(struct symtable *sym); void cgfuncpostamble(struct symtable *sym); int cgloadint(int value, int type); int cgloadglob(struct symtable *sym, int op); int cgloadlocal(struct symtable *sym, int op); int cgloadglobstr(int label); int cgadd(int r1, int r2); int cgsub(int r1, int r2); int cgmul(int r1, int r2); int cgdiv(int r1, int r2); int cgshlconst(int r, int val); int cgcall(struct symtable *sym, int numargs); void cgcopyarg(int r, int argposn); int cgstorglob(int r, struct symtable *sym); int cgstorlocal(int r, struct symtable *sym); void cgglobsym(struct symtable *node); void cgglobstr(int l, char *strvalue); int cgcompare_and_set(int ASTop, int r1, int r2); int cgcompare_and_jump(int ASTop, int r1, int r2, int label); void cglabel(int l); void cgjump(int l); int cgwiden(int r, int oldtype, int newtype); void cgreturn(int reg, struct symtable *sym); int cgaddress(struct symtable *sym); int cgderef(int r, int type); int cgstorderef(int r1, int r2, int type); int cgnegate(int r); int cginvert(int r); int cglognot(int r); int cgboolean(int r, int op, int label); int cgand(int r1, int r2); int cgor(int r1, int r2); int cgxor(int r1, int r2); int cgshl(int r1, int r2); int cgshr(int r1, int r2); // expr.c struct ASTnode *binexpr(int ptp); // stmt.c struct ASTnode *compound_statement(void); // misc.c void match(int t, char *what); void semi(void); void lbrace(void); void rbrace(void); void lparen(void); void rparen(void); void ident(void); void fatal(char *s); void fatals(char *s1, char *s2); void fatald(char *s, int d); void fatalc(char *s, int c); // sym.c void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node); struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int size, int posn); struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int size); struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int size); struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype, int size); struct symtable *addstruct(char *name, int type, struct symtable *ctype, int stype, int size); struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int size); struct symtable *findglob(char *s); struct symtable *findlocl(char *s); struct symtable *findsymbol(char *s); struct symtable *findmember(char *s); struct symtable *findstruct(char *s); void clear_symtable(void); void freeloclsyms(void); // decl.c struct symtable *var_declaration(int type, struct symtable *ctype, int class); struct ASTnode *function_declaration(int type); void global_declarations(void); // types.c int inttype(int type); int ptrtype(int type); int parse_type(struct symtable **ctype); int pointer_to(int type); int value_at(int type); int typesize(int type, struct symtable *ctype); struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op); ================================================ FILE: 32_Struct_Access_pt1/defs.h ================================================ #include #include #include #include // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 enum { TEXTLEN = 512 // Length of identifiers in input }; // Commands and default filenames #define AOUT "a.out" #ifdef __NASM__ #define ASCMD "nasm -f elf64 -o " #define LDCMD "cc -no-pie -fno-plt -Wall -o " #else #define ASCMD "as -o " #define LDCMD "cc -o " #endif // Token types enum { T_EOF, // Binary operators T_ASSIGN, T_LOGOR, T_LOGAND, T_OR, T_XOR, T_AMPER, T_EQ, T_NE, T_LT, T_GT, T_LE, T_GE, T_LSHIFT, T_RSHIFT, T_PLUS, T_MINUS, T_STAR, T_SLASH, // Other operators T_INC, T_DEC, T_INVERT, T_LOGNOT, // Type keywords T_VOID, T_CHAR, T_INT, T_LONG, // Other keywords T_IF, T_ELSE, T_WHILE, T_FOR, T_RETURN, T_STRUCT, // Structural tokens T_INTLIT, T_STRLIT, T_SEMI, T_IDENT, T_LBRACE, T_RBRACE, T_LPAREN, T_RPAREN, T_LBRACKET, T_RBRACKET, T_COMMA, T_DOT, T_ARROW }; // Token structure struct token { int token; // Token type, from the enum list above int intvalue; // For T_INTLIT, the integer value }; // AST node types. The first few line up // with the related tokens enum { A_ASSIGN = 1, A_LOGOR, A_LOGAND, A_OR, A_XOR, A_AND, A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE, A_LSHIFT, A_RSHIFT, A_ADD, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, A_INTLIT, A_STRLIT, A_IDENT, A_GLUE, A_IF, A_WHILE, A_FUNCTION, A_WIDEN, A_RETURN, A_FUNCCALL, A_DEREF, A_ADDR, A_SCALE, A_PREINC, A_PREDEC, A_POSTINC, A_POSTDEC, A_NEGATE, A_INVERT, A_LOGNOT, A_TOBOOL }; // Primitive types. The bottom 4 bits is an integer // value that represents the level of indirection, // e.g. 0= no pointer, 1= pointer, 2= pointer pointer etc. enum { P_NONE, P_VOID = 16, P_CHAR = 32, P_INT = 48, P_LONG = 64, P_STRUCT=80 }; // Structural types enum { S_VARIABLE, S_FUNCTION, S_ARRAY }; // Storage classes enum { C_GLOBAL = 1, // Globally visible symbol C_LOCAL, // Locally visible symbol C_PARAM, // Locally visible function parameter C_STRUCT, // A struct C_MEMBER // Member of a struct or union }; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol struct symtable *ctype; // If struct/union, ptr to that type int stype; // Structural type for the symbol int class; // Storage class for the symbol union { int size; // Number of elements in the symbol int endlabel; // For functions, the end label }; union { int nelems; // For functions, # of params int posn; // For locals, the negative offset // from the stack base pointer }; struct symtable *next; // Next symbol in one list struct symtable *member; // First member of a function, struct, }; // union or enum // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates int rvalue; // True if the node is an rvalue struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; struct symtable *sym; // For many AST nodes, the pointer to union { // the symbol in the symbol table int intvalue; // For A_INTLIT, the integer value int size; // For A_SCALE, the size to scale by }; }; enum { NOREG = -1, // Use NOREG when the AST generation // functions have no register to return NOLABEL = 0 // Use NOLABEL when we have no label to // pass to genAST() }; ================================================ FILE: 32_Struct_Access_pt1/expr.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of expressions // Copyright (c) 2019 Warren Toomey, GPL3 // expression_list: // | expression // | expression ',' expression_list // ; // Parse a list of zero or more comma-separated expressions and // return an AST composed of A_GLUE nodes with the left-hand child // being the sub-tree of previous expressions (or NULL) and the right-hand // child being the next expression. Each A_GLUE node will have size field // set to the number of expressions in the tree at this point. If no // expressions are parsed, NULL is returned static struct ASTnode *expression_list(void) { struct ASTnode *tree = NULL; struct ASTnode *child = NULL; int exprcount = 0; // Loop until the final right parentheses while (Token.token != T_RPAREN) { // Parse the next expression and increment the expression count child = binexpr(0); exprcount++; // Build an A_GLUE AST node with the previous tree as the left child // and the new expression as the right child. Store the expression count. tree = mkastnode(A_GLUE, P_NONE, tree, NULL, child, NULL, exprcount); // Must have a ',' or ')' at this point switch (Token.token) { case T_COMMA: scan(&Token); break; case T_RPAREN: break; default: fatald("Unexpected token in expression list", Token.token); } } // Return the tree of expressions return (tree); } // Parse a function call and return its AST static struct ASTnode *funccall(void) { struct ASTnode *tree; struct symtable *funcptr; // Check that the identifier has been defined as a function, // then make a leaf node for it. if ((funcptr = findsymbol(Text)) == NULL || funcptr->stype != S_FUNCTION) { fatals("Undeclared function", Text); } // Get the '(' lparen(); // Parse the argument expression list tree = expression_list(); // XXX Check type of each argument against the function's prototype // Build the function call AST node. Store the // function's return type as this node's type. // Also record the function's symbol-id tree = mkastunary(A_FUNCCALL, funcptr->type, tree, funcptr, 0); // Get the ')' rparen(); return (tree); } // Parse the index into an array and return an AST tree for it static struct ASTnode *array_access(void) { struct ASTnode *left, *right; struct symtable *aryptr; // Check that the identifier has been defined as an array // then make a leaf node for it that points at the base if ((aryptr = findsymbol(Text)) == NULL || aryptr->stype != S_ARRAY) { fatals("Undeclared array", Text); } left = mkastleaf(A_ADDR, aryptr->type, aryptr, 0); // Get the '[' scan(&Token); // Parse the following expression right = binexpr(0); // Get the ']' match(T_RBRACKET, "]"); // Ensure that this is of int type if (!inttype(right->type)) fatal("Array index is not of integer type"); // Scale the index by the size of the element's type right = modify_type(right, left->type, A_ADD); // Return an AST tree where the array's base has the offset // added to it, and dereference the element. Still an lvalue // at this point. left = mkastnode(A_ADD, aryptr->type, left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, value_at(left->type), left, NULL, 0); return (left); } // Parse the member reference of a struct (or union, soon) // and return an AST tree for it. If withpointer is true, // the access is through a pointer to the member. static struct ASTnode *member_access(int withpointer) { struct ASTnode *left, *right; struct symtable *compvar; struct symtable *typeptr; struct symtable *m; // Check that the identifier has been declared as a struct (or a union, later), // or a struct/union pointer if ((compvar = findsymbol(Text)) == NULL) fatals("Undeclared variable", Text); if (withpointer && compvar->type != pointer_to(P_STRUCT)) fatals("Undeclared variable", Text); if (!withpointer && compvar->type != P_STRUCT) fatals("Undeclared variable", Text); // If a pointer to a struct, get the pointer's value. // Otherwise, make a leaf node that points at the base // Either way, it's an rvalue if (withpointer) { left = mkastleaf(A_IDENT, pointer_to(P_STRUCT), compvar, 0); } else left = mkastleaf(A_ADDR, compvar->type, compvar, 0); left->rvalue = 1; // Get the details of the composite type typeptr = compvar->ctype; // Skip the '.' or '->' token and get the member's name scan(&Token); ident(); // Find the matching member's name in the type // Die if we can't find it for (m = typeptr->member; m != NULL; m = m->next) if (!strcmp(m->name, Text)) break; if (m == NULL) fatals("No member found in struct/union: ", Text); // Build an A_INTLIT node with the offset right = mkastleaf(A_INTLIT, P_INT, NULL, m->posn); // Add the member's offset to the base of the struct and // dereference it. Still an lvalue at this point left = mkastnode(A_ADD, pointer_to(m->type), left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, m->type, left, NULL, 0); return (left); } // Parse a postfix expression and return // an AST node representing it. The // identifier is already in Text. static struct ASTnode *postfix(void) { struct ASTnode *n; struct symtable *varptr; // Scan in the next token to see if we have a postfix expression scan(&Token); // Function call if (Token.token == T_LPAREN) return (funccall()); // An array reference if (Token.token == T_LBRACKET) return (array_access()); // Access into a struct or union if (Token.token == T_DOT) return (member_access(0)); if (Token.token == T_ARROW) return (member_access(1)); // A variable. Check that the variable exists. if ((varptr = findsymbol(Text)) == NULL || varptr->stype != S_VARIABLE) fatals("Unknown variable", Text); switch (Token.token) { // Post-increment: skip over the token case T_INC: scan(&Token); n = mkastleaf(A_POSTINC, varptr->type, varptr, 0); break; // Post-decrement: skip over the token case T_DEC: scan(&Token); n = mkastleaf(A_POSTDEC, varptr->type, varptr, 0); break; // Just a variable reference default: n = mkastleaf(A_IDENT, varptr->type, varptr, 0); } return (n); } // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(void) { struct ASTnode *n; int id; switch (Token.token) { case T_INTLIT: // For an INTLIT token, make a leaf AST node for it. // Make it a P_CHAR if it's within the P_CHAR range if ((Token.intvalue) >= 0 && (Token.intvalue < 256)) n = mkastleaf(A_INTLIT, P_CHAR, NULL, Token.intvalue); else n = mkastleaf(A_INTLIT, P_INT, NULL, Token.intvalue); break; case T_STRLIT: // For a STRLIT token, generate the assembly for it. // Then make a leaf AST node for it. id is the string's label. id = genglobstr(Text); n = mkastleaf(A_STRLIT, pointer_to(P_CHAR), NULL, id); break; case T_IDENT: return (postfix()); case T_LPAREN: // Beginning of a parenthesised expression, skip the '('. // Scan in the expression and the right parenthesis scan(&Token); n = binexpr(0); rparen(); return (n); default: fatald("Expecting a primary expression, got token", Token.token); } // Scan in the next token and return the leaf node scan(&Token); return (n); } // Convert a binary operator token into a binary AST operation. // We rely on a 1:1 mapping from token to AST operation static int binastop(int tokentype) { if (tokentype > T_EOF && tokentype <= T_SLASH) return (tokentype); fatald("Syntax error, token", tokentype); return (0); // Keep -Wall happy } // Return true if a token is right-associative, // false otherwise. static int rightassoc(int tokentype) { if (tokentype == T_ASSIGN) return (1); return (0); } // Operator precedence for each token. Must // match up with the order of tokens in defs.h static int OpPrec[] = { 0, 10, 20, 30, // T_EOF, T_ASSIGN, T_LOGOR, T_LOGAND 40, 50, 60, // T_OR, T_XOR, T_AMPER 70, 70, // T_EQ, T_NE 80, 80, 80, 80, // T_LT, T_GT, T_LE, T_GE 90, 90, // T_LSHIFT, T_RSHIFT 100, 100, // T_PLUS, T_MINUS 110, 110 // T_STAR, T_SLASH }; // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec; if (tokentype > T_SLASH) fatald("Token with no precedence in op_precedence:", tokentype); prec = OpPrec[tokentype]; if (prec == 0) fatald("Syntax error, token", tokentype); return (prec); } // prefix_expression: primary // | '*' prefix_expression // | '&' prefix_expression // | '-' prefix_expression // | '++' prefix_expression // | '--' prefix_expression // ; // Parse a prefix expression and return // a sub-tree representing it. struct ASTnode *prefix(void) { struct ASTnode *tree; switch (Token.token) { case T_AMPER: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Ensure that it's an identifier if (tree->op != A_IDENT) fatal("& operator must be followed by an identifier"); // Now change the operator to A_ADDR and the type to // a pointer to the original type tree->op = A_ADDR; tree->type = pointer_to(tree->type); break; case T_STAR: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's either another deref or an // identifier if (tree->op != A_IDENT && tree->op != A_DEREF) fatal("* operator must be followed by an identifier or *"); // Prepend an A_DEREF operation to the tree tree = mkastunary(A_DEREF, value_at(tree->type), tree, NULL, 0); break; case T_MINUS: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_NEGATE operation to the tree and // make the child an rvalue. Because chars are unsigned, // also widen this to int so that it's signed tree->rvalue = 1; tree = modify_type(tree, P_INT, 0); tree = mkastunary(A_NEGATE, tree->type, tree, NULL, 0); break; case T_INVERT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_INVERT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_INVERT, tree->type, tree, NULL, 0); break; case T_LOGNOT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_LOGNOT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_LOGNOT, tree->type, tree, NULL, 0); break; case T_INC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("++ operator must be followed by an identifier"); // Prepend an A_PREINC operation to the tree tree = mkastunary(A_PREINC, tree->type, tree, NULL, 0); break; case T_DEC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("-- operator must be followed by an identifier"); // Prepend an A_PREDEC operation to the tree tree = mkastunary(A_PREDEC, tree->type, tree, NULL, 0); break; default: tree = primary(); } return (tree); } // Return an AST tree whose root is a binary operator. // Parameter ptp is the previous token's precedence. struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; struct ASTnode *ltemp, *rtemp; int ASTop; int tokentype; // Get the tree on the left. // Fetch the next token at the same time. left = prefix(); // If we hit one of several terminating tokens, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA) { left->rvalue = 1; return (left); } // While the precedence of this token is more than that of the // previous token precedence, or it's right associative and // equal to the previous token's precedence while ((op_precedence(tokentype) > ptp) || (rightassoc(tokentype) && op_precedence(tokentype) == ptp)) { // Fetch in the next integer literal scan(&Token); // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Determine the operation to be performed on the sub-trees ASTop = binastop(tokentype); if (ASTop == A_ASSIGN) { // Assignment // Make the right tree into an rvalue right->rvalue = 1; // Ensure the right's type matches the left right = modify_type(right, left->type, 0); if (right == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree. However, switch // left and right around, so that the right expression's // code will be generated before the left expression ltemp = left; left = right; right = ltemp; } else { // We are not doing an assignment, so both trees should be rvalues // Convert both trees into rvalue if they are lvalue trees left->rvalue = 1; right->rvalue = 1; // Ensure the two types are compatible by trying // to modify each tree to match the other's type. ltemp = modify_type(left, right->type, ASTop); rtemp = modify_type(right, left->type, ASTop); if (ltemp == NULL && rtemp == NULL) fatal("Incompatible types in binary expression"); if (ltemp != NULL) left = ltemp; if (rtemp != NULL) right = rtemp; } // Join that sub-tree with ours. Convert the token // into an AST operation at the same time. left = mkastnode(binastop(tokentype), left->type, left, NULL, right, NULL, 0); // Update the details of the current token. // If we hit a terminating token, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA) { left->rvalue = 1; return (left); } } // Return the tree we have when the precedence // is the same or lower left->rvalue = 1; return (left); } ================================================ FILE: 32_Struct_Access_pt1/gen.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Generic code generator // Copyright (c) 2019 Warren Toomey, GPL3 // Generate and return a new label number int genlabel(void) { static int id = 1; return (id++); } // Generate the code for an IF statement // and an optional ELSE clause static int genIF(struct ASTnode *n) { int Lfalse, Lend; // Generate two labels: one for the // false compound statement, and one // for the end of the overall IF statement. // When there is no ELSE clause, Lfalse _is_ // the ending label! Lfalse = genlabel(); if (n->right) Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. genAST(n->left, Lfalse, n->op); genfreeregs(); // Generate the true compound statement genAST(n->mid, NOLABEL, n->op); genfreeregs(); // If there is an optional ELSE clause, // generate the jump to skip to the end if (n->right) cgjump(Lend); // Now the false label cglabel(Lfalse); // Optional ELSE clause: generate the // false compound statement and the // end label if (n->right) { genAST(n->right, NOLABEL, n->op); genfreeregs(); cglabel(Lend); } return (NOREG); } // Generate the code for a WHILE statement static int genWHILE(struct ASTnode *n) { int Lstart, Lend; // Generate the start and end labels // and output the start label Lstart = genlabel(); Lend = genlabel(); cglabel(Lstart); // Generate the condition code followed // by a jump to the end label. genAST(n->left, Lend, n->op); genfreeregs(); // Generate the compound statement for the body genAST(n->right, NOLABEL, n->op); genfreeregs(); // Finally output the jump back to the condition, // and the end label cgjump(Lstart); cglabel(Lend); return (NOREG); } // Generate the code to copy the arguments of a // function call to its parameters, then call the // function itself. Return the register that holds // the function's return value. static int gen_funccall(struct ASTnode *n) { struct ASTnode *gluetree = n->left; int reg; int numargs = 0; // If there is a list of arguments, walk this list // from the last argument (right-hand child) to the // first while (gluetree) { // Calculate the expression's value reg = genAST(gluetree->right, NOLABEL, gluetree->op); // Copy this into the n'th function parameter: size is 1, 2, 3, ... cgcopyarg(reg, gluetree->size); // Keep the first (highest) number of arguments if (numargs == 0) numargs = gluetree->size; genfreeregs(); gluetree = gluetree->left; } // Call the function, clean up the stack (based on numargs), // and return its result return (cgcall(n->sym, numargs)); } // Given an AST, an optional label, and the AST op // of the parent, generate assembly code recursively. // Return the register id with the tree's final value. int genAST(struct ASTnode *n, int label, int parentASTop) { int leftreg, rightreg; // We have some specific AST node handling at the top // so that we don't evaluate the child sub-trees immediately switch (n->op) { case A_IF: return (genIF(n)); case A_WHILE: return (genWHILE(n)); case A_FUNCCALL: return (gen_funccall(n)); case A_GLUE: // Do each child statement, and free the // registers after each child genAST(n->left, NOLABEL, n->op); genfreeregs(); genAST(n->right, NOLABEL, n->op); genfreeregs(); return (NOREG); case A_FUNCTION: // Generate the function's preamble before the code // in the child sub-tree cgfuncpreamble(n->sym); genAST(n->left, NOLABEL, n->op); cgfuncpostamble(n->sym); return (NOREG); } // General AST node handling below // Get the left and right sub-tree values if (n->left) leftreg = genAST(n->left, NOLABEL, n->op); if (n->right) rightreg = genAST(n->right, NOLABEL, n->op); switch (n->op) { case A_ADD: return (cgadd(leftreg, rightreg)); case A_SUBTRACT: return (cgsub(leftreg, rightreg)); case A_MULTIPLY: return (cgmul(leftreg, rightreg)); case A_DIVIDE: return (cgdiv(leftreg, rightreg)); case A_AND: return (cgand(leftreg, rightreg)); case A_OR: return (cgor(leftreg, rightreg)); case A_XOR: return (cgxor(leftreg, rightreg)); case A_LSHIFT: return (cgshl(leftreg, rightreg)); case A_RSHIFT: return (cgshr(leftreg, rightreg)); case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, compare registers // and set one to 1 or 0 based on the comparison. if (parentASTop == A_IF || parentASTop == A_WHILE) return (cgcompare_and_jump(n->op, leftreg, rightreg, label)); else return (cgcompare_and_set(n->op, leftreg, rightreg)); case A_INTLIT: return (cgloadint(n->intvalue, n->type)); case A_STRLIT: return (cgloadglobstr(n->intvalue)); case A_IDENT: // Load our value if we are an rvalue // or we are being dereferenced if (n->rvalue || parentASTop == A_DEREF) { if (n->sym->class == C_GLOBAL) { return (cgloadglob(n->sym, n->op)); } else { return (cgloadlocal(n->sym, n->op)); } } else return (NOREG); case A_ASSIGN: // Are we assigning to an identifier or through a pointer? switch (n->right->op) { case A_IDENT: if (n->right->sym->class == C_GLOBAL) return (cgstorglob(leftreg, n->right->sym)); else return (cgstorlocal(leftreg, n->right->sym)); case A_DEREF: return (cgstorderef(leftreg, rightreg, n->right->type)); default: fatald("Can't A_ASSIGN in genAST(), op", n->op); } case A_WIDEN: // Widen the child's type to the parent's type return (cgwiden(leftreg, n->left->type, n->type)); case A_RETURN: cgreturn(leftreg, Functionid); return (NOREG); case A_ADDR: return (cgaddress(n->sym)); case A_DEREF: // If we are an rvalue, dereference to get the value we point at, // otherwise leave it for A_ASSIGN to store through the pointer if (n->rvalue) return (cgderef(leftreg, n->left->type)); else return (leftreg); case A_SCALE: // Small optimisation: use shift if the // scale value is a known power of two switch (n->size) { case 2: return (cgshlconst(leftreg, 1)); case 4: return (cgshlconst(leftreg, 2)); case 8: return (cgshlconst(leftreg, 3)); default: // Load a register with the size and // multiply the leftreg by this size rightreg = cgloadint(n->size, P_INT); return (cgmul(leftreg, rightreg)); } case A_POSTINC: case A_POSTDEC: // Load and decrement the variable's value into a register // and post increment/decrement it if (n->sym->class == C_GLOBAL) return (cgloadglob(n->sym, n->op)); else return (cgloadlocal(n->sym, n->op)); case A_PREINC: case A_PREDEC: // Load and decrement the variable's value into a register // and pre increment/decrement it if (n->left->sym->class == C_GLOBAL) return (cgloadglob(n->left->sym, n->op)); else return (cgloadlocal(n->left->sym, n->op)); case A_NEGATE: return (cgnegate(leftreg)); case A_INVERT: return (cginvert(leftreg)); case A_LOGNOT: return (cglognot(leftreg)); case A_TOBOOL: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, set the register // to 0 or 1 based on it's zeroeness or non-zeroeness return (cgboolean(leftreg, parentASTop, label)); default: fatald("Unknown AST operator", n->op); } return (NOREG); // Keep -Wall happy } void genpreamble() { cgpreamble(); } void genpostamble() { cgpostamble(); } void genfreeregs() { freeall_registers(); } void genglobsym(struct symtable *node) { cgglobsym(node); } int genglobstr(char *strvalue) { int l = genlabel(); cgglobstr(l, strvalue); return (l); } int genprimsize(int type) { return (cgprimsize(type)); } int genalign(int type, int offset, int direction) { return (cgalign(type, offset, direction)); } ================================================ FILE: 32_Struct_Access_pt1/main.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "decl.h" #include #include // Compiler setup and top-level execution // Copyright (c) 2019 Warren Toomey, GPL3 // Given a string with a '.' and at least a 1-character suffix // after the '.', change the suffix to be the given character. // Return the new string or NULL if the original string could // not be modified char *alter_suffix(char *str, char suffix) { char *posn; char *newstr; // Clone the string if ((newstr = strdup(str)) == NULL) return (NULL); // Find the '.' if ((posn = strrchr(newstr, '.')) == NULL) return (NULL); // Ensure there is a suffix posn++; if (*posn == '\0') return (NULL); // Change the suffix and NUL-terminate the string *posn++ = suffix; *posn = '\0'; return (newstr); } // Given an input filename, compile that file // down to assembly code. Return the new file's name static char *do_compile(char *filename) { Outfilename = alter_suffix(filename, 's'); if (Outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .c on the end\n", filename); exit(1); } // Open up the input file if ((Infile = fopen(filename, "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", filename, strerror(errno)); exit(1); } // Create the output file if ((Outfile = fopen(Outfilename, "w")) == NULL) { fprintf(stderr, "Unable to create %s: %s\n", Outfilename, strerror(errno)); exit(1); } Line = 1; // Reset the scanner Putback = '\n'; clear_symtable(); // Clear the symbol table if (O_verbose) printf("compiling %s\n", filename); scan(&Token); // Get the first token from the input genpreamble(); // Output the preamble global_declarations(); // Parse the global declarations genpostamble(); // Output the postamble fclose(Outfile); // Close the output file return (Outfilename); } // Given an input filename, assemble that file // down to object code. Return the object filename char *do_assemble(char *filename) { char cmd[TEXTLEN]; int err; char *outfilename = alter_suffix(filename, 'o'); if (outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .s on the end\n", filename); exit(1); } // Build the assembly command and run it snprintf(cmd, TEXTLEN, "%s %s %s", ASCMD, outfilename, filename); if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Assembly of %s failed\n", filename); exit(1); } return (outfilename); } // Given a list of object files and an output filename, // link all of the object filenames together. void do_link(char *outfilename, char *objlist[]) { int cnt, size = TEXTLEN; char cmd[TEXTLEN], *cptr; int err; // Start with the linker command and the output file cptr = cmd; cnt = snprintf(cptr, size, "%s %s ", LDCMD, outfilename); cptr += cnt; size -= cnt; // Now append each object file while (*objlist != NULL) { cnt = snprintf(cptr, size, "%s ", *objlist); cptr += cnt; size -= cnt; objlist++; } if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Linking failed\n"); exit(1); } } // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "Usage: %s [-vcST] [-o outfile] file [file ...]\n", prog); fprintf(stderr, " -v give verbose output of the compilation stages\n"); fprintf(stderr, " -c generate object files but don't link them\n"); fprintf(stderr, " -S generate assembly files but don't link them\n"); fprintf(stderr, " -T dump the AST trees for each input file\n"); fprintf(stderr, " -o outfile, produce the outfile executable file\n"); exit(1); } // Main program: check arguments and print a usage // if we don't have an argument. Open up the input // file and call scanfile() to scan the tokens in it. enum { MAXOBJ = 100 }; int main(int argc, char *argv[]) { char *outfilename = AOUT; char *asmfile, *objfile; char *objlist[MAXOBJ]; int i, objcnt = 0; // Initialise our variables O_dumpAST = 0; O_keepasm = 0; O_assemble = 0; O_verbose = 0; O_dolink = 1; // Scan for command-line options for (i = 1; i < argc; i++) { // No leading '-', stop scanning for options if (*argv[i] != '-') break; // For each option in this argument for (int j = 1; (*argv[i] == '-') && argv[i][j]; j++) { switch (argv[i][j]) { case 'o': outfilename = argv[++i]; // Save & skip to next argument break; case 'T': O_dumpAST = 1; break; case 'c': O_assemble = 1; O_keepasm = 0; O_dolink = 0; break; case 'S': O_keepasm = 1; O_assemble = 0; O_dolink = 0; break; case 'v': O_verbose = 1; break; default: usage(argv[0]); } } } // Ensure we have at lease one input file argument if (i >= argc) usage(argv[0]); // Work on each input file in turn while (i < argc) { asmfile = do_compile(argv[i]); // Compile the source file if (O_dolink || O_assemble) { objfile = do_assemble(asmfile); // Assemble it to object forma if (objcnt == (MAXOBJ - 2)) { fprintf(stderr, "Too many object files for the compiler to handle\n"); exit(1); } objlist[objcnt++] = objfile; // Add the object file's name objlist[objcnt] = NULL; // to the list of object files } if (!O_keepasm) // Remove the assembly file if unlink(asmfile); // we don't need to keep it i++; } // Now link all the object files together if (O_dolink) { do_link(outfilename, objlist); // If we don't need to keep the object // files, then remove them if (!O_assemble) { for (i = 0; objlist[i] != NULL; i++) unlink(objlist[i]); } } return (0); } ================================================ FILE: 32_Struct_Access_pt1/misc.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" #include // Miscellaneous functions // Copyright (c) 2019 Warren Toomey, GPL3 // Ensure that the current token is t, // and fetch the next token. Otherwise // throw an error void match(int t, char *what) { if (Token.token == t) { scan(&Token); } else { fatals("Expected", what); } } // Match a semicolon and fetch the next token void semi(void) { match(T_SEMI, ";"); } // Match a left brace and fetch the next token void lbrace(void) { match(T_LBRACE, "{"); } // Match a right brace and fetch the next token void rbrace(void) { match(T_RBRACE, "}"); } // Match a left parenthesis and fetch the next token void lparen(void) { match(T_LPAREN, "("); } // Match a right parenthesis and fetch the next token void rparen(void) { match(T_RPAREN, ")"); } // Match an identifier and fetch the next token void ident(void) { match(T_IDENT, "identifier"); } // Print out fatal messages void fatal(char *s) { fprintf(stderr, "%s on line %d\n", s, Line); fclose(Outfile); unlink(Outfilename); exit(1); } void fatals(char *s1, char *s2) { fprintf(stderr, "%s:%s on line %d\n", s1, s2, Line); fclose(Outfile); unlink(Outfilename); exit(1); } void fatald(char *s, int d) { fprintf(stderr, "%s:%d on line %d\n", s, d, Line); fclose(Outfile); unlink(Outfilename); exit(1); } void fatalc(char *s, int c) { fprintf(stderr, "%s:%c on line %d\n", s, c, Line); fclose(Outfile); unlink(Outfilename); exit(1); } ================================================ FILE: 32_Struct_Access_pt1/scan.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { char *p; p = strchr(s, c); return (p ? p - s : -1); } // Get the next character from the input file. static int next(void) { int c; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return (c); } c = fgetc(Infile); // Read from input file if ('\n' == c) Line++; // Increment line count return (c); } // Put back an unwanted character static void putback(int c) { Putback = c; } // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } // Return the next character from a character // or string literal static int scanch(void) { int c; // Get the next input character and interpret // metacharacters that start with a backslash c = next(); if (c == '\\') { switch (c = next()) { case 'a': return '\a'; case 'b': return '\b'; case 'f': return '\f'; case 'n': return '\n'; case 'r': return '\r'; case 't': return '\t'; case 'v': return '\v'; case '\\': return '\\'; case '"': return '"'; case '\'': return '\''; default: fatalc("unknown escape sequence", c); } } return (c); // Just an ordinary old character! } // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0; // Convert each character into an int value while ((k = chrpos("0123456789", c)) >= 0) { val = val * 10 + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return (val); } // Scan in a string literal from the input file, // and store it in buf[]. Return the length of // the string. static int scanstr(char *buf) { int i, c; // Loop while we have enough buffer space for (i = 0; i < TEXTLEN - 1; i++) { // Get the next char and append to buf // Return when we hit the ending double quote if ((c = scanch()) == '"') { buf[i] = 0; return (i); } buf[i] = c; } // Ran out of buf[] space fatal("String literal too long"); return (0); } // Scan an identifier from the input file and // store it in buf[]. Return the identifier's length static int scanident(int c, char *buf, int lim) { int i = 0; // Allow digits, alpha and underscores while (isalpha(c) || isdigit(c) || '_' == c) { // Error if we hit the identifier length limit, // else append to buf[] and get next character if (lim - 1 == i) { fatal("Identifier too long"); } else if (i < lim - 1) { buf[i++] = c; } c = next(); } // We hit a non-valid character, put it back. // NUL-terminate the buf[] and return the length putback(c); buf[i] = '\0'; return (i); } // Given a word from the input, return the matching // keyword token number or 0 if it's not a keyword. // Switch on the first letter so that we don't have // to waste time strcmp()ing against all the keywords. static int keyword(char *s) { switch (*s) { case 'c': if (!strcmp(s, "char")) return (T_CHAR); break; case 'e': if (!strcmp(s, "else")) return (T_ELSE); break; case 'f': if (!strcmp(s, "for")) return (T_FOR); break; case 'i': if (!strcmp(s, "if")) return (T_IF); if (!strcmp(s, "int")) return (T_INT); break; case 'l': if (!strcmp(s, "long")) return (T_LONG); break; case 'r': if (!strcmp(s, "return")) return (T_RETURN); break; case 's': if (!strcmp(s, "struct")) return (T_STRUCT); break; case 'w': if (!strcmp(s, "while")) return (T_WHILE); break; case 'v': if (!strcmp(s, "void")) return (T_VOID); break; } return (0); } // A pointer to a rejected token static struct token *Rejtoken = NULL; // Reject the token that we just scanned void reject_token(struct token *t) { if (Rejtoken != NULL) fatal("Can't reject token twice"); Rejtoken = t; } // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c, tokentype; // If we have any rejected token, return it if (Rejtoken != NULL) { t = Rejtoken; Rejtoken = NULL; return (1); } // Skip whitespace c = skip(); // Determine the token based on // the input character switch (c) { case EOF: t->token = T_EOF; return (0); case '+': if ((c = next()) == '+') { t->token = T_INC; } else { putback(c); t->token = T_PLUS; } break; case '-': if ((c = next()) == '-') { t->token = T_DEC; } else if (c == '>') { t->token = T_ARROW; } else { putback(c); t->token = T_MINUS; } break; case '*': t->token = T_STAR; break; case '/': t->token = T_SLASH; break; case ';': t->token = T_SEMI; break; case '{': t->token = T_LBRACE; break; case '}': t->token = T_RBRACE; break; case '(': t->token = T_LPAREN; break; case ')': t->token = T_RPAREN; break; case '[': t->token = T_LBRACKET; break; case ']': t->token = T_RBRACKET; break; case '~': t->token = T_INVERT; break; case '^': t->token = T_XOR; break; case ',': t->token = T_COMMA; break; case '.': t->token = T_DOT; break; case '=': if ((c = next()) == '=') { t->token = T_EQ; } else { putback(c); t->token = T_ASSIGN; } break; case '!': if ((c = next()) == '=') { t->token = T_NE; } else { putback(c); t->token = T_LOGNOT; } break; case '<': if ((c = next()) == '=') { t->token = T_LE; } else if (c == '<') { t->token = T_LSHIFT; } else { putback(c); t->token = T_LT; } break; case '>': if ((c = next()) == '=') { t->token = T_GE; } else if (c == '>') { t->token = T_RSHIFT; } else { putback(c); t->token = T_GT; } break; case '&': if ((c = next()) == '&') { t->token = T_LOGAND; } else { putback(c); t->token = T_AMPER; } break; case '|': if ((c = next()) == '|') { t->token = T_LOGOR; } else { putback(c); t->token = T_OR; } break; case '\'': // If it's a quote, scan in the // literal character value and // the trailing quote t->intvalue = scanch(); t->token = T_INTLIT; if (next() != '\'') fatal("Expected '\\'' at end of char literal"); break; case '"': // Scan in a literal string scanstr(Text); t->token = T_STRLIT; break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } else if (isalpha(c) || '_' == c) { // Read in a keyword or identifier scanident(c, Text, TEXTLEN); // If it's a recognised keyword, return that token if ((tokentype = keyword(Text)) != 0) { t->token = tokentype; break; } // Not a recognised keyword, so it must be an identifier t->token = T_IDENT; break; } // The character isn't part of any recognised token, error fatalc("Unrecognised character", c); } // We found a token return (1); } ================================================ FILE: 32_Struct_Access_pt1/stmt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of statements // Copyright (c) 2019 Warren Toomey, GPL3 // Prototypes static struct ASTnode *single_statement(void); // compound_statement: // empty, i.e. no statement // | statement // | statement statements // ; // // statement: declaration // | expression_statement // | function_call // | if_statement // | while_statement // | for_statement // | return_statement // ; // if_statement: if_head // | if_head 'else' compound_statement // ; // // if_head: 'if' '(' true_false_expression ')' compound_statement ; // // Parse an IF statement including any // optional ELSE clause and return its AST static struct ASTnode *if_statement(void) { struct ASTnode *condAST, *trueAST, *falseAST = NULL; // Ensure we have 'if' '(' match(T_IF, "if"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); rparen(); // Get the AST for the compound statement trueAST = compound_statement(); // If we have an 'else', skip it // and get the AST for the compound statement if (Token.token == T_ELSE) { scan(&Token); falseAST = compound_statement(); } // Build and return the AST for this statement return (mkastnode(A_IF, P_NONE, condAST, trueAST, falseAST, NULL, 0)); } // while_statement: 'while' '(' true_false_expression ')' compound_statement ; // // Parse a WHILE statement and return its AST static struct ASTnode *while_statement(void) { struct ASTnode *condAST, *bodyAST; // Ensure we have 'while' '(' match(T_WHILE, "while"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); rparen(); // Get the AST for the compound statement bodyAST = compound_statement(); // Build and return the AST for this statement return (mkastnode(A_WHILE, P_NONE, condAST, NULL, bodyAST, NULL, 0)); } // for_statement: 'for' '(' preop_statement ';' // true_false_expression ';' // postop_statement ')' compound_statement ; // // preop_statement: statement (for now) // postop_statement: statement (for now) // // Parse a FOR statement and return its AST static struct ASTnode *for_statement(void) { struct ASTnode *condAST, *bodyAST; struct ASTnode *preopAST, *postopAST; struct ASTnode *tree; // Ensure we have 'for' '(' match(T_FOR, "for"); lparen(); // Get the pre_op statement and the ';' preopAST = single_statement(); semi(); // Get the condition and the ';'. // Force a non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); semi(); // Get the post_op statement and the ')' postopAST = single_statement(); rparen(); // Get the compound statement which is the body bodyAST = compound_statement(); // For now, all four sub-trees have to be non-NULL. // Later on, we'll change the semantics for when some are missing // Glue the compound statement and the postop tree tree = mkastnode(A_GLUE, P_NONE, bodyAST, NULL, postopAST, NULL, 0); // Make a WHILE loop with the condition and this new body tree = mkastnode(A_WHILE, P_NONE, condAST, NULL, tree, NULL, 0); // And glue the preop tree to the A_WHILE tree return (mkastnode(A_GLUE, P_NONE, preopAST, NULL, tree, NULL, 0)); } // return_statement: 'return' '(' expression ')' ; // // Parse a return statement and return its AST static struct ASTnode *return_statement(void) { struct ASTnode *tree; // Can't return a value if function returns P_VOID if (Functionid->type == P_VOID) fatal("Can't return from a void function"); // Ensure we have 'return' '(' match(T_RETURN, "return"); lparen(); // Parse the following expression tree = binexpr(0); // Ensure this is compatible with the function's type tree = modify_type(tree, Functionid->type, 0); if (tree == NULL) fatal("Incompatible type to return"); // Add on the A_RETURN node tree = mkastunary(A_RETURN, P_NONE, tree, NULL, 0); // Get the ')' rparen(); return (tree); } // Parse a single statement and return its AST static struct ASTnode *single_statement(void) { int type; struct symtable *ctype; switch (Token.token) { case T_CHAR: case T_INT: case T_LONG: // The beginning of a variable declaration. // Parse the type and get the identifier. // Then parse the rest of the declaration // and skip over the semicolon type = parse_type(&ctype); ident(); var_declaration(type, ctype, C_LOCAL); semi(); return (NULL); // No AST generated here case T_IF: return (if_statement()); case T_WHILE: return (while_statement()); case T_FOR: return (for_statement()); case T_RETURN: return (return_statement()); default: // For now, see if this is an expression. // This catches assignment statements. return (binexpr(0)); } return (NULL); // Keep -Wall happy } // Parse a compound statement // and return its AST struct ASTnode *compound_statement(void) { struct ASTnode *left = NULL; struct ASTnode *tree; // Require a left curly bracket lbrace(); while (1) { // Parse a single statement tree = single_statement(); // Some statements must be followed by a semicolon if (tree != NULL && (tree->op == A_ASSIGN || tree->op == A_RETURN || tree->op == A_FUNCCALL)) semi(); // For each new tree, either save it in left // if left is empty, or glue the left and the // new tree together if (tree != NULL) { if (left == NULL) left = tree; else left = mkastnode(A_GLUE, P_NONE, left, NULL, tree, NULL, 0); } // When we hit a right curly bracket, // skip past it and return the AST if (Token.token == T_RBRACE) { rbrace(); return (left); } } } ================================================ FILE: 32_Struct_Access_pt1/sym.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Symbol table functions // Copyright (c) 2019 Warren Toomey, GPL3 // Append a node to the singly-linked list pointed to by head or tail void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node) { // Check for valid pointers if (head == NULL || tail == NULL || node == NULL) fatal("Either head, tail or node is NULL in appendsym"); // Append to the list if (*tail) { (*tail)->next = node; *tail = node; } else *head = *tail = node; node->next = NULL; } // Create a symbol node to be added to a symbol table list. // Set up the node's: // + type: char, int etc. // + ctype: composite type pointer for struct/union // + structural type: var, function, array etc. // + size: number of elements, or endlabel: end label for a function // + posn: Position information for local symbols // Return a pointer to the new node struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int size, int posn) { // Get a new node struct symtable *node = (struct symtable *) malloc(sizeof(struct symtable)); if (node == NULL) fatal("Unable to malloc a symbol table node in newsym"); // Fill in the values node->name = strdup(name); node->type = type; node->ctype = ctype; node->stype = stype; node->class = class; node->size = size; node->posn = posn; node->next = NULL; node->member = NULL; // Generate any global space if (class == C_GLOBAL) genglobsym(node); return (node); } // Add a symbol to the global symbol list struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int size) { struct symtable *sym = newsym(name, type, ctype, stype, C_GLOBAL, size, 0); appendsym(&Globhead, &Globtail, sym); return (sym); } // Add a symbol to the local symbol list struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int size) { struct symtable *sym = newsym(name, type, ctype, stype, C_LOCAL, size, 0); appendsym(&Loclhead, &Locltail, sym); return (sym); } // Add a symbol to the parameter list struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype, int size) { struct symtable *sym = newsym(name, type, ctype, stype, C_PARAM, size, 0); appendsym(&Parmhead, &Parmtail, sym); return (sym); } // Add a symbol to the temporary member list struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int size) { struct symtable *sym = newsym(name, type, ctype, stype, C_MEMBER, size, 0); appendsym(&Membhead, &Membtail, sym); return (sym); } // Add a struct to the struct list struct symtable *addstruct(char *name, int type, struct symtable *ctype, int stype, int size) { struct symtable *sym = newsym(name, type, ctype, stype, C_STRUCT, size, 0); appendsym(&Structhead, &Structtail, sym); return (sym); } // Search for a symbol in a specific list. // Return a pointer to the found node or NULL if not found. static struct symtable *findsyminlist(char *s, struct symtable *list) { for (; list != NULL; list = list->next) if ((list->name != NULL) && !strcmp(s, list->name)) return (list); return (NULL); } // Determine if the symbol s is in the global symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findglob(char *s) { return (findsyminlist(s, Globhead)); } // Determine if the symbol s is in the local symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findlocl(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member); if (node) return (node); } return (findsyminlist(s, Loclhead)); } // Determine if the symbol s is in the symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findsymbol(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member); if (node) return (node); } // Otherwise, try the local and global symbol lists node = findsyminlist(s, Loclhead); if (node) return (node); return (findsyminlist(s, Globhead)); } // Find a member in the member list // Return a pointer to the found node or NULL if not found. struct symtable *findmember(char *s) { return (findsyminlist(s, Membhead)); } // Find a struct in the struct list // Return a pointer to the found node or NULL if not found. struct symtable *findstruct(char *s) { return (findsyminlist(s, Structhead)); } // Reset the contents of the symbol table void clear_symtable(void) { Globhead = Globtail = NULL; Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Membhead = Membtail = NULL; Structhead = Structtail = NULL; } // Clear all the entries in the local symbol table void freeloclsyms(void) { Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Functionid = NULL; } ================================================ FILE: 32_Struct_Access_pt1/tests/err.input31.c ================================================ Expecting a primary expression, got token:15 on line 5 ================================================ FILE: 32_Struct_Access_pt1/tests/err.input32.c ================================================ Unknown variable:cow on line 4 ================================================ FILE: 32_Struct_Access_pt1/tests/err.input33.c ================================================ Incompatible type to return on line 4 ================================================ FILE: 32_Struct_Access_pt1/tests/err.input34.c ================================================ For now, declaration of non-global arrays is not implemented on line 4 ================================================ FILE: 32_Struct_Access_pt1/tests/err.input35.c ================================================ Duplicate local variable declaration:a on line 4 ================================================ FILE: 32_Struct_Access_pt1/tests/err.input36.c ================================================ Type doesn't match prototype for parameter:2 on line 4 ================================================ FILE: 32_Struct_Access_pt1/tests/err.input37.c ================================================ Unexpected token in parameter list:15 on line 3 ================================================ FILE: 32_Struct_Access_pt1/tests/err.input38.c ================================================ Type doesn't match prototype for parameter:2 on line 4 ================================================ FILE: 32_Struct_Access_pt1/tests/err.input39.c ================================================ No statements in function with non-void type on line 4 ================================================ FILE: 32_Struct_Access_pt1/tests/err.input40.c ================================================ No return for function with non-void type on line 4 ================================================ FILE: 32_Struct_Access_pt1/tests/err.input41.c ================================================ Can't return from a void function on line 3 ================================================ FILE: 32_Struct_Access_pt1/tests/err.input42.c ================================================ Undeclared function:fred on line 3 ================================================ FILE: 32_Struct_Access_pt1/tests/err.input43.c ================================================ Undeclared array:b on line 3 ================================================ FILE: 32_Struct_Access_pt1/tests/err.input44.c ================================================ Unknown variable:z on line 3 ================================================ FILE: 32_Struct_Access_pt1/tests/err.input45.c ================================================ & operator must be followed by an identifier on line 3 ================================================ FILE: 32_Struct_Access_pt1/tests/err.input46.c ================================================ * operator must be followed by an identifier or * on line 3 ================================================ FILE: 32_Struct_Access_pt1/tests/err.input47.c ================================================ ++ operator must be followed by an identifier on line 3 ================================================ FILE: 32_Struct_Access_pt1/tests/err.input48.c ================================================ -- operator must be followed by an identifier on line 3 ================================================ FILE: 32_Struct_Access_pt1/tests/err.input49.c ================================================ Incompatible expression in assignment on line 6 ================================================ FILE: 32_Struct_Access_pt1/tests/err.input50.c ================================================ Incompatible types in binary expression on line 6 ================================================ FILE: 32_Struct_Access_pt1/tests/err.input51.c ================================================ Expected '\'' at end of char literal on line 4 ================================================ FILE: 32_Struct_Access_pt1/tests/err.input52.c ================================================ Unrecognised character:$ on line 5 ================================================ FILE: 32_Struct_Access_pt1/tests/err.input56.c ================================================ unknown struct type:var1 on line 2 ================================================ FILE: 32_Struct_Access_pt1/tests/err.input57.c ================================================ previously defined struct:fred on line 2 ================================================ FILE: 32_Struct_Access_pt1/tests/err.input59.c ================================================ Undeclared variable:y on line 3 ================================================ FILE: 32_Struct_Access_pt1/tests/err.input60.c ================================================ Undeclared variable:x on line 3 ================================================ FILE: 32_Struct_Access_pt1/tests/err.input61.c ================================================ Undeclared variable:x on line 3 ================================================ FILE: 32_Struct_Access_pt1/tests/input01.c ================================================ int printf(char *fmt); void main() { printf("%d\n", 12 * 3); printf("%d\n", 18 - 2 * 4); printf("%d\n", 1 + 2 + 9 - 5/2 + 3*5); } ================================================ FILE: 32_Struct_Access_pt1/tests/input02.c ================================================ int printf(char *fmt); void main() { int fred; int jim; fred= 5; jim= 12; printf("%d\n", fred + jim); } ================================================ FILE: 32_Struct_Access_pt1/tests/input03.c ================================================ int printf(char *fmt); void main() { int x; x= 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); } ================================================ FILE: 32_Struct_Access_pt1/tests/input04.c ================================================ int printf(char *fmt); void main() { int x; x= 7 < 9; printf("%d\n", x); x= 7 <= 9; printf("%d\n", x); x= 7 != 9; printf("%d\n", x); x= 7 == 7; printf("%d\n", x); x= 7 >= 7; printf("%d\n", x); x= 7 <= 7; printf("%d\n", x); x= 9 > 7; printf("%d\n", x); x= 9 >= 7; printf("%d\n", x); x= 9 != 7; printf("%d\n", x); } ================================================ FILE: 32_Struct_Access_pt1/tests/input05.c ================================================ int printf(char *fmt); void main() { int i; int j; i=6; j=12; if (i < j) { printf("%d\n", i); } else { printf("%d\n", j); } } ================================================ FILE: 32_Struct_Access_pt1/tests/input06.c ================================================ int printf(char *fmt); void main() { int i; i=1; while (i <= 10) { printf("%d\n", i); i= i + 1; } } ================================================ FILE: 32_Struct_Access_pt1/tests/input07.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 32_Struct_Access_pt1/tests/input08.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 32_Struct_Access_pt1/tests/input09.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } void fred() { int a; int b; a= 12; b= 3 * a; if (a >= b) { printf("%d\n", 2 * b - a); } } ================================================ FILE: 32_Struct_Access_pt1/tests/input10.c ================================================ int printf(char *fmt); void main() { int i; char j; j= 20; printf("%d\n", j); i= 10; printf("%d\n", i); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 2; j= j + 1) { printf("%d\n", j); } } ================================================ FILE: 32_Struct_Access_pt1/tests/input11.c ================================================ int printf(char *fmt); int main() { int i; char j; long k; i= 10; printf("%d\n", i); j= 20; printf("%d\n", j); k= 30; printf("%d\n", k); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 4; j= j + 1) { printf("%d\n", j); } for (k= 1; k <= 5; k= k + 1) { printf("%d\n", k); } return(i); printf("%d\n", 12345); return(3); } ================================================ FILE: 32_Struct_Access_pt1/tests/input12.c ================================================ int printf(char *fmt); int fred() { return(5); } void main() { int x; x= fred(2); printf("%d\n", x); } ================================================ FILE: 32_Struct_Access_pt1/tests/input13.c ================================================ int printf(char *fmt); int fred() { return(56); } void main() { int dummy; int result; dummy= printf("%d\n", 23); result= fred(10); dummy= printf("%d\n", result); } ================================================ FILE: 32_Struct_Access_pt1/tests/input14.c ================================================ int printf(char *fmt); int fred() { return(20); } int main() { int result; printf("%d\n", 10); result= fred(15); printf("%d\n", result); printf("%d\n", fred(15)+10); return(0); } ================================================ FILE: 32_Struct_Access_pt1/tests/input15.c ================================================ int printf(char *fmt); int main() { char a; char *b; char c; int d; int *e; int f; a= 18; printf("%d\n", a); b= &a; c= *b; printf("%d\n", c); d= 12; printf("%d\n", d); e= &d; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 32_Struct_Access_pt1/tests/input16.c ================================================ int printf(char *fmt); int c; int d; int *e; int f; int main() { c= 12; d=18; printf("%d\n", c); e= &c + 1; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 32_Struct_Access_pt1/tests/input17.c ================================================ int printf(char *fmt); int main() { char a; char *b; int d; int *e; b= &a; *b= 19; printf("%d\n", a); e= &d; *e= 12; printf("%d\n", d); return(0); } ================================================ FILE: 32_Struct_Access_pt1/tests/input18.c ================================================ int printf(char *fmt); int main() { int a; int b; a= b= 34; printf("%d\n", a); printf("%d\n", b); return(0); } ================================================ FILE: 32_Struct_Access_pt1/tests/input18a.c ================================================ int printf(char *fmt); int a; int *b; char c; char *d; int main() { b= &a; *b= 15; printf("%d\n", a); d= &c; *d= 16; printf("%d\n", c); return(0); } ================================================ FILE: 32_Struct_Access_pt1/tests/input19.c ================================================ int printf(char *fmt); int a; int b; int c; int d; int e; int main() { a= 2; b= 4; c= 3; d= 2; e= (a+b) * (c+d); printf("%d\n", e); return(0); } ================================================ FILE: 32_Struct_Access_pt1/tests/input20.c ================================================ int printf(char *fmt); int a; int b[25]; int main() { b[3]= 12; a= b[3]; printf("%d\n", a); return(0); } ================================================ FILE: 32_Struct_Access_pt1/tests/input21.c ================================================ int printf(char *fmt); char c; char *str; int main() { c= '\n'; printf("%d\n", c); for (str= "Hello world\n"; *str != 0; str= str + 1) { printf("%c", *str); } return(0); } ================================================ FILE: 32_Struct_Access_pt1/tests/input22.c ================================================ int printf(char *fmt); char a; char b; char c; int d; int e; int f; long g; long h; long i; int main() { b= 5; c= 7; a= b + c++; printf("%d\n", a); e= 5; f= 7; d= e + f++; printf("%d\n", d); h= 5; i= 7; g= h + i++; printf("%d\n", g); a= b-- + c; printf("%d\n", a); d= e-- + f; printf("%d\n", d); g= h-- + i; printf("%d\n", g); a= ++b + c; printf("%d\n", a); d= ++e + f; printf("%d\n", d); g= ++h + i; printf("%d\n", g); a= b * --c; printf("%d\n", a); d= e * --f; printf("%d\n", d); g= h * --i; printf("%d\n", g); return(0); } ================================================ FILE: 32_Struct_Access_pt1/tests/input23.c ================================================ int printf(char *fmt); char *str; int x; int main() { x= -23; printf("%d\n", x); printf("%d\n", -10 * -10); x= 1; x= ~x; printf("%d\n", x); x= 2 > 5; printf("%d\n", x); x= !x; printf("%d\n", x); x= !x; printf("%d\n", x); x= 13; if (x) { printf("%d\n", 13); } x= 0; if (!x) { printf("%d\n", 14); } for (str= "Hello world\n"; *str; str++) { printf("%c", *str); } return(0); } ================================================ FILE: 32_Struct_Access_pt1/tests/input24.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { a= 42; b= 19; printf("%d\n", a & b); printf("%d\n", a | b); printf("%d\n", a ^ b); printf("%d\n", 1 << 3); printf("%d\n", 63 >> 3); return(0); } ================================================ FILE: 32_Struct_Access_pt1/tests/input25.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { char z; int y; int x; x= 10; y= 20; z= 30; printf("%d\n", x); printf("%d\n", y); printf("%d\n", z); a= 5; b= 15; c= 25; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); return(0); } ================================================ FILE: 32_Struct_Access_pt1/tests/input26.c ================================================ int printf(char *fmt); int main(int a, char b, long c, int d, int e, int f, int g, int h) { int i; int j; int k; a= 13; printf("%d\n", a); b= 23; printf("%d\n", b); c= 34; printf("%d\n", c); d= 44; printf("%d\n", d); e= 54; printf("%d\n", e); f= 64; printf("%d\n", f); g= 74; printf("%d\n", g); h= 84; printf("%d\n", h); i= 94; printf("%d\n", i); j= 95; printf("%d\n", j); k= 96; printf("%d\n", k); return(0); } ================================================ FILE: 32_Struct_Access_pt1/tests/input27.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int param5(int a, int b, int c, int d, int e) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param2(int a, int b) { int c; int d; int e; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param0() { int a; int b; int c; int d; int e; a= 1; b= 2; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int main() { param8(1,2,3,4,5,6,7,8); param5(1,2,3,4,5); param2(1,2); param0(); return(0); } ================================================ FILE: 32_Struct_Access_pt1/tests/input28.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 32_Struct_Access_pt1/tests/input29.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h); int fred(int a, int b, int c); int main(); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 32_Struct_Access_pt1/tests/input30.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input30.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 32_Struct_Access_pt1/tests/input31.c ================================================ int printf(char *fmt); int main() { int x; x= 2 + + 3 - * / ; } ================================================ FILE: 32_Struct_Access_pt1/tests/input32.c ================================================ int printf(char *fmt); int main() { pizza cow llama sausage; } ================================================ FILE: 32_Struct_Access_pt1/tests/input33.c ================================================ int printf(char *fmt); int main() { char *z; return(z); } ================================================ FILE: 32_Struct_Access_pt1/tests/input34.c ================================================ int printf(char *fmt); int main() { int a[12]; return(0); } ================================================ FILE: 32_Struct_Access_pt1/tests/input35.c ================================================ int printf(char *fmt); int fred(int a, int b) { int a; return(a); } ================================================ FILE: 32_Struct_Access_pt1/tests/input36.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c); ================================================ FILE: 32_Struct_Access_pt1/tests/input37.c ================================================ int printf(char *fmt); int fred(int a, char b +, int z); ================================================ FILE: 32_Struct_Access_pt1/tests/input38.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c, int g); ================================================ FILE: 32_Struct_Access_pt1/tests/input39.c ================================================ int printf(char *fmt); int main() { int a; } ================================================ FILE: 32_Struct_Access_pt1/tests/input40.c ================================================ int printf(char *fmt); int main() { int a; a= 5; } ================================================ FILE: 32_Struct_Access_pt1/tests/input41.c ================================================ int printf(char *fmt); void fred() { return(5); } ================================================ FILE: 32_Struct_Access_pt1/tests/input42.c ================================================ int printf(char *fmt); int main() { fred(5); } ================================================ FILE: 32_Struct_Access_pt1/tests/input43.c ================================================ int printf(char *fmt); int main() { int a; a= b[4]; } ================================================ FILE: 32_Struct_Access_pt1/tests/input44.c ================================================ int printf(char *fmt); int main() { int a; a= z; } ================================================ FILE: 32_Struct_Access_pt1/tests/input45.c ================================================ int printf(char *fmt); int main() { int a; a= &5; } ================================================ FILE: 32_Struct_Access_pt1/tests/input46.c ================================================ int printf(char *fmt); int main() { int a; a= *5; } ================================================ FILE: 32_Struct_Access_pt1/tests/input47.c ================================================ int printf(char *fmt); int main() { int a; a= ++5; } ================================================ FILE: 32_Struct_Access_pt1/tests/input48.c ================================================ int printf(char *fmt); int main() { int a; a= --5; } ================================================ FILE: 32_Struct_Access_pt1/tests/input49.c ================================================ int printf(char *fmt); int main() { int x; char y; y= x; } ================================================ FILE: 32_Struct_Access_pt1/tests/input50.c ================================================ int printf(char *fmt); int main() { char *a; char *b; a= a + b; } ================================================ FILE: 32_Struct_Access_pt1/tests/input51.c ================================================ int printf(char *fmt); int main() { char a; a= 'fred'; } ================================================ FILE: 32_Struct_Access_pt1/tests/input52.c ================================================ int printf(char *fmt); int main() { int a; a= $5.00; } ================================================ FILE: 32_Struct_Access_pt1/tests/input53.c ================================================ int printf(char *fmt); int main() { printf("Hello world, %d\n", 23); return(0); } ================================================ FILE: 32_Struct_Access_pt1/tests/input54.c ================================================ int printf(char *fmt); int main() { int i; for (i=0; i < 20; i++) { printf("Hello world, %d\n", i); } return(0); } ================================================ FILE: 32_Struct_Access_pt1/tests/input55.c ================================================ int printf(char *fmt); int main(int argc, char **argv) { int i; char *argument; printf("Hello world\n"); for (i=0; i < argc; i++) { argument= *argv; argv= argv + 1; printf("Argument %d is %s\n", i, argument); } return(0); } ================================================ FILE: 32_Struct_Access_pt1/tests/input56.c ================================================ struct foo { int x; }; struct mary var1; ================================================ FILE: 32_Struct_Access_pt1/tests/input57.c ================================================ struct fred { int x; } ; struct fred { char y; } ; ================================================ FILE: 32_Struct_Access_pt1/tests/input58.c ================================================ int printf(char *fmt); struct fred { int x; char y; long z; }; struct fred var2; struct fred *varptr; int main() { long result; var2.x= 12; printf("%d\n", var2.x); var2.y= 'c'; printf("%d\n", var2.y); var2.z= 4005; printf("%d\n", var2.z); result= var2.x + var2.y + var2.z; printf("%d\n", result); varptr= &var2; result= varptr->x + varptr->y + varptr->z; printf("%d\n", result); return(0); } ================================================ FILE: 32_Struct_Access_pt1/tests/input59.c ================================================ int main() { int x; x= y.foo; } ================================================ FILE: 32_Struct_Access_pt1/tests/input60.c ================================================ int main() { int x; x= x.foo; } ================================================ FILE: 32_Struct_Access_pt1/tests/input61.c ================================================ int main() { int x; x= x->foo; } ================================================ FILE: 32_Struct_Access_pt1/tests/mktests ================================================ #!/bin/sh # Make the output files for each test # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make) fi for i in input*c do if [ ! -f "out.$i" -a ! -f "err.$i" ] then ../cwj -o out $i 2> "err.$i" # If the err file is empty if [ ! -s "err.$i" ] then rm -f "err.$i" cc -o out $i ../lib/printint.c ./out > "out.$i" fi fi rm -f out out.s done ================================================ FILE: 32_Struct_Access_pt1/tests/out.input01.c ================================================ 36 10 25 ================================================ FILE: 32_Struct_Access_pt1/tests/out.input02.c ================================================ 17 ================================================ FILE: 32_Struct_Access_pt1/tests/out.input03.c ================================================ 1 2 3 4 5 ================================================ FILE: 32_Struct_Access_pt1/tests/out.input04.c ================================================ 1 1 1 1 1 1 1 1 1 ================================================ FILE: 32_Struct_Access_pt1/tests/out.input05.c ================================================ 6 ================================================ FILE: 32_Struct_Access_pt1/tests/out.input06.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 32_Struct_Access_pt1/tests/out.input07.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 32_Struct_Access_pt1/tests/out.input08.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 32_Struct_Access_pt1/tests/out.input09.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 32_Struct_Access_pt1/tests/out.input10.c ================================================ 20 10 1 2 3 4 5 253 254 255 0 1 ================================================ FILE: 32_Struct_Access_pt1/tests/out.input11.c ================================================ 10 20 30 1 2 3 4 5 253 254 255 0 1 2 3 1 2 3 4 5 ================================================ FILE: 32_Struct_Access_pt1/tests/out.input12.c ================================================ 5 ================================================ FILE: 32_Struct_Access_pt1/tests/out.input13.c ================================================ 23 56 ================================================ FILE: 32_Struct_Access_pt1/tests/out.input14.c ================================================ 10 20 30 ================================================ FILE: 32_Struct_Access_pt1/tests/out.input15.c ================================================ 18 18 12 12 ================================================ FILE: 32_Struct_Access_pt1/tests/out.input16.c ================================================ 12 18 ================================================ FILE: 32_Struct_Access_pt1/tests/out.input17.c ================================================ 19 12 ================================================ FILE: 32_Struct_Access_pt1/tests/out.input18.c ================================================ 34 34 ================================================ FILE: 32_Struct_Access_pt1/tests/out.input18a.c ================================================ 15 16 ================================================ FILE: 32_Struct_Access_pt1/tests/out.input19.c ================================================ 30 ================================================ FILE: 32_Struct_Access_pt1/tests/out.input20.c ================================================ 12 ================================================ FILE: 32_Struct_Access_pt1/tests/out.input21.c ================================================ 10 Hello world ================================================ FILE: 32_Struct_Access_pt1/tests/out.input22.c ================================================ 12 12 12 13 13 13 13 13 13 35 35 35 ================================================ FILE: 32_Struct_Access_pt1/tests/out.input23.c ================================================ -23 100 -2 0 1 0 13 14 Hello world ================================================ FILE: 32_Struct_Access_pt1/tests/out.input24.c ================================================ 2 59 57 8 7 ================================================ FILE: 32_Struct_Access_pt1/tests/out.input25.c ================================================ 10 20 30 5 15 25 ================================================ FILE: 32_Struct_Access_pt1/tests/out.input26.c ================================================ 13 23 34 44 54 64 74 84 94 95 96 ================================================ FILE: 32_Struct_Access_pt1/tests/out.input27.c ================================================ 1 2 3 4 5 6 7 8 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 ================================================ FILE: 32_Struct_Access_pt1/tests/out.input28.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 32_Struct_Access_pt1/tests/out.input29.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 32_Struct_Access_pt1/tests/out.input30.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input30.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 32_Struct_Access_pt1/tests/out.input53.c ================================================ Hello world, 23 ================================================ FILE: 32_Struct_Access_pt1/tests/out.input54.c ================================================ Hello world, 0 Hello world, 1 Hello world, 2 Hello world, 3 Hello world, 4 Hello world, 5 Hello world, 6 Hello world, 7 Hello world, 8 Hello world, 9 Hello world, 10 Hello world, 11 Hello world, 12 Hello world, 13 Hello world, 14 Hello world, 15 Hello world, 16 Hello world, 17 Hello world, 18 Hello world, 19 ================================================ FILE: 32_Struct_Access_pt1/tests/out.input55.c ================================================ Hello world Argument 0 is ./out ================================================ FILE: 32_Struct_Access_pt1/tests/out.input58.c ================================================ 12 99 4005 4116 4116 ================================================ FILE: 32_Struct_Access_pt1/tests/runtests ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../cwj -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../cwj $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.s "trial.$i" done ================================================ FILE: 32_Struct_Access_pt1/tests/runtestsn ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../compn ] then (cd ..; make) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../compn -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../compn $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.o out.s "trial.$i" done ================================================ FILE: 32_Struct_Access_pt1/tree.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree functions // Copyright (c) 2019 Warren Toomey, GPL3 // Build and return a generic AST node struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue) { struct ASTnode *n; // Malloc a new ASTnode n = (struct ASTnode *) malloc(sizeof(struct ASTnode)); if (n == NULL) fatal("Unable to malloc in mkastnode()"); // Copy in the field values and return it n->op = op; n->type = type; n->left = left; n->mid = mid; n->right = right; n->sym = sym; n->intvalue = intvalue; return (n); } // Make an AST leaf node struct ASTnode *mkastleaf(int op, int type, struct symtable *sym, int intvalue) { return (mkastnode(op, type, NULL, NULL, NULL, sym, intvalue)); } // Make a unary AST node: only one child struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, struct symtable *sym, int intvalue) { return (mkastnode(op, type, left, NULL, NULL, sym, intvalue)); } // Generate and return a new label number // just for AST dumping purposes static int gendumplabel(void) { static int id = 1; return (id++); } // Given an AST tree, print it out and follow the // traversal of the tree that genAST() follows void dumpAST(struct ASTnode *n, int label, int level) { int Lfalse, Lstart, Lend; switch (n->op) { case A_IF: Lfalse = gendumplabel(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_IF"); if (n->right) { Lend = gendumplabel(); fprintf(stdout, ", end L%d", Lend); } fprintf(stdout, "\n"); dumpAST(n->left, Lfalse, level + 2); dumpAST(n->mid, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); return; case A_WHILE: Lstart = gendumplabel(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_WHILE, start L%d\n", Lstart); Lend = gendumplabel(); dumpAST(n->left, Lend, level + 2); dumpAST(n->right, NOLABEL, level + 2); return; } // Reset level to -2 for A_GLUE if (n->op == A_GLUE) level = -2; // General AST node handling if (n->left) dumpAST(n->left, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); for (int i = 0; i < level; i++) fprintf(stdout, " "); switch (n->op) { case A_GLUE: fprintf(stdout, "\n\n"); return; case A_FUNCTION: fprintf(stdout, "A_FUNCTION %s\n", n->sym->name); return; case A_ADD: fprintf(stdout, "A_ADD\n"); return; case A_SUBTRACT: fprintf(stdout, "A_SUBTRACT\n"); return; case A_MULTIPLY: fprintf(stdout, "A_MULTIPLY\n"); return; case A_DIVIDE: fprintf(stdout, "A_DIVIDE\n"); return; case A_EQ: fprintf(stdout, "A_EQ\n"); return; case A_NE: fprintf(stdout, "A_NE\n"); return; case A_LT: fprintf(stdout, "A_LE\n"); return; case A_GT: fprintf(stdout, "A_GT\n"); return; case A_LE: fprintf(stdout, "A_LE\n"); return; case A_GE: fprintf(stdout, "A_GE\n"); return; case A_INTLIT: fprintf(stdout, "A_INTLIT %d\n", n->intvalue); return; case A_STRLIT: fprintf(stdout, "A_STRLIT rval label L%d\n", n->intvalue); return; case A_IDENT: if (n->rvalue) fprintf(stdout, "A_IDENT rval %s\n", n->sym->name); else fprintf(stdout, "A_IDENT %s\n", n->sym->name); return; case A_ASSIGN: fprintf(stdout, "A_ASSIGN\n"); return; case A_WIDEN: fprintf(stdout, "A_WIDEN\n"); return; case A_RETURN: fprintf(stdout, "A_RETURN\n"); return; case A_FUNCCALL: fprintf(stdout, "A_FUNCCALL %s\n", n->sym->name); return; case A_ADDR: fprintf(stdout, "A_ADDR %s\n", n->sym->name); return; case A_DEREF: if (n->rvalue) fprintf(stdout, "A_DEREF rval\n"); else fprintf(stdout, "A_DEREF\n"); return; case A_SCALE: fprintf(stdout, "A_SCALE %d\n", n->size); return; case A_PREINC: fprintf(stdout, "A_PREINC %s\n", n->sym->name); return; case A_PREDEC: fprintf(stdout, "A_PREDEC %s\n", n->sym->name); return; case A_POSTINC: fprintf(stdout, "A_POSTINC\n"); return; case A_POSTDEC: fprintf(stdout, "A_POSTDEC\n"); return; case A_NEGATE: fprintf(stdout, "A_NEGATE\n"); return; default: fatald("Unknown dumpAST operator", n->op); } } ================================================ FILE: 32_Struct_Access_pt1/types.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Types and type handling // Copyright (c) 2019 Warren Toomey, GPL3 // Return true if a type is an int type // of any size, false otherwise int inttype(int type) { return ((type & 0xf) == 0); } // Return true if a type is of pointer type int ptrtype(int type) { return ((type & 0xf) != 0); } // Given a primitive type, return // the type which is a pointer to it int pointer_to(int type) { if ((type & 0xf) == 0xf) fatald("Unrecognised in pointer_to: type", type); return (type + 1); } // Given a primitive pointer type, return // the type which it points to int value_at(int type) { if ((type & 0xf) == 0x0) fatald("Unrecognised in value_at: type", type); return (type - 1); } // Given a type and a composite type pointer, return // the size of this type in bytes int typesize(int type, struct symtable *ctype) { if (type == P_STRUCT) return(ctype->size); return(genprimsize(type)); } // Given an AST tree and a type which we want it to become, // possibly modify the tree by widening or scaling so that // it is compatible with this type. Return the original tree // if no changes occurred, a modified tree, or NULL if the // tree is not compatible with the given type. // If this will be part of a binary operation, the AST op is not zero. struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op) { int ltype; int lsize, rsize; ltype = tree->type; // XXX No idea on these yet if (ltype == P_STRUCT) fatal("Don't know how to do this yet"); if (rtype == P_STRUCT) fatal("Don't know how to do this yet"); // Compare scalar int types if (inttype(ltype) && inttype(rtype)) { // Both types same, nothing to do if (ltype == rtype) return (tree); // Get the sizes for each type lsize = typesize(ltype, NULL); // XXX Fix soon rsize = typesize(rtype, NULL); // XXX Fix soon // Tree's size is too big if (lsize > rsize) return (NULL); // Widen to the right if (rsize > lsize) return (mkastunary(A_WIDEN, rtype, tree, NULL, 0)); } // For pointers on the left if (ptrtype(ltype)) { // OK is same type on right and not doing a binary op if (op == 0 && ltype == rtype) return (tree); } // We can scale only on A_ADD or A_SUBTRACT operation if (op == A_ADD || op == A_SUBTRACT) { // Left is int type, right is pointer type and the size // of the original type is >1: scale the left if (inttype(ltype) && ptrtype(rtype)) { rsize = genprimsize(value_at(rtype)); if (rsize > 1) return (mkastunary(A_SCALE, rtype, tree, NULL, rsize)); else return (tree); // Size 1, no need to scale } } // If we get here, the types are not compatible return (NULL); } ================================================ FILE: 33_Unions/Makefile ================================================ HSRCS= data.h decl.h defs.h SRCS= cg.c decl.c expr.c gen.c main.c misc.c \ scan.c stmt.c sym.c tree.c types.c SRCN= cgn.c decl.c expr.c gen.c main.c misc.c \ scan.c stmt.c sym.c tree.c types.c ARMSRCS= cg_arm.c decl.c expr.c gen.c main.c misc.c \ scan.c stmt.c sym.c tree.c types.c cwj: $(SRCS) $(HSRCS) cc -o cwj -g -Wall $(SRCS) compn: $(SRCN) $(HSRCS) cc -D__NASM__ -o compn -g -Wall $(SRCN) cwjarm: $(ARMSRCS) $(HSRCS) cc -o cwjarm -g -Wall $(ARMSRCS) cp cwjarm cwj clean: rm -f cwj cwjarm compn *.o *.s out test: cwj tests/runtests (cd tests; chmod +x runtests; ./runtests) armtest: cwjarm tests/runtests (cd tests; chmod +x runtests; ./runtests) testn: compn tests/runtestsn (cd tests; chmod +x runtestsn; ./runtestsn) ================================================ FILE: 33_Unions/Readme.md ================================================ # Part 33: Implementing Unions and Member Access Unions also turned out to be easy to implement for one reason: they are like structs except that all members of a union are located at offset zero from the base of the union. Also, the grammar of a union declaration is the same as a struct except for the "union" keyword. This means that we can re-use and modify the existing structs code to deal with unions. ## A New Keyword: "union" I've added the "union" keyword and the T_UNION token to the scanner in `scan.c`. As always, I'll omit the code that does the scanning. ## The Union Symbol List As with structs, there is a singly-linked list to store unions (in `data.h`): ```c extern_ struct symtable *Unionhead, *Uniontail; // List of struct types ``` In `sym.c`, I've also written `addunion()` and `findunion()` functions to add a new union type node to the list and to search for a union type with a given name on the list. > I'm considering merging the struct and union lists into a single composite type list, but I haven't done it yet. I'll probably do it when I get around to some more refactoring. ## Parsing Union Declarations We are going to modify the existing struct parsing code in `decl.c` to parse both structs and unions. I'll only give the changes to the functions, not the whole functions. In `parse_type()`, we now scan the T_UNION token and call the function to parse both struct and union types: ```c case T_STRUCT: type = P_STRUCT; *ctype = composite_declaration(P_STRUCT); break; case T_UNION: type = P_UNION; *ctype = composite_declaration(P_UNION); break; ``` This function `composite_declaration()` was called `struct_declaration()` in the last part of our journey. It now takes the type that we are parsing. ## The `composite_declaration()` Function Here are the changes: ```c // Parse composite type declarations: structs or unions. // Either find an existing struct/union declaration, or build // a struct/union symbol table entry and return its pointer. static struct symtable *composite_declaration(int type) { ... // Find any matching composite type if (type == P_STRUCT) ctype = findstruct(Text); else ctype = findunion(Text); ... // Build the composite type and skip the left brace if (type == P_STRUCT) ctype = addstruct(Text, P_STRUCT, NULL, 0, 0); else ctype = addunion(Text, P_UNION, NULL, 0, 0); ... // Set the position of each successive member in the composite type // Unions are easy. For structs, align the member and find the next free byte for (m = m->next; m != NULL; m = m->next) { // Set the offset for this member if (type == P_STRUCT) m->posn = genalign(m->type, offset, 1); else m->posn = 0; // Get the offset of the next free byte after this member offset += typesize(m->type, m->ctype); } ... return (ctype); } ``` That's it. We simply change the symbol table list we are working on, and always set the member offset to zero for unions. This is why I think it would be worth merging the struct and union type lists into a single list. ## Parsing Union Expressions As with the union declarations, we can reuse the code that deals with structs in expressions. In fact, there are very few changes to make in `expr.c`. ```c // Parse the member reference of a struct or union // and return an AST tree for it. If withpointer is true, // the access is through a pointer to the member. static struct ASTnode *member_access(int withpointer) { ... if (withpointer && compvar->type != pointer_to(P_STRUCT) && compvar->type != pointer_to(P_UNION)) fatals("Undeclared variable", Text); if (!withpointer && compvar->type != P_STRUCT && compvar->type != P_UNION) fatals("Undeclared variable", Text); ``` Again, that's it. The rest of the code was generic enough that we can use it for unions unmodified. And I think there was only one other major change, which was to a function in `types.c`: ```c // Given a type and a composite type pointer, return // the size of this type in bytes int typesize(int type, struct symtable *ctype) { if (type == P_STRUCT || type == P_UNION) return (ctype->size); return (genprimsize(type)); } ``` ## Testing the Union Code Here's our test program, `test/input62.c`: ```c int printf(char *fmt); union fred { char w; int x; int y; long z; }; union fred var1; union fred *varptr; int main() { var1.x= 65; printf("%d\n", var1.x); var1.x= 66; printf("%d\n", var1.x); printf("%d\n", var1.y); printf("The next two depend on the endian of the platform\n"); printf("%d\n", var1.w); printf("%d\n", var1.z); varptr= &var1; varptr->x= 67; printf("%d\n", varptr->x); printf("%d\n", varptr->y); return(0); } ``` This tests that all four members in the union are at the same location, so that a change to one member is seen as the same change to all members. We also check that pointer access into a union also works. ## Conclusion and What's Next This was another nice and easy part of our compiler writing journey. In the next part of our compiler writing journey, we will add enums. [Next step](../34_Enums_and_Typedefs/Readme.md) ================================================ FILE: 33_Unions/cg.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\t.text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\t.data\n", Outfile); currSeg = data_seg; } } // Given a scalar type value, return the // size of the type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch(type) { case P_CHAR: return (offset); case P_INT: case P_LONG: break; default: fatald("Bad type in calc_aligned_offset:", type); } // Here we have an int or a long. Align it on a 4-byte offset // I put the generic code here so it can be reused elsewhere. alignment= 4; offset = (offset + direction * (alignment-1)) & ~(alignment-1); return (offset); } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. static int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "%r10", "%r11", "%r12", "%r13", "%r9", "%r8", "%rcx", "%rdx", "%rsi", "%rdi" }; static char *breglist[] = { "%r10b", "%r11b", "%r12b", "%r13b", "%r9b", "%r8b", "%cl", "%dl", "%sil", "%dil" }; static char *dreglist[] = { "%r10d", "%r11d", "%r12d", "%r13d", "%r9d", "%r8d", "%ecx", "%edx", "%esi", "%edi" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the %rsp and %rsp fprintf(Outfile, "\t.globl\t%s\n" "\t.type\t%s, @function\n" "%s:\n" "\tpushq\t%%rbp\n" "\tmovq\t%%rsp, %%rbp\n", name, name, name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->posn = paramOffset; paramOffset += 8; } else { parm->posn = newlocaloffset(parm->type); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->posn = newlocaloffset(locvar->type); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\taddq\t$%d,%%rsp\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->endlabel); fprintf(Outfile, "\taddq\t$%d,%%rsp\n", stackOffset); fputs("\tpopq %rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmovq\t$%d, %s\n", value, reglist[r]); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", sym->name); } else // Print out the code to initialise it switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovzbq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", sym->name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovslq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", sym->name); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", sym->posn); fprintf(Outfile, "\tmovq\t%d(%%rbp), %s\n", sym->posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", sym->posn); } else switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", sym->posn); fprintf(Outfile, "\tmovzbq\t%d(%%rbp), %s\n", sym->posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", sym->posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", sym->posn); fprintf(Outfile, "\tmovslq\t%d(%%rbp), %s\n", sym->posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", sym->posn); break; default: fatald("Bad type in cgloadlocal:", sym->type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tleaq\tL%d(%%rip), %s\n", label, reglist[r]); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\taddq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsubq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timulq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s,%%rax\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidivq\t%s\n", reglist[r2]); fprintf(Outfile, "\tmovq\t%%rax,%s\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tandq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\torq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txorq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshlq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshrq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tnegq\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnotq\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); return (r); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(struct symtable *sym, int numargs) { // Get a new register int outr = alloc_register(); // Call the function fprintf(Outfile, "\tcall\t%s@PLT\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\taddq\t$%d, %%rsp\n", 8 * (numargs - 6)); // and copy the return value into our register fprintf(Outfile, "\tmovq\t%%rax, %s\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpushq\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmovq\t%s, %s\n", reglist[r], reglist[FIRSTPARAMREG - argposn + 1]); } } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsalq\t$%d, %s\n", val, reglist[r]); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %s(%%rip)\n", reglist[r], sym->name); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %s(%%rip)\n", breglist[r], sym->name); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %s(%%rip)\n", dreglist[r], sym->name); break; default: fatald("Bad type in cgstorglob:", sym->type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %d(%%rbp)\n", reglist[r], sym->posn); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %d(%%rbp)\n", breglist[r], sym->posn); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %d(%%rbp)\n", dreglist[r], sym->posn); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the type size = typesize(node->type, node->ctype); // Generate the global identity and the label cgdataseg(); fprintf(Outfile, "\t.globl\t%s\n", node->name); fprintf(Outfile, "%s:", node->name); // Generate the space for this type switch (size) { case 1: fprintf(Outfile, "\t.byte\t0\n"); break; case 4: fprintf(Outfile, "\t.long\t0\n"); break; case 8: fprintf(Outfile, "\t.quad\t0\n"); break; default: for (int i=0; i < size; i++) fprintf(Outfile, "\t.byte\t0\n"); } } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\t.byte\t%d\n", *cptr); } fprintf(Outfile, "\t.byte\t0\n"); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzbl\t%s, %%eax\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %%eax\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } cgjump(sym->endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = alloc_register(); if (sym->class == C_GLOBAL) fprintf(Outfile, "\tleaq\t%s(%%rip), %s\n", sym->name, reglist[r]); else fprintf(Outfile, "\tleaq\t%d(%%rbp), %s\n", sym->posn, reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzbq\t(%s), %s\n", reglist[r], reglist[r]); break; case 2: fprintf(Outfile, "\tmovslq\t(%s), %s\n", reglist[r], reglist[r]); break; case 4: case 8: fprintf(Outfile, "\tmovq\t(%s), %s\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { // Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmovb\t%s, (%s)\n", breglist[r1], reglist[r2]); break; case 2: case 4: case 8: fprintf(Outfile, "\tmovq\t%s, (%s)\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 33_Unions/cg_arm.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for ARMv6 on Raspberry Pi // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. static int freereg[4]; static char *reglist[4] = { "r4", "r5", "r6", "r7" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // We have to store large integer literal values in memory. // Keep a list of them which will be output in the postamble #define MAXINTS 1024 int Intlist[MAXINTS]; static int Intslot = 0; // Determine the offset of a large integer // literal from the .L3 label. If the integer // isn't in the list, add it. static void set_int_offset(int val) { int offset = -1; // See if it is already there for (int i = 0; i < Intslot; i++) { if (Intlist[i] == val) { offset = 4 * i; break; } } // Not in the list, so add it if (offset == -1) { offset = 4 * Intslot; if (Intslot == MAXINTS) fatal("Out of int slots in set_int_offset()"); Intlist[Intslot++] = val; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L3+%d\n", offset); } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n", Outfile); } // Print out the assembly postamble void cgpostamble() { // Print out the global variables fprintf(Outfile, ".L2:\n"); for (int i = 0; i < Globs; i++) { if (Symtable[i].stype == S_VARIABLE) fprintf(Outfile, "\t.word %s\n", Symtable[i].name); } // Print out the integer literals fprintf(Outfile, ".L3:\n"); for (int i = 0; i < Intslot; i++) { fprintf(Outfile, "\t.word %d\n", Intlist[i]); } } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Symtable[id].name; fprintf(Outfile, "\t.text\n" "\t.globl\t%s\n" "\t.type\t%s, \%%function\n" "%s:\n" "\tpush\t{fp, lr}\n" "\tadd\tfp, sp, #4\n" "\tsub\tsp, sp, #8\n" "\tstr\tr0, [fp, #-8]\n", name, name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Symtable[id].endlabel); fputs("\tsub\tsp, fp, #4\n" "\tpop\t{fp, pc}\n" "\t.align\t2\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); // If the literal value is small, do it with one instruction if (value <= 1000) fprintf(Outfile, "\tmov\t%s, #%d\n", reglist[r], value); else { set_int_offset(value); fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); } return (r); } // Determine the offset of a variable from the .L2 // label. Yes, this is inefficient code. static void set_var_offset(int id) { int offset = 0; // Walk the symbol table up to id. // Find S_VARIABLEs and add on 4 until // we get to our variable for (int i = 0; i < id; i++) { if (Symtable[i].stype == S_VARIABLE) offset += 4; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L2+%d\n", offset); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tldrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgloadglob:", Symtable[id].type); } return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s, %s\n", reglist[r1], reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\tmul\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { // To do a divide: r0 holds the dividend, r1 holds the divisor. // The quotient is in r0. fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r1]); fprintf(Outfile, "\tmov\tr1, %s\n", reglist[r2]); fprintf(Outfile, "\tbl\t__aeabi_idiv\n"); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r1]); free_register(r2); return (r1); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r]); fprintf(Outfile, "\tbl\t%s\n", Symtable[id].name); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r]); return (r); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tlsl\t%s, %s, #%d\n", reglist[r], reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, int id) { // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tstr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgstorglob:", Symtable[id].type); } return (r); } // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (4); switch (type) { case P_CHAR: return (1); case P_INT: case P_LONG: return (4); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Symtable[id].type); fprintf(Outfile, "\t.data\n" "\t.globl\t%s\n", Symtable[id].name); switch (typesize) { case 1: fprintf(Outfile, "%s:\t.byte\t0\n", Symtable[id].name); break; case 4: fprintf(Outfile, "%s:\t.long\t0\n", Symtable[id].name); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "moveq", "movne", "movlt", "movgt", "movle", "movge" }; // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "movne", "moveq", "movge", "movle", "movgt", "movlt" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #1\n", cmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #0\n", invcmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\tuxtb\t%s, %s\n", reglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tb\tL%d\n", l); } // List of inverted branch instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *brlist[] = { "bne", "beq", "bge", "ble", "bgt", "blt" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", brlist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[reg]); cgjump(Symtable[id].endlabel); } // Generate code to load the address of a global // identifier into a variable. Return a new register int cgaddress(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); fprintf(Outfile, "\tmov\t%s, r3\n", reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tldrb\t%s, [%s]\n", reglist[r], reglist[r]); break; case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [%s]\n", reglist[r], reglist[r]); break; } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [%s]\n", reglist[r1], reglist[r2]); break; case P_INT: case P_LONG: fprintf(Outfile, "\tstr\t%s, [%s]\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 33_Unions/cgn.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\tsection .text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\tsection .data\n", Outfile); currSeg = data_seg; } } // Given a scalar type value, return the // size of the type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch(type) { case P_CHAR: return (offset); case P_INT: case P_LONG: break; default: fatald("Bad type in calc_aligned_offset:", type); } // Here we have an int or a long. Align it on a 4-byte offset // I put the generic code here so it can be reused elsewhere. alignment= 4; offset = (offset + direction * (alignment-1)) & ~(alignment-1); return (offset); } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "r10", "r11", "r12", "r13", "r9", "r8", "rcx", "rdx", "rsi", "rdi" }; static char *breglist[] = { "r10b", "r11b", "r12b", "r13b", "r9b", "r8b", "cl", "dl", "sil", "dil" }; static char *dreglist[] = { "r10d", "r11d", "r12d", "r13d", "r9d", "r8d", "ecx", "edx", "esi", "edi" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\textern\tprintint\n", Outfile); fputs("\textern\tprintchar\n", Outfile); fputs("\textern\topen\n", Outfile); fputs("\textern\tclose\n", Outfile); fputs("\textern\tread\n", Outfile); fputs("\textern\twrite\n", Outfile); fputs("\textern\tprintf\n", Outfile); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the rsp and rbp fprintf(Outfile, "\tglobal\t%s\n" "%s:\n" "\tpush\trbp\n" "\tmov\trbp, rsp\n", name, name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->posn = paramOffset; paramOffset += 8; } else { parm->posn = newlocaloffset(parm->type); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->posn = newlocaloffset(locvar->type); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\tadd\trsp, %d\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->endlabel); fprintf(Outfile, "\tadd\trsp, %d\n", stackOffset); fputs("\tpop rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], value); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", sym->name); fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], sym->name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", sym->name); } else // Print out the code to initialise it switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", sym->name); fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], sym->name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", sym->name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", sym->name); fprintf(Outfile, "\tmovsx\t%s, word [%s]\n", dreglist[r], sym->name); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", sym->name); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", sym->posn); fprintf(Outfile, "\tmov\t%s, [rbp+%d]\n", reglist[r], sym->posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", sym->posn); } else switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", sym->posn); fprintf(Outfile, "\tmovzx\t%s, byte [rbp+%d]\n", reglist[r], sym->posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", sym->posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", sym->posn); fprintf(Outfile, "\tmovsx\t%s, word [rbp+%d]\n", reglist[r], sym->posn); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", sym->posn); break; default: fatald("Bad type in cgloadlocal:", sym->type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, L%d\n", reglist[r], label); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timul\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmov\trax, %s\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidiv\t%s\n", reglist[r2]); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tand\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\tor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshl\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshr\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tneg\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnot\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r], breglist[r]); return (r); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, byte %s\n", reglist[r], breglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(struct symtable *sym, int numargs) { // Get a new register int outr = alloc_register(); // Call the function fprintf(Outfile, "\tcall\t%s\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\tadd\trsp, %d\n", 8 * (numargs - 6)); // and copy the return value into our register fprintf(Outfile, "\tmov\t%s, rax\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpush\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmov\t%s, %s\n", reglist[FIRSTPARAMREG - argposn + 1], reglist[r]); } } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsal\t%s, %d\n", reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, dreglist[r]); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\tqword\t[rbp+%d], %s\n", sym->posn, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\tbyte\t[rbp+%d], %s\n", sym->posn, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\tdword\t[rbp+%d], %s\n", sym->posn, dreglist[r]); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the type size = typesize(node->type, node->ctype); // Generate the global identity and the label cgdataseg(); fprintf(Outfile, "\tsection\t.data\n" "\tglobal\t%s\n", node->name); fprintf(Outfile, "%s:", node->name); // Generate the space for this type // original version for (int i = 0; i < node->size; i++) { switch(size) { case 1: fprintf(Outfile, "\tdb\t0\n"); break; case 4: fprintf(Outfile, "\tdd\t0\n"); break; case 8: fprintf(Outfile, "\tdq\t0\n"); break; default: for (int i = 0; i < size; i++) fprintf(Outfile, "\tdb\t0\n"); } } /* compact version using times instead of loop switch(size) { case 1: fprintf(Outfile, "\ttimes\t%d\tdb\t0\n", node->size); break; case 4: fprintf(Outfile, "\ttimes\t%d\tdd\t0\n", node->size); break; case 8: fprintf(Outfile, "\ttimes\t%d\tdq\t0\n", node->size); break; default: fprintf(Outfile, "\ttimes\t%d\tdb\t0\n", size); } */ } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\tdb\t%d\n", *cptr); } fprintf(Outfile, "\tdb\t0\n"); /* put string in readable format on a single line // probably went overboard with error checking int comma = 0, quote = 0, start = 1; fprintf(Outfile, "\tdb\t"); for (cptr=strvalue; *cptr; cptr++) { if ( ! isprint(*cptr) ) if (comma || start) { fprintf(Outfile, "%d, ", *cptr); start = 0; comma = 1; } else if (quote) { fprintf(Outfile, "\', %d, ", *cptr); comma = 1; quote = 0; } else { fprintf(Outfile, "%d, ", *cptr); comma = 1; quote = 0; } else if (start || comma) { fprintf(Outfile, "\'%c", *cptr); start = comma = 0; quote = 1; } else { fprintf(Outfile, "%c", *cptr); comma = 0; quote = 1; } } if (comma || start) fprintf(Outfile, "0\n"); else fprintf(Outfile, "\', 0\n"); */ } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r2], breglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzx\teax, %s\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmov\teax, %s\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } cgjump(sym->endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = alloc_register(); if (sym->class == C_GLOBAL) fprintf(Outfile, "\tmov\t%s, %s\n", reglist[r], sym->name); else fprintf(Outfile, "\tlea\t%s, [rbp+%d]\n", reglist[r], sym->posn); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], reglist[r]); break; case 2: fprintf(Outfile, "\tmovsx\t%s, dword [%s]\n", reglist[r], reglist[r]); break; case 4: case 8: fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { //Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmov\t[%s], byte %s\n", reglist[r2], breglist[r1]); break; case 2: case 4: case 8: fprintf(Outfile, "\tmov\t[%s], %s\n", reglist[r2], reglist[r1]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 33_Unions/data.h ================================================ #ifndef extern_ #define extern_ extern #endif // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 extern_ int Line; // Current line number extern_ int Putback; // Character put back by scanner extern_ struct symtable *Functionid; // Symbol ptr of the current function extern_ FILE *Infile; // Input and output files extern_ FILE *Outfile; extern_ char *Outfilename; // Name of file we opened as Outfile extern_ struct token Token; // Last token scanned extern_ char Text[TEXTLEN + 1]; // Last identifier scanned // Symbol table lists extern_ struct symtable *Globhead, *Globtail; // Global variables and functions extern_ struct symtable *Loclhead, *Locltail; // Local variables extern_ struct symtable *Parmhead, *Parmtail; // Local parameters extern_ struct symtable *Membhead, *Membtail; // Temp list of struct/union members extern_ struct symtable *Structhead, *Structtail; // List of struct types extern_ struct symtable *Unionhead, *Uniontail; // List of struct types // Command-line flags extern_ int O_dumpAST; // If true, dump the AST trees extern_ int O_keepasm; // If true, keep any assembly files extern_ int O_assemble; // If true, assemble the assembly files extern_ int O_dolink; // If true, link the object files extern_ int O_verbose; // If true, print info on compilation stages ================================================ FILE: 33_Unions/decl.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of declarations // Copyright (c) 2019 Warren Toomey, GPL3 static struct symtable *composite_declaration(int type); // Parse the current token and return // a primitive type enum value and a pointer // to any composite type. // Also scan in the next token int parse_type(struct symtable **ctype) { int type; switch (Token.token) { case T_VOID: type = P_VOID; scan(&Token); break; case T_CHAR: type = P_CHAR; scan(&Token); break; case T_INT: type = P_INT; scan(&Token); break; case T_LONG: type = P_LONG; scan(&Token); break; case T_STRUCT: type = P_STRUCT; *ctype = composite_declaration(P_STRUCT); break; case T_UNION: type = P_UNION; *ctype = composite_declaration(P_UNION); break; default: fatald("Illegal type, token", Token.token); } // Scan in one or more further '*' tokens // and determine the correct pointer type while (1) { if (Token.token != T_STAR) break; type = pointer_to(type); scan(&Token); } // We leave with the next token already scanned return (type); } // variable_declaration: type identifier ';' // | type identifier '[' INTLIT ']' ';' // ; // // Parse the declaration of a scalar variable or an array // with a given size. // The identifier has been scanned & we have the type. // class is the variable's class // Return the pointer to variable's entry in the symbol table struct symtable *var_declaration(int type, struct symtable *ctype, int class) { struct symtable *sym = NULL; // See if this has already been declared switch (class) { case C_GLOBAL: if (findglob(Text) != NULL) fatals("Duplicate global variable declaration", Text); case C_LOCAL: case C_PARAM: if (findlocl(Text) != NULL) fatals("Duplicate local variable declaration", Text); case C_MEMBER: if (findmember(Text) != NULL) fatals("Duplicate struct/union member declaration", Text); } // Text now has the identifier's name. // If the next token is a '[' if (Token.token == T_LBRACKET) { // Skip past the '[' scan(&Token); // Check we have an array size if (Token.token == T_INTLIT) { // Add this as a known array and generate its space in assembly. // We treat the array as a pointer to its elements' type switch (class) { case C_GLOBAL: sym = addglob(Text, pointer_to(type), ctype, S_ARRAY, Token.intvalue); break; case C_LOCAL: case C_PARAM: case C_MEMBER: fatal("For now, declaration of non-global arrays is not implemented"); } } // Ensure we have a following ']' scan(&Token); match(T_RBRACKET, "]"); } else { // Add this as a known scalar // and generate its space in assembly switch (class) { case C_GLOBAL: sym = addglob(Text, type, ctype, S_VARIABLE, 1); break; case C_LOCAL: sym = addlocl(Text, type, ctype, S_VARIABLE, 1); break; case C_PARAM: sym = addparm(Text, type, ctype, S_VARIABLE, 1); break; case C_MEMBER: sym = addmemb(Text, type, ctype, S_VARIABLE, 1); break; } } return (sym); } // var_declaration_list: // | variable_declaration // | variable_declaration separate_token var_declaration_list ; // // When called to parse function parameters, separate_token is ','. // When called to parse members of a struct/union, separate_token is ';'. // // Parse a list of variables. // Add them as symbols to one of the symbol table lists, and return the // number of variables. If funcsym is not NULL, there is an existing function // prototype, so compare each variable's type against this prototype. static int var_declaration_list(struct symtable *funcsym, int class, int separate_token, int end_token) { int type; int paramcnt = 0; struct symtable *protoptr = NULL; struct symtable *ctype; // If there is a prototype, get the pointer // to the first prototype parameter if (funcsym != NULL) protoptr = funcsym->member; // Loop until the final end token while (Token.token != end_token) { // Get the type and identifier type = parse_type(&ctype); ident(); // Check that this type matches the prototype if there is one if (protoptr != NULL) { if (type != protoptr->type) fatald("Type doesn't match prototype for parameter", paramcnt + 1); protoptr = protoptr->next; } else { // Add a new parameter to the right symbol table list, based on the class var_declaration(type, ctype, class); } paramcnt++; // Must have a separate_token or ')' at this point if ((Token.token != separate_token) && (Token.token != end_token)) fatald("Unexpected token in parameter list", Token.token); if (Token.token == separate_token) scan(&Token); } // Check that the number of parameters in this list matches // any existing prototype if ((funcsym != NULL) && (paramcnt != funcsym->nelems)) fatals("Parameter count mismatch for function", funcsym->name); // Return the count of parameters return (paramcnt); } // // function_declaration: type identifier '(' parameter_list ')' ; // | type identifier '(' parameter_list ')' compound_statement ; // // Parse the declaration of function. // The identifier has been scanned & we have the type. struct ASTnode *function_declaration(int type) { struct ASTnode *tree, *finalstmt; struct symtable *oldfuncsym, *newfuncsym = NULL; int endlabel, paramcnt; // Text has the identifier's name. If this exists and is a // function, get the id. Otherwise, set oldfuncsym to NULL. if ((oldfuncsym = findsymbol(Text)) != NULL) if (oldfuncsym->stype != S_FUNCTION) oldfuncsym = NULL; // If this is a new function declaration, get a // label-id for the end label, and add the function // to the symbol table, if (oldfuncsym == NULL) { endlabel = genlabel(); // Assumtion: functions only return scalar types, so NULL below newfuncsym = addglob(Text, type, NULL, S_FUNCTION, endlabel); } // Scan in the '(', any parameters and the ')'. // Pass in any existing function prototype pointer lparen(); paramcnt = var_declaration_list(oldfuncsym, C_PARAM, T_COMMA, T_RPAREN); rparen(); // If this is a new function declaration, update the // function symbol entry with the number of parameters. // Also copy the parameter list into the function's node. if (newfuncsym) { newfuncsym->nelems = paramcnt; newfuncsym->member = Parmhead; oldfuncsym = newfuncsym; } // Clear out the parameter list Parmhead = Parmtail = NULL; // Declaration ends in a semicolon, only a prototype. if (Token.token == T_SEMI) { scan(&Token); return (NULL); } // This is not just a prototype. // Set the Functionid global to the function's symbol pointer Functionid = oldfuncsym; // Get the AST tree for the compound statement tree = compound_statement(); // If the function type isn't P_VOID .. if (type != P_VOID) { // Error if no statements in the function if (tree == NULL) fatal("No statements in function with non-void type"); // Check that the last AST operation in the // compound statement was a return statement finalstmt = (tree->op == A_GLUE) ? tree->right : tree; if (finalstmt == NULL || finalstmt->op != A_RETURN) fatal("No return for function with non-void type"); } // Return an A_FUNCTION node which has the function's symbol pointer // and the compound statement sub-tree return (mkastunary(A_FUNCTION, type, tree, oldfuncsym, endlabel)); } // Parse composite type declarations: structs or unions. // Either find an existing struct/union declaration, or build // a struct/union symbol table entry and return its pointer. static struct symtable *composite_declaration(int type) { struct symtable *ctype = NULL; struct symtable *m; int offset; // Skip the struct/union keyword scan(&Token); // See if there is a following struct/union name if (Token.token == T_IDENT) { // Find any matching composite type if (type == P_STRUCT) ctype = findstruct(Text); else ctype = findunion(Text); scan(&Token); } // If the next token isn't an LBRACE , this is // the usage of an existing struct/union type. // Return the pointer to the type. if (Token.token != T_LBRACE) { if (ctype == NULL) fatals("unknown struct/union type", Text); return (ctype); } // Ensure this struct/union type hasn't been // previously defined if (ctype) fatals("previously defined struct/union", Text); // Build the composite type and skip the left brace if (type == P_STRUCT) ctype = addstruct(Text, P_STRUCT, NULL, 0, 0); else ctype = addunion(Text, P_UNION, NULL, 0, 0); scan(&Token); // Scan in the list of members and attach // to the struct type's node var_declaration_list(NULL, C_MEMBER, T_SEMI, T_RBRACE); rbrace(); ctype->member = Membhead; Membhead = Membtail = NULL; // Set the offset of the initial member // and find the first free byte after it m = ctype->member; m->posn = 0; offset = typesize(m->type, m->ctype); // Set the position of each successive member in the composite type // Unions are easy. For structs, align the member and find the next free byte for (m = m->next; m != NULL; m = m->next) { // Set the offset for this member if (type == P_STRUCT) m->posn = genalign(m->type, offset, 1); else m->posn = 0; // Get the offset of the next free byte after this member offset += typesize(m->type, m->ctype); } // Set the overall size of the composite type ctype->size = offset; return (ctype); } // Parse one or more global declarations, either // variables, functions or structs void global_declarations(void) { struct ASTnode *tree; struct symtable *ctype; int type; while (1) { // Stop when we have reached EOF if (Token.token == T_EOF) break; // Get the type type = parse_type(&ctype); // We might have just parsed a struct or union // declaration with no associated variable. // The next token might be a ';'. Loop back if it is. // XXX. I'm not happy with this as it allows // "struct fred;" as an accepted statement if ((type == P_STRUCT || type == P_UNION) && Token.token == T_SEMI) { scan(&Token); continue; } // We have to read past the identifier // to see either a '(' for a function declaration // or a ',' or ';' for a variable declaration. // Text is filled in by the ident() call. ident(); if (Token.token == T_LPAREN) { // Parse the function declaration tree = function_declaration(type); // Only a function prototype, no code if (tree == NULL) continue; // A real function, generate the assembly code for it if (O_dumpAST) { dumpAST(tree, NOLABEL, 0); fprintf(stdout, "\n\n"); } genAST(tree, NOLABEL, 0); // Now free the symbols associated // with this function freeloclsyms(); } else { // Parse the global variable declaration // and skip past the trailing semicolon var_declaration(type, ctype, C_GLOBAL); semi(); } } } ================================================ FILE: 33_Unions/decl.h ================================================ // Function prototypes for all compiler files // Copyright (c) 2019 Warren Toomey, GPL3 // scan.c void reject_token(struct token *t); int scan(struct token *t); // tree.c struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue); struct ASTnode *mkastleaf(int op, int type, struct symtable *sym, int intvalue); struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, struct symtable *sym, int intvalue); void dumpAST(struct ASTnode *n, int label, int parentASTop); // gen.c int genlabel(void); int genAST(struct ASTnode *n, int reg, int parentASTop); void genpreamble(); void genpostamble(); void genfreeregs(); void genglobsym(struct symtable *node); int genglobstr(char *strvalue); int genprimsize(int type); int genalign(int type, int offset, int direction); void genreturn(int reg, int id); // cg.c int cgprimsize(int type); int cgalign(int type, int offset, int direction); void cgtextseg(); void cgdataseg(); void freeall_registers(void); void cgpreamble(); void cgpostamble(); void cgfuncpreamble(struct symtable *sym); void cgfuncpostamble(struct symtable *sym); int cgloadint(int value, int type); int cgloadglob(struct symtable *sym, int op); int cgloadlocal(struct symtable *sym, int op); int cgloadglobstr(int label); int cgadd(int r1, int r2); int cgsub(int r1, int r2); int cgmul(int r1, int r2); int cgdiv(int r1, int r2); int cgshlconst(int r, int val); int cgcall(struct symtable *sym, int numargs); void cgcopyarg(int r, int argposn); int cgstorglob(int r, struct symtable *sym); int cgstorlocal(int r, struct symtable *sym); void cgglobsym(struct symtable *node); void cgglobstr(int l, char *strvalue); int cgcompare_and_set(int ASTop, int r1, int r2); int cgcompare_and_jump(int ASTop, int r1, int r2, int label); void cglabel(int l); void cgjump(int l); int cgwiden(int r, int oldtype, int newtype); void cgreturn(int reg, struct symtable *sym); int cgaddress(struct symtable *sym); int cgderef(int r, int type); int cgstorderef(int r1, int r2, int type); int cgnegate(int r); int cginvert(int r); int cglognot(int r); int cgboolean(int r, int op, int label); int cgand(int r1, int r2); int cgor(int r1, int r2); int cgxor(int r1, int r2); int cgshl(int r1, int r2); int cgshr(int r1, int r2); // expr.c struct ASTnode *binexpr(int ptp); // stmt.c struct ASTnode *compound_statement(void); // misc.c void match(int t, char *what); void semi(void); void lbrace(void); void rbrace(void); void lparen(void); void rparen(void); void ident(void); void fatal(char *s); void fatals(char *s1, char *s2); void fatald(char *s, int d); void fatalc(char *s, int c); // sym.c void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node); struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int size, int posn); struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int size); struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int size); struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype, int size); struct symtable *addstruct(char *name, int type, struct symtable *ctype, int stype, int size); struct symtable *addunion(char *name, int type, struct symtable *ctype, int stype, int size); struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int size); struct symtable *findglob(char *s); struct symtable *findlocl(char *s); struct symtable *findsymbol(char *s); struct symtable *findmember(char *s); struct symtable *findstruct(char *s); struct symtable *findunion(char *s); void clear_symtable(void); void freeloclsyms(void); // decl.c struct symtable *var_declaration(int type, struct symtable *ctype, int class); struct ASTnode *function_declaration(int type); void global_declarations(void); // types.c int inttype(int type); int ptrtype(int type); int parse_type(struct symtable **ctype); int pointer_to(int type); int value_at(int type); int typesize(int type, struct symtable *ctype); struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op); ================================================ FILE: 33_Unions/defs.h ================================================ #include #include #include #include // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 enum { TEXTLEN = 512 // Length of identifiers in input }; // Commands and default filenames #define AOUT "a.out" #ifdef __NASM__ #define ASCMD "nasm -f elf64 -o " #define LDCMD "cc -no-pie -fno-plt -Wall -o " #else #define ASCMD "as -o " #define LDCMD "cc -o " #endif // Token types enum { T_EOF, // Binary operators T_ASSIGN, T_LOGOR, T_LOGAND, T_OR, T_XOR, T_AMPER, T_EQ, T_NE, T_LT, T_GT, T_LE, T_GE, T_LSHIFT, T_RSHIFT, T_PLUS, T_MINUS, T_STAR, T_SLASH, // Other operators T_INC, T_DEC, T_INVERT, T_LOGNOT, // Type keywords T_VOID, T_CHAR, T_INT, T_LONG, // Other keywords T_IF, T_ELSE, T_WHILE, T_FOR, T_RETURN, T_STRUCT, T_UNION, // Structural tokens T_INTLIT, T_STRLIT, T_SEMI, T_IDENT, T_LBRACE, T_RBRACE, T_LPAREN, T_RPAREN, T_LBRACKET, T_RBRACKET, T_COMMA, T_DOT, T_ARROW }; // Token structure struct token { int token; // Token type, from the enum list above int intvalue; // For T_INTLIT, the integer value }; // AST node types. The first few line up // with the related tokens enum { A_ASSIGN = 1, A_LOGOR, A_LOGAND, A_OR, A_XOR, A_AND, A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE, A_LSHIFT, A_RSHIFT, A_ADD, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, A_INTLIT, A_STRLIT, A_IDENT, A_GLUE, A_IF, A_WHILE, A_FUNCTION, A_WIDEN, A_RETURN, A_FUNCCALL, A_DEREF, A_ADDR, A_SCALE, A_PREINC, A_PREDEC, A_POSTINC, A_POSTDEC, A_NEGATE, A_INVERT, A_LOGNOT, A_TOBOOL }; // Primitive types. The bottom 4 bits is an integer // value that represents the level of indirection, // e.g. 0= no pointer, 1= pointer, 2= pointer pointer etc. enum { P_NONE, P_VOID = 16, P_CHAR = 32, P_INT = 48, P_LONG = 64, P_STRUCT=80, P_UNION=96 }; // Structural types enum { S_VARIABLE, S_FUNCTION, S_ARRAY }; // Storage classes enum { C_GLOBAL = 1, // Globally visible symbol C_LOCAL, // Locally visible symbol C_PARAM, // Locally visible function parameter C_STRUCT, // A struct C_UNION, // A union C_MEMBER // Member of a struct or union }; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol struct symtable *ctype; // If struct/union, ptr to that type int stype; // Structural type for the symbol int class; // Storage class for the symbol union { int size; // Number of elements in the symbol int endlabel; // For functions, the end label }; union { int nelems; // For functions, # of params int posn; // For locals, the negative offset // from the stack base pointer }; struct symtable *next; // Next symbol in one list struct symtable *member; // First member of a function, struct, }; // union or enum // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates int rvalue; // True if the node is an rvalue struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; struct symtable *sym; // For many AST nodes, the pointer to union { // the symbol in the symbol table int intvalue; // For A_INTLIT, the integer value int size; // For A_SCALE, the size to scale by }; }; enum { NOREG = -1, // Use NOREG when the AST generation // functions have no register to return NOLABEL = 0 // Use NOLABEL when we have no label to // pass to genAST() }; ================================================ FILE: 33_Unions/expr.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of expressions // Copyright (c) 2019 Warren Toomey, GPL3 // expression_list: // | expression // | expression ',' expression_list // ; // Parse a list of zero or more comma-separated expressions and // return an AST composed of A_GLUE nodes with the left-hand child // being the sub-tree of previous expressions (or NULL) and the right-hand // child being the next expression. Each A_GLUE node will have size field // set to the number of expressions in the tree at this point. If no // expressions are parsed, NULL is returned static struct ASTnode *expression_list(void) { struct ASTnode *tree = NULL; struct ASTnode *child = NULL; int exprcount = 0; // Loop until the final right parentheses while (Token.token != T_RPAREN) { // Parse the next expression and increment the expression count child = binexpr(0); exprcount++; // Build an A_GLUE AST node with the previous tree as the left child // and the new expression as the right child. Store the expression count. tree = mkastnode(A_GLUE, P_NONE, tree, NULL, child, NULL, exprcount); // Must have a ',' or ')' at this point switch (Token.token) { case T_COMMA: scan(&Token); break; case T_RPAREN: break; default: fatald("Unexpected token in expression list", Token.token); } } // Return the tree of expressions return (tree); } // Parse a function call and return its AST static struct ASTnode *funccall(void) { struct ASTnode *tree; struct symtable *funcptr; // Check that the identifier has been defined as a function, // then make a leaf node for it. if ((funcptr = findsymbol(Text)) == NULL || funcptr->stype != S_FUNCTION) { fatals("Undeclared function", Text); } // Get the '(' lparen(); // Parse the argument expression list tree = expression_list(); // XXX Check type of each argument against the function's prototype // Build the function call AST node. Store the // function's return type as this node's type. // Also record the function's symbol-id tree = mkastunary(A_FUNCCALL, funcptr->type, tree, funcptr, 0); // Get the ')' rparen(); return (tree); } // Parse the index into an array and return an AST tree for it static struct ASTnode *array_access(void) { struct ASTnode *left, *right; struct symtable *aryptr; // Check that the identifier has been defined as an array // then make a leaf node for it that points at the base if ((aryptr = findsymbol(Text)) == NULL || aryptr->stype != S_ARRAY) { fatals("Undeclared array", Text); } left = mkastleaf(A_ADDR, aryptr->type, aryptr, 0); // Get the '[' scan(&Token); // Parse the following expression right = binexpr(0); // Get the ']' match(T_RBRACKET, "]"); // Ensure that this is of int type if (!inttype(right->type)) fatal("Array index is not of integer type"); // Scale the index by the size of the element's type right = modify_type(right, left->type, A_ADD); // Return an AST tree where the array's base has the offset // added to it, and dereference the element. Still an lvalue // at this point. left = mkastnode(A_ADD, aryptr->type, left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, value_at(left->type), left, NULL, 0); return (left); } // Parse the member reference of a struct or union // and return an AST tree for it. If withpointer is true, // the access is through a pointer to the member. static struct ASTnode *member_access(int withpointer) { struct ASTnode *left, *right; struct symtable *compvar; struct symtable *typeptr; struct symtable *m; // Check that the identifier has been declared as a // struct/union or a struct/union pointer if ((compvar = findsymbol(Text)) == NULL) fatals("Undeclared variable", Text); if (withpointer && compvar->type != pointer_to(P_STRUCT) && compvar->type != pointer_to(P_UNION)) fatals("Undeclared variable", Text); if (!withpointer && compvar->type != P_STRUCT && compvar->type != P_UNION) fatals("Undeclared variable", Text); // If a pointer to a struct/union, get the pointer's value. // Otherwise, make a leaf node that points at the base // Either way, it's an rvalue if (withpointer) { left = mkastleaf(A_IDENT, pointer_to(compvar->type), compvar, 0); } else left = mkastleaf(A_ADDR, compvar->type, compvar, 0); left->rvalue = 1; // Get the details of the composite type typeptr = compvar->ctype; // Skip the '.' or '->' token and get the member's name scan(&Token); ident(); // Find the matching member's name in the type // Die if we can't find it for (m = typeptr->member; m != NULL; m = m->next) if (!strcmp(m->name, Text)) break; if (m == NULL) fatals("No member found in struct/union: ", Text); // Build an A_INTLIT node with the offset right = mkastleaf(A_INTLIT, P_INT, NULL, m->posn); // Add the member's offset to the base of the struct/union // and dereference it. Still an lvalue at this point left = mkastnode(A_ADD, pointer_to(m->type), left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, m->type, left, NULL, 0); return (left); } // Parse a postfix expression and return // an AST node representing it. The // identifier is already in Text. static struct ASTnode *postfix(void) { struct ASTnode *n; struct symtable *varptr; // Scan in the next token to see if we have a postfix expression scan(&Token); // Function call if (Token.token == T_LPAREN) return (funccall()); // An array reference if (Token.token == T_LBRACKET) return (array_access()); // Access into a struct or union if (Token.token == T_DOT) return (member_access(0)); if (Token.token == T_ARROW) return (member_access(1)); // A variable. Check that the variable exists. if ((varptr = findsymbol(Text)) == NULL || varptr->stype != S_VARIABLE) fatals("Unknown variable", Text); switch (Token.token) { // Post-increment: skip over the token case T_INC: scan(&Token); n = mkastleaf(A_POSTINC, varptr->type, varptr, 0); break; // Post-decrement: skip over the token case T_DEC: scan(&Token); n = mkastleaf(A_POSTDEC, varptr->type, varptr, 0); break; // Just a variable reference default: n = mkastleaf(A_IDENT, varptr->type, varptr, 0); } return (n); } // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(void) { struct ASTnode *n; int id; switch (Token.token) { case T_INTLIT: // For an INTLIT token, make a leaf AST node for it. // Make it a P_CHAR if it's within the P_CHAR range if ((Token.intvalue) >= 0 && (Token.intvalue < 256)) n = mkastleaf(A_INTLIT, P_CHAR, NULL, Token.intvalue); else n = mkastleaf(A_INTLIT, P_INT, NULL, Token.intvalue); break; case T_STRLIT: // For a STRLIT token, generate the assembly for it. // Then make a leaf AST node for it. id is the string's label. id = genglobstr(Text); n = mkastleaf(A_STRLIT, pointer_to(P_CHAR), NULL, id); break; case T_IDENT: return (postfix()); case T_LPAREN: // Beginning of a parenthesised expression, skip the '('. // Scan in the expression and the right parenthesis scan(&Token); n = binexpr(0); rparen(); return (n); default: fatald("Expecting a primary expression, got token", Token.token); } // Scan in the next token and return the leaf node scan(&Token); return (n); } // Convert a binary operator token into a binary AST operation. // We rely on a 1:1 mapping from token to AST operation static int binastop(int tokentype) { if (tokentype > T_EOF && tokentype <= T_SLASH) return (tokentype); fatald("Syntax error, token", tokentype); return (0); // Keep -Wall happy } // Return true if a token is right-associative, // false otherwise. static int rightassoc(int tokentype) { if (tokentype == T_ASSIGN) return (1); return (0); } // Operator precedence for each token. Must // match up with the order of tokens in defs.h static int OpPrec[] = { 0, 10, 20, 30, // T_EOF, T_ASSIGN, T_LOGOR, T_LOGAND 40, 50, 60, // T_OR, T_XOR, T_AMPER 70, 70, // T_EQ, T_NE 80, 80, 80, 80, // T_LT, T_GT, T_LE, T_GE 90, 90, // T_LSHIFT, T_RSHIFT 100, 100, // T_PLUS, T_MINUS 110, 110 // T_STAR, T_SLASH }; // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec; if (tokentype > T_SLASH) fatald("Token with no precedence in op_precedence:", tokentype); prec = OpPrec[tokentype]; if (prec == 0) fatald("Syntax error, token", tokentype); return (prec); } // prefix_expression: primary // | '*' prefix_expression // | '&' prefix_expression // | '-' prefix_expression // | '++' prefix_expression // | '--' prefix_expression // ; // Parse a prefix expression and return // a sub-tree representing it. struct ASTnode *prefix(void) { struct ASTnode *tree; switch (Token.token) { case T_AMPER: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Ensure that it's an identifier if (tree->op != A_IDENT) fatal("& operator must be followed by an identifier"); // Now change the operator to A_ADDR and the type to // a pointer to the original type tree->op = A_ADDR; tree->type = pointer_to(tree->type); break; case T_STAR: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's either another deref or an // identifier if (tree->op != A_IDENT && tree->op != A_DEREF) fatal("* operator must be followed by an identifier or *"); // Prepend an A_DEREF operation to the tree tree = mkastunary(A_DEREF, value_at(tree->type), tree, NULL, 0); break; case T_MINUS: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_NEGATE operation to the tree and // make the child an rvalue. Because chars are unsigned, // also widen this to int so that it's signed tree->rvalue = 1; tree = modify_type(tree, P_INT, 0); tree = mkastunary(A_NEGATE, tree->type, tree, NULL, 0); break; case T_INVERT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_INVERT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_INVERT, tree->type, tree, NULL, 0); break; case T_LOGNOT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_LOGNOT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_LOGNOT, tree->type, tree, NULL, 0); break; case T_INC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("++ operator must be followed by an identifier"); // Prepend an A_PREINC operation to the tree tree = mkastunary(A_PREINC, tree->type, tree, NULL, 0); break; case T_DEC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("-- operator must be followed by an identifier"); // Prepend an A_PREDEC operation to the tree tree = mkastunary(A_PREDEC, tree->type, tree, NULL, 0); break; default: tree = primary(); } return (tree); } // Return an AST tree whose root is a binary operator. // Parameter ptp is the previous token's precedence. struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; struct ASTnode *ltemp, *rtemp; int ASTop; int tokentype; // Get the tree on the left. // Fetch the next token at the same time. left = prefix(); // If we hit one of several terminating tokens, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA) { left->rvalue = 1; return (left); } // While the precedence of this token is more than that of the // previous token precedence, or it's right associative and // equal to the previous token's precedence while ((op_precedence(tokentype) > ptp) || (rightassoc(tokentype) && op_precedence(tokentype) == ptp)) { // Fetch in the next integer literal scan(&Token); // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Determine the operation to be performed on the sub-trees ASTop = binastop(tokentype); if (ASTop == A_ASSIGN) { // Assignment // Make the right tree into an rvalue right->rvalue = 1; // Ensure the right's type matches the left right = modify_type(right, left->type, 0); if (right == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree. However, switch // left and right around, so that the right expression's // code will be generated before the left expression ltemp = left; left = right; right = ltemp; } else { // We are not doing an assignment, so both trees should be rvalues // Convert both trees into rvalue if they are lvalue trees left->rvalue = 1; right->rvalue = 1; // Ensure the two types are compatible by trying // to modify each tree to match the other's type. ltemp = modify_type(left, right->type, ASTop); rtemp = modify_type(right, left->type, ASTop); if (ltemp == NULL && rtemp == NULL) fatal("Incompatible types in binary expression"); if (ltemp != NULL) left = ltemp; if (rtemp != NULL) right = rtemp; } // Join that sub-tree with ours. Convert the token // into an AST operation at the same time. left = mkastnode(binastop(tokentype), left->type, left, NULL, right, NULL, 0); // Update the details of the current token. // If we hit a terminating token, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA) { left->rvalue = 1; return (left); } } // Return the tree we have when the precedence // is the same or lower left->rvalue = 1; return (left); } ================================================ FILE: 33_Unions/gen.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Generic code generator // Copyright (c) 2019 Warren Toomey, GPL3 // Generate and return a new label number int genlabel(void) { static int id = 1; return (id++); } // Generate the code for an IF statement // and an optional ELSE clause static int genIF(struct ASTnode *n) { int Lfalse, Lend; // Generate two labels: one for the // false compound statement, and one // for the end of the overall IF statement. // When there is no ELSE clause, Lfalse _is_ // the ending label! Lfalse = genlabel(); if (n->right) Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. genAST(n->left, Lfalse, n->op); genfreeregs(); // Generate the true compound statement genAST(n->mid, NOLABEL, n->op); genfreeregs(); // If there is an optional ELSE clause, // generate the jump to skip to the end if (n->right) cgjump(Lend); // Now the false label cglabel(Lfalse); // Optional ELSE clause: generate the // false compound statement and the // end label if (n->right) { genAST(n->right, NOLABEL, n->op); genfreeregs(); cglabel(Lend); } return (NOREG); } // Generate the code for a WHILE statement static int genWHILE(struct ASTnode *n) { int Lstart, Lend; // Generate the start and end labels // and output the start label Lstart = genlabel(); Lend = genlabel(); cglabel(Lstart); // Generate the condition code followed // by a jump to the end label. genAST(n->left, Lend, n->op); genfreeregs(); // Generate the compound statement for the body genAST(n->right, NOLABEL, n->op); genfreeregs(); // Finally output the jump back to the condition, // and the end label cgjump(Lstart); cglabel(Lend); return (NOREG); } // Generate the code to copy the arguments of a // function call to its parameters, then call the // function itself. Return the register that holds // the function's return value. static int gen_funccall(struct ASTnode *n) { struct ASTnode *gluetree = n->left; int reg; int numargs = 0; // If there is a list of arguments, walk this list // from the last argument (right-hand child) to the // first while (gluetree) { // Calculate the expression's value reg = genAST(gluetree->right, NOLABEL, gluetree->op); // Copy this into the n'th function parameter: size is 1, 2, 3, ... cgcopyarg(reg, gluetree->size); // Keep the first (highest) number of arguments if (numargs == 0) numargs = gluetree->size; genfreeregs(); gluetree = gluetree->left; } // Call the function, clean up the stack (based on numargs), // and return its result return (cgcall(n->sym, numargs)); } // Given an AST, an optional label, and the AST op // of the parent, generate assembly code recursively. // Return the register id with the tree's final value. int genAST(struct ASTnode *n, int label, int parentASTop) { int leftreg, rightreg; // We have some specific AST node handling at the top // so that we don't evaluate the child sub-trees immediately switch (n->op) { case A_IF: return (genIF(n)); case A_WHILE: return (genWHILE(n)); case A_FUNCCALL: return (gen_funccall(n)); case A_GLUE: // Do each child statement, and free the // registers after each child genAST(n->left, NOLABEL, n->op); genfreeregs(); genAST(n->right, NOLABEL, n->op); genfreeregs(); return (NOREG); case A_FUNCTION: // Generate the function's preamble before the code // in the child sub-tree cgfuncpreamble(n->sym); genAST(n->left, NOLABEL, n->op); cgfuncpostamble(n->sym); return (NOREG); } // General AST node handling below // Get the left and right sub-tree values if (n->left) leftreg = genAST(n->left, NOLABEL, n->op); if (n->right) rightreg = genAST(n->right, NOLABEL, n->op); switch (n->op) { case A_ADD: return (cgadd(leftreg, rightreg)); case A_SUBTRACT: return (cgsub(leftreg, rightreg)); case A_MULTIPLY: return (cgmul(leftreg, rightreg)); case A_DIVIDE: return (cgdiv(leftreg, rightreg)); case A_AND: return (cgand(leftreg, rightreg)); case A_OR: return (cgor(leftreg, rightreg)); case A_XOR: return (cgxor(leftreg, rightreg)); case A_LSHIFT: return (cgshl(leftreg, rightreg)); case A_RSHIFT: return (cgshr(leftreg, rightreg)); case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, compare registers // and set one to 1 or 0 based on the comparison. if (parentASTop == A_IF || parentASTop == A_WHILE) return (cgcompare_and_jump(n->op, leftreg, rightreg, label)); else return (cgcompare_and_set(n->op, leftreg, rightreg)); case A_INTLIT: return (cgloadint(n->intvalue, n->type)); case A_STRLIT: return (cgloadglobstr(n->intvalue)); case A_IDENT: // Load our value if we are an rvalue // or we are being dereferenced if (n->rvalue || parentASTop == A_DEREF) { if (n->sym->class == C_GLOBAL) { return (cgloadglob(n->sym, n->op)); } else { return (cgloadlocal(n->sym, n->op)); } } else return (NOREG); case A_ASSIGN: // Are we assigning to an identifier or through a pointer? switch (n->right->op) { case A_IDENT: if (n->right->sym->class == C_GLOBAL) return (cgstorglob(leftreg, n->right->sym)); else return (cgstorlocal(leftreg, n->right->sym)); case A_DEREF: return (cgstorderef(leftreg, rightreg, n->right->type)); default: fatald("Can't A_ASSIGN in genAST(), op", n->op); } case A_WIDEN: // Widen the child's type to the parent's type return (cgwiden(leftreg, n->left->type, n->type)); case A_RETURN: cgreturn(leftreg, Functionid); return (NOREG); case A_ADDR: return (cgaddress(n->sym)); case A_DEREF: // If we are an rvalue, dereference to get the value we point at, // otherwise leave it for A_ASSIGN to store through the pointer if (n->rvalue) return (cgderef(leftreg, n->left->type)); else return (leftreg); case A_SCALE: // Small optimisation: use shift if the // scale value is a known power of two switch (n->size) { case 2: return (cgshlconst(leftreg, 1)); case 4: return (cgshlconst(leftreg, 2)); case 8: return (cgshlconst(leftreg, 3)); default: // Load a register with the size and // multiply the leftreg by this size rightreg = cgloadint(n->size, P_INT); return (cgmul(leftreg, rightreg)); } case A_POSTINC: case A_POSTDEC: // Load and decrement the variable's value into a register // and post increment/decrement it if (n->sym->class == C_GLOBAL) return (cgloadglob(n->sym, n->op)); else return (cgloadlocal(n->sym, n->op)); case A_PREINC: case A_PREDEC: // Load and decrement the variable's value into a register // and pre increment/decrement it if (n->left->sym->class == C_GLOBAL) return (cgloadglob(n->left->sym, n->op)); else return (cgloadlocal(n->left->sym, n->op)); case A_NEGATE: return (cgnegate(leftreg)); case A_INVERT: return (cginvert(leftreg)); case A_LOGNOT: return (cglognot(leftreg)); case A_TOBOOL: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, set the register // to 0 or 1 based on it's zeroeness or non-zeroeness return (cgboolean(leftreg, parentASTop, label)); default: fatald("Unknown AST operator", n->op); } return (NOREG); // Keep -Wall happy } void genpreamble() { cgpreamble(); } void genpostamble() { cgpostamble(); } void genfreeregs() { freeall_registers(); } void genglobsym(struct symtable *node) { cgglobsym(node); } int genglobstr(char *strvalue) { int l = genlabel(); cgglobstr(l, strvalue); return (l); } int genprimsize(int type) { return (cgprimsize(type)); } int genalign(int type, int offset, int direction) { return (cgalign(type, offset, direction)); } ================================================ FILE: 33_Unions/main.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "decl.h" #include #include // Compiler setup and top-level execution // Copyright (c) 2019 Warren Toomey, GPL3 // Given a string with a '.' and at least a 1-character suffix // after the '.', change the suffix to be the given character. // Return the new string or NULL if the original string could // not be modified char *alter_suffix(char *str, char suffix) { char *posn; char *newstr; // Clone the string if ((newstr = strdup(str)) == NULL) return (NULL); // Find the '.' if ((posn = strrchr(newstr, '.')) == NULL) return (NULL); // Ensure there is a suffix posn++; if (*posn == '\0') return (NULL); // Change the suffix and NUL-terminate the string *posn++ = suffix; *posn = '\0'; return (newstr); } // Given an input filename, compile that file // down to assembly code. Return the new file's name static char *do_compile(char *filename) { Outfilename = alter_suffix(filename, 's'); if (Outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .c on the end\n", filename); exit(1); } // Open up the input file if ((Infile = fopen(filename, "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", filename, strerror(errno)); exit(1); } // Create the output file if ((Outfile = fopen(Outfilename, "w")) == NULL) { fprintf(stderr, "Unable to create %s: %s\n", Outfilename, strerror(errno)); exit(1); } Line = 1; // Reset the scanner Putback = '\n'; clear_symtable(); // Clear the symbol table if (O_verbose) printf("compiling %s\n", filename); scan(&Token); // Get the first token from the input genpreamble(); // Output the preamble global_declarations(); // Parse the global declarations genpostamble(); // Output the postamble fclose(Outfile); // Close the output file return (Outfilename); } // Given an input filename, assemble that file // down to object code. Return the object filename char *do_assemble(char *filename) { char cmd[TEXTLEN]; int err; char *outfilename = alter_suffix(filename, 'o'); if (outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .s on the end\n", filename); exit(1); } // Build the assembly command and run it snprintf(cmd, TEXTLEN, "%s %s %s", ASCMD, outfilename, filename); if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Assembly of %s failed\n", filename); exit(1); } return (outfilename); } // Given a list of object files and an output filename, // link all of the object filenames together. void do_link(char *outfilename, char *objlist[]) { int cnt, size = TEXTLEN; char cmd[TEXTLEN], *cptr; int err; // Start with the linker command and the output file cptr = cmd; cnt = snprintf(cptr, size, "%s %s ", LDCMD, outfilename); cptr += cnt; size -= cnt; // Now append each object file while (*objlist != NULL) { cnt = snprintf(cptr, size, "%s ", *objlist); cptr += cnt; size -= cnt; objlist++; } if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Linking failed\n"); exit(1); } } // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "Usage: %s [-vcST] [-o outfile] file [file ...]\n", prog); fprintf(stderr, " -v give verbose output of the compilation stages\n"); fprintf(stderr, " -c generate object files but don't link them\n"); fprintf(stderr, " -S generate assembly files but don't link them\n"); fprintf(stderr, " -T dump the AST trees for each input file\n"); fprintf(stderr, " -o outfile, produce the outfile executable file\n"); exit(1); } // Main program: check arguments and print a usage // if we don't have an argument. Open up the input // file and call scanfile() to scan the tokens in it. enum { MAXOBJ = 100 }; int main(int argc, char *argv[]) { char *outfilename = AOUT; char *asmfile, *objfile; char *objlist[MAXOBJ]; int i, objcnt = 0; // Initialise our variables O_dumpAST = 0; O_keepasm = 0; O_assemble = 0; O_verbose = 0; O_dolink = 1; // Scan for command-line options for (i = 1; i < argc; i++) { // No leading '-', stop scanning for options if (*argv[i] != '-') break; // For each option in this argument for (int j = 1; (*argv[i] == '-') && argv[i][j]; j++) { switch (argv[i][j]) { case 'o': outfilename = argv[++i]; // Save & skip to next argument break; case 'T': O_dumpAST = 1; break; case 'c': O_assemble = 1; O_keepasm = 0; O_dolink = 0; break; case 'S': O_keepasm = 1; O_assemble = 0; O_dolink = 0; break; case 'v': O_verbose = 1; break; default: usage(argv[0]); } } } // Ensure we have at lease one input file argument if (i >= argc) usage(argv[0]); // Work on each input file in turn while (i < argc) { asmfile = do_compile(argv[i]); // Compile the source file if (O_dolink || O_assemble) { objfile = do_assemble(asmfile); // Assemble it to object forma if (objcnt == (MAXOBJ - 2)) { fprintf(stderr, "Too many object files for the compiler to handle\n"); exit(1); } objlist[objcnt++] = objfile; // Add the object file's name objlist[objcnt] = NULL; // to the list of object files } if (!O_keepasm) // Remove the assembly file if unlink(asmfile); // we don't need to keep it i++; } // Now link all the object files together if (O_dolink) { do_link(outfilename, objlist); // If we don't need to keep the object // files, then remove them if (!O_assemble) { for (i = 0; objlist[i] != NULL; i++) unlink(objlist[i]); } } return (0); } ================================================ FILE: 33_Unions/misc.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" #include // Miscellaneous functions // Copyright (c) 2019 Warren Toomey, GPL3 // Ensure that the current token is t, // and fetch the next token. Otherwise // throw an error void match(int t, char *what) { if (Token.token == t) { scan(&Token); } else { fatals("Expected", what); } } // Match a semicolon and fetch the next token void semi(void) { match(T_SEMI, ";"); } // Match a left brace and fetch the next token void lbrace(void) { match(T_LBRACE, "{"); } // Match a right brace and fetch the next token void rbrace(void) { match(T_RBRACE, "}"); } // Match a left parenthesis and fetch the next token void lparen(void) { match(T_LPAREN, "("); } // Match a right parenthesis and fetch the next token void rparen(void) { match(T_RPAREN, ")"); } // Match an identifier and fetch the next token void ident(void) { match(T_IDENT, "identifier"); } // Print out fatal messages void fatal(char *s) { fprintf(stderr, "%s on line %d\n", s, Line); fclose(Outfile); unlink(Outfilename); exit(1); } void fatals(char *s1, char *s2) { fprintf(stderr, "%s:%s on line %d\n", s1, s2, Line); fclose(Outfile); unlink(Outfilename); exit(1); } void fatald(char *s, int d) { fprintf(stderr, "%s:%d on line %d\n", s, d, Line); fclose(Outfile); unlink(Outfilename); exit(1); } void fatalc(char *s, int c) { fprintf(stderr, "%s:%c on line %d\n", s, c, Line); fclose(Outfile); unlink(Outfilename); exit(1); } ================================================ FILE: 33_Unions/scan.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { char *p; p = strchr(s, c); return (p ? p - s : -1); } // Get the next character from the input file. static int next(void) { int c; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return (c); } c = fgetc(Infile); // Read from input file if ('\n' == c) Line++; // Increment line count return (c); } // Put back an unwanted character static void putback(int c) { Putback = c; } // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } // Return the next character from a character // or string literal static int scanch(void) { int c; // Get the next input character and interpret // metacharacters that start with a backslash c = next(); if (c == '\\') { switch (c = next()) { case 'a': return '\a'; case 'b': return '\b'; case 'f': return '\f'; case 'n': return '\n'; case 'r': return '\r'; case 't': return '\t'; case 'v': return '\v'; case '\\': return '\\'; case '"': return '"'; case '\'': return '\''; default: fatalc("unknown escape sequence", c); } } return (c); // Just an ordinary old character! } // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0; // Convert each character into an int value while ((k = chrpos("0123456789", c)) >= 0) { val = val * 10 + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return (val); } // Scan in a string literal from the input file, // and store it in buf[]. Return the length of // the string. static int scanstr(char *buf) { int i, c; // Loop while we have enough buffer space for (i = 0; i < TEXTLEN - 1; i++) { // Get the next char and append to buf // Return when we hit the ending double quote if ((c = scanch()) == '"') { buf[i] = 0; return (i); } buf[i] = c; } // Ran out of buf[] space fatal("String literal too long"); return (0); } // Scan an identifier from the input file and // store it in buf[]. Return the identifier's length static int scanident(int c, char *buf, int lim) { int i = 0; // Allow digits, alpha and underscores while (isalpha(c) || isdigit(c) || '_' == c) { // Error if we hit the identifier length limit, // else append to buf[] and get next character if (lim - 1 == i) { fatal("Identifier too long"); } else if (i < lim - 1) { buf[i++] = c; } c = next(); } // We hit a non-valid character, put it back. // NUL-terminate the buf[] and return the length putback(c); buf[i] = '\0'; return (i); } // Given a word from the input, return the matching // keyword token number or 0 if it's not a keyword. // Switch on the first letter so that we don't have // to waste time strcmp()ing against all the keywords. static int keyword(char *s) { switch (*s) { case 'c': if (!strcmp(s, "char")) return (T_CHAR); break; case 'e': if (!strcmp(s, "else")) return (T_ELSE); break; case 'f': if (!strcmp(s, "for")) return (T_FOR); break; case 'i': if (!strcmp(s, "if")) return (T_IF); if (!strcmp(s, "int")) return (T_INT); break; case 'l': if (!strcmp(s, "long")) return (T_LONG); break; case 'r': if (!strcmp(s, "return")) return (T_RETURN); break; case 's': if (!strcmp(s, "struct")) return (T_STRUCT); break; case 'u': if (!strcmp(s, "union")) return (T_UNION); break; case 'v': if (!strcmp(s, "void")) return (T_VOID); break; case 'w': if (!strcmp(s, "while")) return (T_WHILE); break; } return (0); } // A pointer to a rejected token static struct token *Rejtoken = NULL; // Reject the token that we just scanned void reject_token(struct token *t) { if (Rejtoken != NULL) fatal("Can't reject token twice"); Rejtoken = t; } // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c, tokentype; // If we have any rejected token, return it if (Rejtoken != NULL) { t = Rejtoken; Rejtoken = NULL; return (1); } // Skip whitespace c = skip(); // Determine the token based on // the input character switch (c) { case EOF: t->token = T_EOF; return (0); case '+': if ((c = next()) == '+') { t->token = T_INC; } else { putback(c); t->token = T_PLUS; } break; case '-': if ((c = next()) == '-') { t->token = T_DEC; } else if (c == '>') { t->token = T_ARROW; } else { putback(c); t->token = T_MINUS; } break; case '*': t->token = T_STAR; break; case '/': t->token = T_SLASH; break; case ';': t->token = T_SEMI; break; case '{': t->token = T_LBRACE; break; case '}': t->token = T_RBRACE; break; case '(': t->token = T_LPAREN; break; case ')': t->token = T_RPAREN; break; case '[': t->token = T_LBRACKET; break; case ']': t->token = T_RBRACKET; break; case '~': t->token = T_INVERT; break; case '^': t->token = T_XOR; break; case ',': t->token = T_COMMA; break; case '.': t->token = T_DOT; break; case '=': if ((c = next()) == '=') { t->token = T_EQ; } else { putback(c); t->token = T_ASSIGN; } break; case '!': if ((c = next()) == '=') { t->token = T_NE; } else { putback(c); t->token = T_LOGNOT; } break; case '<': if ((c = next()) == '=') { t->token = T_LE; } else if (c == '<') { t->token = T_LSHIFT; } else { putback(c); t->token = T_LT; } break; case '>': if ((c = next()) == '=') { t->token = T_GE; } else if (c == '>') { t->token = T_RSHIFT; } else { putback(c); t->token = T_GT; } break; case '&': if ((c = next()) == '&') { t->token = T_LOGAND; } else { putback(c); t->token = T_AMPER; } break; case '|': if ((c = next()) == '|') { t->token = T_LOGOR; } else { putback(c); t->token = T_OR; } break; case '\'': // If it's a quote, scan in the // literal character value and // the trailing quote t->intvalue = scanch(); t->token = T_INTLIT; if (next() != '\'') fatal("Expected '\\'' at end of char literal"); break; case '"': // Scan in a literal string scanstr(Text); t->token = T_STRLIT; break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } else if (isalpha(c) || '_' == c) { // Read in a keyword or identifier scanident(c, Text, TEXTLEN); // If it's a recognised keyword, return that token if ((tokentype = keyword(Text)) != 0) { t->token = tokentype; break; } // Not a recognised keyword, so it must be an identifier t->token = T_IDENT; break; } // The character isn't part of any recognised token, error fatalc("Unrecognised character", c); } // We found a token return (1); } ================================================ FILE: 33_Unions/stmt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of statements // Copyright (c) 2019 Warren Toomey, GPL3 // Prototypes static struct ASTnode *single_statement(void); // compound_statement: // empty, i.e. no statement // | statement // | statement statements // ; // // statement: declaration // | expression_statement // | function_call // | if_statement // | while_statement // | for_statement // | return_statement // ; // if_statement: if_head // | if_head 'else' compound_statement // ; // // if_head: 'if' '(' true_false_expression ')' compound_statement ; // // Parse an IF statement including any // optional ELSE clause and return its AST static struct ASTnode *if_statement(void) { struct ASTnode *condAST, *trueAST, *falseAST = NULL; // Ensure we have 'if' '(' match(T_IF, "if"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); rparen(); // Get the AST for the compound statement trueAST = compound_statement(); // If we have an 'else', skip it // and get the AST for the compound statement if (Token.token == T_ELSE) { scan(&Token); falseAST = compound_statement(); } // Build and return the AST for this statement return (mkastnode(A_IF, P_NONE, condAST, trueAST, falseAST, NULL, 0)); } // while_statement: 'while' '(' true_false_expression ')' compound_statement ; // // Parse a WHILE statement and return its AST static struct ASTnode *while_statement(void) { struct ASTnode *condAST, *bodyAST; // Ensure we have 'while' '(' match(T_WHILE, "while"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); rparen(); // Get the AST for the compound statement bodyAST = compound_statement(); // Build and return the AST for this statement return (mkastnode(A_WHILE, P_NONE, condAST, NULL, bodyAST, NULL, 0)); } // for_statement: 'for' '(' preop_statement ';' // true_false_expression ';' // postop_statement ')' compound_statement ; // // preop_statement: statement (for now) // postop_statement: statement (for now) // // Parse a FOR statement and return its AST static struct ASTnode *for_statement(void) { struct ASTnode *condAST, *bodyAST; struct ASTnode *preopAST, *postopAST; struct ASTnode *tree; // Ensure we have 'for' '(' match(T_FOR, "for"); lparen(); // Get the pre_op statement and the ';' preopAST = single_statement(); semi(); // Get the condition and the ';'. // Force a non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); semi(); // Get the post_op statement and the ')' postopAST = single_statement(); rparen(); // Get the compound statement which is the body bodyAST = compound_statement(); // For now, all four sub-trees have to be non-NULL. // Later on, we'll change the semantics for when some are missing // Glue the compound statement and the postop tree tree = mkastnode(A_GLUE, P_NONE, bodyAST, NULL, postopAST, NULL, 0); // Make a WHILE loop with the condition and this new body tree = mkastnode(A_WHILE, P_NONE, condAST, NULL, tree, NULL, 0); // And glue the preop tree to the A_WHILE tree return (mkastnode(A_GLUE, P_NONE, preopAST, NULL, tree, NULL, 0)); } // return_statement: 'return' '(' expression ')' ; // // Parse a return statement and return its AST static struct ASTnode *return_statement(void) { struct ASTnode *tree; // Can't return a value if function returns P_VOID if (Functionid->type == P_VOID) fatal("Can't return from a void function"); // Ensure we have 'return' '(' match(T_RETURN, "return"); lparen(); // Parse the following expression tree = binexpr(0); // Ensure this is compatible with the function's type tree = modify_type(tree, Functionid->type, 0); if (tree == NULL) fatal("Incompatible type to return"); // Add on the A_RETURN node tree = mkastunary(A_RETURN, P_NONE, tree, NULL, 0); // Get the ')' rparen(); return (tree); } // Parse a single statement and return its AST static struct ASTnode *single_statement(void) { int type; struct symtable *ctype; switch (Token.token) { case T_CHAR: case T_INT: case T_LONG: // The beginning of a variable declaration. // Parse the type and get the identifier. // Then parse the rest of the declaration // and skip over the semicolon type = parse_type(&ctype); ident(); var_declaration(type, ctype, C_LOCAL); semi(); return (NULL); // No AST generated here case T_IF: return (if_statement()); case T_WHILE: return (while_statement()); case T_FOR: return (for_statement()); case T_RETURN: return (return_statement()); default: // For now, see if this is an expression. // This catches assignment statements. return (binexpr(0)); } return (NULL); // Keep -Wall happy } // Parse a compound statement // and return its AST struct ASTnode *compound_statement(void) { struct ASTnode *left = NULL; struct ASTnode *tree; // Require a left curly bracket lbrace(); while (1) { // Parse a single statement tree = single_statement(); // Some statements must be followed by a semicolon if (tree != NULL && (tree->op == A_ASSIGN || tree->op == A_RETURN || tree->op == A_FUNCCALL)) semi(); // For each new tree, either save it in left // if left is empty, or glue the left and the // new tree together if (tree != NULL) { if (left == NULL) left = tree; else left = mkastnode(A_GLUE, P_NONE, left, NULL, tree, NULL, 0); } // When we hit a right curly bracket, // skip past it and return the AST if (Token.token == T_RBRACE) { rbrace(); return (left); } } } ================================================ FILE: 33_Unions/sym.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Symbol table functions // Copyright (c) 2019 Warren Toomey, GPL3 // Append a node to the singly-linked list pointed to by head or tail void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node) { // Check for valid pointers if (head == NULL || tail == NULL || node == NULL) fatal("Either head, tail or node is NULL in appendsym"); // Append to the list if (*tail) { (*tail)->next = node; *tail = node; } else *head = *tail = node; node->next = NULL; } // Create a symbol node to be added to a symbol table list. // Set up the node's: // + type: char, int etc. // + ctype: composite type pointer for struct/union // + structural type: var, function, array etc. // + size: number of elements, or endlabel: end label for a function // + posn: Position information for local symbols // Return a pointer to the new node struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int size, int posn) { // Get a new node struct symtable *node = (struct symtable *) malloc(sizeof(struct symtable)); if (node == NULL) fatal("Unable to malloc a symbol table node in newsym"); // Fill in the values node->name = strdup(name); node->type = type; node->ctype = ctype; node->stype = stype; node->class = class; node->size = size; node->posn = posn; node->next = NULL; node->member = NULL; // Generate any global space if (class == C_GLOBAL) genglobsym(node); return (node); } // Add a symbol to the global symbol list struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int size) { struct symtable *sym = newsym(name, type, ctype, stype, C_GLOBAL, size, 0); appendsym(&Globhead, &Globtail, sym); return (sym); } // Add a symbol to the local symbol list struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int size) { struct symtable *sym = newsym(name, type, ctype, stype, C_LOCAL, size, 0); appendsym(&Loclhead, &Locltail, sym); return (sym); } // Add a symbol to the parameter list struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype, int size) { struct symtable *sym = newsym(name, type, ctype, stype, C_PARAM, size, 0); appendsym(&Parmhead, &Parmtail, sym); return (sym); } // Add a symbol to the temporary member list struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int size) { struct symtable *sym = newsym(name, type, ctype, stype, C_MEMBER, size, 0); appendsym(&Membhead, &Membtail, sym); return (sym); } // Add a struct to the struct list struct symtable *addstruct(char *name, int type, struct symtable *ctype, int stype, int size) { struct symtable *sym = newsym(name, type, ctype, stype, C_STRUCT, size, 0); appendsym(&Structhead, &Structtail, sym); return (sym); } // Add a struct to the union list struct symtable *addunion(char *name, int type, struct symtable *ctype, int stype, int size) { struct symtable *sym = newsym(name, type, ctype, stype, C_UNION, size, 0); appendsym(&Unionhead, &Uniontail, sym); return (sym); } // Search for a symbol in a specific list. // Return a pointer to the found node or NULL if not found. static struct symtable *findsyminlist(char *s, struct symtable *list) { for (; list != NULL; list = list->next) if ((list->name != NULL) && !strcmp(s, list->name)) return (list); return (NULL); } // Determine if the symbol s is in the global symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findglob(char *s) { return (findsyminlist(s, Globhead)); } // Determine if the symbol s is in the local symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findlocl(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member); if (node) return (node); } return (findsyminlist(s, Loclhead)); } // Determine if the symbol s is in the symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findsymbol(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member); if (node) return (node); } // Otherwise, try the local and global symbol lists node = findsyminlist(s, Loclhead); if (node) return (node); return (findsyminlist(s, Globhead)); } // Find a member in the member list // Return a pointer to the found node or NULL if not found. struct symtable *findmember(char *s) { return (findsyminlist(s, Membhead)); } // Find a struct in the struct list // Return a pointer to the found node or NULL if not found. struct symtable *findstruct(char *s) { return (findsyminlist(s, Structhead)); } // Find a struct in the union list // Return a pointer to the found node or NULL if not found. struct symtable *findunion(char *s) { return (findsyminlist(s, Unionhead)); } // Reset the contents of the symbol table void clear_symtable(void) { Globhead = Globtail = NULL; Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Membhead = Membtail = NULL; Structhead = Structtail = NULL; Unionhead = Uniontail = NULL; } // Clear all the entries in the local symbol table void freeloclsyms(void) { Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Functionid = NULL; } ================================================ FILE: 33_Unions/tests/err.input31.c ================================================ Expecting a primary expression, got token:15 on line 5 ================================================ FILE: 33_Unions/tests/err.input32.c ================================================ Unknown variable:cow on line 4 ================================================ FILE: 33_Unions/tests/err.input33.c ================================================ Incompatible type to return on line 4 ================================================ FILE: 33_Unions/tests/err.input34.c ================================================ For now, declaration of non-global arrays is not implemented on line 4 ================================================ FILE: 33_Unions/tests/err.input35.c ================================================ Duplicate local variable declaration:a on line 4 ================================================ FILE: 33_Unions/tests/err.input36.c ================================================ Type doesn't match prototype for parameter:2 on line 4 ================================================ FILE: 33_Unions/tests/err.input37.c ================================================ Unexpected token in parameter list:15 on line 3 ================================================ FILE: 33_Unions/tests/err.input38.c ================================================ Type doesn't match prototype for parameter:2 on line 4 ================================================ FILE: 33_Unions/tests/err.input39.c ================================================ No statements in function with non-void type on line 4 ================================================ FILE: 33_Unions/tests/err.input40.c ================================================ No return for function with non-void type on line 4 ================================================ FILE: 33_Unions/tests/err.input41.c ================================================ Can't return from a void function on line 3 ================================================ FILE: 33_Unions/tests/err.input42.c ================================================ Undeclared function:fred on line 3 ================================================ FILE: 33_Unions/tests/err.input43.c ================================================ Undeclared array:b on line 3 ================================================ FILE: 33_Unions/tests/err.input44.c ================================================ Unknown variable:z on line 3 ================================================ FILE: 33_Unions/tests/err.input45.c ================================================ & operator must be followed by an identifier on line 3 ================================================ FILE: 33_Unions/tests/err.input46.c ================================================ * operator must be followed by an identifier or * on line 3 ================================================ FILE: 33_Unions/tests/err.input47.c ================================================ ++ operator must be followed by an identifier on line 3 ================================================ FILE: 33_Unions/tests/err.input48.c ================================================ -- operator must be followed by an identifier on line 3 ================================================ FILE: 33_Unions/tests/err.input49.c ================================================ Incompatible expression in assignment on line 6 ================================================ FILE: 33_Unions/tests/err.input50.c ================================================ Incompatible types in binary expression on line 6 ================================================ FILE: 33_Unions/tests/err.input51.c ================================================ Expected '\'' at end of char literal on line 4 ================================================ FILE: 33_Unions/tests/err.input52.c ================================================ Unrecognised character:$ on line 5 ================================================ FILE: 33_Unions/tests/err.input56.c ================================================ unknown struct/union type:var1 on line 2 ================================================ FILE: 33_Unions/tests/err.input57.c ================================================ previously defined struct/union:fred on line 2 ================================================ FILE: 33_Unions/tests/err.input59.c ================================================ Undeclared variable:y on line 3 ================================================ FILE: 33_Unions/tests/err.input60.c ================================================ Undeclared variable:x on line 3 ================================================ FILE: 33_Unions/tests/err.input61.c ================================================ Undeclared variable:x on line 3 ================================================ FILE: 33_Unions/tests/input01.c ================================================ int printf(char *fmt); void main() { printf("%d\n", 12 * 3); printf("%d\n", 18 - 2 * 4); printf("%d\n", 1 + 2 + 9 - 5/2 + 3*5); } ================================================ FILE: 33_Unions/tests/input02.c ================================================ int printf(char *fmt); void main() { int fred; int jim; fred= 5; jim= 12; printf("%d\n", fred + jim); } ================================================ FILE: 33_Unions/tests/input03.c ================================================ int printf(char *fmt); void main() { int x; x= 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); } ================================================ FILE: 33_Unions/tests/input04.c ================================================ int printf(char *fmt); void main() { int x; x= 7 < 9; printf("%d\n", x); x= 7 <= 9; printf("%d\n", x); x= 7 != 9; printf("%d\n", x); x= 7 == 7; printf("%d\n", x); x= 7 >= 7; printf("%d\n", x); x= 7 <= 7; printf("%d\n", x); x= 9 > 7; printf("%d\n", x); x= 9 >= 7; printf("%d\n", x); x= 9 != 7; printf("%d\n", x); } ================================================ FILE: 33_Unions/tests/input05.c ================================================ int printf(char *fmt); void main() { int i; int j; i=6; j=12; if (i < j) { printf("%d\n", i); } else { printf("%d\n", j); } } ================================================ FILE: 33_Unions/tests/input06.c ================================================ int printf(char *fmt); void main() { int i; i=1; while (i <= 10) { printf("%d\n", i); i= i + 1; } } ================================================ FILE: 33_Unions/tests/input07.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 33_Unions/tests/input08.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 33_Unions/tests/input09.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } void fred() { int a; int b; a= 12; b= 3 * a; if (a >= b) { printf("%d\n", 2 * b - a); } } ================================================ FILE: 33_Unions/tests/input10.c ================================================ int printf(char *fmt); void main() { int i; char j; j= 20; printf("%d\n", j); i= 10; printf("%d\n", i); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 2; j= j + 1) { printf("%d\n", j); } } ================================================ FILE: 33_Unions/tests/input11.c ================================================ int printf(char *fmt); int main() { int i; char j; long k; i= 10; printf("%d\n", i); j= 20; printf("%d\n", j); k= 30; printf("%d\n", k); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 4; j= j + 1) { printf("%d\n", j); } for (k= 1; k <= 5; k= k + 1) { printf("%d\n", k); } return(i); printf("%d\n", 12345); return(3); } ================================================ FILE: 33_Unions/tests/input12.c ================================================ int printf(char *fmt); int fred() { return(5); } void main() { int x; x= fred(2); printf("%d\n", x); } ================================================ FILE: 33_Unions/tests/input13.c ================================================ int printf(char *fmt); int fred() { return(56); } void main() { int dummy; int result; dummy= printf("%d\n", 23); result= fred(10); dummy= printf("%d\n", result); } ================================================ FILE: 33_Unions/tests/input14.c ================================================ int printf(char *fmt); int fred() { return(20); } int main() { int result; printf("%d\n", 10); result= fred(15); printf("%d\n", result); printf("%d\n", fred(15)+10); return(0); } ================================================ FILE: 33_Unions/tests/input15.c ================================================ int printf(char *fmt); int main() { char a; char *b; char c; int d; int *e; int f; a= 18; printf("%d\n", a); b= &a; c= *b; printf("%d\n", c); d= 12; printf("%d\n", d); e= &d; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 33_Unions/tests/input16.c ================================================ int printf(char *fmt); int c; int d; int *e; int f; int main() { c= 12; d=18; printf("%d\n", c); e= &c + 1; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 33_Unions/tests/input17.c ================================================ int printf(char *fmt); int main() { char a; char *b; int d; int *e; b= &a; *b= 19; printf("%d\n", a); e= &d; *e= 12; printf("%d\n", d); return(0); } ================================================ FILE: 33_Unions/tests/input18.c ================================================ int printf(char *fmt); int main() { int a; int b; a= b= 34; printf("%d\n", a); printf("%d\n", b); return(0); } ================================================ FILE: 33_Unions/tests/input18a.c ================================================ int printf(char *fmt); int a; int *b; char c; char *d; int main() { b= &a; *b= 15; printf("%d\n", a); d= &c; *d= 16; printf("%d\n", c); return(0); } ================================================ FILE: 33_Unions/tests/input19.c ================================================ int printf(char *fmt); int a; int b; int c; int d; int e; int main() { a= 2; b= 4; c= 3; d= 2; e= (a+b) * (c+d); printf("%d\n", e); return(0); } ================================================ FILE: 33_Unions/tests/input20.c ================================================ int printf(char *fmt); int a; int b[25]; int main() { b[3]= 12; a= b[3]; printf("%d\n", a); return(0); } ================================================ FILE: 33_Unions/tests/input21.c ================================================ int printf(char *fmt); char c; char *str; int main() { c= '\n'; printf("%d\n", c); for (str= "Hello world\n"; *str != 0; str= str + 1) { printf("%c", *str); } return(0); } ================================================ FILE: 33_Unions/tests/input22.c ================================================ int printf(char *fmt); char a; char b; char c; int d; int e; int f; long g; long h; long i; int main() { b= 5; c= 7; a= b + c++; printf("%d\n", a); e= 5; f= 7; d= e + f++; printf("%d\n", d); h= 5; i= 7; g= h + i++; printf("%d\n", g); a= b-- + c; printf("%d\n", a); d= e-- + f; printf("%d\n", d); g= h-- + i; printf("%d\n", g); a= ++b + c; printf("%d\n", a); d= ++e + f; printf("%d\n", d); g= ++h + i; printf("%d\n", g); a= b * --c; printf("%d\n", a); d= e * --f; printf("%d\n", d); g= h * --i; printf("%d\n", g); return(0); } ================================================ FILE: 33_Unions/tests/input23.c ================================================ int printf(char *fmt); char *str; int x; int main() { x= -23; printf("%d\n", x); printf("%d\n", -10 * -10); x= 1; x= ~x; printf("%d\n", x); x= 2 > 5; printf("%d\n", x); x= !x; printf("%d\n", x); x= !x; printf("%d\n", x); x= 13; if (x) { printf("%d\n", 13); } x= 0; if (!x) { printf("%d\n", 14); } for (str= "Hello world\n"; *str; str++) { printf("%c", *str); } return(0); } ================================================ FILE: 33_Unions/tests/input24.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { a= 42; b= 19; printf("%d\n", a & b); printf("%d\n", a | b); printf("%d\n", a ^ b); printf("%d\n", 1 << 3); printf("%d\n", 63 >> 3); return(0); } ================================================ FILE: 33_Unions/tests/input25.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { char z; int y; int x; x= 10; y= 20; z= 30; printf("%d\n", x); printf("%d\n", y); printf("%d\n", z); a= 5; b= 15; c= 25; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); return(0); } ================================================ FILE: 33_Unions/tests/input26.c ================================================ int printf(char *fmt); int main(int a, char b, long c, int d, int e, int f, int g, int h) { int i; int j; int k; a= 13; printf("%d\n", a); b= 23; printf("%d\n", b); c= 34; printf("%d\n", c); d= 44; printf("%d\n", d); e= 54; printf("%d\n", e); f= 64; printf("%d\n", f); g= 74; printf("%d\n", g); h= 84; printf("%d\n", h); i= 94; printf("%d\n", i); j= 95; printf("%d\n", j); k= 96; printf("%d\n", k); return(0); } ================================================ FILE: 33_Unions/tests/input27.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int param5(int a, int b, int c, int d, int e) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param2(int a, int b) { int c; int d; int e; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param0() { int a; int b; int c; int d; int e; a= 1; b= 2; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int main() { param8(1,2,3,4,5,6,7,8); param5(1,2,3,4,5); param2(1,2); param0(); return(0); } ================================================ FILE: 33_Unions/tests/input28.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 33_Unions/tests/input29.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h); int fred(int a, int b, int c); int main(); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 33_Unions/tests/input30.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input30.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 33_Unions/tests/input31.c ================================================ int printf(char *fmt); int main() { int x; x= 2 + + 3 - * / ; } ================================================ FILE: 33_Unions/tests/input32.c ================================================ int printf(char *fmt); int main() { pizza cow llama sausage; } ================================================ FILE: 33_Unions/tests/input33.c ================================================ int printf(char *fmt); int main() { char *z; return(z); } ================================================ FILE: 33_Unions/tests/input34.c ================================================ int printf(char *fmt); int main() { int a[12]; return(0); } ================================================ FILE: 33_Unions/tests/input35.c ================================================ int printf(char *fmt); int fred(int a, int b) { int a; return(a); } ================================================ FILE: 33_Unions/tests/input36.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c); ================================================ FILE: 33_Unions/tests/input37.c ================================================ int printf(char *fmt); int fred(int a, char b +, int z); ================================================ FILE: 33_Unions/tests/input38.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c, int g); ================================================ FILE: 33_Unions/tests/input39.c ================================================ int printf(char *fmt); int main() { int a; } ================================================ FILE: 33_Unions/tests/input40.c ================================================ int printf(char *fmt); int main() { int a; a= 5; } ================================================ FILE: 33_Unions/tests/input41.c ================================================ int printf(char *fmt); void fred() { return(5); } ================================================ FILE: 33_Unions/tests/input42.c ================================================ int printf(char *fmt); int main() { fred(5); } ================================================ FILE: 33_Unions/tests/input43.c ================================================ int printf(char *fmt); int main() { int a; a= b[4]; } ================================================ FILE: 33_Unions/tests/input44.c ================================================ int printf(char *fmt); int main() { int a; a= z; } ================================================ FILE: 33_Unions/tests/input45.c ================================================ int printf(char *fmt); int main() { int a; a= &5; } ================================================ FILE: 33_Unions/tests/input46.c ================================================ int printf(char *fmt); int main() { int a; a= *5; } ================================================ FILE: 33_Unions/tests/input47.c ================================================ int printf(char *fmt); int main() { int a; a= ++5; } ================================================ FILE: 33_Unions/tests/input48.c ================================================ int printf(char *fmt); int main() { int a; a= --5; } ================================================ FILE: 33_Unions/tests/input49.c ================================================ int printf(char *fmt); int main() { int x; char y; y= x; } ================================================ FILE: 33_Unions/tests/input50.c ================================================ int printf(char *fmt); int main() { char *a; char *b; a= a + b; } ================================================ FILE: 33_Unions/tests/input51.c ================================================ int printf(char *fmt); int main() { char a; a= 'fred'; } ================================================ FILE: 33_Unions/tests/input52.c ================================================ int printf(char *fmt); int main() { int a; a= $5.00; } ================================================ FILE: 33_Unions/tests/input53.c ================================================ int printf(char *fmt); int main() { printf("Hello world, %d\n", 23); return(0); } ================================================ FILE: 33_Unions/tests/input54.c ================================================ int printf(char *fmt); int main() { int i; for (i=0; i < 20; i++) { printf("Hello world, %d\n", i); } return(0); } ================================================ FILE: 33_Unions/tests/input55.c ================================================ int printf(char *fmt); int main(int argc, char **argv) { int i; char *argument; printf("Hello world\n"); for (i=0; i < argc; i++) { argument= *argv; argv= argv + 1; printf("Argument %d is %s\n", i, argument); } return(0); } ================================================ FILE: 33_Unions/tests/input56.c ================================================ struct foo { int x; }; struct mary var1; ================================================ FILE: 33_Unions/tests/input57.c ================================================ struct fred { int x; } ; struct fred { char y; } ; ================================================ FILE: 33_Unions/tests/input58.c ================================================ int printf(char *fmt); struct fred { int x; char y; long z; }; struct fred var2; struct fred *varptr; int main() { long result; var2.x= 12; printf("%d\n", var2.x); var2.y= 'c'; printf("%d\n", var2.y); var2.z= 4005; printf("%d\n", var2.z); result= var2.x + var2.y + var2.z; printf("%d\n", result); varptr= &var2; result= varptr->x + varptr->y + varptr->z; printf("%d\n", result); return(0); } ================================================ FILE: 33_Unions/tests/input59.c ================================================ int main() { int x; x= y.foo; } ================================================ FILE: 33_Unions/tests/input60.c ================================================ int main() { int x; x= x.foo; } ================================================ FILE: 33_Unions/tests/input61.c ================================================ int main() { int x; x= x->foo; } ================================================ FILE: 33_Unions/tests/input62.c ================================================ int printf(char *fmt); union fred { char w; int x; int y; long z; }; union fred var1; union fred *varptr; int main() { var1.x= 65; printf("%d\n", var1.x); var1.x= 66; printf("%d\n", var1.x); printf("%d\n", var1.y); printf("The next two depend on the endian of the platform\n"); printf("%d\n", var1.w); printf("%d\n", var1.z); varptr= &var1; varptr->x= 67; printf("%d\n", varptr->x); printf("%d\n", varptr->y); return(0); } ================================================ FILE: 33_Unions/tests/mktests ================================================ #!/bin/sh # Make the output files for each test # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make) fi for i in input*c do if [ ! -f "out.$i" -a ! -f "err.$i" ] then ../cwj -o out $i 2> "err.$i" # If the err file is empty if [ ! -s "err.$i" ] then rm -f "err.$i" cc -o out $i ../lib/printint.c ./out > "out.$i" fi fi rm -f out out.s done ================================================ FILE: 33_Unions/tests/out.input01.c ================================================ 36 10 25 ================================================ FILE: 33_Unions/tests/out.input02.c ================================================ 17 ================================================ FILE: 33_Unions/tests/out.input03.c ================================================ 1 2 3 4 5 ================================================ FILE: 33_Unions/tests/out.input04.c ================================================ 1 1 1 1 1 1 1 1 1 ================================================ FILE: 33_Unions/tests/out.input05.c ================================================ 6 ================================================ FILE: 33_Unions/tests/out.input06.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 33_Unions/tests/out.input07.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 33_Unions/tests/out.input08.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 33_Unions/tests/out.input09.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 33_Unions/tests/out.input10.c ================================================ 20 10 1 2 3 4 5 253 254 255 0 1 ================================================ FILE: 33_Unions/tests/out.input11.c ================================================ 10 20 30 1 2 3 4 5 253 254 255 0 1 2 3 1 2 3 4 5 ================================================ FILE: 33_Unions/tests/out.input12.c ================================================ 5 ================================================ FILE: 33_Unions/tests/out.input13.c ================================================ 23 56 ================================================ FILE: 33_Unions/tests/out.input14.c ================================================ 10 20 30 ================================================ FILE: 33_Unions/tests/out.input15.c ================================================ 18 18 12 12 ================================================ FILE: 33_Unions/tests/out.input16.c ================================================ 12 18 ================================================ FILE: 33_Unions/tests/out.input17.c ================================================ 19 12 ================================================ FILE: 33_Unions/tests/out.input18.c ================================================ 34 34 ================================================ FILE: 33_Unions/tests/out.input18a.c ================================================ 15 16 ================================================ FILE: 33_Unions/tests/out.input19.c ================================================ 30 ================================================ FILE: 33_Unions/tests/out.input20.c ================================================ 12 ================================================ FILE: 33_Unions/tests/out.input21.c ================================================ 10 Hello world ================================================ FILE: 33_Unions/tests/out.input22.c ================================================ 12 12 12 13 13 13 13 13 13 35 35 35 ================================================ FILE: 33_Unions/tests/out.input23.c ================================================ -23 100 -2 0 1 0 13 14 Hello world ================================================ FILE: 33_Unions/tests/out.input24.c ================================================ 2 59 57 8 7 ================================================ FILE: 33_Unions/tests/out.input25.c ================================================ 10 20 30 5 15 25 ================================================ FILE: 33_Unions/tests/out.input26.c ================================================ 13 23 34 44 54 64 74 84 94 95 96 ================================================ FILE: 33_Unions/tests/out.input27.c ================================================ 1 2 3 4 5 6 7 8 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 ================================================ FILE: 33_Unions/tests/out.input28.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 33_Unions/tests/out.input29.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 33_Unions/tests/out.input30.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input30.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 33_Unions/tests/out.input53.c ================================================ Hello world, 23 ================================================ FILE: 33_Unions/tests/out.input54.c ================================================ Hello world, 0 Hello world, 1 Hello world, 2 Hello world, 3 Hello world, 4 Hello world, 5 Hello world, 6 Hello world, 7 Hello world, 8 Hello world, 9 Hello world, 10 Hello world, 11 Hello world, 12 Hello world, 13 Hello world, 14 Hello world, 15 Hello world, 16 Hello world, 17 Hello world, 18 Hello world, 19 ================================================ FILE: 33_Unions/tests/out.input55.c ================================================ Hello world Argument 0 is ./out ================================================ FILE: 33_Unions/tests/out.input58.c ================================================ 12 99 4005 4116 4116 ================================================ FILE: 33_Unions/tests/out.input62.c ================================================ 65 66 66 The next two depend on the endian of the platform 66 66 67 67 ================================================ FILE: 33_Unions/tests/runtests ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../cwj -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../cwj $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.s "trial.$i" done ================================================ FILE: 33_Unions/tests/runtestsn ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../compn ] then (cd ..; make) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../compn -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../compn $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.o out.s "trial.$i" done ================================================ FILE: 33_Unions/tree.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree functions // Copyright (c) 2019 Warren Toomey, GPL3 // Build and return a generic AST node struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue) { struct ASTnode *n; // Malloc a new ASTnode n = (struct ASTnode *) malloc(sizeof(struct ASTnode)); if (n == NULL) fatal("Unable to malloc in mkastnode()"); // Copy in the field values and return it n->op = op; n->type = type; n->left = left; n->mid = mid; n->right = right; n->sym = sym; n->intvalue = intvalue; return (n); } // Make an AST leaf node struct ASTnode *mkastleaf(int op, int type, struct symtable *sym, int intvalue) { return (mkastnode(op, type, NULL, NULL, NULL, sym, intvalue)); } // Make a unary AST node: only one child struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, struct symtable *sym, int intvalue) { return (mkastnode(op, type, left, NULL, NULL, sym, intvalue)); } // Generate and return a new label number // just for AST dumping purposes static int gendumplabel(void) { static int id = 1; return (id++); } // Given an AST tree, print it out and follow the // traversal of the tree that genAST() follows void dumpAST(struct ASTnode *n, int label, int level) { int Lfalse, Lstart, Lend; switch (n->op) { case A_IF: Lfalse = gendumplabel(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_IF"); if (n->right) { Lend = gendumplabel(); fprintf(stdout, ", end L%d", Lend); } fprintf(stdout, "\n"); dumpAST(n->left, Lfalse, level + 2); dumpAST(n->mid, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); return; case A_WHILE: Lstart = gendumplabel(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_WHILE, start L%d\n", Lstart); Lend = gendumplabel(); dumpAST(n->left, Lend, level + 2); dumpAST(n->right, NOLABEL, level + 2); return; } // Reset level to -2 for A_GLUE if (n->op == A_GLUE) level = -2; // General AST node handling if (n->left) dumpAST(n->left, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); for (int i = 0; i < level; i++) fprintf(stdout, " "); switch (n->op) { case A_GLUE: fprintf(stdout, "\n\n"); return; case A_FUNCTION: fprintf(stdout, "A_FUNCTION %s\n", n->sym->name); return; case A_ADD: fprintf(stdout, "A_ADD\n"); return; case A_SUBTRACT: fprintf(stdout, "A_SUBTRACT\n"); return; case A_MULTIPLY: fprintf(stdout, "A_MULTIPLY\n"); return; case A_DIVIDE: fprintf(stdout, "A_DIVIDE\n"); return; case A_EQ: fprintf(stdout, "A_EQ\n"); return; case A_NE: fprintf(stdout, "A_NE\n"); return; case A_LT: fprintf(stdout, "A_LE\n"); return; case A_GT: fprintf(stdout, "A_GT\n"); return; case A_LE: fprintf(stdout, "A_LE\n"); return; case A_GE: fprintf(stdout, "A_GE\n"); return; case A_INTLIT: fprintf(stdout, "A_INTLIT %d\n", n->intvalue); return; case A_STRLIT: fprintf(stdout, "A_STRLIT rval label L%d\n", n->intvalue); return; case A_IDENT: if (n->rvalue) fprintf(stdout, "A_IDENT rval %s\n", n->sym->name); else fprintf(stdout, "A_IDENT %s\n", n->sym->name); return; case A_ASSIGN: fprintf(stdout, "A_ASSIGN\n"); return; case A_WIDEN: fprintf(stdout, "A_WIDEN\n"); return; case A_RETURN: fprintf(stdout, "A_RETURN\n"); return; case A_FUNCCALL: fprintf(stdout, "A_FUNCCALL %s\n", n->sym->name); return; case A_ADDR: fprintf(stdout, "A_ADDR %s\n", n->sym->name); return; case A_DEREF: if (n->rvalue) fprintf(stdout, "A_DEREF rval\n"); else fprintf(stdout, "A_DEREF\n"); return; case A_SCALE: fprintf(stdout, "A_SCALE %d\n", n->size); return; case A_PREINC: fprintf(stdout, "A_PREINC %s\n", n->sym->name); return; case A_PREDEC: fprintf(stdout, "A_PREDEC %s\n", n->sym->name); return; case A_POSTINC: fprintf(stdout, "A_POSTINC\n"); return; case A_POSTDEC: fprintf(stdout, "A_POSTDEC\n"); return; case A_NEGATE: fprintf(stdout, "A_NEGATE\n"); return; default: fatald("Unknown dumpAST operator", n->op); } } ================================================ FILE: 33_Unions/types.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Types and type handling // Copyright (c) 2019 Warren Toomey, GPL3 // Return true if a type is an int type // of any size, false otherwise int inttype(int type) { return ((type & 0xf) == 0); } // Return true if a type is of pointer type int ptrtype(int type) { return ((type & 0xf) != 0); } // Given a primitive type, return // the type which is a pointer to it int pointer_to(int type) { if ((type & 0xf) == 0xf) fatald("Unrecognised in pointer_to: type", type); return (type + 1); } // Given a primitive pointer type, return // the type which it points to int value_at(int type) { if ((type & 0xf) == 0x0) fatald("Unrecognised in value_at: type", type); return (type - 1); } // Given a type and a composite type pointer, return // the size of this type in bytes int typesize(int type, struct symtable *ctype) { if (type == P_STRUCT || type == P_UNION) return (ctype->size); return (genprimsize(type)); } // Given an AST tree and a type which we want it to become, // possibly modify the tree by widening or scaling so that // it is compatible with this type. Return the original tree // if no changes occurred, a modified tree, or NULL if the // tree is not compatible with the given type. // If this will be part of a binary operation, the AST op is not zero. struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op) { int ltype; int lsize, rsize; ltype = tree->type; // XXX No idea on these yet if (ltype == P_STRUCT || ltype == P_UNION) fatal("Don't know how to do this yet"); if (rtype == P_STRUCT || rtype == P_UNION) fatal("Don't know how to do this yet"); // Compare scalar int types if (inttype(ltype) && inttype(rtype)) { // Both types same, nothing to do if (ltype == rtype) return (tree); // Get the sizes for each type lsize = typesize(ltype, NULL); // XXX Fix soon rsize = typesize(rtype, NULL); // XXX Fix soon // Tree's size is too big if (lsize > rsize) return (NULL); // Widen to the right if (rsize > lsize) return (mkastunary(A_WIDEN, rtype, tree, NULL, 0)); } // For pointers on the left if (ptrtype(ltype)) { // OK is same type on right and not doing a binary op if (op == 0 && ltype == rtype) return (tree); } // We can scale only on A_ADD or A_SUBTRACT operation if (op == A_ADD || op == A_SUBTRACT) { // Left is int type, right is pointer type and the size // of the original type is >1: scale the left if (inttype(ltype) && ptrtype(rtype)) { rsize = genprimsize(value_at(rtype)); if (rsize > 1) return (mkastunary(A_SCALE, rtype, tree, NULL, rsize)); else return (tree); // Size 1, no need to scale } } // If we get here, the types are not compatible return (NULL); } ================================================ FILE: 34_Enums_and_Typedefs/Makefile ================================================ HSRCS= data.h decl.h defs.h SRCS= cg.c decl.c expr.c gen.c main.c misc.c \ scan.c stmt.c sym.c tree.c types.c SRCN= cgn.c decl.c expr.c gen.c main.c misc.c \ scan.c stmt.c sym.c tree.c types.c ARMSRCS= cg_arm.c decl.c expr.c gen.c main.c misc.c \ scan.c stmt.c sym.c tree.c types.c cwj: $(SRCS) $(HSRCS) cc -o cwj -g -Wall $(SRCS) compn: $(SRCN) $(HSRCS) cc -D__NASM__ -o compn -g -Wall $(SRCN) cwjarm: $(ARMSRCS) $(HSRCS) cc -o cwjarm -g -Wall $(ARMSRCS) cp cwjarm cwj clean: rm -f cwj cwjarm compn *.o *.s out test: cwj tests/runtests (cd tests; chmod +x runtests; ./runtests) armtest: cwjarm tests/runtests (cd tests; chmod +x runtests; ./runtests) testn: compn tests/runtestsn (cd tests; chmod +x runtestsn; ./runtestsn) ================================================ FILE: 34_Enums_and_Typedefs/Readme.md ================================================ # Part 34: Enums and Typedefs I decided to implement both enums and typedefs in this part of our compiler writing journey, as each one was quite small. We already covered the design aspects of enums back in part 30. To revise briefly, enums are just named integer literals. There were two issues to deal with: + we cannot redefine an enum type name, and + we cannot redefine a named enum value As examples of the above: ```c enum fred { x, y, z }; enum fred { a, b }; // fred is redefined enum jane { x, y }; // x and y are redefined ``` As you can see above, a list of enumerated values only has identifier names and no types: it means we can't reuse our existing variable declaration parsing code. We will have to write our own parsing code here. ## New Keywords and Tokens I've added two new keywords, 'enum' and 'typedef' to the grammar along with two tokens, T_ENUM and T_TYPEDEF. Browse through the code in `scan.c` for details. ## Symbol Table Lists for Enums and Typedefs We need to record the details of the declared enums and typedefs, so there are two new symbol table lists in `data.h`: ```c extern_ struct symtable *Enumhead, *Enumtail; // List of enum types and values extern_ struct symtable *Typehead, *Typetail; // List of typedefs ``` and in `sym.c` there are associated functions to add entries to each list and to search each list for specific names. Nodes in these lists are marked as being one of (from `defs.h`): ```c C_ENUMTYPE, // A named enumeration type C_ENUMVAL, // A named enumeration value C_TYPEDEF // A named typedef ``` OK, so two lists but three node classes, what's going on? It turns out that enum values (like `x` and `y` in the examples at the top) don't belong to any specific enum type. Also, enum type names (like `fred` and `jane` in the examples at the top) don't really do anything, but we do have to prevent redefinitions of them. I'm using the one enum symbol table list to hold both the C_ENUMTYPE and the C_ENUMVAL nodes in the same lists. Using the examples near the top, we would have: ``` fred x y z C_ENUMTYPE -> C_ENUMVAL -> C_ENUMVAL -> C_ENUMVAL 0 1 2 ``` This also means that, when we are searching the enum symbol table list, we need the ability to search for C_ENUMTYPEs or for C_ENUMVALs. ## Parsing Enum Declarations Before I give the code to do this, let's just look at some examples of what we need to parse: ```c enum fred { a, b, c }; // a is 0, b is 1, c is 2 enum foo { d=2, e=6, f }; // d is 2, e is 6, f is 7 enum bar { g=2, h=6, i } var1; // var1 is really an int enum { j, k, l } var2; // var2 is really an int ``` Firstly, where does enum parsing get attached to our existing parsing code? As with structs and unions, in the code that parses types (in `decl.c`): ```c // Parse the current token and return // a primitive type enum value and a pointer // to any composite type. // Also scan in the next token int parse_type(struct symtable **ctype) { int type; switch (Token.token) { // For the following, if we have a ';' after the // parsing then there is no type, so return -1 ... case T_ENUM: type = P_INT; // Enums are really ints enum_declaration(); if (Token.token == T_SEMI) type = -1; break; } ... } ``` I've changed the return value of `parse_type()` to help identify when it was a declaration of a struct, union, enum or typedef and not an actual type (followed by an identifier). Let's now look at the `enum_declaration()` code in stages. ```c // Parse an enum declaration static void enum_declaration(void) { struct symtable *etype = NULL; char *name; int intval = 0; // Skip the enum keyword. scan(&Token); // If there's a following enum type name, get a // pointer to any existing enum type node. if (Token.token == T_IDENT) { etype = findenumtype(Text); name = strdup(Text); // As it gets tromped soon scan(&Token); } ``` We only have one global variable, `Text`, to hold a scanned-in word, and we have to be able to parse `enum foo var1`. If we scan in the token after the `foo`, we will lose the `foo` string. So we need to `strdup()` this. ```c // If the next token isn't a LBRACE, check // that we have an enum type name, then return if (Token.token != T_LBRACE) { if (etype == NULL) fatals("undeclared enum type:", name); return; } ``` We've hit a declaration like `enum foo var1` and not `enum foo { ...`. Therefore `foo` must already exist as a known enum type. We can return with no value, as the type of every enum is P_INT, which is set in the code that calls `enum_declaration()`. ```c // We do have an LBRACE. Skip it scan(&Token); // If we have an enum type name, ensure that it // hasn't been declared before. if (etype != NULL) fatals("enum type redeclared:", etype->name); else // Build an enum type node for this identifier etype = addenum(name, C_ENUMTYPE, 0); ``` Now we are parsing something like `enum foo { ...`, so we must check that `foo` has not already been declared as an enum type. ```c // Loop to get all the enum values while (1) { // Ensure we have an identifier // Copy it in case there's an int literal coming up ident(); name = strdup(Text); // Ensure this enum value hasn't been declared before etype = findenumval(name); if (etype != NULL) fatals("enum value redeclared:", Text); ``` Again, we `strdup()` the enum value identifier. We also check that this enum value identifier hasn't already been defined. ```c // If the next token is an '=', skip it and // get the following int literal if (Token.token == T_ASSIGN) { scan(&Token); if (Token.token != T_INTLIT) fatal("Expected int literal after '='"); intval = Token.intvalue; scan(&Token); } ``` This is why we had to `strdup()` as the scanning of an integer literal will walk over the `Text` global variable. We scan in the '=' and integer literal tokens here and set the `intval` variable to be the integer literal value. ```c // Build an enum value node for this identifier. // Increment the value for the next enum identifier. etype = addenum(name, C_ENUMVAL, intval++); // Bail out on a right curly bracket, else get a comma if (Token.token == T_RBRACE) break; comma(); } scan(&Token); // Skip over the right curly bracket } ``` We now have the enum value's name and its value in `intval`. We can add this to the enum symbol table list with `addenum()`. We also increment `intval` to be ready for the next enum value identifier. ## Accessing Enum Names We now have the code to parse the list of enum value names and store their integer literal values in the symbol table. How and when do we search for them and use them? We have to do this at the point where we could be using a variable name in an expression. If we find an enum name, we convert it into an A_INTLIT AST node with a specific value. The location to do this is `postfix()` in `expr.c` ```c // Parse a postfix expression and return // an AST node representing it. The // identifier is already in Text. static struct ASTnode *postfix(void) { struct symtable *enumptr; // If the identifier matches an enum value, // return an A_INTLIT node if ((enumptr = findenumval(Text)) != NULL) { scan(&Token); return (mkastleaf(A_INTLIT, P_INT, NULL, enumptr->posn)); } ... } ``` ## Testing the Functionality All done! There are several test programs that confirm we are spotting redefined enum types and names, but the `test/input63.c` code demonstrates enums working: ```c int printf(char *fmt); enum fred { apple=1, banana, carrot, pear=10, peach, mango, papaya }; enum jane { aple=1, bnana, crrot, par=10, pech, mago, paaya }; enum fred var1; enum jane var2; enum fred var3; int main() { var1= carrot + pear + mango; printf("%d\n", var1); return(0); ``` which adds `carrot + pear + mango` (i.e. 3+10+12) and prints out 25. ## Typedefs That's enums done. Now we look at typedefs. The basic grammar of a typedef declaration is: ``` typedef_declaration: 'typedef' identifier existing_type | 'typedef' identifier existing_type variable_name ; ``` Thus, once we parse the `typedef` keyword, we can parse the following type and build a C_TYPEDEF symbol node with the name. We can store the `type` and `ctype` of the actual type in this symbol node. The parsing code is nice and simple. We hook into `parse_type()` in `decl.c`: ```c case T_TYPEDEF: type = typedef_declaration(ctype); if (Token.token == T_SEMI) type = -1; break; ``` Here is the `typedef_declaration()` code. Note that it returns the actual `type` and `ctype` in case the declaration is followed by a variable name. ```c // Parse a typedef declaration and return the type // and ctype that it represents int typedef_declaration(struct symtable **ctype) { int type; // Skip the typedef keyword. scan(&Token); // Get the actual type following the keyword type = parse_type(ctype); // See if the typedef identifier already exists if (findtypedef(Text) != NULL) fatals("redefinition of typedef", Text); // It doesn't exist so add it to the typedef list addtypedef(Text, type, *ctype, 0, 0); scan(&Token); return (type); } ``` The code should be straight-forward but note the recursive call back to `parse_type()`: we already have the code to parse the type definition after the name of the typedef. ## Searching and Using Typedef Definitions We now have a list of typedef definitions in a symbol table list. How do we use these definitions? We effectively have added new type keywords to our grammar, e.g. ```c FILE *zin; int32_t cost; ``` It just means that when we are parsing a type and we hit a keyword that we don't recognise, we can look that work up in the typedef list. So, we get to modify `parse_type()` again: ```c case T_IDENT: type = type_of_typedef(Text, ctype); break; ``` Both `type` and `ctype` are returned by `type_of_typedef()`: ```c // Given a typedef name, return the type it represents int type_of_typedef(char *name, struct symtable **ctype) { struct symtable *t; // Look up the typedef in the list t = findtypedef(name); if (t == NULL) fatals("unknown type", name); scan(&Token); *ctype = t->ctype; return (t->type); } ``` Note that, as yet, I haven't written the code to be "recursive". For example, the current code won't parse this example: ```c typedef int FOO; typedef FOO BAR; BAR x; // x is of type BAR -> type FOO -> type int ``` But it does compile `tests/input68.c`: ```c int printf(char *fmt); typedef int FOO; FOO var1; struct bar { int x; int y} ; typedef struct bar BAR; BAR var2; int main() { var1= 5; printf("%d\n", var1); var2.x= 7; var2.y= 10; printf("%d\n", var2.x + var2.y); return(0); } ``` with both `int` redefined as type `FOO` and a struct redefined as type `BAR`. ## Conclusion and What's Next In this part of our compiler writing journey, we added support for both enums and typedefs. Both were relatively easy to do, even though we did have to write a fair bit of parsing code for the enums. I guess I was spoiled when I could reuse the same parsing code for variable lists, struct member lists and union member lists! The code to add typedefs was really nice and simple. I do need to add to code to follow typedefs of typedefs: that also should be simple. In the next part of our compiler writing journey, I think it's time we bring in the C pre-processor. Now that we have structs, unions, enums and typedefs, we should be able to write a bunch of *header files* with definitions of some of the common Unix/Linux library functions. Then we will be able to include them in our source files and write some really useful programs. [Next step](../35_Preprocessor/Readme.md) ================================================ FILE: 34_Enums_and_Typedefs/cg.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\t.text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\t.data\n", Outfile); currSeg = data_seg; } } // Given a scalar type value, return the // size of the type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch(type) { case P_CHAR: return (offset); case P_INT: case P_LONG: break; default: fatald("Bad type in calc_aligned_offset:", type); } // Here we have an int or a long. Align it on a 4-byte offset // I put the generic code here so it can be reused elsewhere. alignment= 4; offset = (offset + direction * (alignment-1)) & ~(alignment-1); return (offset); } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. static int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "%r10", "%r11", "%r12", "%r13", "%r9", "%r8", "%rcx", "%rdx", "%rsi", "%rdi" }; static char *breglist[] = { "%r10b", "%r11b", "%r12b", "%r13b", "%r9b", "%r8b", "%cl", "%dl", "%sil", "%dil" }; static char *dreglist[] = { "%r10d", "%r11d", "%r12d", "%r13d", "%r9d", "%r8d", "%ecx", "%edx", "%esi", "%edi" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the %rsp and %rsp fprintf(Outfile, "\t.globl\t%s\n" "\t.type\t%s, @function\n" "%s:\n" "\tpushq\t%%rbp\n" "\tmovq\t%%rsp, %%rbp\n", name, name, name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->posn = paramOffset; paramOffset += 8; } else { parm->posn = newlocaloffset(parm->type); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->posn = newlocaloffset(locvar->type); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\taddq\t$%d,%%rsp\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->endlabel); fprintf(Outfile, "\taddq\t$%d,%%rsp\n", stackOffset); fputs("\tpopq %rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmovq\t$%d, %s\n", value, reglist[r]); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", sym->name); } else // Print out the code to initialise it switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovzbq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", sym->name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovslq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", sym->name); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", sym->posn); fprintf(Outfile, "\tmovq\t%d(%%rbp), %s\n", sym->posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", sym->posn); } else switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", sym->posn); fprintf(Outfile, "\tmovzbq\t%d(%%rbp), %s\n", sym->posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", sym->posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", sym->posn); fprintf(Outfile, "\tmovslq\t%d(%%rbp), %s\n", sym->posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", sym->posn); break; default: fatald("Bad type in cgloadlocal:", sym->type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tleaq\tL%d(%%rip), %s\n", label, reglist[r]); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\taddq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsubq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timulq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s,%%rax\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidivq\t%s\n", reglist[r2]); fprintf(Outfile, "\tmovq\t%%rax,%s\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tandq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\torq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txorq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshlq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshrq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tnegq\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnotq\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); return (r); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(struct symtable *sym, int numargs) { // Get a new register int outr = alloc_register(); // Call the function fprintf(Outfile, "\tcall\t%s@PLT\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\taddq\t$%d, %%rsp\n", 8 * (numargs - 6)); // and copy the return value into our register fprintf(Outfile, "\tmovq\t%%rax, %s\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpushq\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmovq\t%s, %s\n", reglist[r], reglist[FIRSTPARAMREG - argposn + 1]); } } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsalq\t$%d, %s\n", val, reglist[r]); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %s(%%rip)\n", reglist[r], sym->name); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %s(%%rip)\n", breglist[r], sym->name); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %s(%%rip)\n", dreglist[r], sym->name); break; default: fatald("Bad type in cgstorglob:", sym->type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %d(%%rbp)\n", reglist[r], sym->posn); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %d(%%rbp)\n", breglist[r], sym->posn); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %d(%%rbp)\n", dreglist[r], sym->posn); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the type size = typesize(node->type, node->ctype); // Generate the global identity and the label cgdataseg(); fprintf(Outfile, "\t.globl\t%s\n", node->name); fprintf(Outfile, "%s:", node->name); // Generate the space for this type switch (size) { case 1: fprintf(Outfile, "\t.byte\t0\n"); break; case 4: fprintf(Outfile, "\t.long\t0\n"); break; case 8: fprintf(Outfile, "\t.quad\t0\n"); break; default: for (int i=0; i < size; i++) fprintf(Outfile, "\t.byte\t0\n"); } } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\t.byte\t%d\n", *cptr); } fprintf(Outfile, "\t.byte\t0\n"); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzbl\t%s, %%eax\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %%eax\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } cgjump(sym->endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = alloc_register(); if (sym->class == C_GLOBAL) fprintf(Outfile, "\tleaq\t%s(%%rip), %s\n", sym->name, reglist[r]); else fprintf(Outfile, "\tleaq\t%d(%%rbp), %s\n", sym->posn, reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzbq\t(%s), %s\n", reglist[r], reglist[r]); break; case 2: fprintf(Outfile, "\tmovslq\t(%s), %s\n", reglist[r], reglist[r]); break; case 4: case 8: fprintf(Outfile, "\tmovq\t(%s), %s\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { // Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmovb\t%s, (%s)\n", breglist[r1], reglist[r2]); break; case 2: case 4: case 8: fprintf(Outfile, "\tmovq\t%s, (%s)\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 34_Enums_and_Typedefs/cg_arm.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for ARMv6 on Raspberry Pi // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. static int freereg[4]; static char *reglist[4] = { "r4", "r5", "r6", "r7" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // We have to store large integer literal values in memory. // Keep a list of them which will be output in the postamble #define MAXINTS 1024 int Intlist[MAXINTS]; static int Intslot = 0; // Determine the offset of a large integer // literal from the .L3 label. If the integer // isn't in the list, add it. static void set_int_offset(int val) { int offset = -1; // See if it is already there for (int i = 0; i < Intslot; i++) { if (Intlist[i] == val) { offset = 4 * i; break; } } // Not in the list, so add it if (offset == -1) { offset = 4 * Intslot; if (Intslot == MAXINTS) fatal("Out of int slots in set_int_offset()"); Intlist[Intslot++] = val; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L3+%d\n", offset); } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n", Outfile); } // Print out the assembly postamble void cgpostamble() { // Print out the global variables fprintf(Outfile, ".L2:\n"); for (int i = 0; i < Globs; i++) { if (Symtable[i].stype == S_VARIABLE) fprintf(Outfile, "\t.word %s\n", Symtable[i].name); } // Print out the integer literals fprintf(Outfile, ".L3:\n"); for (int i = 0; i < Intslot; i++) { fprintf(Outfile, "\t.word %d\n", Intlist[i]); } } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Symtable[id].name; fprintf(Outfile, "\t.text\n" "\t.globl\t%s\n" "\t.type\t%s, \%%function\n" "%s:\n" "\tpush\t{fp, lr}\n" "\tadd\tfp, sp, #4\n" "\tsub\tsp, sp, #8\n" "\tstr\tr0, [fp, #-8]\n", name, name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Symtable[id].endlabel); fputs("\tsub\tsp, fp, #4\n" "\tpop\t{fp, pc}\n" "\t.align\t2\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); // If the literal value is small, do it with one instruction if (value <= 1000) fprintf(Outfile, "\tmov\t%s, #%d\n", reglist[r], value); else { set_int_offset(value); fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); } return (r); } // Determine the offset of a variable from the .L2 // label. Yes, this is inefficient code. static void set_var_offset(int id) { int offset = 0; // Walk the symbol table up to id. // Find S_VARIABLEs and add on 4 until // we get to our variable for (int i = 0; i < id; i++) { if (Symtable[i].stype == S_VARIABLE) offset += 4; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L2+%d\n", offset); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tldrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgloadglob:", Symtable[id].type); } return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s, %s\n", reglist[r1], reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\tmul\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { // To do a divide: r0 holds the dividend, r1 holds the divisor. // The quotient is in r0. fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r1]); fprintf(Outfile, "\tmov\tr1, %s\n", reglist[r2]); fprintf(Outfile, "\tbl\t__aeabi_idiv\n"); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r1]); free_register(r2); return (r1); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r]); fprintf(Outfile, "\tbl\t%s\n", Symtable[id].name); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r]); return (r); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tlsl\t%s, %s, #%d\n", reglist[r], reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, int id) { // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tstr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgstorglob:", Symtable[id].type); } return (r); } // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (4); switch (type) { case P_CHAR: return (1); case P_INT: case P_LONG: return (4); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Symtable[id].type); fprintf(Outfile, "\t.data\n" "\t.globl\t%s\n", Symtable[id].name); switch (typesize) { case 1: fprintf(Outfile, "%s:\t.byte\t0\n", Symtable[id].name); break; case 4: fprintf(Outfile, "%s:\t.long\t0\n", Symtable[id].name); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "moveq", "movne", "movlt", "movgt", "movle", "movge" }; // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "movne", "moveq", "movge", "movle", "movgt", "movlt" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #1\n", cmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #0\n", invcmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\tuxtb\t%s, %s\n", reglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tb\tL%d\n", l); } // List of inverted branch instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *brlist[] = { "bne", "beq", "bge", "ble", "bgt", "blt" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", brlist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[reg]); cgjump(Symtable[id].endlabel); } // Generate code to load the address of a global // identifier into a variable. Return a new register int cgaddress(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); fprintf(Outfile, "\tmov\t%s, r3\n", reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tldrb\t%s, [%s]\n", reglist[r], reglist[r]); break; case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [%s]\n", reglist[r], reglist[r]); break; } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [%s]\n", reglist[r1], reglist[r2]); break; case P_INT: case P_LONG: fprintf(Outfile, "\tstr\t%s, [%s]\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 34_Enums_and_Typedefs/cgn.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\tsection .text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\tsection .data\n", Outfile); currSeg = data_seg; } } // Given a scalar type value, return the // size of the type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch(type) { case P_CHAR: return (offset); case P_INT: case P_LONG: break; default: fatald("Bad type in calc_aligned_offset:", type); } // Here we have an int or a long. Align it on a 4-byte offset // I put the generic code here so it can be reused elsewhere. alignment= 4; offset = (offset + direction * (alignment-1)) & ~(alignment-1); return (offset); } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "r10", "r11", "r12", "r13", "r9", "r8", "rcx", "rdx", "rsi", "rdi" }; static char *breglist[] = { "r10b", "r11b", "r12b", "r13b", "r9b", "r8b", "cl", "dl", "sil", "dil" }; static char *dreglist[] = { "r10d", "r11d", "r12d", "r13d", "r9d", "r8d", "ecx", "edx", "esi", "edi" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\textern\tprintint\n", Outfile); fputs("\textern\tprintchar\n", Outfile); fputs("\textern\topen\n", Outfile); fputs("\textern\tclose\n", Outfile); fputs("\textern\tread\n", Outfile); fputs("\textern\twrite\n", Outfile); fputs("\textern\tprintf\n", Outfile); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the rsp and rbp fprintf(Outfile, "\tglobal\t%s\n" "%s:\n" "\tpush\trbp\n" "\tmov\trbp, rsp\n", name, name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->posn = paramOffset; paramOffset += 8; } else { parm->posn = newlocaloffset(parm->type); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->posn = newlocaloffset(locvar->type); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\tadd\trsp, %d\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->endlabel); fprintf(Outfile, "\tadd\trsp, %d\n", stackOffset); fputs("\tpop rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], value); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", sym->name); fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], sym->name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", sym->name); } else // Print out the code to initialise it switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", sym->name); fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], sym->name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", sym->name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", sym->name); fprintf(Outfile, "\tmovsx\t%s, word [%s]\n", dreglist[r], sym->name); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", sym->name); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", sym->posn); fprintf(Outfile, "\tmov\t%s, [rbp+%d]\n", reglist[r], sym->posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", sym->posn); } else switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", sym->posn); fprintf(Outfile, "\tmovzx\t%s, byte [rbp+%d]\n", reglist[r], sym->posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", sym->posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", sym->posn); fprintf(Outfile, "\tmovsx\t%s, word [rbp+%d]\n", reglist[r], sym->posn); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", sym->posn); break; default: fatald("Bad type in cgloadlocal:", sym->type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, L%d\n", reglist[r], label); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timul\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmov\trax, %s\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidiv\t%s\n", reglist[r2]); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tand\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\tor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshl\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshr\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tneg\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnot\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r], breglist[r]); return (r); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, byte %s\n", reglist[r], breglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(struct symtable *sym, int numargs) { // Get a new register int outr = alloc_register(); // Call the function fprintf(Outfile, "\tcall\t%s\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\tadd\trsp, %d\n", 8 * (numargs - 6)); // and copy the return value into our register fprintf(Outfile, "\tmov\t%s, rax\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpush\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmov\t%s, %s\n", reglist[FIRSTPARAMREG - argposn + 1], reglist[r]); } } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsal\t%s, %d\n", reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, dreglist[r]); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\tqword\t[rbp+%d], %s\n", sym->posn, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\tbyte\t[rbp+%d], %s\n", sym->posn, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\tdword\t[rbp+%d], %s\n", sym->posn, dreglist[r]); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the type size = typesize(node->type, node->ctype); // Generate the global identity and the label cgdataseg(); fprintf(Outfile, "\tsection\t.data\n" "\tglobal\t%s\n", node->name); fprintf(Outfile, "%s:", node->name); // Generate the space for this type // original version for (int i = 0; i < node->size; i++) { switch(size) { case 1: fprintf(Outfile, "\tdb\t0\n"); break; case 4: fprintf(Outfile, "\tdd\t0\n"); break; case 8: fprintf(Outfile, "\tdq\t0\n"); break; default: for (int i = 0; i < size; i++) fprintf(Outfile, "\tdb\t0\n"); } } /* compact version using times instead of loop switch(size) { case 1: fprintf(Outfile, "\ttimes\t%d\tdb\t0\n", node->size); break; case 4: fprintf(Outfile, "\ttimes\t%d\tdd\t0\n", node->size); break; case 8: fprintf(Outfile, "\ttimes\t%d\tdq\t0\n", node->size); break; default: fprintf(Outfile, "\ttimes\t%d\tdb\t0\n", size); } */ } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\tdb\t%d\n", *cptr); } fprintf(Outfile, "\tdb\t0\n"); /* put string in readable format on a single line // probably went overboard with error checking int comma = 0, quote = 0, start = 1; fprintf(Outfile, "\tdb\t"); for (cptr=strvalue; *cptr; cptr++) { if ( ! isprint(*cptr) ) if (comma || start) { fprintf(Outfile, "%d, ", *cptr); start = 0; comma = 1; } else if (quote) { fprintf(Outfile, "\', %d, ", *cptr); comma = 1; quote = 0; } else { fprintf(Outfile, "%d, ", *cptr); comma = 1; quote = 0; } else if (start || comma) { fprintf(Outfile, "\'%c", *cptr); start = comma = 0; quote = 1; } else { fprintf(Outfile, "%c", *cptr); comma = 0; quote = 1; } } if (comma || start) fprintf(Outfile, "0\n"); else fprintf(Outfile, "\', 0\n"); */ } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r2], breglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzx\teax, %s\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmov\teax, %s\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } cgjump(sym->endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = alloc_register(); if (sym->class == C_GLOBAL) fprintf(Outfile, "\tmov\t%s, %s\n", reglist[r], sym->name); else fprintf(Outfile, "\tlea\t%s, [rbp+%d]\n", reglist[r], sym->posn); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], reglist[r]); break; case 2: fprintf(Outfile, "\tmovsx\t%s, dword [%s]\n", reglist[r], reglist[r]); break; case 4: case 8: fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { //Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmov\t[%s], byte %s\n", reglist[r2], breglist[r1]); break; case 2: case 4: case 8: fprintf(Outfile, "\tmov\t[%s], %s\n", reglist[r2], reglist[r1]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 34_Enums_and_Typedefs/data.h ================================================ #ifndef extern_ #define extern_ extern #endif // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 extern_ int Line; // Current line number extern_ int Putback; // Character put back by scanner extern_ struct symtable *Functionid; // Symbol ptr of the current function extern_ FILE *Infile; // Input and output files extern_ FILE *Outfile; extern_ char *Outfilename; // Name of file we opened as Outfile extern_ struct token Token; // Last token scanned extern_ char Text[TEXTLEN + 1]; // Last identifier scanned // Symbol table lists extern_ struct symtable *Globhead, *Globtail; // Global variables and functions extern_ struct symtable *Loclhead, *Locltail; // Local variables extern_ struct symtable *Parmhead, *Parmtail; // Local parameters extern_ struct symtable *Membhead, *Membtail; // Temp list of struct/union members extern_ struct symtable *Structhead, *Structtail; // List of struct types extern_ struct symtable *Unionhead, *Uniontail; // List of union types extern_ struct symtable *Enumhead, *Enumtail; // List of enum types and values extern_ struct symtable *Typehead, *Typetail; // List of typedefs // Command-line flags extern_ int O_dumpAST; // If true, dump the AST trees extern_ int O_keepasm; // If true, keep any assembly files extern_ int O_assemble; // If true, assemble the assembly files extern_ int O_dolink; // If true, link the object files extern_ int O_verbose; // If true, print info on compilation stages ================================================ FILE: 34_Enums_and_Typedefs/decl.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of declarations // Copyright (c) 2019 Warren Toomey, GPL3 static struct symtable *composite_declaration(int type); static void enum_declaration(void); int typedef_declaration(struct symtable **ctype); int type_of_typedef(char *name, struct symtable **ctype); // Parse the current token and return // a primitive type enum value and a pointer // to any composite type. // Also scan in the next token int parse_type(struct symtable **ctype) { int type; switch (Token.token) { case T_VOID: type = P_VOID; scan(&Token); break; case T_CHAR: type = P_CHAR; scan(&Token); break; case T_INT: type = P_INT; scan(&Token); break; case T_LONG: type = P_LONG; scan(&Token); break; // For the following, if we have a ';' after the // parsing then there is no type, so return -1 case T_STRUCT: type = P_STRUCT; *ctype = composite_declaration(P_STRUCT); if (Token.token == T_SEMI) type = -1; break; case T_UNION: type = P_UNION; *ctype = composite_declaration(P_UNION); if (Token.token == T_SEMI) type = -1; break; case T_ENUM: type = P_INT; // Enums are really ints enum_declaration(); if (Token.token == T_SEMI) type = -1; break; case T_TYPEDEF: type = typedef_declaration(ctype); if (Token.token == T_SEMI) type = -1; break; case T_IDENT: type = type_of_typedef(Text, ctype); break; default: fatald("Illegal type, token", Token.token); } // Scan in one or more further '*' tokens // and determine the correct pointer type while (1) { if (Token.token != T_STAR) break; type = pointer_to(type); scan(&Token); } // We leave with the next token already scanned return (type); } // variable_declaration: type identifier ';' // | type identifier '[' INTLIT ']' ';' // ; // // Parse the declaration of a scalar variable or an array // with a given size. // The identifier has been scanned & we have the type. // class is the variable's class // Return the pointer to variable's entry in the symbol table struct symtable *var_declaration(int type, struct symtable *ctype, int class) { struct symtable *sym = NULL; // See if this has already been declared switch (class) { case C_GLOBAL: if (findglob(Text) != NULL) fatals("Duplicate global variable declaration", Text); case C_LOCAL: case C_PARAM: if (findlocl(Text) != NULL) fatals("Duplicate local variable declaration", Text); case C_MEMBER: if (findmember(Text) != NULL) fatals("Duplicate struct/union member declaration", Text); } // Text now has the identifier's name. // If the next token is a '[' if (Token.token == T_LBRACKET) { // Skip past the '[' scan(&Token); // Check we have an array size if (Token.token == T_INTLIT) { // Add this as a known array and generate its space in assembly. // We treat the array as a pointer to its elements' type switch (class) { case C_GLOBAL: sym = addglob(Text, pointer_to(type), ctype, S_ARRAY, Token.intvalue); break; case C_LOCAL: case C_PARAM: case C_MEMBER: fatal ("For now, declaration of non-global arrays is not implemented"); } } // Ensure we have a following ']' scan(&Token); match(T_RBRACKET, "]"); } else { // Add this as a known scalar // and generate its space in assembly switch (class) { case C_GLOBAL: sym = addglob(Text, type, ctype, S_VARIABLE, 1); break; case C_LOCAL: sym = addlocl(Text, type, ctype, S_VARIABLE, 1); break; case C_PARAM: sym = addparm(Text, type, ctype, S_VARIABLE, 1); break; case C_MEMBER: sym = addmemb(Text, type, ctype, S_VARIABLE, 1); break; } } return (sym); } // var_declaration_list: // | variable_declaration // | variable_declaration separate_token var_declaration_list ; // // When called to parse function parameters, separate_token is ','. // When called to parse members of a struct/union, separate_token is ';'. // // Parse a list of variables. // Add them as symbols to one of the symbol table lists, and return the // number of variables. If funcsym is not NULL, there is an existing function // prototype, so compare each variable's type against this prototype. static int var_declaration_list(struct symtable *funcsym, int class, int separate_token, int end_token) { int type; int paramcnt = 0; struct symtable *protoptr = NULL; struct symtable *ctype; // If there is a prototype, get the pointer // to the first prototype parameter if (funcsym != NULL) protoptr = funcsym->member; // Loop until the final end token while (Token.token != end_token) { // Get the type and identifier type = parse_type(&ctype); ident(); // Check that this type matches the prototype if there is one if (protoptr != NULL) { if (type != protoptr->type) fatald("Type doesn't match prototype for parameter", paramcnt + 1); protoptr = protoptr->next; } else { // Add a new parameter to the right symbol table list, based on the class var_declaration(type, ctype, class); } paramcnt++; // Must have a separate_token or ')' at this point if ((Token.token != separate_token) && (Token.token != end_token)) fatald("Unexpected token in parameter list", Token.token); if (Token.token == separate_token) scan(&Token); } // Check that the number of parameters in this list matches // any existing prototype if ((funcsym != NULL) && (paramcnt != funcsym->nelems)) fatals("Parameter count mismatch for function", funcsym->name); // Return the count of parameters return (paramcnt); } // // function_declaration: type identifier '(' parameter_list ')' ; // | type identifier '(' parameter_list ')' compound_statement ; // // Parse the declaration of function. // The identifier has been scanned & we have the type. struct ASTnode *function_declaration(int type) { struct ASTnode *tree, *finalstmt; struct symtable *oldfuncsym, *newfuncsym = NULL; int endlabel, paramcnt; // Text has the identifier's name. If this exists and is a // function, get the id. Otherwise, set oldfuncsym to NULL. if ((oldfuncsym = findsymbol(Text)) != NULL) if (oldfuncsym->stype != S_FUNCTION) oldfuncsym = NULL; // If this is a new function declaration, get a // label-id for the end label, and add the function // to the symbol table, if (oldfuncsym == NULL) { endlabel = genlabel(); // Assumtion: functions only return scalar types, so NULL below newfuncsym = addglob(Text, type, NULL, S_FUNCTION, endlabel); } // Scan in the '(', any parameters and the ')'. // Pass in any existing function prototype pointer lparen(); paramcnt = var_declaration_list(oldfuncsym, C_PARAM, T_COMMA, T_RPAREN); rparen(); // If this is a new function declaration, update the // function symbol entry with the number of parameters. // Also copy the parameter list into the function's node. if (newfuncsym) { newfuncsym->nelems = paramcnt; newfuncsym->member = Parmhead; oldfuncsym = newfuncsym; } // Clear out the parameter list Parmhead = Parmtail = NULL; // Declaration ends in a semicolon, only a prototype. if (Token.token == T_SEMI) { scan(&Token); return (NULL); } // This is not just a prototype. // Set the Functionid global to the function's symbol pointer Functionid = oldfuncsym; // Get the AST tree for the compound statement tree = compound_statement(); // If the function type isn't P_VOID .. if (type != P_VOID) { // Error if no statements in the function if (tree == NULL) fatal("No statements in function with non-void type"); // Check that the last AST operation in the // compound statement was a return statement finalstmt = (tree->op == A_GLUE) ? tree->right : tree; if (finalstmt == NULL || finalstmt->op != A_RETURN) fatal("No return for function with non-void type"); } // Return an A_FUNCTION node which has the function's symbol pointer // and the compound statement sub-tree return (mkastunary(A_FUNCTION, type, tree, oldfuncsym, endlabel)); } // Parse composite type declarations: structs or unions. // Either find an existing struct/union declaration, or build // a struct/union symbol table entry and return its pointer. static struct symtable *composite_declaration(int type) { struct symtable *ctype = NULL; struct symtable *m; int offset; // Skip the struct/union keyword scan(&Token); // See if there is a following struct/union name if (Token.token == T_IDENT) { // Find any matching composite type if (type == P_STRUCT) ctype = findstruct(Text); else ctype = findunion(Text); scan(&Token); } // If the next token isn't an LBRACE , this is // the usage of an existing struct/union type. // Return the pointer to the type. if (Token.token != T_LBRACE) { if (ctype == NULL) fatals("unknown struct/union type", Text); return (ctype); } // Ensure this struct/union type hasn't been // previously defined if (ctype) fatals("previously defined struct/union", Text); // Build the composite type and skip the left brace if (type == P_STRUCT) ctype = addstruct(Text, P_STRUCT, NULL, 0, 0); else ctype = addunion(Text, P_UNION, NULL, 0, 0); scan(&Token); // Scan in the list of members and attach // to the struct type's node var_declaration_list(NULL, C_MEMBER, T_SEMI, T_RBRACE); rbrace(); ctype->member = Membhead; Membhead = Membtail = NULL; // Set the offset of the initial member // and find the first free byte after it m = ctype->member; m->posn = 0; offset = typesize(m->type, m->ctype); // Set the position of each successive member in the composite type // Unions are easy. For structs, align the member and find the next free byte for (m = m->next; m != NULL; m = m->next) { // Set the offset for this member if (type == P_STRUCT) m->posn = genalign(m->type, offset, 1); else m->posn = 0; // Get the offset of the next free byte after this member offset += typesize(m->type, m->ctype); } // Set the overall size of the composite type ctype->size = offset; return (ctype); } // Parse an enum declaration static void enum_declaration(void) { struct symtable *etype = NULL; char *name; int intval = 0; // Skip the enum keyword. scan(&Token); // If there's a following enum type name, get a // pointer to any existing enum type node. if (Token.token == T_IDENT) { etype = findenumtype(Text); name = strdup(Text); // As it gets tromped soon scan(&Token); } // If the next token isn't a LBRACE, check // that we have an enum type name, then return if (Token.token != T_LBRACE) { if (etype == NULL) fatals("undeclared enum type:", name); return; } // We do have an LBRACE. Skip it scan(&Token); // If we have an enum type name, ensure that it // hasn't been declared before. if (etype != NULL) fatals("enum type redeclared:", etype->name); else // Build an enum type node for this identifier etype = addenum(name, C_ENUMTYPE, 0); // Loop to get all the enum values while (1) { // Ensure we have an identifier // Copy it in case there's an int literal coming up ident(); name = strdup(Text); // Ensure this enum value hasn't been declared before etype = findenumval(name); if (etype != NULL) fatals("enum value redeclared:", Text); // If the next token is an '=', skip it and // get the following int literal if (Token.token == T_ASSIGN) { scan(&Token); if (Token.token != T_INTLIT) fatal("Expected int literal after '='"); intval = Token.intvalue; scan(&Token); } // Build an enum value node for this identifier. // Increment the value for the next enum identifier. etype = addenum(name, C_ENUMVAL, intval++); // Bail out on a right curly bracket, else get a comma if (Token.token == T_RBRACE) break; comma(); } scan(&Token); // Skip over the right curly bracket } // Parse a typedef declaration and return the type // and ctype that it represents int typedef_declaration(struct symtable **ctype) { int type; // Skip the typedef keyword. scan(&Token); // Get the actual type following the keyword type = parse_type(ctype); // See if the typedef identifier already exists if (findtypedef(Text) != NULL) fatals("redefinition of typedef", Text); // It doesn't exist so add it to the typedef list addtypedef(Text, type, *ctype, 0, 0); scan(&Token); return (type); } // Given a typedef name, return the type it represents int type_of_typedef(char *name, struct symtable **ctype) { struct symtable *t; // Look up the typedef in the list t = findtypedef(name); if (t == NULL) fatals("unknown type", name); scan(&Token); *ctype = t->ctype; return (t->type); } // Parse one or more global declarations, either // variables, functions or structs void global_declarations(void) { struct ASTnode *tree; struct symtable *ctype; int type; while (1) { // Stop when we have reached EOF if (Token.token == T_EOF) break; // Get the type type = parse_type(&ctype); // We might have just parsed a struct, union or enum // declaration with no associated variable. // The next token might be a ';'. Loop back if it is. // XXX: I'm not happy with this as it allows // "struct fred;" as an accepted statement if (type == -1) { semi(); continue; } // We have to read past the identifier // to see either a '(' for a function declaration // or a ',' or ';' for a variable declaration. // Text is filled in by the ident() call. ident(); if (Token.token == T_LPAREN) { // Parse the function declaration tree = function_declaration(type); // Only a function prototype, no code if (tree == NULL) continue; // A real function, generate the assembly code for it if (O_dumpAST) { dumpAST(tree, NOLABEL, 0); fprintf(stdout, "\n\n"); } genAST(tree, NOLABEL, 0); // Now free the symbols associated // with this function freeloclsyms(); } else { // Parse the global variable declaration // and skip past the trailing semicolon var_declaration(type, ctype, C_GLOBAL); semi(); } } } ================================================ FILE: 34_Enums_and_Typedefs/decl.h ================================================ // Function prototypes for all compiler files // Copyright (c) 2019 Warren Toomey, GPL3 // scan.c void reject_token(struct token *t); int scan(struct token *t); // tree.c struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue); struct ASTnode *mkastleaf(int op, int type, struct symtable *sym, int intvalue); struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, struct symtable *sym, int intvalue); void dumpAST(struct ASTnode *n, int label, int parentASTop); // gen.c int genlabel(void); int genAST(struct ASTnode *n, int reg, int parentASTop); void genpreamble(); void genpostamble(); void genfreeregs(); void genglobsym(struct symtable *node); int genglobstr(char *strvalue); int genprimsize(int type); int genalign(int type, int offset, int direction); void genreturn(int reg, int id); // cg.c int cgprimsize(int type); int cgalign(int type, int offset, int direction); void cgtextseg(); void cgdataseg(); void freeall_registers(void); void cgpreamble(); void cgpostamble(); void cgfuncpreamble(struct symtable *sym); void cgfuncpostamble(struct symtable *sym); int cgloadint(int value, int type); int cgloadglob(struct symtable *sym, int op); int cgloadlocal(struct symtable *sym, int op); int cgloadglobstr(int label); int cgadd(int r1, int r2); int cgsub(int r1, int r2); int cgmul(int r1, int r2); int cgdiv(int r1, int r2); int cgshlconst(int r, int val); int cgcall(struct symtable *sym, int numargs); void cgcopyarg(int r, int argposn); int cgstorglob(int r, struct symtable *sym); int cgstorlocal(int r, struct symtable *sym); void cgglobsym(struct symtable *node); void cgglobstr(int l, char *strvalue); int cgcompare_and_set(int ASTop, int r1, int r2); int cgcompare_and_jump(int ASTop, int r1, int r2, int label); void cglabel(int l); void cgjump(int l); int cgwiden(int r, int oldtype, int newtype); void cgreturn(int reg, struct symtable *sym); int cgaddress(struct symtable *sym); int cgderef(int r, int type); int cgstorderef(int r1, int r2, int type); int cgnegate(int r); int cginvert(int r); int cglognot(int r); int cgboolean(int r, int op, int label); int cgand(int r1, int r2); int cgor(int r1, int r2); int cgxor(int r1, int r2); int cgshl(int r1, int r2); int cgshr(int r1, int r2); // expr.c struct ASTnode *binexpr(int ptp); // stmt.c struct ASTnode *compound_statement(void); // misc.c void match(int t, char *what); void semi(void); void lbrace(void); void rbrace(void); void lparen(void); void rparen(void); void ident(void); void comma(void); void fatal(char *s); void fatals(char *s1, char *s2); void fatald(char *s, int d); void fatalc(char *s, int c); // sym.c void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node); struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int size, int posn); struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int size); struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int size); struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype, int size); struct symtable *addstruct(char *name, int type, struct symtable *ctype, int stype, int size); struct symtable *addunion(char *name, int type, struct symtable *ctype, int stype, int size); struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int size); struct symtable *addenum(char *name, int class, int value); struct symtable *addtypedef(char *name, int type, struct symtable *ctype, int stype, int size); struct symtable *findglob(char *s); struct symtable *findlocl(char *s); struct symtable *findsymbol(char *s); struct symtable *findmember(char *s); struct symtable *findstruct(char *s); struct symtable *findunion(char *s); struct symtable *findenumtype(char *s); struct symtable *findenumval(char *s); struct symtable *findtypedef(char *s); void clear_symtable(void); void freeloclsyms(void); // decl.c struct symtable *var_declaration(int type, struct symtable *ctype, int class); struct ASTnode *function_declaration(int type); void global_declarations(void); // types.c int inttype(int type); int ptrtype(int type); int parse_type(struct symtable **ctype); int pointer_to(int type); int value_at(int type); int typesize(int type, struct symtable *ctype); struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op); ================================================ FILE: 34_Enums_and_Typedefs/defs.h ================================================ #include #include #include #include // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 enum { TEXTLEN = 512 // Length of identifiers in input }; // Commands and default filenames #define AOUT "a.out" #ifdef __NASM__ #define ASCMD "nasm -f elf64 -o " #define LDCMD "cc -no-pie -fno-plt -Wall -o " #else #define ASCMD "as -o " #define LDCMD "cc -o " #endif // Token types enum { T_EOF, // Binary operators T_ASSIGN, T_LOGOR, T_LOGAND, T_OR, T_XOR, T_AMPER, T_EQ, T_NE, T_LT, T_GT, T_LE, T_GE, T_LSHIFT, T_RSHIFT, T_PLUS, T_MINUS, T_STAR, T_SLASH, // Other operators T_INC, T_DEC, T_INVERT, T_LOGNOT, // Type keywords T_VOID, T_CHAR, T_INT, T_LONG, // Other keywords T_IF, T_ELSE, T_WHILE, T_FOR, T_RETURN, T_STRUCT, T_UNION, T_ENUM, T_TYPEDEF, // Structural tokens T_INTLIT, T_STRLIT, T_SEMI, T_IDENT, T_LBRACE, T_RBRACE, T_LPAREN, T_RPAREN, T_LBRACKET, T_RBRACKET, T_COMMA, T_DOT, T_ARROW }; // Token structure struct token { int token; // Token type, from the enum list above int intvalue; // For T_INTLIT, the integer value }; // AST node types. The first few line up // with the related tokens enum { A_ASSIGN = 1, A_LOGOR, A_LOGAND, A_OR, A_XOR, A_AND, A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE, A_LSHIFT, A_RSHIFT, A_ADD, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, A_INTLIT, A_STRLIT, A_IDENT, A_GLUE, A_IF, A_WHILE, A_FUNCTION, A_WIDEN, A_RETURN, A_FUNCCALL, A_DEREF, A_ADDR, A_SCALE, A_PREINC, A_PREDEC, A_POSTINC, A_POSTDEC, A_NEGATE, A_INVERT, A_LOGNOT, A_TOBOOL }; // Primitive types. The bottom 4 bits is an integer // value that represents the level of indirection, // e.g. 0= no pointer, 1= pointer, 2= pointer pointer etc. enum { P_NONE, P_VOID = 16, P_CHAR = 32, P_INT = 48, P_LONG = 64, P_STRUCT=80, P_UNION=96 }; // Structural types enum { S_VARIABLE, S_FUNCTION, S_ARRAY }; // Storage classes enum { C_GLOBAL = 1, // Globally visible symbol C_LOCAL, // Locally visible symbol C_PARAM, // Locally visible function parameter C_STRUCT, // A struct C_UNION, // A union C_MEMBER, // Member of a struct or union C_ENUMTYPE, // A named enumeration type C_ENUMVAL, // A named enumeration value C_TYPEDEF // A named typedef }; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol struct symtable *ctype; // If struct/union, ptr to that type int stype; // Structural type for the symbol int class; // Storage class for the symbol union { int size; // Number of elements in the symbol int endlabel; // For functions, the end label }; union { int nelems; // For functions, # of params int posn; // For locals, the negative offset // from the stack base pointer }; struct symtable *next; // Next symbol in one list struct symtable *member; // First member of a function, struct, }; // union or enum // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates int rvalue; // True if the node is an rvalue struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; struct symtable *sym; // For many AST nodes, the pointer to union { // the symbol in the symbol table int intvalue; // For A_INTLIT, the integer value int size; // For A_SCALE, the size to scale by }; }; enum { NOREG = -1, // Use NOREG when the AST generation // functions have no register to return NOLABEL = 0 // Use NOLABEL when we have no label to // pass to genAST() }; ================================================ FILE: 34_Enums_and_Typedefs/expr.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of expressions // Copyright (c) 2019 Warren Toomey, GPL3 // expression_list: // | expression // | expression ',' expression_list // ; // Parse a list of zero or more comma-separated expressions and // return an AST composed of A_GLUE nodes with the left-hand child // being the sub-tree of previous expressions (or NULL) and the right-hand // child being the next expression. Each A_GLUE node will have size field // set to the number of expressions in the tree at this point. If no // expressions are parsed, NULL is returned static struct ASTnode *expression_list(void) { struct ASTnode *tree = NULL; struct ASTnode *child = NULL; int exprcount = 0; // Loop until the final right parentheses while (Token.token != T_RPAREN) { // Parse the next expression and increment the expression count child = binexpr(0); exprcount++; // Build an A_GLUE AST node with the previous tree as the left child // and the new expression as the right child. Store the expression count. tree = mkastnode(A_GLUE, P_NONE, tree, NULL, child, NULL, exprcount); // Must have a ',' or ')' at this point switch (Token.token) { case T_COMMA: scan(&Token); break; case T_RPAREN: break; default: fatald("Unexpected token in expression list", Token.token); } } // Return the tree of expressions return (tree); } // Parse a function call and return its AST static struct ASTnode *funccall(void) { struct ASTnode *tree; struct symtable *funcptr; // Check that the identifier has been defined as a function, // then make a leaf node for it. if ((funcptr = findsymbol(Text)) == NULL || funcptr->stype != S_FUNCTION) { fatals("Undeclared function", Text); } // Get the '(' lparen(); // Parse the argument expression list tree = expression_list(); // XXX Check type of each argument against the function's prototype // Build the function call AST node. Store the // function's return type as this node's type. // Also record the function's symbol-id tree = mkastunary(A_FUNCCALL, funcptr->type, tree, funcptr, 0); // Get the ')' rparen(); return (tree); } // Parse the index into an array and return an AST tree for it static struct ASTnode *array_access(void) { struct ASTnode *left, *right; struct symtable *aryptr; // Check that the identifier has been defined as an array // then make a leaf node for it that points at the base if ((aryptr = findsymbol(Text)) == NULL || aryptr->stype != S_ARRAY) { fatals("Undeclared array", Text); } left = mkastleaf(A_ADDR, aryptr->type, aryptr, 0); // Get the '[' scan(&Token); // Parse the following expression right = binexpr(0); // Get the ']' match(T_RBRACKET, "]"); // Ensure that this is of int type if (!inttype(right->type)) fatal("Array index is not of integer type"); // Scale the index by the size of the element's type right = modify_type(right, left->type, A_ADD); // Return an AST tree where the array's base has the offset // added to it, and dereference the element. Still an lvalue // at this point. left = mkastnode(A_ADD, aryptr->type, left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, value_at(left->type), left, NULL, 0); return (left); } // Parse the member reference of a struct or union // and return an AST tree for it. If withpointer is true, // the access is through a pointer to the member. static struct ASTnode *member_access(int withpointer) { struct ASTnode *left, *right; struct symtable *compvar; struct symtable *typeptr; struct symtable *m; // Check that the identifier has been declared as a // struct/union or a struct/union pointer if ((compvar = findsymbol(Text)) == NULL) fatals("Undeclared variable", Text); if (withpointer && compvar->type != pointer_to(P_STRUCT) && compvar->type != pointer_to(P_UNION)) fatals("Undeclared variable", Text); if (!withpointer && compvar->type != P_STRUCT && compvar->type != P_UNION) fatals("Undeclared variable", Text); // If a pointer to a struct/union, get the pointer's value. // Otherwise, make a leaf node that points at the base // Either way, it's an rvalue if (withpointer) { left = mkastleaf(A_IDENT, pointer_to(compvar->type), compvar, 0); } else left = mkastleaf(A_ADDR, compvar->type, compvar, 0); left->rvalue = 1; // Get the details of the composite type typeptr = compvar->ctype; // Skip the '.' or '->' token and get the member's name scan(&Token); ident(); // Find the matching member's name in the type // Die if we can't find it for (m = typeptr->member; m != NULL; m = m->next) if (!strcmp(m->name, Text)) break; if (m == NULL) fatals("No member found in struct/union: ", Text); // Build an A_INTLIT node with the offset right = mkastleaf(A_INTLIT, P_INT, NULL, m->posn); // Add the member's offset to the base of the struct/union // and dereference it. Still an lvalue at this point left = mkastnode(A_ADD, pointer_to(m->type), left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, m->type, left, NULL, 0); return (left); } // Parse a postfix expression and return // an AST node representing it. The // identifier is already in Text. static struct ASTnode *postfix(void) { struct ASTnode *n; struct symtable *varptr; struct symtable *enumptr; // If the identifier matches an enum value, // return an A_INTLIT node if ((enumptr = findenumval(Text)) != NULL) { scan(&Token); return (mkastleaf(A_INTLIT, P_INT, NULL, enumptr->posn)); } // Scan in the next token to see if we have a postfix expression scan(&Token); // Function call if (Token.token == T_LPAREN) return (funccall()); // An array reference if (Token.token == T_LBRACKET) return (array_access()); // Access into a struct or union if (Token.token == T_DOT) return (member_access(0)); if (Token.token == T_ARROW) return (member_access(1)); // A variable. Check that the variable exists. if ((varptr = findsymbol(Text)) == NULL || varptr->stype != S_VARIABLE) fatals("Unknown variable", Text); switch (Token.token) { // Post-increment: skip over the token case T_INC: scan(&Token); n = mkastleaf(A_POSTINC, varptr->type, varptr, 0); break; // Post-decrement: skip over the token case T_DEC: scan(&Token); n = mkastleaf(A_POSTDEC, varptr->type, varptr, 0); break; // Just a variable reference default: n = mkastleaf(A_IDENT, varptr->type, varptr, 0); } return (n); } // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(void) { struct ASTnode *n; int id; switch (Token.token) { case T_INTLIT: // For an INTLIT token, make a leaf AST node for it. // Make it a P_CHAR if it's within the P_CHAR range if ((Token.intvalue) >= 0 && (Token.intvalue < 256)) n = mkastleaf(A_INTLIT, P_CHAR, NULL, Token.intvalue); else n = mkastleaf(A_INTLIT, P_INT, NULL, Token.intvalue); break; case T_STRLIT: // For a STRLIT token, generate the assembly for it. // Then make a leaf AST node for it. id is the string's label. id = genglobstr(Text); n = mkastleaf(A_STRLIT, pointer_to(P_CHAR), NULL, id); break; case T_IDENT: return (postfix()); case T_LPAREN: // Beginning of a parenthesised expression, skip the '('. // Scan in the expression and the right parenthesis scan(&Token); n = binexpr(0); rparen(); return (n); default: fatald("Expecting a primary expression, got token", Token.token); } // Scan in the next token and return the leaf node scan(&Token); return (n); } // Convert a binary operator token into a binary AST operation. // We rely on a 1:1 mapping from token to AST operation static int binastop(int tokentype) { if (tokentype > T_EOF && tokentype <= T_SLASH) return (tokentype); fatald("Syntax error, token", tokentype); return (0); // Keep -Wall happy } // Return true if a token is right-associative, // false otherwise. static int rightassoc(int tokentype) { if (tokentype == T_ASSIGN) return (1); return (0); } // Operator precedence for each token. Must // match up with the order of tokens in defs.h static int OpPrec[] = { 0, 10, 20, 30, // T_EOF, T_ASSIGN, T_LOGOR, T_LOGAND 40, 50, 60, // T_OR, T_XOR, T_AMPER 70, 70, // T_EQ, T_NE 80, 80, 80, 80, // T_LT, T_GT, T_LE, T_GE 90, 90, // T_LSHIFT, T_RSHIFT 100, 100, // T_PLUS, T_MINUS 110, 110 // T_STAR, T_SLASH }; // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec; if (tokentype > T_SLASH) fatald("Token with no precedence in op_precedence:", tokentype); prec = OpPrec[tokentype]; if (prec == 0) fatald("Syntax error, token", tokentype); return (prec); } // prefix_expression: primary // | '*' prefix_expression // | '&' prefix_expression // | '-' prefix_expression // | '++' prefix_expression // | '--' prefix_expression // ; // Parse a prefix expression and return // a sub-tree representing it. struct ASTnode *prefix(void) { struct ASTnode *tree; switch (Token.token) { case T_AMPER: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Ensure that it's an identifier if (tree->op != A_IDENT) fatal("& operator must be followed by an identifier"); // Now change the operator to A_ADDR and the type to // a pointer to the original type tree->op = A_ADDR; tree->type = pointer_to(tree->type); break; case T_STAR: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's either another deref or an // identifier if (tree->op != A_IDENT && tree->op != A_DEREF) fatal("* operator must be followed by an identifier or *"); // Prepend an A_DEREF operation to the tree tree = mkastunary(A_DEREF, value_at(tree->type), tree, NULL, 0); break; case T_MINUS: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_NEGATE operation to the tree and // make the child an rvalue. Because chars are unsigned, // also widen this to int so that it's signed tree->rvalue = 1; tree = modify_type(tree, P_INT, 0); tree = mkastunary(A_NEGATE, tree->type, tree, NULL, 0); break; case T_INVERT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_INVERT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_INVERT, tree->type, tree, NULL, 0); break; case T_LOGNOT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_LOGNOT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_LOGNOT, tree->type, tree, NULL, 0); break; case T_INC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("++ operator must be followed by an identifier"); // Prepend an A_PREINC operation to the tree tree = mkastunary(A_PREINC, tree->type, tree, NULL, 0); break; case T_DEC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("-- operator must be followed by an identifier"); // Prepend an A_PREDEC operation to the tree tree = mkastunary(A_PREDEC, tree->type, tree, NULL, 0); break; default: tree = primary(); } return (tree); } // Return an AST tree whose root is a binary operator. // Parameter ptp is the previous token's precedence. struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; struct ASTnode *ltemp, *rtemp; int ASTop; int tokentype; // Get the tree on the left. // Fetch the next token at the same time. left = prefix(); // If we hit one of several terminating tokens, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA) { left->rvalue = 1; return (left); } // While the precedence of this token is more than that of the // previous token precedence, or it's right associative and // equal to the previous token's precedence while ((op_precedence(tokentype) > ptp) || (rightassoc(tokentype) && op_precedence(tokentype) == ptp)) { // Fetch in the next integer literal scan(&Token); // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Determine the operation to be performed on the sub-trees ASTop = binastop(tokentype); if (ASTop == A_ASSIGN) { // Assignment // Make the right tree into an rvalue right->rvalue = 1; // Ensure the right's type matches the left right = modify_type(right, left->type, 0); if (right == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree. However, switch // left and right around, so that the right expression's // code will be generated before the left expression ltemp = left; left = right; right = ltemp; } else { // We are not doing an assignment, so both trees should be rvalues // Convert both trees into rvalue if they are lvalue trees left->rvalue = 1; right->rvalue = 1; // Ensure the two types are compatible by trying // to modify each tree to match the other's type. ltemp = modify_type(left, right->type, ASTop); rtemp = modify_type(right, left->type, ASTop); if (ltemp == NULL && rtemp == NULL) fatal("Incompatible types in binary expression"); if (ltemp != NULL) left = ltemp; if (rtemp != NULL) right = rtemp; } // Join that sub-tree with ours. Convert the token // into an AST operation at the same time. left = mkastnode(binastop(tokentype), left->type, left, NULL, right, NULL, 0); // Update the details of the current token. // If we hit a terminating token, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA) { left->rvalue = 1; return (left); } } // Return the tree we have when the precedence // is the same or lower left->rvalue = 1; return (left); } ================================================ FILE: 34_Enums_and_Typedefs/gen.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Generic code generator // Copyright (c) 2019 Warren Toomey, GPL3 // Generate and return a new label number int genlabel(void) { static int id = 1; return (id++); } // Generate the code for an IF statement // and an optional ELSE clause static int genIF(struct ASTnode *n) { int Lfalse, Lend; // Generate two labels: one for the // false compound statement, and one // for the end of the overall IF statement. // When there is no ELSE clause, Lfalse _is_ // the ending label! Lfalse = genlabel(); if (n->right) Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. genAST(n->left, Lfalse, n->op); genfreeregs(); // Generate the true compound statement genAST(n->mid, NOLABEL, n->op); genfreeregs(); // If there is an optional ELSE clause, // generate the jump to skip to the end if (n->right) cgjump(Lend); // Now the false label cglabel(Lfalse); // Optional ELSE clause: generate the // false compound statement and the // end label if (n->right) { genAST(n->right, NOLABEL, n->op); genfreeregs(); cglabel(Lend); } return (NOREG); } // Generate the code for a WHILE statement static int genWHILE(struct ASTnode *n) { int Lstart, Lend; // Generate the start and end labels // and output the start label Lstart = genlabel(); Lend = genlabel(); cglabel(Lstart); // Generate the condition code followed // by a jump to the end label. genAST(n->left, Lend, n->op); genfreeregs(); // Generate the compound statement for the body genAST(n->right, NOLABEL, n->op); genfreeregs(); // Finally output the jump back to the condition, // and the end label cgjump(Lstart); cglabel(Lend); return (NOREG); } // Generate the code to copy the arguments of a // function call to its parameters, then call the // function itself. Return the register that holds // the function's return value. static int gen_funccall(struct ASTnode *n) { struct ASTnode *gluetree = n->left; int reg; int numargs = 0; // If there is a list of arguments, walk this list // from the last argument (right-hand child) to the // first while (gluetree) { // Calculate the expression's value reg = genAST(gluetree->right, NOLABEL, gluetree->op); // Copy this into the n'th function parameter: size is 1, 2, 3, ... cgcopyarg(reg, gluetree->size); // Keep the first (highest) number of arguments if (numargs == 0) numargs = gluetree->size; genfreeregs(); gluetree = gluetree->left; } // Call the function, clean up the stack (based on numargs), // and return its result return (cgcall(n->sym, numargs)); } // Given an AST, an optional label, and the AST op // of the parent, generate assembly code recursively. // Return the register id with the tree's final value. int genAST(struct ASTnode *n, int label, int parentASTop) { int leftreg, rightreg; // We have some specific AST node handling at the top // so that we don't evaluate the child sub-trees immediately switch (n->op) { case A_IF: return (genIF(n)); case A_WHILE: return (genWHILE(n)); case A_FUNCCALL: return (gen_funccall(n)); case A_GLUE: // Do each child statement, and free the // registers after each child genAST(n->left, NOLABEL, n->op); genfreeregs(); genAST(n->right, NOLABEL, n->op); genfreeregs(); return (NOREG); case A_FUNCTION: // Generate the function's preamble before the code // in the child sub-tree cgfuncpreamble(n->sym); genAST(n->left, NOLABEL, n->op); cgfuncpostamble(n->sym); return (NOREG); } // General AST node handling below // Get the left and right sub-tree values if (n->left) leftreg = genAST(n->left, NOLABEL, n->op); if (n->right) rightreg = genAST(n->right, NOLABEL, n->op); switch (n->op) { case A_ADD: return (cgadd(leftreg, rightreg)); case A_SUBTRACT: return (cgsub(leftreg, rightreg)); case A_MULTIPLY: return (cgmul(leftreg, rightreg)); case A_DIVIDE: return (cgdiv(leftreg, rightreg)); case A_AND: return (cgand(leftreg, rightreg)); case A_OR: return (cgor(leftreg, rightreg)); case A_XOR: return (cgxor(leftreg, rightreg)); case A_LSHIFT: return (cgshl(leftreg, rightreg)); case A_RSHIFT: return (cgshr(leftreg, rightreg)); case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, compare registers // and set one to 1 or 0 based on the comparison. if (parentASTop == A_IF || parentASTop == A_WHILE) return (cgcompare_and_jump(n->op, leftreg, rightreg, label)); else return (cgcompare_and_set(n->op, leftreg, rightreg)); case A_INTLIT: return (cgloadint(n->intvalue, n->type)); case A_STRLIT: return (cgloadglobstr(n->intvalue)); case A_IDENT: // Load our value if we are an rvalue // or we are being dereferenced if (n->rvalue || parentASTop == A_DEREF) { if (n->sym->class == C_GLOBAL) { return (cgloadglob(n->sym, n->op)); } else { return (cgloadlocal(n->sym, n->op)); } } else return (NOREG); case A_ASSIGN: // Are we assigning to an identifier or through a pointer? switch (n->right->op) { case A_IDENT: if (n->right->sym->class == C_GLOBAL) return (cgstorglob(leftreg, n->right->sym)); else return (cgstorlocal(leftreg, n->right->sym)); case A_DEREF: return (cgstorderef(leftreg, rightreg, n->right->type)); default: fatald("Can't A_ASSIGN in genAST(), op", n->op); } case A_WIDEN: // Widen the child's type to the parent's type return (cgwiden(leftreg, n->left->type, n->type)); case A_RETURN: cgreturn(leftreg, Functionid); return (NOREG); case A_ADDR: return (cgaddress(n->sym)); case A_DEREF: // If we are an rvalue, dereference to get the value we point at, // otherwise leave it for A_ASSIGN to store through the pointer if (n->rvalue) return (cgderef(leftreg, n->left->type)); else return (leftreg); case A_SCALE: // Small optimisation: use shift if the // scale value is a known power of two switch (n->size) { case 2: return (cgshlconst(leftreg, 1)); case 4: return (cgshlconst(leftreg, 2)); case 8: return (cgshlconst(leftreg, 3)); default: // Load a register with the size and // multiply the leftreg by this size rightreg = cgloadint(n->size, P_INT); return (cgmul(leftreg, rightreg)); } case A_POSTINC: case A_POSTDEC: // Load and decrement the variable's value into a register // and post increment/decrement it if (n->sym->class == C_GLOBAL) return (cgloadglob(n->sym, n->op)); else return (cgloadlocal(n->sym, n->op)); case A_PREINC: case A_PREDEC: // Load and decrement the variable's value into a register // and pre increment/decrement it if (n->left->sym->class == C_GLOBAL) return (cgloadglob(n->left->sym, n->op)); else return (cgloadlocal(n->left->sym, n->op)); case A_NEGATE: return (cgnegate(leftreg)); case A_INVERT: return (cginvert(leftreg)); case A_LOGNOT: return (cglognot(leftreg)); case A_TOBOOL: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, set the register // to 0 or 1 based on it's zeroeness or non-zeroeness return (cgboolean(leftreg, parentASTop, label)); default: fatald("Unknown AST operator", n->op); } return (NOREG); // Keep -Wall happy } void genpreamble() { cgpreamble(); } void genpostamble() { cgpostamble(); } void genfreeregs() { freeall_registers(); } void genglobsym(struct symtable *node) { cgglobsym(node); } int genglobstr(char *strvalue) { int l = genlabel(); cgglobstr(l, strvalue); return (l); } int genprimsize(int type) { return (cgprimsize(type)); } int genalign(int type, int offset, int direction) { return (cgalign(type, offset, direction)); } ================================================ FILE: 34_Enums_and_Typedefs/main.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "decl.h" #include #include // Compiler setup and top-level execution // Copyright (c) 2019 Warren Toomey, GPL3 // Given a string with a '.' and at least a 1-character suffix // after the '.', change the suffix to be the given character. // Return the new string or NULL if the original string could // not be modified char *alter_suffix(char *str, char suffix) { char *posn; char *newstr; // Clone the string if ((newstr = strdup(str)) == NULL) return (NULL); // Find the '.' if ((posn = strrchr(newstr, '.')) == NULL) return (NULL); // Ensure there is a suffix posn++; if (*posn == '\0') return (NULL); // Change the suffix and NUL-terminate the string *posn++ = suffix; *posn = '\0'; return (newstr); } // Given an input filename, compile that file // down to assembly code. Return the new file's name static char *do_compile(char *filename) { Outfilename = alter_suffix(filename, 's'); if (Outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .c on the end\n", filename); exit(1); } // Open up the input file if ((Infile = fopen(filename, "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", filename, strerror(errno)); exit(1); } // Create the output file if ((Outfile = fopen(Outfilename, "w")) == NULL) { fprintf(stderr, "Unable to create %s: %s\n", Outfilename, strerror(errno)); exit(1); } Line = 1; // Reset the scanner Putback = '\n'; clear_symtable(); // Clear the symbol table if (O_verbose) printf("compiling %s\n", filename); scan(&Token); // Get the first token from the input genpreamble(); // Output the preamble global_declarations(); // Parse the global declarations genpostamble(); // Output the postamble fclose(Outfile); // Close the output file return (Outfilename); } // Given an input filename, assemble that file // down to object code. Return the object filename char *do_assemble(char *filename) { char cmd[TEXTLEN]; int err; char *outfilename = alter_suffix(filename, 'o'); if (outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .s on the end\n", filename); exit(1); } // Build the assembly command and run it snprintf(cmd, TEXTLEN, "%s %s %s", ASCMD, outfilename, filename); if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Assembly of %s failed\n", filename); exit(1); } return (outfilename); } // Given a list of object files and an output filename, // link all of the object filenames together. void do_link(char *outfilename, char *objlist[]) { int cnt, size = TEXTLEN; char cmd[TEXTLEN], *cptr; int err; // Start with the linker command and the output file cptr = cmd; cnt = snprintf(cptr, size, "%s %s ", LDCMD, outfilename); cptr += cnt; size -= cnt; // Now append each object file while (*objlist != NULL) { cnt = snprintf(cptr, size, "%s ", *objlist); cptr += cnt; size -= cnt; objlist++; } if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Linking failed\n"); exit(1); } } // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "Usage: %s [-vcST] [-o outfile] file [file ...]\n", prog); fprintf(stderr, " -v give verbose output of the compilation stages\n"); fprintf(stderr, " -c generate object files but don't link them\n"); fprintf(stderr, " -S generate assembly files but don't link them\n"); fprintf(stderr, " -T dump the AST trees for each input file\n"); fprintf(stderr, " -o outfile, produce the outfile executable file\n"); exit(1); } // Main program: check arguments and print a usage // if we don't have an argument. Open up the input // file and call scanfile() to scan the tokens in it. enum { MAXOBJ = 100 }; int main(int argc, char *argv[]) { char *outfilename = AOUT; char *asmfile, *objfile; char *objlist[MAXOBJ]; int i, objcnt = 0; // Initialise our variables O_dumpAST = 0; O_keepasm = 0; O_assemble = 0; O_verbose = 0; O_dolink = 1; // Scan for command-line options for (i = 1; i < argc; i++) { // No leading '-', stop scanning for options if (*argv[i] != '-') break; // For each option in this argument for (int j = 1; (*argv[i] == '-') && argv[i][j]; j++) { switch (argv[i][j]) { case 'o': outfilename = argv[++i]; // Save & skip to next argument break; case 'T': O_dumpAST = 1; break; case 'c': O_assemble = 1; O_keepasm = 0; O_dolink = 0; break; case 'S': O_keepasm = 1; O_assemble = 0; O_dolink = 0; break; case 'v': O_verbose = 1; break; default: usage(argv[0]); } } } // Ensure we have at lease one input file argument if (i >= argc) usage(argv[0]); // Work on each input file in turn while (i < argc) { asmfile = do_compile(argv[i]); // Compile the source file if (O_dolink || O_assemble) { objfile = do_assemble(asmfile); // Assemble it to object forma if (objcnt == (MAXOBJ - 2)) { fprintf(stderr, "Too many object files for the compiler to handle\n"); exit(1); } objlist[objcnt++] = objfile; // Add the object file's name objlist[objcnt] = NULL; // to the list of object files } if (!O_keepasm) // Remove the assembly file if unlink(asmfile); // we don't need to keep it i++; } // Now link all the object files together if (O_dolink) { do_link(outfilename, objlist); // If we don't need to keep the object // files, then remove them if (!O_assemble) { for (i = 0; objlist[i] != NULL; i++) unlink(objlist[i]); } } return (0); } ================================================ FILE: 34_Enums_and_Typedefs/misc.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" #include // Miscellaneous functions // Copyright (c) 2019 Warren Toomey, GPL3 // Ensure that the current token is t, // and fetch the next token. Otherwise // throw an error void match(int t, char *what) { if (Token.token == t) { scan(&Token); } else { fatals("Expected", what); } } // Match a semicolon and fetch the next token void semi(void) { match(T_SEMI, ";"); } // Match a left brace and fetch the next token void lbrace(void) { match(T_LBRACE, "{"); } // Match a right brace and fetch the next token void rbrace(void) { match(T_RBRACE, "}"); } // Match a left parenthesis and fetch the next token void lparen(void) { match(T_LPAREN, "("); } // Match a right parenthesis and fetch the next token void rparen(void) { match(T_RPAREN, ")"); } // Match an identifier and fetch the next token void ident(void) { match(T_IDENT, "identifier"); } // Match a comma and fetch the next token void comma(void) { match(T_COMMA, "comma"); } // Print out fatal messages void fatal(char *s) { fprintf(stderr, "%s on line %d\n", s, Line); fclose(Outfile); unlink(Outfilename); exit(1); } void fatals(char *s1, char *s2) { fprintf(stderr, "%s:%s on line %d\n", s1, s2, Line); fclose(Outfile); unlink(Outfilename); exit(1); } void fatald(char *s, int d) { fprintf(stderr, "%s:%d on line %d\n", s, d, Line); fclose(Outfile); unlink(Outfilename); exit(1); } void fatalc(char *s, int c) { fprintf(stderr, "%s:%c on line %d\n", s, c, Line); fclose(Outfile); unlink(Outfilename); exit(1); } ================================================ FILE: 34_Enums_and_Typedefs/scan.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { char *p; p = strchr(s, c); return (p ? p - s : -1); } // Get the next character from the input file. static int next(void) { int c; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return (c); } c = fgetc(Infile); // Read from input file if ('\n' == c) Line++; // Increment line count return (c); } // Put back an unwanted character static void putback(int c) { Putback = c; } // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } // Return the next character from a character // or string literal static int scanch(void) { int c; // Get the next input character and interpret // metacharacters that start with a backslash c = next(); if (c == '\\') { switch (c = next()) { case 'a': return '\a'; case 'b': return '\b'; case 'f': return '\f'; case 'n': return '\n'; case 'r': return '\r'; case 't': return '\t'; case 'v': return '\v'; case '\\': return '\\'; case '"': return '"'; case '\'': return '\''; default: fatalc("unknown escape sequence", c); } } return (c); // Just an ordinary old character! } // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0; // Convert each character into an int value while ((k = chrpos("0123456789", c)) >= 0) { val = val * 10 + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return (val); } // Scan in a string literal from the input file, // and store it in buf[]. Return the length of // the string. static int scanstr(char *buf) { int i, c; // Loop while we have enough buffer space for (i = 0; i < TEXTLEN - 1; i++) { // Get the next char and append to buf // Return when we hit the ending double quote if ((c = scanch()) == '"') { buf[i] = 0; return (i); } buf[i] = c; } // Ran out of buf[] space fatal("String literal too long"); return (0); } // Scan an identifier from the input file and // store it in buf[]. Return the identifier's length static int scanident(int c, char *buf, int lim) { int i = 0; // Allow digits, alpha and underscores while (isalpha(c) || isdigit(c) || '_' == c) { // Error if we hit the identifier length limit, // else append to buf[] and get next character if (lim - 1 == i) { fatal("Identifier too long"); } else if (i < lim - 1) { buf[i++] = c; } c = next(); } // We hit a non-valid character, put it back. // NUL-terminate the buf[] and return the length putback(c); buf[i] = '\0'; return (i); } // Given a word from the input, return the matching // keyword token number or 0 if it's not a keyword. // Switch on the first letter so that we don't have // to waste time strcmp()ing against all the keywords. static int keyword(char *s) { switch (*s) { case 'c': if (!strcmp(s, "char")) return (T_CHAR); break; case 'e': if (!strcmp(s, "else")) return (T_ELSE); if (!strcmp(s, "enum")) return (T_ENUM); break; case 'f': if (!strcmp(s, "for")) return (T_FOR); break; case 'i': if (!strcmp(s, "if")) return (T_IF); if (!strcmp(s, "int")) return (T_INT); break; case 'l': if (!strcmp(s, "long")) return (T_LONG); break; case 'r': if (!strcmp(s, "return")) return (T_RETURN); break; case 's': if (!strcmp(s, "struct")) return (T_STRUCT); break; case 't': if (!strcmp(s, "typedef")) return (T_TYPEDEF); break; case 'u': if (!strcmp(s, "union")) return (T_UNION); break; case 'v': if (!strcmp(s, "void")) return (T_VOID); break; case 'w': if (!strcmp(s, "while")) return (T_WHILE); break; } return (0); } // A pointer to a rejected token static struct token *Rejtoken = NULL; // Reject the token that we just scanned void reject_token(struct token *t) { if (Rejtoken != NULL) fatal("Can't reject token twice"); Rejtoken = t; } // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c, tokentype; // If we have any rejected token, return it if (Rejtoken != NULL) { t = Rejtoken; Rejtoken = NULL; return (1); } // Skip whitespace c = skip(); // Determine the token based on // the input character switch (c) { case EOF: t->token = T_EOF; return (0); case '+': if ((c = next()) == '+') { t->token = T_INC; } else { putback(c); t->token = T_PLUS; } break; case '-': if ((c = next()) == '-') { t->token = T_DEC; } else if (c == '>') { t->token = T_ARROW; } else { putback(c); t->token = T_MINUS; } break; case '*': t->token = T_STAR; break; case '/': t->token = T_SLASH; break; case ';': t->token = T_SEMI; break; case '{': t->token = T_LBRACE; break; case '}': t->token = T_RBRACE; break; case '(': t->token = T_LPAREN; break; case ')': t->token = T_RPAREN; break; case '[': t->token = T_LBRACKET; break; case ']': t->token = T_RBRACKET; break; case '~': t->token = T_INVERT; break; case '^': t->token = T_XOR; break; case ',': t->token = T_COMMA; break; case '.': t->token = T_DOT; break; case '=': if ((c = next()) == '=') { t->token = T_EQ; } else { putback(c); t->token = T_ASSIGN; } break; case '!': if ((c = next()) == '=') { t->token = T_NE; } else { putback(c); t->token = T_LOGNOT; } break; case '<': if ((c = next()) == '=') { t->token = T_LE; } else if (c == '<') { t->token = T_LSHIFT; } else { putback(c); t->token = T_LT; } break; case '>': if ((c = next()) == '=') { t->token = T_GE; } else if (c == '>') { t->token = T_RSHIFT; } else { putback(c); t->token = T_GT; } break; case '&': if ((c = next()) == '&') { t->token = T_LOGAND; } else { putback(c); t->token = T_AMPER; } break; case '|': if ((c = next()) == '|') { t->token = T_LOGOR; } else { putback(c); t->token = T_OR; } break; case '\'': // If it's a quote, scan in the // literal character value and // the trailing quote t->intvalue = scanch(); t->token = T_INTLIT; if (next() != '\'') fatal("Expected '\\'' at end of char literal"); break; case '"': // Scan in a literal string scanstr(Text); t->token = T_STRLIT; break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } else if (isalpha(c) || '_' == c) { // Read in a keyword or identifier scanident(c, Text, TEXTLEN); // If it's a recognised keyword, return that token if ((tokentype = keyword(Text)) != 0) { t->token = tokentype; break; } // Not a recognised keyword, so it must be an identifier t->token = T_IDENT; break; } // The character isn't part of any recognised token, error fatalc("Unrecognised character", c); } // We found a token return (1); } ================================================ FILE: 34_Enums_and_Typedefs/stmt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of statements // Copyright (c) 2019 Warren Toomey, GPL3 // Prototypes static struct ASTnode *single_statement(void); // compound_statement: // empty, i.e. no statement // | statement // | statement statements // ; // // statement: declaration // | expression_statement // | function_call // | if_statement // | while_statement // | for_statement // | return_statement // ; // if_statement: if_head // | if_head 'else' compound_statement // ; // // if_head: 'if' '(' true_false_expression ')' compound_statement ; // // Parse an IF statement including any // optional ELSE clause and return its AST static struct ASTnode *if_statement(void) { struct ASTnode *condAST, *trueAST, *falseAST = NULL; // Ensure we have 'if' '(' match(T_IF, "if"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); rparen(); // Get the AST for the compound statement trueAST = compound_statement(); // If we have an 'else', skip it // and get the AST for the compound statement if (Token.token == T_ELSE) { scan(&Token); falseAST = compound_statement(); } // Build and return the AST for this statement return (mkastnode(A_IF, P_NONE, condAST, trueAST, falseAST, NULL, 0)); } // while_statement: 'while' '(' true_false_expression ')' compound_statement ; // // Parse a WHILE statement and return its AST static struct ASTnode *while_statement(void) { struct ASTnode *condAST, *bodyAST; // Ensure we have 'while' '(' match(T_WHILE, "while"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); rparen(); // Get the AST for the compound statement bodyAST = compound_statement(); // Build and return the AST for this statement return (mkastnode(A_WHILE, P_NONE, condAST, NULL, bodyAST, NULL, 0)); } // for_statement: 'for' '(' preop_statement ';' // true_false_expression ';' // postop_statement ')' compound_statement ; // // preop_statement: statement (for now) // postop_statement: statement (for now) // // Parse a FOR statement and return its AST static struct ASTnode *for_statement(void) { struct ASTnode *condAST, *bodyAST; struct ASTnode *preopAST, *postopAST; struct ASTnode *tree; // Ensure we have 'for' '(' match(T_FOR, "for"); lparen(); // Get the pre_op statement and the ';' preopAST = single_statement(); semi(); // Get the condition and the ';'. // Force a non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); semi(); // Get the post_op statement and the ')' postopAST = single_statement(); rparen(); // Get the compound statement which is the body bodyAST = compound_statement(); // For now, all four sub-trees have to be non-NULL. // Later on, we'll change the semantics for when some are missing // Glue the compound statement and the postop tree tree = mkastnode(A_GLUE, P_NONE, bodyAST, NULL, postopAST, NULL, 0); // Make a WHILE loop with the condition and this new body tree = mkastnode(A_WHILE, P_NONE, condAST, NULL, tree, NULL, 0); // And glue the preop tree to the A_WHILE tree return (mkastnode(A_GLUE, P_NONE, preopAST, NULL, tree, NULL, 0)); } // return_statement: 'return' '(' expression ')' ; // // Parse a return statement and return its AST static struct ASTnode *return_statement(void) { struct ASTnode *tree; // Can't return a value if function returns P_VOID if (Functionid->type == P_VOID) fatal("Can't return from a void function"); // Ensure we have 'return' '(' match(T_RETURN, "return"); lparen(); // Parse the following expression tree = binexpr(0); // Ensure this is compatible with the function's type tree = modify_type(tree, Functionid->type, 0); if (tree == NULL) fatal("Incompatible type to return"); // Add on the A_RETURN node tree = mkastunary(A_RETURN, P_NONE, tree, NULL, 0); // Get the ')' rparen(); return (tree); } // Parse a single statement and return its AST static struct ASTnode *single_statement(void) { int type; struct symtable *ctype; switch (Token.token) { case T_CHAR: case T_INT: case T_LONG: // The beginning of a variable declaration. // Parse the type and get the identifier. // Then parse the rest of the declaration // and skip over the semicolon type = parse_type(&ctype); ident(); var_declaration(type, ctype, C_LOCAL); semi(); return (NULL); // No AST generated here case T_IF: return (if_statement()); case T_WHILE: return (while_statement()); case T_FOR: return (for_statement()); case T_RETURN: return (return_statement()); default: // For now, see if this is an expression. // This catches assignment statements. return (binexpr(0)); } return (NULL); // Keep -Wall happy } // Parse a compound statement // and return its AST struct ASTnode *compound_statement(void) { struct ASTnode *left = NULL; struct ASTnode *tree; // Require a left curly bracket lbrace(); while (1) { // Parse a single statement tree = single_statement(); // Some statements must be followed by a semicolon if (tree != NULL && (tree->op == A_ASSIGN || tree->op == A_RETURN || tree->op == A_FUNCCALL)) semi(); // For each new tree, either save it in left // if left is empty, or glue the left and the // new tree together if (tree != NULL) { if (left == NULL) left = tree; else left = mkastnode(A_GLUE, P_NONE, left, NULL, tree, NULL, 0); } // When we hit a right curly bracket, // skip past it and return the AST if (Token.token == T_RBRACE) { rbrace(); return (left); } } } ================================================ FILE: 34_Enums_and_Typedefs/sym.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Symbol table functions // Copyright (c) 2019 Warren Toomey, GPL3 // Append a node to the singly-linked list pointed to by head or tail void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node) { // Check for valid pointers if (head == NULL || tail == NULL || node == NULL) fatal("Either head, tail or node is NULL in appendsym"); // Append to the list if (*tail) { (*tail)->next = node; *tail = node; } else *head = *tail = node; node->next = NULL; } // Create a symbol node to be added to a symbol table list. // Set up the node's: // + type: char, int etc. // + ctype: composite type pointer for struct/union // + structural type: var, function, array etc. // + size: number of elements, or endlabel: end label for a function // + posn: Position information for local symbols // Return a pointer to the new node struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int size, int posn) { // Get a new node struct symtable *node = (struct symtable *) malloc(sizeof(struct symtable)); if (node == NULL) fatal("Unable to malloc a symbol table node in newsym"); // Fill in the values node->name = strdup(name); node->type = type; node->ctype = ctype; node->stype = stype; node->class = class; node->size = size; node->posn = posn; node->next = NULL; node->member = NULL; // Generate any global space if (class == C_GLOBAL) genglobsym(node); return (node); } // Add a symbol to the global symbol list struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int size) { struct symtable *sym = newsym(name, type, ctype, stype, C_GLOBAL, size, 0); appendsym(&Globhead, &Globtail, sym); return (sym); } // Add a symbol to the local symbol list struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int size) { struct symtable *sym = newsym(name, type, ctype, stype, C_LOCAL, size, 0); appendsym(&Loclhead, &Locltail, sym); return (sym); } // Add a symbol to the parameter list struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype, int size) { struct symtable *sym = newsym(name, type, ctype, stype, C_PARAM, size, 0); appendsym(&Parmhead, &Parmtail, sym); return (sym); } // Add a symbol to the temporary member list struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int size) { struct symtable *sym = newsym(name, type, ctype, stype, C_MEMBER, size, 0); appendsym(&Membhead, &Membtail, sym); return (sym); } // Add a struct to the struct list struct symtable *addstruct(char *name, int type, struct symtable *ctype, int stype, int size) { struct symtable *sym = newsym(name, type, ctype, stype, C_STRUCT, size, 0); appendsym(&Structhead, &Structtail, sym); return (sym); } // Add a struct to the union list struct symtable *addunion(char *name, int type, struct symtable *ctype, int stype, int size) { struct symtable *sym = newsym(name, type, ctype, stype, C_UNION, size, 0); appendsym(&Unionhead, &Uniontail, sym); return (sym); } // Add an enum type or value to the enum list. // Class is C_ENUMTYPE or C_ENUMVAL. // Use posn to store the int value. struct symtable *addenum(char *name, int class, int value) { struct symtable *sym = newsym(name, P_INT, NULL, 0, class, 0, value); appendsym(&Enumhead, &Enumtail, sym); return (sym); } // Add a typedef to the typedef list struct symtable *addtypedef(char *name, int type, struct symtable *ctype, int stype, int size) { struct symtable *sym = newsym(name, type, ctype, stype, C_TYPEDEF, size, 0); appendsym(&Typehead, &Typetail, sym); return (sym); } // Search for a symbol in a specific list. // Return a pointer to the found node or NULL if not found. // If class is not zero, also match on the given class static struct symtable *findsyminlist(char *s, struct symtable *list, int class) { for (; list != NULL; list = list->next) if ((list->name != NULL) && !strcmp(s, list->name)) if (class==0 || class== list->class) return (list); return (NULL); } // Determine if the symbol s is in the global symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findglob(char *s) { return (findsyminlist(s, Globhead, 0)); } // Determine if the symbol s is in the local symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findlocl(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member, 0); if (node) return (node); } return (findsyminlist(s, Loclhead, 0)); } // Determine if the symbol s is in the symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findsymbol(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member, 0); if (node) return (node); } // Otherwise, try the local and global symbol lists node = findsyminlist(s, Loclhead, 0); if (node) return (node); return (findsyminlist(s, Globhead, 0)); } // Find a member in the member list // Return a pointer to the found node or NULL if not found. struct symtable *findmember(char *s) { return (findsyminlist(s, Membhead, 0)); } // Find a struct in the struct list // Return a pointer to the found node or NULL if not found. struct symtable *findstruct(char *s) { return (findsyminlist(s, Structhead, 0)); } // Find a struct in the union list // Return a pointer to the found node or NULL if not found. struct symtable *findunion(char *s) { return (findsyminlist(s, Unionhead, 0)); } // Find an enum type in the enum list // Return a pointer to the found node or NULL if not found. struct symtable *findenumtype(char *s) { return (findsyminlist(s, Enumhead, C_ENUMTYPE)); } // Find an enum value in the enum list // Return a pointer to the found node or NULL if not found. struct symtable *findenumval(char *s) { return (findsyminlist(s, Enumhead, C_ENUMVAL)); } // Find a type in the tyedef list // Return a pointer to the found node or NULL if not found. struct symtable *findtypedef(char *s) { return (findsyminlist(s, Typehead, 0)); } // Reset the contents of the symbol table void clear_symtable(void) { Globhead = Globtail = NULL; Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Membhead = Membtail = NULL; Structhead = Structtail = NULL; Unionhead = Uniontail = NULL; Enumhead = Enumtail = NULL; Typehead = Typetail = NULL; } // Clear all the entries in the local symbol table void freeloclsyms(void) { Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Functionid = NULL; } ================================================ FILE: 34_Enums_and_Typedefs/tests/err.input31.c ================================================ Expecting a primary expression, got token:15 on line 5 ================================================ FILE: 34_Enums_and_Typedefs/tests/err.input32.c ================================================ Unknown variable:cow on line 4 ================================================ FILE: 34_Enums_and_Typedefs/tests/err.input33.c ================================================ Incompatible type to return on line 4 ================================================ FILE: 34_Enums_and_Typedefs/tests/err.input34.c ================================================ For now, declaration of non-global arrays is not implemented on line 4 ================================================ FILE: 34_Enums_and_Typedefs/tests/err.input35.c ================================================ Duplicate local variable declaration:a on line 4 ================================================ FILE: 34_Enums_and_Typedefs/tests/err.input36.c ================================================ Type doesn't match prototype for parameter:2 on line 4 ================================================ FILE: 34_Enums_and_Typedefs/tests/err.input37.c ================================================ Unexpected token in parameter list:15 on line 3 ================================================ FILE: 34_Enums_and_Typedefs/tests/err.input38.c ================================================ Type doesn't match prototype for parameter:2 on line 4 ================================================ FILE: 34_Enums_and_Typedefs/tests/err.input39.c ================================================ No statements in function with non-void type on line 4 ================================================ FILE: 34_Enums_and_Typedefs/tests/err.input40.c ================================================ No return for function with non-void type on line 4 ================================================ FILE: 34_Enums_and_Typedefs/tests/err.input41.c ================================================ Can't return from a void function on line 3 ================================================ FILE: 34_Enums_and_Typedefs/tests/err.input42.c ================================================ Undeclared function:fred on line 3 ================================================ FILE: 34_Enums_and_Typedefs/tests/err.input43.c ================================================ Undeclared array:b on line 3 ================================================ FILE: 34_Enums_and_Typedefs/tests/err.input44.c ================================================ Unknown variable:z on line 3 ================================================ FILE: 34_Enums_and_Typedefs/tests/err.input45.c ================================================ & operator must be followed by an identifier on line 3 ================================================ FILE: 34_Enums_and_Typedefs/tests/err.input46.c ================================================ * operator must be followed by an identifier or * on line 3 ================================================ FILE: 34_Enums_and_Typedefs/tests/err.input47.c ================================================ ++ operator must be followed by an identifier on line 3 ================================================ FILE: 34_Enums_and_Typedefs/tests/err.input48.c ================================================ -- operator must be followed by an identifier on line 3 ================================================ FILE: 34_Enums_and_Typedefs/tests/err.input49.c ================================================ Incompatible expression in assignment on line 6 ================================================ FILE: 34_Enums_and_Typedefs/tests/err.input50.c ================================================ Incompatible types in binary expression on line 6 ================================================ FILE: 34_Enums_and_Typedefs/tests/err.input51.c ================================================ Expected '\'' at end of char literal on line 4 ================================================ FILE: 34_Enums_and_Typedefs/tests/err.input52.c ================================================ Unrecognised character:$ on line 5 ================================================ FILE: 34_Enums_and_Typedefs/tests/err.input56.c ================================================ unknown struct/union type:var1 on line 2 ================================================ FILE: 34_Enums_and_Typedefs/tests/err.input57.c ================================================ previously defined struct/union:fred on line 2 ================================================ FILE: 34_Enums_and_Typedefs/tests/err.input59.c ================================================ Undeclared variable:y on line 3 ================================================ FILE: 34_Enums_and_Typedefs/tests/err.input60.c ================================================ Undeclared variable:x on line 3 ================================================ FILE: 34_Enums_and_Typedefs/tests/err.input61.c ================================================ Undeclared variable:x on line 3 ================================================ FILE: 34_Enums_and_Typedefs/tests/err.input64.c ================================================ undeclared enum type::fred on line 1 ================================================ FILE: 34_Enums_and_Typedefs/tests/err.input65.c ================================================ enum type redeclared::fred on line 2 ================================================ FILE: 34_Enums_and_Typedefs/tests/err.input66.c ================================================ enum value redeclared::z on line 2 ================================================ FILE: 34_Enums_and_Typedefs/tests/err.input68.c ================================================ redefinition of typedef:FOO on line 2 ================================================ FILE: 34_Enums_and_Typedefs/tests/err.input69.c ================================================ unknown type:FLOO on line 2 ================================================ FILE: 34_Enums_and_Typedefs/tests/input01.c ================================================ int printf(char *fmt); void main() { printf("%d\n", 12 * 3); printf("%d\n", 18 - 2 * 4); printf("%d\n", 1 + 2 + 9 - 5/2 + 3*5); } ================================================ FILE: 34_Enums_and_Typedefs/tests/input02.c ================================================ int printf(char *fmt); void main() { int fred; int jim; fred= 5; jim= 12; printf("%d\n", fred + jim); } ================================================ FILE: 34_Enums_and_Typedefs/tests/input03.c ================================================ int printf(char *fmt); void main() { int x; x= 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); } ================================================ FILE: 34_Enums_and_Typedefs/tests/input04.c ================================================ int printf(char *fmt); void main() { int x; x= 7 < 9; printf("%d\n", x); x= 7 <= 9; printf("%d\n", x); x= 7 != 9; printf("%d\n", x); x= 7 == 7; printf("%d\n", x); x= 7 >= 7; printf("%d\n", x); x= 7 <= 7; printf("%d\n", x); x= 9 > 7; printf("%d\n", x); x= 9 >= 7; printf("%d\n", x); x= 9 != 7; printf("%d\n", x); } ================================================ FILE: 34_Enums_and_Typedefs/tests/input05.c ================================================ int printf(char *fmt); void main() { int i; int j; i=6; j=12; if (i < j) { printf("%d\n", i); } else { printf("%d\n", j); } } ================================================ FILE: 34_Enums_and_Typedefs/tests/input06.c ================================================ int printf(char *fmt); void main() { int i; i=1; while (i <= 10) { printf("%d\n", i); i= i + 1; } } ================================================ FILE: 34_Enums_and_Typedefs/tests/input07.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 34_Enums_and_Typedefs/tests/input08.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 34_Enums_and_Typedefs/tests/input09.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } void fred() { int a; int b; a= 12; b= 3 * a; if (a >= b) { printf("%d\n", 2 * b - a); } } ================================================ FILE: 34_Enums_and_Typedefs/tests/input10.c ================================================ int printf(char *fmt); void main() { int i; char j; j= 20; printf("%d\n", j); i= 10; printf("%d\n", i); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 2; j= j + 1) { printf("%d\n", j); } } ================================================ FILE: 34_Enums_and_Typedefs/tests/input11.c ================================================ int printf(char *fmt); int main() { int i; char j; long k; i= 10; printf("%d\n", i); j= 20; printf("%d\n", j); k= 30; printf("%d\n", k); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 4; j= j + 1) { printf("%d\n", j); } for (k= 1; k <= 5; k= k + 1) { printf("%d\n", k); } return(i); printf("%d\n", 12345); return(3); } ================================================ FILE: 34_Enums_and_Typedefs/tests/input12.c ================================================ int printf(char *fmt); int fred() { return(5); } void main() { int x; x= fred(2); printf("%d\n", x); } ================================================ FILE: 34_Enums_and_Typedefs/tests/input13.c ================================================ int printf(char *fmt); int fred() { return(56); } void main() { int dummy; int result; dummy= printf("%d\n", 23); result= fred(10); dummy= printf("%d\n", result); } ================================================ FILE: 34_Enums_and_Typedefs/tests/input14.c ================================================ int printf(char *fmt); int fred() { return(20); } int main() { int result; printf("%d\n", 10); result= fred(15); printf("%d\n", result); printf("%d\n", fred(15)+10); return(0); } ================================================ FILE: 34_Enums_and_Typedefs/tests/input15.c ================================================ int printf(char *fmt); int main() { char a; char *b; char c; int d; int *e; int f; a= 18; printf("%d\n", a); b= &a; c= *b; printf("%d\n", c); d= 12; printf("%d\n", d); e= &d; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 34_Enums_and_Typedefs/tests/input16.c ================================================ int printf(char *fmt); int c; int d; int *e; int f; int main() { c= 12; d=18; printf("%d\n", c); e= &c + 1; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 34_Enums_and_Typedefs/tests/input17.c ================================================ int printf(char *fmt); int main() { char a; char *b; int d; int *e; b= &a; *b= 19; printf("%d\n", a); e= &d; *e= 12; printf("%d\n", d); return(0); } ================================================ FILE: 34_Enums_and_Typedefs/tests/input18.c ================================================ int printf(char *fmt); int main() { int a; int b; a= b= 34; printf("%d\n", a); printf("%d\n", b); return(0); } ================================================ FILE: 34_Enums_and_Typedefs/tests/input18a.c ================================================ int printf(char *fmt); int a; int *b; char c; char *d; int main() { b= &a; *b= 15; printf("%d\n", a); d= &c; *d= 16; printf("%d\n", c); return(0); } ================================================ FILE: 34_Enums_and_Typedefs/tests/input19.c ================================================ int printf(char *fmt); int a; int b; int c; int d; int e; int main() { a= 2; b= 4; c= 3; d= 2; e= (a+b) * (c+d); printf("%d\n", e); return(0); } ================================================ FILE: 34_Enums_and_Typedefs/tests/input20.c ================================================ int printf(char *fmt); int a; int b[25]; int main() { b[3]= 12; a= b[3]; printf("%d\n", a); return(0); } ================================================ FILE: 34_Enums_and_Typedefs/tests/input21.c ================================================ int printf(char *fmt); char c; char *str; int main() { c= '\n'; printf("%d\n", c); for (str= "Hello world\n"; *str != 0; str= str + 1) { printf("%c", *str); } return(0); } ================================================ FILE: 34_Enums_and_Typedefs/tests/input22.c ================================================ int printf(char *fmt); char a; char b; char c; int d; int e; int f; long g; long h; long i; int main() { b= 5; c= 7; a= b + c++; printf("%d\n", a); e= 5; f= 7; d= e + f++; printf("%d\n", d); h= 5; i= 7; g= h + i++; printf("%d\n", g); a= b-- + c; printf("%d\n", a); d= e-- + f; printf("%d\n", d); g= h-- + i; printf("%d\n", g); a= ++b + c; printf("%d\n", a); d= ++e + f; printf("%d\n", d); g= ++h + i; printf("%d\n", g); a= b * --c; printf("%d\n", a); d= e * --f; printf("%d\n", d); g= h * --i; printf("%d\n", g); return(0); } ================================================ FILE: 34_Enums_and_Typedefs/tests/input23.c ================================================ int printf(char *fmt); char *str; int x; int main() { x= -23; printf("%d\n", x); printf("%d\n", -10 * -10); x= 1; x= ~x; printf("%d\n", x); x= 2 > 5; printf("%d\n", x); x= !x; printf("%d\n", x); x= !x; printf("%d\n", x); x= 13; if (x) { printf("%d\n", 13); } x= 0; if (!x) { printf("%d\n", 14); } for (str= "Hello world\n"; *str; str++) { printf("%c", *str); } return(0); } ================================================ FILE: 34_Enums_and_Typedefs/tests/input24.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { a= 42; b= 19; printf("%d\n", a & b); printf("%d\n", a | b); printf("%d\n", a ^ b); printf("%d\n", 1 << 3); printf("%d\n", 63 >> 3); return(0); } ================================================ FILE: 34_Enums_and_Typedefs/tests/input25.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { char z; int y; int x; x= 10; y= 20; z= 30; printf("%d\n", x); printf("%d\n", y); printf("%d\n", z); a= 5; b= 15; c= 25; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); return(0); } ================================================ FILE: 34_Enums_and_Typedefs/tests/input26.c ================================================ int printf(char *fmt); int main(int a, char b, long c, int d, int e, int f, int g, int h) { int i; int j; int k; a= 13; printf("%d\n", a); b= 23; printf("%d\n", b); c= 34; printf("%d\n", c); d= 44; printf("%d\n", d); e= 54; printf("%d\n", e); f= 64; printf("%d\n", f); g= 74; printf("%d\n", g); h= 84; printf("%d\n", h); i= 94; printf("%d\n", i); j= 95; printf("%d\n", j); k= 96; printf("%d\n", k); return(0); } ================================================ FILE: 34_Enums_and_Typedefs/tests/input27.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int param5(int a, int b, int c, int d, int e) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param2(int a, int b) { int c; int d; int e; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param0() { int a; int b; int c; int d; int e; a= 1; b= 2; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int main() { param8(1,2,3,4,5,6,7,8); param5(1,2,3,4,5); param2(1,2); param0(); return(0); } ================================================ FILE: 34_Enums_and_Typedefs/tests/input28.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 34_Enums_and_Typedefs/tests/input29.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h); int fred(int a, int b, int c); int main(); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 34_Enums_and_Typedefs/tests/input30.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input30.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 34_Enums_and_Typedefs/tests/input31.c ================================================ int printf(char *fmt); int main() { int x; x= 2 + + 3 - * / ; } ================================================ FILE: 34_Enums_and_Typedefs/tests/input32.c ================================================ int printf(char *fmt); int main() { pizza cow llama sausage; } ================================================ FILE: 34_Enums_and_Typedefs/tests/input33.c ================================================ int printf(char *fmt); int main() { char *z; return(z); } ================================================ FILE: 34_Enums_and_Typedefs/tests/input34.c ================================================ int printf(char *fmt); int main() { int a[12]; return(0); } ================================================ FILE: 34_Enums_and_Typedefs/tests/input35.c ================================================ int printf(char *fmt); int fred(int a, int b) { int a; return(a); } ================================================ FILE: 34_Enums_and_Typedefs/tests/input36.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c); ================================================ FILE: 34_Enums_and_Typedefs/tests/input37.c ================================================ int printf(char *fmt); int fred(int a, char b +, int z); ================================================ FILE: 34_Enums_and_Typedefs/tests/input38.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c, int g); ================================================ FILE: 34_Enums_and_Typedefs/tests/input39.c ================================================ int printf(char *fmt); int main() { int a; } ================================================ FILE: 34_Enums_and_Typedefs/tests/input40.c ================================================ int printf(char *fmt); int main() { int a; a= 5; } ================================================ FILE: 34_Enums_and_Typedefs/tests/input41.c ================================================ int printf(char *fmt); void fred() { return(5); } ================================================ FILE: 34_Enums_and_Typedefs/tests/input42.c ================================================ int printf(char *fmt); int main() { fred(5); } ================================================ FILE: 34_Enums_and_Typedefs/tests/input43.c ================================================ int printf(char *fmt); int main() { int a; a= b[4]; } ================================================ FILE: 34_Enums_and_Typedefs/tests/input44.c ================================================ int printf(char *fmt); int main() { int a; a= z; } ================================================ FILE: 34_Enums_and_Typedefs/tests/input45.c ================================================ int printf(char *fmt); int main() { int a; a= &5; } ================================================ FILE: 34_Enums_and_Typedefs/tests/input46.c ================================================ int printf(char *fmt); int main() { int a; a= *5; } ================================================ FILE: 34_Enums_and_Typedefs/tests/input47.c ================================================ int printf(char *fmt); int main() { int a; a= ++5; } ================================================ FILE: 34_Enums_and_Typedefs/tests/input48.c ================================================ int printf(char *fmt); int main() { int a; a= --5; } ================================================ FILE: 34_Enums_and_Typedefs/tests/input49.c ================================================ int printf(char *fmt); int main() { int x; char y; y= x; } ================================================ FILE: 34_Enums_and_Typedefs/tests/input50.c ================================================ int printf(char *fmt); int main() { char *a; char *b; a= a + b; } ================================================ FILE: 34_Enums_and_Typedefs/tests/input51.c ================================================ int printf(char *fmt); int main() { char a; a= 'fred'; } ================================================ FILE: 34_Enums_and_Typedefs/tests/input52.c ================================================ int printf(char *fmt); int main() { int a; a= $5.00; } ================================================ FILE: 34_Enums_and_Typedefs/tests/input53.c ================================================ int printf(char *fmt); int main() { printf("Hello world, %d\n", 23); return(0); } ================================================ FILE: 34_Enums_and_Typedefs/tests/input54.c ================================================ int printf(char *fmt); int main() { int i; for (i=0; i < 20; i++) { printf("Hello world, %d\n", i); } return(0); } ================================================ FILE: 34_Enums_and_Typedefs/tests/input55.c ================================================ int printf(char *fmt); int main(int argc, char **argv) { int i; char *argument; printf("Hello world\n"); for (i=0; i < argc; i++) { argument= *argv; argv= argv + 1; printf("Argument %d is %s\n", i, argument); } return(0); } ================================================ FILE: 34_Enums_and_Typedefs/tests/input56.c ================================================ struct foo { int x; }; struct mary var1; ================================================ FILE: 34_Enums_and_Typedefs/tests/input57.c ================================================ struct fred { int x; } ; struct fred { char y; } ; ================================================ FILE: 34_Enums_and_Typedefs/tests/input58.c ================================================ int printf(char *fmt); struct fred { int x; char y; long z; }; struct fred var2; struct fred *varptr; int main() { long result; var2.x= 12; printf("%d\n", var2.x); var2.y= 'c'; printf("%d\n", var2.y); var2.z= 4005; printf("%d\n", var2.z); result= var2.x + var2.y + var2.z; printf("%d\n", result); varptr= &var2; result= varptr->x + varptr->y + varptr->z; printf("%d\n", result); return(0); } ================================================ FILE: 34_Enums_and_Typedefs/tests/input59.c ================================================ int main() { int x; x= y.foo; } ================================================ FILE: 34_Enums_and_Typedefs/tests/input60.c ================================================ int main() { int x; x= x.foo; } ================================================ FILE: 34_Enums_and_Typedefs/tests/input61.c ================================================ int main() { int x; x= x->foo; } ================================================ FILE: 34_Enums_and_Typedefs/tests/input62.c ================================================ int printf(char *fmt); union fred { char w; int x; int y; long z; }; union fred var1; union fred *varptr; int main() { var1.x= 65; printf("%d\n", var1.x); var1.x= 66; printf("%d\n", var1.x); printf("%d\n", var1.y); printf("The next two depend on the endian of the platform\n"); printf("%d\n", var1.w); printf("%d\n", var1.z); varptr= &var1; varptr->x= 67; printf("%d\n", varptr->x); printf("%d\n", varptr->y); return(0); } ================================================ FILE: 34_Enums_and_Typedefs/tests/input63.c ================================================ int printf(char *fmt); enum fred { apple=1, banana, carrot, pear=10, peach, mango, papaya }; enum jane { aple=1, bnana, crrot, par=10, pech, mago, paaya }; enum fred var1; enum jane var2; enum fred var3; int main() { var1= carrot + pear + mango; printf("%d\n", var1); return(0); } ================================================ FILE: 34_Enums_and_Typedefs/tests/input64.c ================================================ enum fred var3; ================================================ FILE: 34_Enums_and_Typedefs/tests/input65.c ================================================ enum fred { x, y, z }; enum fred { a, b }; ================================================ FILE: 34_Enums_and_Typedefs/tests/input66.c ================================================ enum fred { x, y, z }; enum mary { a, b, z }; ================================================ FILE: 34_Enums_and_Typedefs/tests/input67.c ================================================ int printf(char *fmt); typedef int FOO; FOO var1; struct bar { int x; int y} ; typedef struct bar BAR; BAR var2; int main() { var1= 5; printf("%d\n", var1); var2.x= 7; var2.y= 10; printf("%d\n", var2.x + var2.y); return(0); } ================================================ FILE: 34_Enums_and_Typedefs/tests/input68.c ================================================ typedef int FOO; typedef char FOO; ================================================ FILE: 34_Enums_and_Typedefs/tests/input69.c ================================================ typedef int FOO; FLOO y; ================================================ FILE: 34_Enums_and_Typedefs/tests/mktests ================================================ #!/bin/sh # Make the output files for each test # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make) fi for i in input*c do if [ ! -f "out.$i" -a ! -f "err.$i" ] then ../cwj -o out $i 2> "err.$i" # If the err file is empty if [ ! -s "err.$i" ] then rm -f "err.$i" cc -o out $i ../lib/printint.c ./out > "out.$i" fi fi rm -f out out.s done ================================================ FILE: 34_Enums_and_Typedefs/tests/out.input01.c ================================================ 36 10 25 ================================================ FILE: 34_Enums_and_Typedefs/tests/out.input02.c ================================================ 17 ================================================ FILE: 34_Enums_and_Typedefs/tests/out.input03.c ================================================ 1 2 3 4 5 ================================================ FILE: 34_Enums_and_Typedefs/tests/out.input04.c ================================================ 1 1 1 1 1 1 1 1 1 ================================================ FILE: 34_Enums_and_Typedefs/tests/out.input05.c ================================================ 6 ================================================ FILE: 34_Enums_and_Typedefs/tests/out.input06.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 34_Enums_and_Typedefs/tests/out.input07.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 34_Enums_and_Typedefs/tests/out.input08.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 34_Enums_and_Typedefs/tests/out.input09.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 34_Enums_and_Typedefs/tests/out.input10.c ================================================ 20 10 1 2 3 4 5 253 254 255 0 1 ================================================ FILE: 34_Enums_and_Typedefs/tests/out.input11.c ================================================ 10 20 30 1 2 3 4 5 253 254 255 0 1 2 3 1 2 3 4 5 ================================================ FILE: 34_Enums_and_Typedefs/tests/out.input12.c ================================================ 5 ================================================ FILE: 34_Enums_and_Typedefs/tests/out.input13.c ================================================ 23 56 ================================================ FILE: 34_Enums_and_Typedefs/tests/out.input14.c ================================================ 10 20 30 ================================================ FILE: 34_Enums_and_Typedefs/tests/out.input15.c ================================================ 18 18 12 12 ================================================ FILE: 34_Enums_and_Typedefs/tests/out.input16.c ================================================ 12 18 ================================================ FILE: 34_Enums_and_Typedefs/tests/out.input17.c ================================================ 19 12 ================================================ FILE: 34_Enums_and_Typedefs/tests/out.input18.c ================================================ 34 34 ================================================ FILE: 34_Enums_and_Typedefs/tests/out.input18a.c ================================================ 15 16 ================================================ FILE: 34_Enums_and_Typedefs/tests/out.input19.c ================================================ 30 ================================================ FILE: 34_Enums_and_Typedefs/tests/out.input20.c ================================================ 12 ================================================ FILE: 34_Enums_and_Typedefs/tests/out.input21.c ================================================ 10 Hello world ================================================ FILE: 34_Enums_and_Typedefs/tests/out.input22.c ================================================ 12 12 12 13 13 13 13 13 13 35 35 35 ================================================ FILE: 34_Enums_and_Typedefs/tests/out.input23.c ================================================ -23 100 -2 0 1 0 13 14 Hello world ================================================ FILE: 34_Enums_and_Typedefs/tests/out.input24.c ================================================ 2 59 57 8 7 ================================================ FILE: 34_Enums_and_Typedefs/tests/out.input25.c ================================================ 10 20 30 5 15 25 ================================================ FILE: 34_Enums_and_Typedefs/tests/out.input26.c ================================================ 13 23 34 44 54 64 74 84 94 95 96 ================================================ FILE: 34_Enums_and_Typedefs/tests/out.input27.c ================================================ 1 2 3 4 5 6 7 8 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 ================================================ FILE: 34_Enums_and_Typedefs/tests/out.input28.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 34_Enums_and_Typedefs/tests/out.input29.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 34_Enums_and_Typedefs/tests/out.input30.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input30.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 34_Enums_and_Typedefs/tests/out.input53.c ================================================ Hello world, 23 ================================================ FILE: 34_Enums_and_Typedefs/tests/out.input54.c ================================================ Hello world, 0 Hello world, 1 Hello world, 2 Hello world, 3 Hello world, 4 Hello world, 5 Hello world, 6 Hello world, 7 Hello world, 8 Hello world, 9 Hello world, 10 Hello world, 11 Hello world, 12 Hello world, 13 Hello world, 14 Hello world, 15 Hello world, 16 Hello world, 17 Hello world, 18 Hello world, 19 ================================================ FILE: 34_Enums_and_Typedefs/tests/out.input55.c ================================================ Hello world Argument 0 is ./out ================================================ FILE: 34_Enums_and_Typedefs/tests/out.input58.c ================================================ 12 99 4005 4116 4116 ================================================ FILE: 34_Enums_and_Typedefs/tests/out.input62.c ================================================ 65 66 66 The next two depend on the endian of the platform 66 66 67 67 ================================================ FILE: 34_Enums_and_Typedefs/tests/out.input63.c ================================================ 25 ================================================ FILE: 34_Enums_and_Typedefs/tests/out.input67.c ================================================ 5 17 ================================================ FILE: 34_Enums_and_Typedefs/tests/runtests ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../cwj -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../cwj $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.s "trial.$i" done ================================================ FILE: 34_Enums_and_Typedefs/tests/runtestsn ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../compn ] then (cd ..; make) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../compn -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../compn $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.o out.s "trial.$i" done ================================================ FILE: 34_Enums_and_Typedefs/tree.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree functions // Copyright (c) 2019 Warren Toomey, GPL3 // Build and return a generic AST node struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue) { struct ASTnode *n; // Malloc a new ASTnode n = (struct ASTnode *) malloc(sizeof(struct ASTnode)); if (n == NULL) fatal("Unable to malloc in mkastnode()"); // Copy in the field values and return it n->op = op; n->type = type; n->left = left; n->mid = mid; n->right = right; n->sym = sym; n->intvalue = intvalue; return (n); } // Make an AST leaf node struct ASTnode *mkastleaf(int op, int type, struct symtable *sym, int intvalue) { return (mkastnode(op, type, NULL, NULL, NULL, sym, intvalue)); } // Make a unary AST node: only one child struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, struct symtable *sym, int intvalue) { return (mkastnode(op, type, left, NULL, NULL, sym, intvalue)); } // Generate and return a new label number // just for AST dumping purposes static int gendumplabel(void) { static int id = 1; return (id++); } // Given an AST tree, print it out and follow the // traversal of the tree that genAST() follows void dumpAST(struct ASTnode *n, int label, int level) { int Lfalse, Lstart, Lend; switch (n->op) { case A_IF: Lfalse = gendumplabel(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_IF"); if (n->right) { Lend = gendumplabel(); fprintf(stdout, ", end L%d", Lend); } fprintf(stdout, "\n"); dumpAST(n->left, Lfalse, level + 2); dumpAST(n->mid, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); return; case A_WHILE: Lstart = gendumplabel(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_WHILE, start L%d\n", Lstart); Lend = gendumplabel(); dumpAST(n->left, Lend, level + 2); dumpAST(n->right, NOLABEL, level + 2); return; } // Reset level to -2 for A_GLUE if (n->op == A_GLUE) level = -2; // General AST node handling if (n->left) dumpAST(n->left, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); for (int i = 0; i < level; i++) fprintf(stdout, " "); switch (n->op) { case A_GLUE: fprintf(stdout, "\n\n"); return; case A_FUNCTION: fprintf(stdout, "A_FUNCTION %s\n", n->sym->name); return; case A_ADD: fprintf(stdout, "A_ADD\n"); return; case A_SUBTRACT: fprintf(stdout, "A_SUBTRACT\n"); return; case A_MULTIPLY: fprintf(stdout, "A_MULTIPLY\n"); return; case A_DIVIDE: fprintf(stdout, "A_DIVIDE\n"); return; case A_EQ: fprintf(stdout, "A_EQ\n"); return; case A_NE: fprintf(stdout, "A_NE\n"); return; case A_LT: fprintf(stdout, "A_LE\n"); return; case A_GT: fprintf(stdout, "A_GT\n"); return; case A_LE: fprintf(stdout, "A_LE\n"); return; case A_GE: fprintf(stdout, "A_GE\n"); return; case A_INTLIT: fprintf(stdout, "A_INTLIT %d\n", n->intvalue); return; case A_STRLIT: fprintf(stdout, "A_STRLIT rval label L%d\n", n->intvalue); return; case A_IDENT: if (n->rvalue) fprintf(stdout, "A_IDENT rval %s\n", n->sym->name); else fprintf(stdout, "A_IDENT %s\n", n->sym->name); return; case A_ASSIGN: fprintf(stdout, "A_ASSIGN\n"); return; case A_WIDEN: fprintf(stdout, "A_WIDEN\n"); return; case A_RETURN: fprintf(stdout, "A_RETURN\n"); return; case A_FUNCCALL: fprintf(stdout, "A_FUNCCALL %s\n", n->sym->name); return; case A_ADDR: fprintf(stdout, "A_ADDR %s\n", n->sym->name); return; case A_DEREF: if (n->rvalue) fprintf(stdout, "A_DEREF rval\n"); else fprintf(stdout, "A_DEREF\n"); return; case A_SCALE: fprintf(stdout, "A_SCALE %d\n", n->size); return; case A_PREINC: fprintf(stdout, "A_PREINC %s\n", n->sym->name); return; case A_PREDEC: fprintf(stdout, "A_PREDEC %s\n", n->sym->name); return; case A_POSTINC: fprintf(stdout, "A_POSTINC\n"); return; case A_POSTDEC: fprintf(stdout, "A_POSTDEC\n"); return; case A_NEGATE: fprintf(stdout, "A_NEGATE\n"); return; default: fatald("Unknown dumpAST operator", n->op); } } ================================================ FILE: 34_Enums_and_Typedefs/types.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Types and type handling // Copyright (c) 2019 Warren Toomey, GPL3 // Return true if a type is an int type // of any size, false otherwise int inttype(int type) { return ((type & 0xf) == 0); } // Return true if a type is of pointer type int ptrtype(int type) { return ((type & 0xf) != 0); } // Given a primitive type, return // the type which is a pointer to it int pointer_to(int type) { if ((type & 0xf) == 0xf) fatald("Unrecognised in pointer_to: type", type); return (type + 1); } // Given a primitive pointer type, return // the type which it points to int value_at(int type) { if ((type & 0xf) == 0x0) fatald("Unrecognised in value_at: type", type); return (type - 1); } // Given a type and a composite type pointer, return // the size of this type in bytes int typesize(int type, struct symtable *ctype) { if (type == P_STRUCT || type == P_UNION) return (ctype->size); return (genprimsize(type)); } // Given an AST tree and a type which we want it to become, // possibly modify the tree by widening or scaling so that // it is compatible with this type. Return the original tree // if no changes occurred, a modified tree, or NULL if the // tree is not compatible with the given type. // If this will be part of a binary operation, the AST op is not zero. struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op) { int ltype; int lsize, rsize; ltype = tree->type; // XXX No idea on these yet if (ltype == P_STRUCT || ltype == P_UNION) fatal("Don't know how to do this yet"); if (rtype == P_STRUCT || rtype == P_UNION) fatal("Don't know how to do this yet"); // Compare scalar int types if (inttype(ltype) && inttype(rtype)) { // Both types same, nothing to do if (ltype == rtype) return (tree); // Get the sizes for each type lsize = typesize(ltype, NULL); // XXX Fix soon rsize = typesize(rtype, NULL); // XXX Fix soon // Tree's size is too big if (lsize > rsize) return (NULL); // Widen to the right if (rsize > lsize) return (mkastunary(A_WIDEN, rtype, tree, NULL, 0)); } // For pointers on the left if (ptrtype(ltype)) { // OK is same type on right and not doing a binary op if (op == 0 && ltype == rtype) return (tree); } // We can scale only on A_ADD or A_SUBTRACT operation if (op == A_ADD || op == A_SUBTRACT) { // Left is int type, right is pointer type and the size // of the original type is >1: scale the left if (inttype(ltype) && ptrtype(rtype)) { rsize = genprimsize(value_at(rtype)); if (rsize > 1) return (mkastunary(A_SCALE, rtype, tree, NULL, rsize)); else return (tree); // Size 1, no need to scale } } // If we get here, the types are not compatible return (NULL); } ================================================ FILE: 35_Preprocessor/Makefile ================================================ # Define the location of the include directory # and the location to install the compiler binary INCDIR=/tmp/include BINDIR=/tmp HSRCS= data.h decl.h defs.h SRCS= cg.c decl.c expr.c gen.c main.c misc.c \ scan.c stmt.c sym.c tree.c types.c SRCN= cgn.c decl.c expr.c gen.c main.c misc.c \ scan.c stmt.c sym.c tree.c types.c ARMSRCS= cg_arm.c decl.c expr.c gen.c main.c misc.c \ scan.c stmt.c sym.c tree.c types.c cwj: $(SRCS) $(HSRCS) cc -o cwj -g -Wall -DINCDIR=\"$(INCDIR)\" $(SRCS) compn: $(SRCN) $(HSRCS) cc -D__NASM__ -o compn -g -Wall -DINCDIR=\"$(INCDIR)\" $(SRCN) cwjarm: $(ARMSRCS) $(HSRCS) cc -o cwjarm -g -Wall $(ARMSRCS) cp cwjarm cwj install: cwj mkdir -p $(INCDIR) rsync -a include/. $(INCDIR) cp cwj $(BINDIR) chmod +x $(BINDIR)/cwj installn: compn mkdir -p $(INCDIR) rsync -a include/. $(INCDIR) cp compn $(BINDIR) chmod +x $(BINDIR)/compn clean: rm -f cwj cwjarm compn *.o *.s out a.out test: install tests/runtests (cd tests; chmod +x runtests; ./runtests) armtest: cwjarm tests/runtests (cd tests; chmod +x runtests; ./runtests) testn: installn tests/runtestsn (cd tests; chmod +x runtestsn; ./runtestsn) ================================================ FILE: 35_Preprocessor/Readme.md ================================================ # Part 35: The C Pre-Processor In this part of our compiler writing journey, I've added support for an external C pre-processor, and I also added the `extern` keyword to our language. We've reached the point where we can write [header files](https://www.tutorialspoint.com/cprogramming/c_header_files.htm) for our programs, and also put comments in them. I must admit, this feels good. ## The C Pre-Processor I don't want to write about the C pre-processor itself, even though is a very important part of any C environment. Instead, I'll point you at these two articles to read: + [C Preprocessor](https://en.wikipedia.org/wiki/C_preprocessor) at *Wikipedia* + [C Preprocessor and Macros](https://www.programiz.com/c-programming/c-preprocessor-macros) at *www.programiz.com* ## Integrating the C Pre-Processor In other compilers like [SubC](http://www.t3x.org/subc/), the pre-processor is built right into the language. Here I've decided to use the external system C pre-processor which is usually the [Gnu C pre-processor](https://gcc.gnu.org/onlinedocs/cpp/). Before I show you how I've done this, firstly we need to look at the lines that the pre-processor inserts as part of its operation. Consider this short program (with lines numbered): ```c 1 #include 2 3 int main() { 4 printf("Hello world\n"); 5 return(0); 6 } ``` Here is what we (the compiler) might receive from the pre-processor after it processes this file: ```c # 1 "z.c" # 1 "" # 1 "" # 1 "z.c" # 1 "include/stdio.h" 1 # 1 "include/stddef.h" 1 typedef long size_t; # 5 "include/stdio.h" 2 typedef char * FILE; FILE *fopen(char *pathname, char *mode); ... # 2 "z.c" 2 int main() { printf("Hello world\n"); return(0); } ``` Each pre-processor line starts with a '#', then the number of the following line, then the name of the file from where this line comes from. The numbers at the end of some of the lines I don't really know what they are. I suspect, when one file includes another, they represent the line number of the file that did the including. Here is how I'm going to integrate the pre-processor with our compiler. I'm going to use `popen()` to open up a pipe from a process which is the pre-processor, and we will tell the pre-processor to work on our input file. Then we will modify the lexical scanner to identify the pre-processor lines and set the current line number and name of the file being processed. ## Modifications to `main.c` We have a new global variable, `char *Infilename`, defined in `data.h`. In the `do_compile()` function in `main.c` we now do this: ```c // Given an input filename, compile that file // down to assembly code. Return the new file's name static char *do_compile(char *filename) { char cmd[TEXTLEN]; ... // Generate the pre-processor command snprintf(cmd, TEXTLEN, "%s %s %s", CPPCMD, INCDIR, filename); // Open up the pre-processor pipe if ((Infile = popen(cmd, "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", filename, strerror(errno)); exit(1); } Infilename = filename; ``` which I think is a straight-forward piece of code, except that I haven't explained where `CPPCMD` and `INCDIR` come from. `CPPCMD` is defined as the name of the pre-processor command in `defs.h`: ```c #define CPPCMD "cpp -nostdinc -isystem " ``` This tells the Gnu pre-processor to not use the standard include directory `/usr/include`: instead, `-isystem` tells the pre-processor to use the next thing on the command line which is `INCDIR`. `INCDIR` is actually defined in the `Makefile`, as this is a common place to put things that can be changed at configuration time: ```make # Define the location of the include directory # and the location to install the compiler binary INCDIR=/tmp/include BINDIR=/tmp ``` The compiler binary is now compiled with this `Makefile` rule: ```make cwj: $(SRCS) $(HSRCS) cc -o cwj -g -Wall -DINCDIR=\"$(INCDIR)\" $(SRCS) ``` and this passes the `/tmp/include` value in to the compilation as `INCDIR`. Now, when does `/tmp/include` get created, and what gets put there? ## Our First Set of Header Files In the `include/` directory in this area, I've made a start on some header files that are plain enough for our compiler to digest. We can't use the real system header files, as they contain lines like: ```c extern int _IO_feof (_IO_FILE *__fp) __attribute__ ((__nothrow__ , __leaf__)); extern int _IO_ferror (_IO_FILE *__fp) __attribute__ ((__nothrow__ , __leaf__)); ``` which would cause our compiler to have a fit! There is now a rule in the `Makefile` to copy our own header files to the `INCDIR` directory: ```make install: cwj mkdir -p $(INCDIR) rsync -a include/. $(INCDIR) cp cwj $(BINDIR) chmod +x $(BINDIR)/cwj ``` ## Scanning the Pre-Processor Input So now we are reading the pre-processor output from working on the input file, and not reading from the file directly. We now need to recognise pre-processor lines and set the number of the next line and the file's name where the line came from. I've modified the scanner to do this, as this already deals with incrementing the line number. So in `scan.c`, I've made this change to the `scan()` function: ```c // Get the next character from the input file. static int next(void) { int c, l; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return (c); } c = fgetc(Infile); // Read from input file while (c == '#') { // We've hit a pre-processor statement scan(&Token); // Get the line number into l if (Token.token != T_INTLIT) fatals("Expecting pre-processor line number, got:", Text); l = Token.intvalue; scan(&Token); // Get the filename in Text if (Token.token != T_STRLIT) fatals("Expecting pre-processor file name, got:", Text); if (Text[0] != '<') { // If this is a real filename if (strcmp(Text, Infilename)) // and not the one we have now Infilename = strdup(Text); // save it. Then update the line num Line = l; } while ((c = fgetc(Infile)) != '\n'); // Skip to the end of the line c = fgetc(Infile); // and get the next character } if ('\n' == c) Line++; // Increment line count return (c); } ``` We use a 'while' loop because there can be successive pre-processor lines. We are fortunate that we can call `scan()` recursively to scan in both the line number as a T_INTLIT and the file's name as a T_STRLIT. The code ignores filenames that are enclosed in '<' ... '>', as these don't represent real filenames. We do have to `strdup()` the file's name as it is in the global `Text` variable which will get overwritten. However, if the name in `Text` is already what's in `Infilename`, we don't need to duplicate it. Once we have the line number and filename, we read up to and one character past the end of the line, then go back to our original character scanning code. And that turned out to be all that was needed to integrate the C pre-processor with our compiler. I had worried that it would be complex to do this, but it wasn't. ## Preventing Unwanted Function/Variable Redeclarations Many header files include other header files, so there is a strong chance that one header file might get included multiple times. This would cause redeclarations of the same function and/or global variable. To prevent this, I'm using the normal header mechanism of defining a header-specific macro the first time a header file is included. This then prevents the contents of the header file being included a second time. As an example, here is what is currently in `include/stdio.h`: ```c #ifndef _STDIO_H_ # define _STDIO_H_ #include // This FILE definition will do for now typedef char * FILE; FILE *fopen(char *pathname, char *mode); size_t fread(void *ptr, size_t size, size_t nmemb, FILE *stream); size_t fwrite(void *ptr, size_t size, size_t nmemb, FILE *stream); int fclose(FILE *stream); int printf(char *format); int fprintf(FILE *stream, char *format); #endif // _STDIO_H_ ``` Once `_STDIO_H_` is defined, it prevents this file's contents from being included a second time. ## The `extern` Keyword Now that we have a working pre-processor, I thought it would be time to add the `extern` keyword to the language. This would allow us to define a global variable but not generate any storage for it: the assumption is that the variable has been declared global in another source file. The addition of `extern` actually has an impact across several files. Not a big impact, but a widespread impact. Let's see this. ### A New Token and Keyword So, we have a new keyword `extern` and a new token T_EXTERN in `scan.c`. As always, the code is there for you to read. ### A New Class In `defs.h` we have a new storage class: ```c // Storage classes enum { C_GLOBAL = 1, // Globally visible symbol ... C_EXTERN, // External globally visible symbol ... }; ``` The reason I put this in is because we already have this code for global symbols in `sym.c`: ```c // Create a symbol node to be added to a symbol table list. struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int size, int posn) { // Get a new node struct symtable *node = (struct symtable *) malloc(sizeof(struct symtable)); // Fill in the values ... // Generate any global space if (class == C_GLOBAL) genglobsym(node); ``` We want `extern` symbols added to the global list, but we don't want to call `genglobsym()` to create the storage for them. So, we need to call `newsym()` with a class that isn't C_GLOBAL. ### Changes to `sym.c` To this end, I've modified `addglob()` to take a `class` argument which is passed to `newsym()`: ```c // Add a symbol to the global symbol list struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int class, int size) { struct symtable *sym = newsym(name, type, ctype, stype, class, size, 0); appendsym(&Globhead, &Globtail, sym); return (sym); } ``` This means that, everywhere that we call `addglob()` in the compiler, we now must pass in a `class` value. Before, `addglob()` would explicitly pass C_GLOBAL to `newsym()`. Now, we must pass the `class` value we want to `addglob()`. ### The `extern` Keyword and Our Grammar In terms of the grammar of our language, I'm going to enforce the rule that the `extern` keyword must come before any other words in a type description. Later on, I'll add `static` to the list of words. The [BNF Grammar for C](https://www.lysator.liu.se/c/ANSI-C-grammar-y.html) that we saw in past parts has these production rules: ``` storage_class_specifier : TYPEDEF | EXTERN | STATIC | AUTO | REGISTER ; type_specifier : VOID | CHAR | SHORT | INT | LONG | FLOAT | DOUBLE | SIGNED | UNSIGNED | struct_or_union_specifier | enum_specifier | TYPE_NAME ; declaration_specifiers : storage_class_specifier | storage_class_specifier declaration_specifiers | type_specifier | type_specifier declaration_specifiers | type_qualifier | type_qualifier declaration_specifiers ; ``` which I think allows `extern` to come anywhere in the type specification. Oh well, we are building a subset of the C language here! ### Parsing the `extern` Keyword As with the last five or six parts of this journey, I've made changes to `parse_type()` in `decl.c` again: ```c int parse_type(struct symtable **ctype, int *class) { int type, exstatic=1; // See if the class has been changed to extern (later, static) while (exstatic) { switch (Token.token) { case T_EXTERN: *class= C_EXTERN; scan(&Token); break; default: exstatic= 0; } } ... } ``` Note now that `parse_type()` has a second parameter, `int *class`. This allows the caller to pass in the initial storage class for the type (probably C_GLOBAL, G_LOCAL or C_PARAM). If we see the `extern` keyword in `parse_type()`, we can change to become T_EXTERN. Also apologies, I couldn't think of a good name for the boolean flag that controls the 'while' loop. ### The `parse_type()` and `addglob()` Callers So we've modified the arguments to both `parse_type()` and `addglob()`. Now we have to find everywhere in the compiler where both functions are called, and ensure we pass a suitable `class` value to both of them. In `var_declaration_list()` in `decl.c` where we are parsing a list of variables or parameters, we already get the storage class for these variables: ```c static int var_declaration_list(struct symtable *funcsym, int class, int separate_token, int end_token); ``` So we can pass the `class` to `parse_type()` which may change it, then call `var_declaration()` with the actual class: ```c ... // Get the type and identifier type = parse_type(&ctype, &class); ident(); ... // Add a new parameter to the right symbol table list, based on the class var_declaration(type, ctype, class); ``` And in `var_declaration()`: ```c switch (class) { case C_EXTERN: case C_GLOBAL: sym = addglob(Text, type, ctype, S_VARIABLE, class, 1); ... } ``` For local variables, we need to turn our attention to `single_statement()` in `stmt.c`. I also should, at this point, say that I'd previously forgot to add the cases for structs, unions, enums and typedefs here. ```c // Parse a single statement and return its AST static struct ASTnode *single_statement(void) { int type, class= C_LOCAL; struct symtable *ctype; switch (Token.token) { case T_IDENT: // We have to see if the identifier matches a typedef. // If not do the default code in this switch statement. // Otherwise, fall down to the parse_type() call. if (findtypedef(Text) == NULL) return (binexpr(0)); case T_CHAR: case T_INT: case T_LONG: case T_STRUCT: case T_UNION: case T_ENUM: case T_TYPEDEF: // The beginning of a variable declaration. // Parse the type and get the identifier. // Then parse the rest of the declaration // and skip over the semicolon type = parse_type(&ctype, &class); ident(); var_declaration(type, ctype, class); semi(); return (NULL); // No AST generated here ... } ... } ``` Note that we start with `class= C_LOCAL`, but it might get modified by `parse_type()` before being passed to `var_declaration()`. This allows us to write code that looks like: ```c int main() { extern int foo; ... } ``` ## Testing the Code I've got one test program, `test/input70.c` which uses one of our new header files to confirm that the pre-processor works: ```c #include typedef int FOO; int main() { FOO x; x= 56; printf("%d\n", x); return(0); } ``` I was hoping that `errno` was still an ordinary integer so that I could declare `extern int errno;` in `include/errno.h`. But, apparently, `errno` is now a function and not a global integer variable. I think this tells you a) how old I am and b) how long it is since I've written C code. ## Conclusion and What's Next I feel like we have hit another milestone here. We now have external variables and header files. This also means that, *finally*, we can put comments into our source files. That really makes me happy. We are up to just over 4,100 lines of code, of which about 2,800 lines are not comments and not whitespace. I have no idea exactly how many more lines of code we'll need to make the compiler self-compiling, but I'm going to hazard a guess of between 7,000 to 9,000 lines. We'll see! In the next part of our compiler writing journey, we will add the `break` and `continue` keywords to our loop constructs. [Next step](../36_Break_Continue/Readme.md) ================================================ FILE: 35_Preprocessor/cg.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\t.text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\t.data\n", Outfile); currSeg = data_seg; } } // Given a scalar type value, return the // size of the type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch(type) { case P_CHAR: return (offset); case P_INT: case P_LONG: break; default: fatald("Bad type in calc_aligned_offset:", type); } // Here we have an int or a long. Align it on a 4-byte offset // I put the generic code here so it can be reused elsewhere. alignment= 4; offset = (offset + direction * (alignment-1)) & ~(alignment-1); return (offset); } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. static int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "%r10", "%r11", "%r12", "%r13", "%r9", "%r8", "%rcx", "%rdx", "%rsi", "%rdi" }; static char *breglist[] = { "%r10b", "%r11b", "%r12b", "%r13b", "%r9b", "%r8b", "%cl", "%dl", "%sil", "%dil" }; static char *dreglist[] = { "%r10d", "%r11d", "%r12d", "%r13d", "%r9d", "%r8d", "%ecx", "%edx", "%esi", "%edi" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the %rsp and %rsp fprintf(Outfile, "\t.globl\t%s\n" "\t.type\t%s, @function\n" "%s:\n" "\tpushq\t%%rbp\n" "\tmovq\t%%rsp, %%rbp\n", name, name, name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->posn = paramOffset; paramOffset += 8; } else { parm->posn = newlocaloffset(parm->type); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->posn = newlocaloffset(locvar->type); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\taddq\t$%d,%%rsp\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->endlabel); fprintf(Outfile, "\taddq\t$%d,%%rsp\n", stackOffset); fputs("\tpopq %rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmovq\t$%d, %s\n", value, reglist[r]); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", sym->name); } else // Print out the code to initialise it switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovzbq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", sym->name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovslq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", sym->name); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", sym->posn); fprintf(Outfile, "\tmovq\t%d(%%rbp), %s\n", sym->posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", sym->posn); } else switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", sym->posn); fprintf(Outfile, "\tmovzbq\t%d(%%rbp), %s\n", sym->posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", sym->posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", sym->posn); fprintf(Outfile, "\tmovslq\t%d(%%rbp), %s\n", sym->posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", sym->posn); break; default: fatald("Bad type in cgloadlocal:", sym->type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tleaq\tL%d(%%rip), %s\n", label, reglist[r]); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\taddq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsubq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timulq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s,%%rax\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidivq\t%s\n", reglist[r2]); fprintf(Outfile, "\tmovq\t%%rax,%s\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tandq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\torq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txorq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshlq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshrq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tnegq\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnotq\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); return (r); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(struct symtable *sym, int numargs) { // Get a new register int outr = alloc_register(); // Call the function fprintf(Outfile, "\tcall\t%s@PLT\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\taddq\t$%d, %%rsp\n", 8 * (numargs - 6)); // and copy the return value into our register fprintf(Outfile, "\tmovq\t%%rax, %s\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpushq\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmovq\t%s, %s\n", reglist[r], reglist[FIRSTPARAMREG - argposn + 1]); } } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsalq\t$%d, %s\n", val, reglist[r]); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %s(%%rip)\n", reglist[r], sym->name); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %s(%%rip)\n", breglist[r], sym->name); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %s(%%rip)\n", dreglist[r], sym->name); break; default: fatald("Bad type in cgstorglob:", sym->type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %d(%%rbp)\n", reglist[r], sym->posn); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %d(%%rbp)\n", breglist[r], sym->posn); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %d(%%rbp)\n", dreglist[r], sym->posn); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the type size = typesize(node->type, node->ctype); // Generate the global identity and the label cgdataseg(); fprintf(Outfile, "\t.globl\t%s\n", node->name); fprintf(Outfile, "%s:", node->name); // Generate the space for this type switch (size) { case 1: fprintf(Outfile, "\t.byte\t0\n"); break; case 4: fprintf(Outfile, "\t.long\t0\n"); break; case 8: fprintf(Outfile, "\t.quad\t0\n"); break; default: for (int i=0; i < size; i++) fprintf(Outfile, "\t.byte\t0\n"); } } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\t.byte\t%d\n", *cptr); } fprintf(Outfile, "\t.byte\t0\n"); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzbl\t%s, %%eax\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %%eax\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } cgjump(sym->endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = alloc_register(); if (sym->class == C_GLOBAL) fprintf(Outfile, "\tleaq\t%s(%%rip), %s\n", sym->name, reglist[r]); else fprintf(Outfile, "\tleaq\t%d(%%rbp), %s\n", sym->posn, reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzbq\t(%s), %s\n", reglist[r], reglist[r]); break; case 2: fprintf(Outfile, "\tmovslq\t(%s), %s\n", reglist[r], reglist[r]); break; case 4: case 8: fprintf(Outfile, "\tmovq\t(%s), %s\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { // Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmovb\t%s, (%s)\n", breglist[r1], reglist[r2]); break; case 2: case 4: case 8: fprintf(Outfile, "\tmovq\t%s, (%s)\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 35_Preprocessor/cg_arm.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for ARMv6 on Raspberry Pi // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. static int freereg[4]; static char *reglist[4] = { "r4", "r5", "r6", "r7" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // We have to store large integer literal values in memory. // Keep a list of them which will be output in the postamble #define MAXINTS 1024 int Intlist[MAXINTS]; static int Intslot = 0; // Determine the offset of a large integer // literal from the .L3 label. If the integer // isn't in the list, add it. static void set_int_offset(int val) { int offset = -1; // See if it is already there for (int i = 0; i < Intslot; i++) { if (Intlist[i] == val) { offset = 4 * i; break; } } // Not in the list, so add it if (offset == -1) { offset = 4 * Intslot; if (Intslot == MAXINTS) fatal("Out of int slots in set_int_offset()"); Intlist[Intslot++] = val; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L3+%d\n", offset); } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n", Outfile); } // Print out the assembly postamble void cgpostamble() { // Print out the global variables fprintf(Outfile, ".L2:\n"); for (int i = 0; i < Globs; i++) { if (Symtable[i].stype == S_VARIABLE) fprintf(Outfile, "\t.word %s\n", Symtable[i].name); } // Print out the integer literals fprintf(Outfile, ".L3:\n"); for (int i = 0; i < Intslot; i++) { fprintf(Outfile, "\t.word %d\n", Intlist[i]); } } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Symtable[id].name; fprintf(Outfile, "\t.text\n" "\t.globl\t%s\n" "\t.type\t%s, \%%function\n" "%s:\n" "\tpush\t{fp, lr}\n" "\tadd\tfp, sp, #4\n" "\tsub\tsp, sp, #8\n" "\tstr\tr0, [fp, #-8]\n", name, name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Symtable[id].endlabel); fputs("\tsub\tsp, fp, #4\n" "\tpop\t{fp, pc}\n" "\t.align\t2\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); // If the literal value is small, do it with one instruction if (value <= 1000) fprintf(Outfile, "\tmov\t%s, #%d\n", reglist[r], value); else { set_int_offset(value); fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); } return (r); } // Determine the offset of a variable from the .L2 // label. Yes, this is inefficient code. static void set_var_offset(int id) { int offset = 0; // Walk the symbol table up to id. // Find S_VARIABLEs and add on 4 until // we get to our variable for (int i = 0; i < id; i++) { if (Symtable[i].stype == S_VARIABLE) offset += 4; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L2+%d\n", offset); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tldrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgloadglob:", Symtable[id].type); } return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s, %s\n", reglist[r1], reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\tmul\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { // To do a divide: r0 holds the dividend, r1 holds the divisor. // The quotient is in r0. fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r1]); fprintf(Outfile, "\tmov\tr1, %s\n", reglist[r2]); fprintf(Outfile, "\tbl\t__aeabi_idiv\n"); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r1]); free_register(r2); return (r1); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r]); fprintf(Outfile, "\tbl\t%s\n", Symtable[id].name); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r]); return (r); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tlsl\t%s, %s, #%d\n", reglist[r], reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, int id) { // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tstr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgstorglob:", Symtable[id].type); } return (r); } // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (4); switch (type) { case P_CHAR: return (1); case P_INT: case P_LONG: return (4); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Symtable[id].type); fprintf(Outfile, "\t.data\n" "\t.globl\t%s\n", Symtable[id].name); switch (typesize) { case 1: fprintf(Outfile, "%s:\t.byte\t0\n", Symtable[id].name); break; case 4: fprintf(Outfile, "%s:\t.long\t0\n", Symtable[id].name); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "moveq", "movne", "movlt", "movgt", "movle", "movge" }; // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "movne", "moveq", "movge", "movle", "movgt", "movlt" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #1\n", cmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #0\n", invcmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\tuxtb\t%s, %s\n", reglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tb\tL%d\n", l); } // List of inverted branch instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *brlist[] = { "bne", "beq", "bge", "ble", "bgt", "blt" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", brlist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[reg]); cgjump(Symtable[id].endlabel); } // Generate code to load the address of a global // identifier into a variable. Return a new register int cgaddress(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); fprintf(Outfile, "\tmov\t%s, r3\n", reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tldrb\t%s, [%s]\n", reglist[r], reglist[r]); break; case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [%s]\n", reglist[r], reglist[r]); break; } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [%s]\n", reglist[r1], reglist[r2]); break; case P_INT: case P_LONG: fprintf(Outfile, "\tstr\t%s, [%s]\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 35_Preprocessor/cgn.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\tsection .text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\tsection .data\n", Outfile); currSeg = data_seg; } } // Given a scalar type value, return the // size of the type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch(type) { case P_CHAR: return (offset); case P_INT: case P_LONG: break; default: fatald("Bad type in calc_aligned_offset:", type); } // Here we have an int or a long. Align it on a 4-byte offset // I put the generic code here so it can be reused elsewhere. alignment= 4; offset = (offset + direction * (alignment-1)) & ~(alignment-1); return (offset); } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "r10", "r11", "r12", "r13", "r9", "r8", "rcx", "rdx", "rsi", "rdi" }; static char *breglist[] = { "r10b", "r11b", "r12b", "r13b", "r9b", "r8b", "cl", "dl", "sil", "dil" }; static char *dreglist[] = { "r10d", "r11d", "r12d", "r13d", "r9d", "r8d", "ecx", "edx", "esi", "edi" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\textern\tprintint\n", Outfile); fputs("\textern\tprintchar\n", Outfile); fputs("\textern\topen\n", Outfile); fputs("\textern\tclose\n", Outfile); fputs("\textern\tread\n", Outfile); fputs("\textern\twrite\n", Outfile); fputs("\textern\tprintf\n", Outfile); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the rsp and rbp fprintf(Outfile, "\tglobal\t%s\n" "%s:\n" "\tpush\trbp\n" "\tmov\trbp, rsp\n", name, name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->posn = paramOffset; paramOffset += 8; } else { parm->posn = newlocaloffset(parm->type); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->posn = newlocaloffset(locvar->type); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\tadd\trsp, %d\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->endlabel); fprintf(Outfile, "\tadd\trsp, %d\n", stackOffset); fputs("\tpop rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], value); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", sym->name); fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], sym->name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", sym->name); } else // Print out the code to initialise it switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", sym->name); fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], sym->name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", sym->name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", sym->name); fprintf(Outfile, "\tmovsx\t%s, word [%s]\n", dreglist[r], sym->name); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", sym->name); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", sym->posn); fprintf(Outfile, "\tmov\t%s, [rbp+%d]\n", reglist[r], sym->posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", sym->posn); } else switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", sym->posn); fprintf(Outfile, "\tmovzx\t%s, byte [rbp+%d]\n", reglist[r], sym->posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", sym->posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", sym->posn); fprintf(Outfile, "\tmovsx\t%s, word [rbp+%d]\n", reglist[r], sym->posn); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", sym->posn); break; default: fatald("Bad type in cgloadlocal:", sym->type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, L%d\n", reglist[r], label); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timul\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmov\trax, %s\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidiv\t%s\n", reglist[r2]); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tand\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\tor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshl\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshr\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tneg\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnot\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r], breglist[r]); return (r); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, byte %s\n", reglist[r], breglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(struct symtable *sym, int numargs) { // Get a new register int outr = alloc_register(); // Call the function fprintf(Outfile, "\tcall\t%s\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\tadd\trsp, %d\n", 8 * (numargs - 6)); // and copy the return value into our register fprintf(Outfile, "\tmov\t%s, rax\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpush\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmov\t%s, %s\n", reglist[FIRSTPARAMREG - argposn + 1], reglist[r]); } } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsal\t%s, %d\n", reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, dreglist[r]); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\tqword\t[rbp+%d], %s\n", sym->posn, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\tbyte\t[rbp+%d], %s\n", sym->posn, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\tdword\t[rbp+%d], %s\n", sym->posn, dreglist[r]); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the type size = typesize(node->type, node->ctype); // Generate the global identity and the label cgdataseg(); fprintf(Outfile, "\tsection\t.data\n" "\tglobal\t%s\n", node->name); fprintf(Outfile, "%s:", node->name); // Generate the space for this type // original version for (int i = 0; i < node->size; i++) { switch(size) { case 1: fprintf(Outfile, "\tdb\t0\n"); break; case 4: fprintf(Outfile, "\tdd\t0\n"); break; case 8: fprintf(Outfile, "\tdq\t0\n"); break; default: for (int i = 0; i < size; i++) fprintf(Outfile, "\tdb\t0\n"); } } /* compact version using times instead of loop switch(size) { case 1: fprintf(Outfile, "\ttimes\t%d\tdb\t0\n", node->size); break; case 4: fprintf(Outfile, "\ttimes\t%d\tdd\t0\n", node->size); break; case 8: fprintf(Outfile, "\ttimes\t%d\tdq\t0\n", node->size); break; default: fprintf(Outfile, "\ttimes\t%d\tdb\t0\n", size); } */ } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\tdb\t%d\n", *cptr); } fprintf(Outfile, "\tdb\t0\n"); /* put string in readable format on a single line // probably went overboard with error checking int comma = 0, quote = 0, start = 1; fprintf(Outfile, "\tdb\t"); for (cptr=strvalue; *cptr; cptr++) { if ( ! isprint(*cptr) ) if (comma || start) { fprintf(Outfile, "%d, ", *cptr); start = 0; comma = 1; } else if (quote) { fprintf(Outfile, "\', %d, ", *cptr); comma = 1; quote = 0; } else { fprintf(Outfile, "%d, ", *cptr); comma = 1; quote = 0; } else if (start || comma) { fprintf(Outfile, "\'%c", *cptr); start = comma = 0; quote = 1; } else { fprintf(Outfile, "%c", *cptr); comma = 0; quote = 1; } } if (comma || start) fprintf(Outfile, "0\n"); else fprintf(Outfile, "\', 0\n"); */ } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r2], breglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzx\teax, %s\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmov\teax, %s\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } cgjump(sym->endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = alloc_register(); if (sym->class == C_GLOBAL) fprintf(Outfile, "\tmov\t%s, %s\n", reglist[r], sym->name); else fprintf(Outfile, "\tlea\t%s, [rbp+%d]\n", reglist[r], sym->posn); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], reglist[r]); break; case 2: fprintf(Outfile, "\tmovsx\t%s, dword [%s]\n", reglist[r], reglist[r]); break; case 4: case 8: fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { //Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmov\t[%s], byte %s\n", reglist[r2], breglist[r1]); break; case 2: case 4: case 8: fprintf(Outfile, "\tmov\t[%s], %s\n", reglist[r2], reglist[r1]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 35_Preprocessor/data.h ================================================ #ifndef extern_ #define extern_ extern #endif // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 extern_ int Line; // Current line number extern_ int Putback; // Character put back by scanner extern_ struct symtable *Functionid; // Symbol ptr of the current function extern_ FILE *Infile; // Input and output files extern_ FILE *Outfile; extern_ char *Infilename; // Name of file we are parsing extern_ char *Outfilename; // Name of file we opened as Outfile extern_ struct token Token; // Last token scanned extern_ char Text[TEXTLEN + 1]; // Last identifier scanned // Symbol table lists extern_ struct symtable *Globhead, *Globtail; // Global variables and functions extern_ struct symtable *Loclhead, *Locltail; // Local variables extern_ struct symtable *Parmhead, *Parmtail; // Local parameters extern_ struct symtable *Membhead, *Membtail; // Temp list of struct/union members extern_ struct symtable *Structhead, *Structtail; // List of struct types extern_ struct symtable *Unionhead, *Uniontail; // List of union types extern_ struct symtable *Enumhead, *Enumtail; // List of enum types and values extern_ struct symtable *Typehead, *Typetail; // List of typedefs // Command-line flags extern_ int O_dumpAST; // If true, dump the AST trees extern_ int O_keepasm; // If true, keep any assembly files extern_ int O_assemble; // If true, assemble the assembly files extern_ int O_dolink; // If true, link the object files extern_ int O_verbose; // If true, print info on compilation stages ================================================ FILE: 35_Preprocessor/decl.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of declarations // Copyright (c) 2019 Warren Toomey, GPL3 static struct symtable *composite_declaration(int type); static void enum_declaration(void); int typedef_declaration(struct symtable **ctype); int type_of_typedef(char *name, struct symtable **ctype); // Parse the current token and return a // primitive type enum value, a pointer // to any composite type and possibly // modify the class of the type. // Also scan in the next token. int parse_type(struct symtable **ctype, int *class) { int type, exstatic=1; // See if the class has been changed to extern (later, static) while (exstatic) { switch (Token.token) { case T_EXTERN: *class= C_EXTERN; scan(&Token); break; default: exstatic= 0; } } // Now work on the actual type keyword switch (Token.token) { case T_VOID: type = P_VOID; scan(&Token); break; case T_CHAR: type = P_CHAR; scan(&Token); break; case T_INT: type = P_INT; scan(&Token); break; case T_LONG: type = P_LONG; scan(&Token); break; // For the following, if we have a ';' after the // parsing then there is no type, so return -1 case T_STRUCT: type = P_STRUCT; *ctype = composite_declaration(P_STRUCT); if (Token.token == T_SEMI) type = -1; break; case T_UNION: type = P_UNION; *ctype = composite_declaration(P_UNION); if (Token.token == T_SEMI) type = -1; break; case T_ENUM: type = P_INT; // Enums are really ints enum_declaration(); if (Token.token == T_SEMI) type = -1; break; case T_TYPEDEF: type = typedef_declaration(ctype); if (Token.token == T_SEMI) type = -1; break; case T_IDENT: type = type_of_typedef(Text, ctype); break; default: fatald("Illegal type, token", Token.token); } // Scan in one or more further '*' tokens // and determine the correct pointer type while (1) { if (Token.token != T_STAR) break; type = pointer_to(type); scan(&Token); } // We leave with the next token already scanned return (type); } // variable_declaration: type identifier ';' // | type identifier '[' INTLIT ']' ';' // ; // // Parse the declaration of a scalar variable or an array // with a given size. // The identifier has been scanned & we have the type. // class is the variable's class // Return the pointer to variable's entry in the symbol table struct symtable *var_declaration(int type, struct symtable *ctype, int class) { struct symtable *sym = NULL; // See if this has already been declared switch (class) { case C_EXTERN: case C_GLOBAL: if (findglob(Text) != NULL) fatals("Duplicate global variable declaration", Text); case C_LOCAL: case C_PARAM: if (findlocl(Text) != NULL) fatals("Duplicate local variable declaration", Text); case C_MEMBER: if (findmember(Text) != NULL) fatals("Duplicate struct/union member declaration", Text); } // Text now has the identifier's name. // If the next token is a '[' if (Token.token == T_LBRACKET) { // Skip past the '[' scan(&Token); // Check we have an array size if (Token.token == T_INTLIT) { // Add this as a known array and generate its space in assembly. // We treat the array as a pointer to its elements' type switch (class) { case C_EXTERN: case C_GLOBAL: sym = addglob(Text, pointer_to(type), ctype, S_ARRAY, class, Token.intvalue); break; case C_LOCAL: case C_PARAM: case C_MEMBER: fatal ("For now, declaration of non-global arrays is not implemented"); } } // Ensure we have a following ']' scan(&Token); match(T_RBRACKET, "]"); } else { // Add this as a known scalar // and generate its space in assembly switch (class) { case C_EXTERN: case C_GLOBAL: sym = addglob(Text, type, ctype, S_VARIABLE, class, 1); break; case C_LOCAL: sym = addlocl(Text, type, ctype, S_VARIABLE, 1); break; case C_PARAM: sym = addparm(Text, type, ctype, S_VARIABLE, 1); break; case C_MEMBER: sym = addmemb(Text, type, ctype, S_VARIABLE, 1); break; } } return (sym); } // var_declaration_list: // | variable_declaration // | variable_declaration separate_token var_declaration_list ; // // When called to parse function parameters, separate_token is ','. // When called to parse members of a struct/union, separate_token is ';'. // // Parse a list of variables. // Add them as symbols to one of the symbol table lists, and return the // number of variables. If funcsym is not NULL, there is an existing function // prototype, so compare each variable's type against this prototype. static int var_declaration_list(struct symtable *funcsym, int class, int separate_token, int end_token) { int type; int paramcnt = 0; struct symtable *protoptr = NULL; struct symtable *ctype; // If there is a prototype, get the pointer // to the first prototype parameter if (funcsym != NULL) protoptr = funcsym->member; // Loop until the final end token while (Token.token != end_token) { // Get the type and identifier type = parse_type(&ctype, &class); ident(); // Check that this type matches the prototype if there is one if (protoptr != NULL) { if (type != protoptr->type) fatald("Type doesn't match prototype for parameter", paramcnt + 1); protoptr = protoptr->next; } else { // Add a new parameter to the right symbol table list, based on the class var_declaration(type, ctype, class); } paramcnt++; // Must have a separate_token or ')' at this point if ((Token.token != separate_token) && (Token.token != end_token)) fatald("Unexpected token in parameter list", Token.token); if (Token.token == separate_token) scan(&Token); } // Check that the number of parameters in this list matches // any existing prototype if ((funcsym != NULL) && (paramcnt != funcsym->nelems)) fatals("Parameter count mismatch for function", funcsym->name); // Return the count of parameters return (paramcnt); } // // function_declaration: type identifier '(' parameter_list ')' ; // | type identifier '(' parameter_list ')' compound_statement ; // // Parse the declaration of function. // The identifier has been scanned & we have the type. struct ASTnode *function_declaration(int type) { struct ASTnode *tree, *finalstmt; struct symtable *oldfuncsym, *newfuncsym = NULL; int endlabel, paramcnt; // Text has the identifier's name. If this exists and is a // function, get the id. Otherwise, set oldfuncsym to NULL. if ((oldfuncsym = findsymbol(Text)) != NULL) if (oldfuncsym->stype != S_FUNCTION) oldfuncsym = NULL; // If this is a new function declaration, get a // label-id for the end label, and add the function // to the symbol table, if (oldfuncsym == NULL) { endlabel = genlabel(); // Assumtion: functions only return scalar types, so NULL below newfuncsym = addglob(Text, type, NULL, S_FUNCTION, C_GLOBAL, endlabel); } // Scan in the '(', any parameters and the ')'. // Pass in any existing function prototype pointer lparen(); paramcnt = var_declaration_list(oldfuncsym, C_PARAM, T_COMMA, T_RPAREN); rparen(); // If this is a new function declaration, update the // function symbol entry with the number of parameters. // Also copy the parameter list into the function's node. if (newfuncsym) { newfuncsym->nelems = paramcnt; newfuncsym->member = Parmhead; oldfuncsym = newfuncsym; } // Clear out the parameter list Parmhead = Parmtail = NULL; // Declaration ends in a semicolon, only a prototype. if (Token.token == T_SEMI) { scan(&Token); return (NULL); } // This is not just a prototype. // Set the Functionid global to the function's symbol pointer Functionid = oldfuncsym; // Get the AST tree for the compound statement tree = compound_statement(); // If the function type isn't P_VOID .. if (type != P_VOID) { // Error if no statements in the function if (tree == NULL) fatal("No statements in function with non-void type"); // Check that the last AST operation in the // compound statement was a return statement finalstmt = (tree->op == A_GLUE) ? tree->right : tree; if (finalstmt == NULL || finalstmt->op != A_RETURN) fatal("No return for function with non-void type"); } // Return an A_FUNCTION node which has the function's symbol pointer // and the compound statement sub-tree return (mkastunary(A_FUNCTION, type, tree, oldfuncsym, endlabel)); } // Parse composite type declarations: structs or unions. // Either find an existing struct/union declaration, or build // a struct/union symbol table entry and return its pointer. static struct symtable *composite_declaration(int type) { struct symtable *ctype = NULL; struct symtable *m; int offset; // Skip the struct/union keyword scan(&Token); // See if there is a following struct/union name if (Token.token == T_IDENT) { // Find any matching composite type if (type == P_STRUCT) ctype = findstruct(Text); else ctype = findunion(Text); scan(&Token); } // If the next token isn't an LBRACE , this is // the usage of an existing struct/union type. // Return the pointer to the type. if (Token.token != T_LBRACE) { if (ctype == NULL) fatals("unknown struct/union type", Text); return (ctype); } // Ensure this struct/union type hasn't been // previously defined if (ctype) fatals("previously defined struct/union", Text); // Build the composite type and skip the left brace if (type == P_STRUCT) ctype = addstruct(Text, P_STRUCT, NULL, 0, 0); else ctype = addunion(Text, P_UNION, NULL, 0, 0); scan(&Token); // Scan in the list of members and attach // to the struct type's node var_declaration_list(NULL, C_MEMBER, T_SEMI, T_RBRACE); rbrace(); ctype->member = Membhead; Membhead = Membtail = NULL; // Set the offset of the initial member // and find the first free byte after it m = ctype->member; m->posn = 0; offset = typesize(m->type, m->ctype); // Set the position of each successive member in the composite type // Unions are easy. For structs, align the member and find the next free byte for (m = m->next; m != NULL; m = m->next) { // Set the offset for this member if (type == P_STRUCT) m->posn = genalign(m->type, offset, 1); else m->posn = 0; // Get the offset of the next free byte after this member offset += typesize(m->type, m->ctype); } // Set the overall size of the composite type ctype->size = offset; return (ctype); } // Parse an enum declaration static void enum_declaration(void) { struct symtable *etype = NULL; char *name; int intval = 0; // Skip the enum keyword. scan(&Token); // If there's a following enum type name, get a // pointer to any existing enum type node. if (Token.token == T_IDENT) { etype = findenumtype(Text); name = strdup(Text); // As it gets tromped soon scan(&Token); } // If the next token isn't a LBRACE, check // that we have an enum type name, then return if (Token.token != T_LBRACE) { if (etype == NULL) fatals("undeclared enum type:", name); return; } // We do have an LBRACE. Skip it scan(&Token); // If we have an enum type name, ensure that it // hasn't been declared before. if (etype != NULL) fatals("enum type redeclared:", etype->name); else // Build an enum type node for this identifier etype = addenum(name, C_ENUMTYPE, 0); // Loop to get all the enum values while (1) { // Ensure we have an identifier // Copy it in case there's an int literal coming up ident(); name = strdup(Text); // Ensure this enum value hasn't been declared before etype = findenumval(name); if (etype != NULL) fatals("enum value redeclared:", Text); // If the next token is an '=', skip it and // get the following int literal if (Token.token == T_ASSIGN) { scan(&Token); if (Token.token != T_INTLIT) fatal("Expected int literal after '='"); intval = Token.intvalue; scan(&Token); } // Build an enum value node for this identifier. // Increment the value for the next enum identifier. etype = addenum(name, C_ENUMVAL, intval++); // Bail out on a right curly bracket, else get a comma if (Token.token == T_RBRACE) break; comma(); } scan(&Token); // Skip over the right curly bracket } // Parse a typedef declaration and return the type // and ctype that it represents int typedef_declaration(struct symtable **ctype) { int type, class=0; // Skip the typedef keyword. scan(&Token); // Get the actual type following the keyword type = parse_type(ctype, &class); if (class != 0) fatal("Can't have extern in a typedef declaration"); // See if the typedef identifier already exists if (findtypedef(Text) != NULL) fatals("redefinition of typedef", Text); // It doesn't exist so add it to the typedef list addtypedef(Text, type, *ctype, 0, 0); scan(&Token); return (type); } // Given a typedef name, return the type it represents int type_of_typedef(char *name, struct symtable **ctype) { struct symtable *t; // Look up the typedef in the list t = findtypedef(name); if (t == NULL) fatals("unknown type", name); scan(&Token); *ctype = t->ctype; return (t->type); } // Parse one or more global declarations, either // variables, functions or structs void global_declarations(void) { struct ASTnode *tree; struct symtable *ctype; int type, class= C_GLOBAL; while (1) { // Stop when we have reached EOF if (Token.token == T_EOF) break; // Get the type type = parse_type(&ctype, &class); // We might have just parsed a struct, union or enum // declaration with no associated variable. // The next token might be a ';'. Loop back if it is. // XXX: I'm not happy with this as it allows // "struct fred;" as an accepted statement if (type == -1) { semi(); continue; } // We have to read past the identifier // to see either a '(' for a function declaration // or a ',' or ';' for a variable declaration. // Text is filled in by the ident() call. ident(); if (Token.token == T_LPAREN) { // Parse the function declaration tree = function_declaration(type); // Only a function prototype, no code if (tree == NULL) continue; // A real function, generate the assembly code for it if (O_dumpAST) { dumpAST(tree, NOLABEL, 0); fprintf(stdout, "\n\n"); } genAST(tree, NOLABEL, 0); // Now free the symbols associated // with this function freeloclsyms(); } else { // Parse the global variable declaration // and skip past the trailing semicolon var_declaration(type, ctype, class); semi(); } } } ================================================ FILE: 35_Preprocessor/decl.h ================================================ // Function prototypes for all compiler files // Copyright (c) 2019 Warren Toomey, GPL3 // scan.c void reject_token(struct token *t); int scan(struct token *t); // tree.c struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue); struct ASTnode *mkastleaf(int op, int type, struct symtable *sym, int intvalue); struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, struct symtable *sym, int intvalue); void dumpAST(struct ASTnode *n, int label, int parentASTop); // gen.c int genlabel(void); int genAST(struct ASTnode *n, int reg, int parentASTop); void genpreamble(); void genpostamble(); void genfreeregs(); void genglobsym(struct symtable *node); int genglobstr(char *strvalue); int genprimsize(int type); int genalign(int type, int offset, int direction); void genreturn(int reg, int id); // cg.c int cgprimsize(int type); int cgalign(int type, int offset, int direction); void cgtextseg(); void cgdataseg(); void freeall_registers(void); void cgpreamble(); void cgpostamble(); void cgfuncpreamble(struct symtable *sym); void cgfuncpostamble(struct symtable *sym); int cgloadint(int value, int type); int cgloadglob(struct symtable *sym, int op); int cgloadlocal(struct symtable *sym, int op); int cgloadglobstr(int label); int cgadd(int r1, int r2); int cgsub(int r1, int r2); int cgmul(int r1, int r2); int cgdiv(int r1, int r2); int cgshlconst(int r, int val); int cgcall(struct symtable *sym, int numargs); void cgcopyarg(int r, int argposn); int cgstorglob(int r, struct symtable *sym); int cgstorlocal(int r, struct symtable *sym); void cgglobsym(struct symtable *node); void cgglobstr(int l, char *strvalue); int cgcompare_and_set(int ASTop, int r1, int r2); int cgcompare_and_jump(int ASTop, int r1, int r2, int label); void cglabel(int l); void cgjump(int l); int cgwiden(int r, int oldtype, int newtype); void cgreturn(int reg, struct symtable *sym); int cgaddress(struct symtable *sym); int cgderef(int r, int type); int cgstorderef(int r1, int r2, int type); int cgnegate(int r); int cginvert(int r); int cglognot(int r); int cgboolean(int r, int op, int label); int cgand(int r1, int r2); int cgor(int r1, int r2); int cgxor(int r1, int r2); int cgshl(int r1, int r2); int cgshr(int r1, int r2); // expr.c struct ASTnode *binexpr(int ptp); // stmt.c struct ASTnode *compound_statement(void); // misc.c void match(int t, char *what); void semi(void); void lbrace(void); void rbrace(void); void lparen(void); void rparen(void); void ident(void); void comma(void); void fatal(char *s); void fatals(char *s1, char *s2); void fatald(char *s, int d); void fatalc(char *s, int c); // sym.c void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node); struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int size, int posn); struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int class, int size); struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int size); struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype, int size); struct symtable *addstruct(char *name, int type, struct symtable *ctype, int stype, int size); struct symtable *addunion(char *name, int type, struct symtable *ctype, int stype, int size); struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int size); struct symtable *addenum(char *name, int class, int value); struct symtable *addtypedef(char *name, int type, struct symtable *ctype, int stype, int size); struct symtable *findglob(char *s); struct symtable *findlocl(char *s); struct symtable *findsymbol(char *s); struct symtable *findmember(char *s); struct symtable *findstruct(char *s); struct symtable *findunion(char *s); struct symtable *findenumtype(char *s); struct symtable *findenumval(char *s); struct symtable *findtypedef(char *s); void clear_symtable(void); void freeloclsyms(void); // decl.c struct symtable *var_declaration(int type, struct symtable *ctype, int class); struct ASTnode *function_declaration(int type); void global_declarations(void); // types.c int inttype(int type); int ptrtype(int type); int parse_type(struct symtable **ctype, int *class); int pointer_to(int type); int value_at(int type); int typesize(int type, struct symtable *ctype); struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op); ================================================ FILE: 35_Preprocessor/defs.h ================================================ #include #include #include #include // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 enum { TEXTLEN = 512 // Length of identifiers in input }; // Commands and default filenames #define AOUT "a.out" #ifdef __NASM__ #define ASCMD "nasm -f elf64 -o " #define LDCMD "cc -no-pie -fno-plt -Wall -o " #else #define ASCMD "as -o " #define LDCMD "cc -o " #endif #define CPPCMD "cpp -nostdinc -isystem " // Token types enum { T_EOF, // Binary operators T_ASSIGN, T_LOGOR, T_LOGAND, T_OR, T_XOR, T_AMPER, T_EQ, T_NE, T_LT, T_GT, T_LE, T_GE, T_LSHIFT, T_RSHIFT, T_PLUS, T_MINUS, T_STAR, T_SLASH, // Other operators T_INC, T_DEC, T_INVERT, T_LOGNOT, // Type keywords T_VOID, T_CHAR, T_INT, T_LONG, // Other keywords T_IF, T_ELSE, T_WHILE, T_FOR, T_RETURN, T_STRUCT, T_UNION, T_ENUM, T_TYPEDEF, T_EXTERN, // Structural tokens T_INTLIT, T_STRLIT, T_SEMI, T_IDENT, T_LBRACE, T_RBRACE, T_LPAREN, T_RPAREN, T_LBRACKET, T_RBRACKET, T_COMMA, T_DOT, T_ARROW }; // Token structure struct token { int token; // Token type, from the enum list above int intvalue; // For T_INTLIT, the integer value }; // AST node types. The first few line up // with the related tokens enum { A_ASSIGN = 1, A_LOGOR, A_LOGAND, A_OR, A_XOR, A_AND, A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE, A_LSHIFT, A_RSHIFT, A_ADD, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, A_INTLIT, A_STRLIT, A_IDENT, A_GLUE, A_IF, A_WHILE, A_FUNCTION, A_WIDEN, A_RETURN, A_FUNCCALL, A_DEREF, A_ADDR, A_SCALE, A_PREINC, A_PREDEC, A_POSTINC, A_POSTDEC, A_NEGATE, A_INVERT, A_LOGNOT, A_TOBOOL }; // Primitive types. The bottom 4 bits is an integer // value that represents the level of indirection, // e.g. 0= no pointer, 1= pointer, 2= pointer pointer etc. enum { P_NONE, P_VOID = 16, P_CHAR = 32, P_INT = 48, P_LONG = 64, P_STRUCT=80, P_UNION=96 }; // Structural types enum { S_VARIABLE, S_FUNCTION, S_ARRAY }; // Storage classes enum { C_GLOBAL = 1, // Globally visible symbol C_LOCAL, // Locally visible symbol C_PARAM, // Locally visible function parameter C_EXTERN, // External globally visible symbol C_STRUCT, // A struct C_UNION, // A union C_MEMBER, // Member of a struct or union C_ENUMTYPE, // A named enumeration type C_ENUMVAL, // A named enumeration value C_TYPEDEF // A named typedef }; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol struct symtable *ctype; // If struct/union, ptr to that type int stype; // Structural type for the symbol int class; // Storage class for the symbol union { int size; // Number of elements in the symbol int endlabel; // For functions, the end label }; union { int nelems; // For functions, # of params int posn; // For locals, the negative offset // from the stack base pointer }; struct symtable *next; // Next symbol in one list struct symtable *member; // First member of a function, struct, }; // union or enum // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates int rvalue; // True if the node is an rvalue struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; struct symtable *sym; // For many AST nodes, the pointer to union { // the symbol in the symbol table int intvalue; // For A_INTLIT, the integer value int size; // For A_SCALE, the size to scale by }; }; enum { NOREG = -1, // Use NOREG when the AST generation // functions have no register to return NOLABEL = 0 // Use NOLABEL when we have no label to // pass to genAST() }; ================================================ FILE: 35_Preprocessor/expr.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of expressions // Copyright (c) 2019 Warren Toomey, GPL3 // expression_list: // | expression // | expression ',' expression_list // ; // Parse a list of zero or more comma-separated expressions and // return an AST composed of A_GLUE nodes with the left-hand child // being the sub-tree of previous expressions (or NULL) and the right-hand // child being the next expression. Each A_GLUE node will have size field // set to the number of expressions in the tree at this point. If no // expressions are parsed, NULL is returned static struct ASTnode *expression_list(void) { struct ASTnode *tree = NULL; struct ASTnode *child = NULL; int exprcount = 0; // Loop until the final right parentheses while (Token.token != T_RPAREN) { // Parse the next expression and increment the expression count child = binexpr(0); exprcount++; // Build an A_GLUE AST node with the previous tree as the left child // and the new expression as the right child. Store the expression count. tree = mkastnode(A_GLUE, P_NONE, tree, NULL, child, NULL, exprcount); // Must have a ',' or ')' at this point switch (Token.token) { case T_COMMA: scan(&Token); break; case T_RPAREN: break; default: fatald("Unexpected token in expression list", Token.token); } } // Return the tree of expressions return (tree); } // Parse a function call and return its AST static struct ASTnode *funccall(void) { struct ASTnode *tree; struct symtable *funcptr; // Check that the identifier has been defined as a function, // then make a leaf node for it. if ((funcptr = findsymbol(Text)) == NULL || funcptr->stype != S_FUNCTION) { fatals("Undeclared function", Text); } // Get the '(' lparen(); // Parse the argument expression list tree = expression_list(); // XXX Check type of each argument against the function's prototype // Build the function call AST node. Store the // function's return type as this node's type. // Also record the function's symbol-id tree = mkastunary(A_FUNCCALL, funcptr->type, tree, funcptr, 0); // Get the ')' rparen(); return (tree); } // Parse the index into an array and return an AST tree for it static struct ASTnode *array_access(void) { struct ASTnode *left, *right; struct symtable *aryptr; // Check that the identifier has been defined as an array // then make a leaf node for it that points at the base if ((aryptr = findsymbol(Text)) == NULL || aryptr->stype != S_ARRAY) { fatals("Undeclared array", Text); } left = mkastleaf(A_ADDR, aryptr->type, aryptr, 0); // Get the '[' scan(&Token); // Parse the following expression right = binexpr(0); // Get the ']' match(T_RBRACKET, "]"); // Ensure that this is of int type if (!inttype(right->type)) fatal("Array index is not of integer type"); // Scale the index by the size of the element's type right = modify_type(right, left->type, A_ADD); // Return an AST tree where the array's base has the offset // added to it, and dereference the element. Still an lvalue // at this point. left = mkastnode(A_ADD, aryptr->type, left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, value_at(left->type), left, NULL, 0); return (left); } // Parse the member reference of a struct or union // and return an AST tree for it. If withpointer is true, // the access is through a pointer to the member. static struct ASTnode *member_access(int withpointer) { struct ASTnode *left, *right; struct symtable *compvar; struct symtable *typeptr; struct symtable *m; // Check that the identifier has been declared as a // struct/union or a struct/union pointer if ((compvar = findsymbol(Text)) == NULL) fatals("Undeclared variable", Text); if (withpointer && compvar->type != pointer_to(P_STRUCT) && compvar->type != pointer_to(P_UNION)) fatals("Undeclared variable", Text); if (!withpointer && compvar->type != P_STRUCT && compvar->type != P_UNION) fatals("Undeclared variable", Text); // If a pointer to a struct/union, get the pointer's value. // Otherwise, make a leaf node that points at the base // Either way, it's an rvalue if (withpointer) { left = mkastleaf(A_IDENT, pointer_to(compvar->type), compvar, 0); } else left = mkastleaf(A_ADDR, compvar->type, compvar, 0); left->rvalue = 1; // Get the details of the composite type typeptr = compvar->ctype; // Skip the '.' or '->' token and get the member's name scan(&Token); ident(); // Find the matching member's name in the type // Die if we can't find it for (m = typeptr->member; m != NULL; m = m->next) if (!strcmp(m->name, Text)) break; if (m == NULL) fatals("No member found in struct/union: ", Text); // Build an A_INTLIT node with the offset right = mkastleaf(A_INTLIT, P_INT, NULL, m->posn); // Add the member's offset to the base of the struct/union // and dereference it. Still an lvalue at this point left = mkastnode(A_ADD, pointer_to(m->type), left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, m->type, left, NULL, 0); return (left); } // Parse a postfix expression and return // an AST node representing it. The // identifier is already in Text. static struct ASTnode *postfix(void) { struct ASTnode *n; struct symtable *varptr; struct symtable *enumptr; // If the identifier matches an enum value, // return an A_INTLIT node if ((enumptr = findenumval(Text)) != NULL) { scan(&Token); return (mkastleaf(A_INTLIT, P_INT, NULL, enumptr->posn)); } // Scan in the next token to see if we have a postfix expression scan(&Token); // Function call if (Token.token == T_LPAREN) return (funccall()); // An array reference if (Token.token == T_LBRACKET) return (array_access()); // Access into a struct or union if (Token.token == T_DOT) return (member_access(0)); if (Token.token == T_ARROW) return (member_access(1)); // A variable. Check that the variable exists. if ((varptr = findsymbol(Text)) == NULL || varptr->stype != S_VARIABLE) fatals("Unknown variable", Text); switch (Token.token) { // Post-increment: skip over the token case T_INC: scan(&Token); n = mkastleaf(A_POSTINC, varptr->type, varptr, 0); break; // Post-decrement: skip over the token case T_DEC: scan(&Token); n = mkastleaf(A_POSTDEC, varptr->type, varptr, 0); break; // Just a variable reference default: n = mkastleaf(A_IDENT, varptr->type, varptr, 0); } return (n); } // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(void) { struct ASTnode *n; int id; switch (Token.token) { case T_INTLIT: // For an INTLIT token, make a leaf AST node for it. // Make it a P_CHAR if it's within the P_CHAR range if ((Token.intvalue) >= 0 && (Token.intvalue < 256)) n = mkastleaf(A_INTLIT, P_CHAR, NULL, Token.intvalue); else n = mkastleaf(A_INTLIT, P_INT, NULL, Token.intvalue); break; case T_STRLIT: // For a STRLIT token, generate the assembly for it. // Then make a leaf AST node for it. id is the string's label. id = genglobstr(Text); n = mkastleaf(A_STRLIT, pointer_to(P_CHAR), NULL, id); break; case T_IDENT: return (postfix()); case T_LPAREN: // Beginning of a parenthesised expression, skip the '('. // Scan in the expression and the right parenthesis scan(&Token); n = binexpr(0); rparen(); return (n); default: fatald("Expecting a primary expression, got token", Token.token); } // Scan in the next token and return the leaf node scan(&Token); return (n); } // Convert a binary operator token into a binary AST operation. // We rely on a 1:1 mapping from token to AST operation static int binastop(int tokentype) { if (tokentype > T_EOF && tokentype <= T_SLASH) return (tokentype); fatald("Syntax error, token", tokentype); return (0); // Keep -Wall happy } // Return true if a token is right-associative, // false otherwise. static int rightassoc(int tokentype) { if (tokentype == T_ASSIGN) return (1); return (0); } // Operator precedence for each token. Must // match up with the order of tokens in defs.h static int OpPrec[] = { 0, 10, 20, 30, // T_EOF, T_ASSIGN, T_LOGOR, T_LOGAND 40, 50, 60, // T_OR, T_XOR, T_AMPER 70, 70, // T_EQ, T_NE 80, 80, 80, 80, // T_LT, T_GT, T_LE, T_GE 90, 90, // T_LSHIFT, T_RSHIFT 100, 100, // T_PLUS, T_MINUS 110, 110 // T_STAR, T_SLASH }; // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec; if (tokentype > T_SLASH) fatald("Token with no precedence in op_precedence:", tokentype); prec = OpPrec[tokentype]; if (prec == 0) fatald("Syntax error, token", tokentype); return (prec); } // prefix_expression: primary // | '*' prefix_expression // | '&' prefix_expression // | '-' prefix_expression // | '++' prefix_expression // | '--' prefix_expression // ; // Parse a prefix expression and return // a sub-tree representing it. struct ASTnode *prefix(void) { struct ASTnode *tree; switch (Token.token) { case T_AMPER: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Ensure that it's an identifier if (tree->op != A_IDENT) fatal("& operator must be followed by an identifier"); // Now change the operator to A_ADDR and the type to // a pointer to the original type tree->op = A_ADDR; tree->type = pointer_to(tree->type); break; case T_STAR: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's either another deref or an // identifier if (tree->op != A_IDENT && tree->op != A_DEREF) fatal("* operator must be followed by an identifier or *"); // Prepend an A_DEREF operation to the tree tree = mkastunary(A_DEREF, value_at(tree->type), tree, NULL, 0); break; case T_MINUS: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_NEGATE operation to the tree and // make the child an rvalue. Because chars are unsigned, // also widen this to int so that it's signed tree->rvalue = 1; tree = modify_type(tree, P_INT, 0); tree = mkastunary(A_NEGATE, tree->type, tree, NULL, 0); break; case T_INVERT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_INVERT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_INVERT, tree->type, tree, NULL, 0); break; case T_LOGNOT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_LOGNOT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_LOGNOT, tree->type, tree, NULL, 0); break; case T_INC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("++ operator must be followed by an identifier"); // Prepend an A_PREINC operation to the tree tree = mkastunary(A_PREINC, tree->type, tree, NULL, 0); break; case T_DEC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("-- operator must be followed by an identifier"); // Prepend an A_PREDEC operation to the tree tree = mkastunary(A_PREDEC, tree->type, tree, NULL, 0); break; default: tree = primary(); } return (tree); } // Return an AST tree whose root is a binary operator. // Parameter ptp is the previous token's precedence. struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; struct ASTnode *ltemp, *rtemp; int ASTop; int tokentype; // Get the tree on the left. // Fetch the next token at the same time. left = prefix(); // If we hit one of several terminating tokens, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA) { left->rvalue = 1; return (left); } // While the precedence of this token is more than that of the // previous token precedence, or it's right associative and // equal to the previous token's precedence while ((op_precedence(tokentype) > ptp) || (rightassoc(tokentype) && op_precedence(tokentype) == ptp)) { // Fetch in the next integer literal scan(&Token); // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Determine the operation to be performed on the sub-trees ASTop = binastop(tokentype); if (ASTop == A_ASSIGN) { // Assignment // Make the right tree into an rvalue right->rvalue = 1; // Ensure the right's type matches the left right = modify_type(right, left->type, 0); if (right == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree. However, switch // left and right around, so that the right expression's // code will be generated before the left expression ltemp = left; left = right; right = ltemp; } else { // We are not doing an assignment, so both trees should be rvalues // Convert both trees into rvalue if they are lvalue trees left->rvalue = 1; right->rvalue = 1; // Ensure the two types are compatible by trying // to modify each tree to match the other's type. ltemp = modify_type(left, right->type, ASTop); rtemp = modify_type(right, left->type, ASTop); if (ltemp == NULL && rtemp == NULL) fatal("Incompatible types in binary expression"); if (ltemp != NULL) left = ltemp; if (rtemp != NULL) right = rtemp; } // Join that sub-tree with ours. Convert the token // into an AST operation at the same time. left = mkastnode(binastop(tokentype), left->type, left, NULL, right, NULL, 0); // Update the details of the current token. // If we hit a terminating token, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA) { left->rvalue = 1; return (left); } } // Return the tree we have when the precedence // is the same or lower left->rvalue = 1; return (left); } ================================================ FILE: 35_Preprocessor/gen.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Generic code generator // Copyright (c) 2019 Warren Toomey, GPL3 // Generate and return a new label number int genlabel(void) { static int id = 1; return (id++); } // Generate the code for an IF statement // and an optional ELSE clause static int genIF(struct ASTnode *n) { int Lfalse, Lend; // Generate two labels: one for the // false compound statement, and one // for the end of the overall IF statement. // When there is no ELSE clause, Lfalse _is_ // the ending label! Lfalse = genlabel(); if (n->right) Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. genAST(n->left, Lfalse, n->op); genfreeregs(); // Generate the true compound statement genAST(n->mid, NOLABEL, n->op); genfreeregs(); // If there is an optional ELSE clause, // generate the jump to skip to the end if (n->right) cgjump(Lend); // Now the false label cglabel(Lfalse); // Optional ELSE clause: generate the // false compound statement and the // end label if (n->right) { genAST(n->right, NOLABEL, n->op); genfreeregs(); cglabel(Lend); } return (NOREG); } // Generate the code for a WHILE statement static int genWHILE(struct ASTnode *n) { int Lstart, Lend; // Generate the start and end labels // and output the start label Lstart = genlabel(); Lend = genlabel(); cglabel(Lstart); // Generate the condition code followed // by a jump to the end label. genAST(n->left, Lend, n->op); genfreeregs(); // Generate the compound statement for the body genAST(n->right, NOLABEL, n->op); genfreeregs(); // Finally output the jump back to the condition, // and the end label cgjump(Lstart); cglabel(Lend); return (NOREG); } // Generate the code to copy the arguments of a // function call to its parameters, then call the // function itself. Return the register that holds // the function's return value. static int gen_funccall(struct ASTnode *n) { struct ASTnode *gluetree = n->left; int reg; int numargs = 0; // If there is a list of arguments, walk this list // from the last argument (right-hand child) to the // first while (gluetree) { // Calculate the expression's value reg = genAST(gluetree->right, NOLABEL, gluetree->op); // Copy this into the n'th function parameter: size is 1, 2, 3, ... cgcopyarg(reg, gluetree->size); // Keep the first (highest) number of arguments if (numargs == 0) numargs = gluetree->size; genfreeregs(); gluetree = gluetree->left; } // Call the function, clean up the stack (based on numargs), // and return its result return (cgcall(n->sym, numargs)); } // Given an AST, an optional label, and the AST op // of the parent, generate assembly code recursively. // Return the register id with the tree's final value. int genAST(struct ASTnode *n, int label, int parentASTop) { int leftreg, rightreg; // We have some specific AST node handling at the top // so that we don't evaluate the child sub-trees immediately switch (n->op) { case A_IF: return (genIF(n)); case A_WHILE: return (genWHILE(n)); case A_FUNCCALL: return (gen_funccall(n)); case A_GLUE: // Do each child statement, and free the // registers after each child genAST(n->left, NOLABEL, n->op); genfreeregs(); genAST(n->right, NOLABEL, n->op); genfreeregs(); return (NOREG); case A_FUNCTION: // Generate the function's preamble before the code // in the child sub-tree cgfuncpreamble(n->sym); genAST(n->left, NOLABEL, n->op); cgfuncpostamble(n->sym); return (NOREG); } // General AST node handling below // Get the left and right sub-tree values if (n->left) leftreg = genAST(n->left, NOLABEL, n->op); if (n->right) rightreg = genAST(n->right, NOLABEL, n->op); switch (n->op) { case A_ADD: return (cgadd(leftreg, rightreg)); case A_SUBTRACT: return (cgsub(leftreg, rightreg)); case A_MULTIPLY: return (cgmul(leftreg, rightreg)); case A_DIVIDE: return (cgdiv(leftreg, rightreg)); case A_AND: return (cgand(leftreg, rightreg)); case A_OR: return (cgor(leftreg, rightreg)); case A_XOR: return (cgxor(leftreg, rightreg)); case A_LSHIFT: return (cgshl(leftreg, rightreg)); case A_RSHIFT: return (cgshr(leftreg, rightreg)); case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, compare registers // and set one to 1 or 0 based on the comparison. if (parentASTop == A_IF || parentASTop == A_WHILE) return (cgcompare_and_jump(n->op, leftreg, rightreg, label)); else return (cgcompare_and_set(n->op, leftreg, rightreg)); case A_INTLIT: return (cgloadint(n->intvalue, n->type)); case A_STRLIT: return (cgloadglobstr(n->intvalue)); case A_IDENT: // Load our value if we are an rvalue // or we are being dereferenced if (n->rvalue || parentASTop == A_DEREF) { if (n->sym->class == C_GLOBAL) { return (cgloadglob(n->sym, n->op)); } else { return (cgloadlocal(n->sym, n->op)); } } else return (NOREG); case A_ASSIGN: // Are we assigning to an identifier or through a pointer? switch (n->right->op) { case A_IDENT: if (n->right->sym->class == C_GLOBAL) return (cgstorglob(leftreg, n->right->sym)); else return (cgstorlocal(leftreg, n->right->sym)); case A_DEREF: return (cgstorderef(leftreg, rightreg, n->right->type)); default: fatald("Can't A_ASSIGN in genAST(), op", n->op); } case A_WIDEN: // Widen the child's type to the parent's type return (cgwiden(leftreg, n->left->type, n->type)); case A_RETURN: cgreturn(leftreg, Functionid); return (NOREG); case A_ADDR: return (cgaddress(n->sym)); case A_DEREF: // If we are an rvalue, dereference to get the value we point at, // otherwise leave it for A_ASSIGN to store through the pointer if (n->rvalue) return (cgderef(leftreg, n->left->type)); else return (leftreg); case A_SCALE: // Small optimisation: use shift if the // scale value is a known power of two switch (n->size) { case 2: return (cgshlconst(leftreg, 1)); case 4: return (cgshlconst(leftreg, 2)); case 8: return (cgshlconst(leftreg, 3)); default: // Load a register with the size and // multiply the leftreg by this size rightreg = cgloadint(n->size, P_INT); return (cgmul(leftreg, rightreg)); } case A_POSTINC: case A_POSTDEC: // Load and decrement the variable's value into a register // and post increment/decrement it if (n->sym->class == C_GLOBAL) return (cgloadglob(n->sym, n->op)); else return (cgloadlocal(n->sym, n->op)); case A_PREINC: case A_PREDEC: // Load and decrement the variable's value into a register // and pre increment/decrement it if (n->left->sym->class == C_GLOBAL) return (cgloadglob(n->left->sym, n->op)); else return (cgloadlocal(n->left->sym, n->op)); case A_NEGATE: return (cgnegate(leftreg)); case A_INVERT: return (cginvert(leftreg)); case A_LOGNOT: return (cglognot(leftreg)); case A_TOBOOL: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, set the register // to 0 or 1 based on it's zeroeness or non-zeroeness return (cgboolean(leftreg, parentASTop, label)); default: fatald("Unknown AST operator", n->op); } return (NOREG); // Keep -Wall happy } void genpreamble() { cgpreamble(); } void genpostamble() { cgpostamble(); } void genfreeregs() { freeall_registers(); } void genglobsym(struct symtable *node) { cgglobsym(node); } int genglobstr(char *strvalue) { int l = genlabel(); cgglobstr(l, strvalue); return (l); } int genprimsize(int type) { return (cgprimsize(type)); } int genalign(int type, int offset, int direction) { return (cgalign(type, offset, direction)); } ================================================ FILE: 35_Preprocessor/include/fcntl.h ================================================ #ifndef _FCNTL_H_ # define _FCNTL_H_ #define O_RDONLY 00 #define O_WRONLY 01 #define O_RDWR 02 int open(char *pathname, int flags); #endif // _FCNTL_H_ ================================================ FILE: 35_Preprocessor/include/stddef.h ================================================ #ifndef _STDDEF_H_ # define _STDDEF_H_ typedef long size_t; #endif //_STDDEF_H_ ================================================ FILE: 35_Preprocessor/include/stdio.h ================================================ #ifndef _STDIO_H_ # define _STDIO_H_ #include // This FILE definition will do for now typedef char * FILE; FILE *fopen(char *pathname, char *mode); size_t fread(void *ptr, size_t size, size_t nmemb, FILE *stream); size_t fwrite(void *ptr, size_t size, size_t nmemb, FILE *stream); int fclose(FILE *stream); int printf(char *format); int fprintf(FILE *stream, char *format); #endif // _STDIO_H_ ================================================ FILE: 35_Preprocessor/main.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "decl.h" #include #include // Compiler setup and top-level execution // Copyright (c) 2019 Warren Toomey, GPL3 // Given a string with a '.' and at least a 1-character suffix // after the '.', change the suffix to be the given character. // Return the new string or NULL if the original string could // not be modified char *alter_suffix(char *str, char suffix) { char *posn; char *newstr; // Clone the string if ((newstr = strdup(str)) == NULL) return (NULL); // Find the '.' if ((posn = strrchr(newstr, '.')) == NULL) return (NULL); // Ensure there is a suffix posn++; if (*posn == '\0') return (NULL); // Change the suffix and NUL-terminate the string *posn++ = suffix; *posn = '\0'; return (newstr); } // Given an input filename, compile that file // down to assembly code. Return the new file's name static char *do_compile(char *filename) { char cmd[TEXTLEN]; // Change the input file's suffix to .s Outfilename = alter_suffix(filename, 's'); if (Outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .c on the end\n", filename); exit(1); } // Generate the pre-processor command snprintf(cmd, TEXTLEN, "%s %s %s", CPPCMD, INCDIR, filename); // Open up the pre-processor pipe if ((Infile = popen(cmd, "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", filename, strerror(errno)); exit(1); } Infilename = filename; // Create the output file if ((Outfile = fopen(Outfilename, "w")) == NULL) { fprintf(stderr, "Unable to create %s: %s\n", Outfilename, strerror(errno)); exit(1); } Line = 1; // Reset the scanner Putback = '\n'; clear_symtable(); // Clear the symbol table if (O_verbose) printf("compiling %s\n", filename); scan(&Token); // Get the first token from the input genpreamble(); // Output the preamble global_declarations(); // Parse the global declarations genpostamble(); // Output the postamble fclose(Outfile); // Close the output file return (Outfilename); } // Given an input filename, assemble that file // down to object code. Return the object filename char *do_assemble(char *filename) { char cmd[TEXTLEN]; int err; char *outfilename = alter_suffix(filename, 'o'); if (outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .s on the end\n", filename); exit(1); } // Build the assembly command and run it snprintf(cmd, TEXTLEN, "%s %s %s", ASCMD, outfilename, filename); if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Assembly of %s failed\n", filename); exit(1); } return (outfilename); } // Given a list of object files and an output filename, // link all of the object filenames together. void do_link(char *outfilename, char *objlist[]) { int cnt, size = TEXTLEN; char cmd[TEXTLEN], *cptr; int err; // Start with the linker command and the output file cptr = cmd; cnt = snprintf(cptr, size, "%s %s ", LDCMD, outfilename); cptr += cnt; size -= cnt; // Now append each object file while (*objlist != NULL) { cnt = snprintf(cptr, size, "%s ", *objlist); cptr += cnt; size -= cnt; objlist++; } if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Linking failed\n"); exit(1); } } // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "Usage: %s [-vcST] [-o outfile] file [file ...]\n", prog); fprintf(stderr, " -v give verbose output of the compilation stages\n"); fprintf(stderr, " -c generate object files but don't link them\n"); fprintf(stderr, " -S generate assembly files but don't link them\n"); fprintf(stderr, " -T dump the AST trees for each input file\n"); fprintf(stderr, " -o outfile, produce the outfile executable file\n"); exit(1); } // Main program: check arguments and print a usage // if we don't have an argument. Open up the input // file and call scanfile() to scan the tokens in it. enum { MAXOBJ = 100 }; int main(int argc, char *argv[]) { char *outfilename = AOUT; char *asmfile, *objfile; char *objlist[MAXOBJ]; int i, objcnt = 0; // Initialise our variables O_dumpAST = 0; O_keepasm = 0; O_assemble = 0; O_verbose = 0; O_dolink = 1; // Scan for command-line options for (i = 1; i < argc; i++) { // No leading '-', stop scanning for options if (*argv[i] != '-') break; // For each option in this argument for (int j = 1; (*argv[i] == '-') && argv[i][j]; j++) { switch (argv[i][j]) { case 'o': outfilename = argv[++i]; // Save & skip to next argument break; case 'T': O_dumpAST = 1; break; case 'c': O_assemble = 1; O_keepasm = 0; O_dolink = 0; break; case 'S': O_keepasm = 1; O_assemble = 0; O_dolink = 0; break; case 'v': O_verbose = 1; break; default: usage(argv[0]); } } } // Ensure we have at lease one input file argument if (i >= argc) usage(argv[0]); // Work on each input file in turn while (i < argc) { asmfile = do_compile(argv[i]); // Compile the source file if (O_dolink || O_assemble) { objfile = do_assemble(asmfile); // Assemble it to object forma if (objcnt == (MAXOBJ - 2)) { fprintf(stderr, "Too many object files for the compiler to handle\n"); exit(1); } objlist[objcnt++] = objfile; // Add the object file's name objlist[objcnt] = NULL; // to the list of object files } if (!O_keepasm) // Remove the assembly file if unlink(asmfile); // we don't need to keep it i++; } // Now link all the object files together if (O_dolink) { do_link(outfilename, objlist); // If we don't need to keep the object // files, then remove them if (!O_assemble) { for (i = 0; objlist[i] != NULL; i++) unlink(objlist[i]); } } return (0); } ================================================ FILE: 35_Preprocessor/misc.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" #include // Miscellaneous functions // Copyright (c) 2019 Warren Toomey, GPL3 // Ensure that the current token is t, // and fetch the next token. Otherwise // throw an error void match(int t, char *what) { if (Token.token == t) { scan(&Token); } else { fatals("Expected", what); } } // Match a semicolon and fetch the next token void semi(void) { match(T_SEMI, ";"); } // Match a left brace and fetch the next token void lbrace(void) { match(T_LBRACE, "{"); } // Match a right brace and fetch the next token void rbrace(void) { match(T_RBRACE, "}"); } // Match a left parenthesis and fetch the next token void lparen(void) { match(T_LPAREN, "("); } // Match a right parenthesis and fetch the next token void rparen(void) { match(T_RPAREN, ")"); } // Match an identifier and fetch the next token void ident(void) { match(T_IDENT, "identifier"); } // Match a comma and fetch the next token void comma(void) { match(T_COMMA, "comma"); } // Print out fatal messages void fatal(char *s) { fprintf(stderr, "%s on line %d of %s\n", s, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatals(char *s1, char *s2) { fprintf(stderr, "%s:%s on line %d of %s\n", s1, s2, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatald(char *s, int d) { fprintf(stderr, "%s:%d on line %d of %s\n", s, d, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatalc(char *s, int c) { fprintf(stderr, "%s:%c on line %d of %s\n", s, c, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } ================================================ FILE: 35_Preprocessor/scan.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { char *p; p = strchr(s, c); return (p ? p - s : -1); } // Get the next character from the input file. static int next(void) { int c, l; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return (c); } c = fgetc(Infile); // Read from input file while (c == '#') { // We've hit a pre-processor statement scan(&Token); // Get the line number into l if (Token.token != T_INTLIT) fatals("Expecting pre-processor line number, got:", Text); l = Token.intvalue; scan(&Token); // Get the filename in Text if (Token.token != T_STRLIT) fatals("Expecting pre-processor file name, got:", Text); if (Text[0] != '<') { // If this is a real filename if (strcmp(Text, Infilename)) // and not the one we have now Infilename = strdup(Text); // save it. Then update the line num Line = l; } while ((c = fgetc(Infile)) != '\n'); // Skip to the end of the line c = fgetc(Infile); // and get the next character } if ('\n' == c) Line++; // Increment line count return (c); } // Put back an unwanted character static void putback(int c) { Putback = c; } // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } // Return the next character from a character // or string literal static int scanch(void) { int c; // Get the next input character and interpret // metacharacters that start with a backslash c = next(); if (c == '\\') { switch (c = next()) { case 'a': return '\a'; case 'b': return '\b'; case 'f': return '\f'; case 'n': return '\n'; case 'r': return '\r'; case 't': return '\t'; case 'v': return '\v'; case '\\': return '\\'; case '"': return '"'; case '\'': return '\''; default: fatalc("unknown escape sequence", c); } } return (c); // Just an ordinary old character! } // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0; // Convert each character into an int value while ((k = chrpos("0123456789", c)) >= 0) { val = val * 10 + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return (val); } // Scan in a string literal from the input file, // and store it in buf[]. Return the length of // the string. static int scanstr(char *buf) { int i, c; // Loop while we have enough buffer space for (i = 0; i < TEXTLEN - 1; i++) { // Get the next char and append to buf // Return when we hit the ending double quote if ((c = scanch()) == '"') { buf[i] = 0; return (i); } buf[i] = c; } // Ran out of buf[] space fatal("String literal too long"); return (0); } // Scan an identifier from the input file and // store it in buf[]. Return the identifier's length static int scanident(int c, char *buf, int lim) { int i = 0; // Allow digits, alpha and underscores while (isalpha(c) || isdigit(c) || '_' == c) { // Error if we hit the identifier length limit, // else append to buf[] and get next character if (lim - 1 == i) { fatal("Identifier too long"); } else if (i < lim - 1) { buf[i++] = c; } c = next(); } // We hit a non-valid character, put it back. // NUL-terminate the buf[] and return the length putback(c); buf[i] = '\0'; return (i); } // Given a word from the input, return the matching // keyword token number or 0 if it's not a keyword. // Switch on the first letter so that we don't have // to waste time strcmp()ing against all the keywords. static int keyword(char *s) { switch (*s) { case 'c': if (!strcmp(s, "char")) return (T_CHAR); break; case 'e': if (!strcmp(s, "else")) return (T_ELSE); if (!strcmp(s, "enum")) return (T_ENUM); if (!strcmp(s, "extern")) return (T_EXTERN); break; case 'f': if (!strcmp(s, "for")) return (T_FOR); break; case 'i': if (!strcmp(s, "if")) return (T_IF); if (!strcmp(s, "int")) return (T_INT); break; case 'l': if (!strcmp(s, "long")) return (T_LONG); break; case 'r': if (!strcmp(s, "return")) return (T_RETURN); break; case 's': if (!strcmp(s, "struct")) return (T_STRUCT); break; case 't': if (!strcmp(s, "typedef")) return (T_TYPEDEF); break; case 'u': if (!strcmp(s, "union")) return (T_UNION); break; case 'v': if (!strcmp(s, "void")) return (T_VOID); break; case 'w': if (!strcmp(s, "while")) return (T_WHILE); break; } return (0); } // A pointer to a rejected token static struct token *Rejtoken = NULL; // Reject the token that we just scanned void reject_token(struct token *t) { if (Rejtoken != NULL) fatal("Can't reject token twice"); Rejtoken = t; } // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c, tokentype; // If we have any rejected token, return it if (Rejtoken != NULL) { t = Rejtoken; Rejtoken = NULL; return (1); } // Skip whitespace c = skip(); // Determine the token based on // the input character switch (c) { case EOF: t->token = T_EOF; return (0); case '+': if ((c = next()) == '+') { t->token = T_INC; } else { putback(c); t->token = T_PLUS; } break; case '-': if ((c = next()) == '-') { t->token = T_DEC; } else if (c == '>') { t->token = T_ARROW; } else { putback(c); t->token = T_MINUS; } break; case '*': t->token = T_STAR; break; case '/': t->token = T_SLASH; break; case ';': t->token = T_SEMI; break; case '{': t->token = T_LBRACE; break; case '}': t->token = T_RBRACE; break; case '(': t->token = T_LPAREN; break; case ')': t->token = T_RPAREN; break; case '[': t->token = T_LBRACKET; break; case ']': t->token = T_RBRACKET; break; case '~': t->token = T_INVERT; break; case '^': t->token = T_XOR; break; case ',': t->token = T_COMMA; break; case '.': t->token = T_DOT; break; case '=': if ((c = next()) == '=') { t->token = T_EQ; } else { putback(c); t->token = T_ASSIGN; } break; case '!': if ((c = next()) == '=') { t->token = T_NE; } else { putback(c); t->token = T_LOGNOT; } break; case '<': if ((c = next()) == '=') { t->token = T_LE; } else if (c == '<') { t->token = T_LSHIFT; } else { putback(c); t->token = T_LT; } break; case '>': if ((c = next()) == '=') { t->token = T_GE; } else if (c == '>') { t->token = T_RSHIFT; } else { putback(c); t->token = T_GT; } break; case '&': if ((c = next()) == '&') { t->token = T_LOGAND; } else { putback(c); t->token = T_AMPER; } break; case '|': if ((c = next()) == '|') { t->token = T_LOGOR; } else { putback(c); t->token = T_OR; } break; case '\'': // If it's a quote, scan in the // literal character value and // the trailing quote t->intvalue = scanch(); t->token = T_INTLIT; if (next() != '\'') fatal("Expected '\\'' at end of char literal"); break; case '"': // Scan in a literal string scanstr(Text); t->token = T_STRLIT; break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } else if (isalpha(c) || '_' == c) { // Read in a keyword or identifier scanident(c, Text, TEXTLEN); // If it's a recognised keyword, return that token if ((tokentype = keyword(Text)) != 0) { t->token = tokentype; break; } // Not a recognised keyword, so it must be an identifier t->token = T_IDENT; break; } // The character isn't part of any recognised token, error fatalc("Unrecognised character", c); } // We found a token return (1); } ================================================ FILE: 35_Preprocessor/stmt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of statements // Copyright (c) 2019 Warren Toomey, GPL3 // Prototypes static struct ASTnode *single_statement(void); // compound_statement: // empty, i.e. no statement // | statement // | statement statements // ; // // statement: declaration // | expression_statement // | function_call // | if_statement // | while_statement // | for_statement // | return_statement // ; // if_statement: if_head // | if_head 'else' compound_statement // ; // // if_head: 'if' '(' true_false_expression ')' compound_statement ; // // Parse an IF statement including any // optional ELSE clause and return its AST static struct ASTnode *if_statement(void) { struct ASTnode *condAST, *trueAST, *falseAST = NULL; // Ensure we have 'if' '(' match(T_IF, "if"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); rparen(); // Get the AST for the compound statement trueAST = compound_statement(); // If we have an 'else', skip it // and get the AST for the compound statement if (Token.token == T_ELSE) { scan(&Token); falseAST = compound_statement(); } // Build and return the AST for this statement return (mkastnode(A_IF, P_NONE, condAST, trueAST, falseAST, NULL, 0)); } // while_statement: 'while' '(' true_false_expression ')' compound_statement ; // // Parse a WHILE statement and return its AST static struct ASTnode *while_statement(void) { struct ASTnode *condAST, *bodyAST; // Ensure we have 'while' '(' match(T_WHILE, "while"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); rparen(); // Get the AST for the compound statement bodyAST = compound_statement(); // Build and return the AST for this statement return (mkastnode(A_WHILE, P_NONE, condAST, NULL, bodyAST, NULL, 0)); } // for_statement: 'for' '(' preop_statement ';' // true_false_expression ';' // postop_statement ')' compound_statement ; // // preop_statement: statement (for now) // postop_statement: statement (for now) // // Parse a FOR statement and return its AST static struct ASTnode *for_statement(void) { struct ASTnode *condAST, *bodyAST; struct ASTnode *preopAST, *postopAST; struct ASTnode *tree; // Ensure we have 'for' '(' match(T_FOR, "for"); lparen(); // Get the pre_op statement and the ';' preopAST = single_statement(); semi(); // Get the condition and the ';'. // Force a non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); semi(); // Get the post_op statement and the ')' postopAST = single_statement(); rparen(); // Get the compound statement which is the body bodyAST = compound_statement(); // For now, all four sub-trees have to be non-NULL. // Later on, we'll change the semantics for when some are missing // Glue the compound statement and the postop tree tree = mkastnode(A_GLUE, P_NONE, bodyAST, NULL, postopAST, NULL, 0); // Make a WHILE loop with the condition and this new body tree = mkastnode(A_WHILE, P_NONE, condAST, NULL, tree, NULL, 0); // And glue the preop tree to the A_WHILE tree return (mkastnode(A_GLUE, P_NONE, preopAST, NULL, tree, NULL, 0)); } // return_statement: 'return' '(' expression ')' ; // // Parse a return statement and return its AST static struct ASTnode *return_statement(void) { struct ASTnode *tree; // Can't return a value if function returns P_VOID if (Functionid->type == P_VOID) fatal("Can't return from a void function"); // Ensure we have 'return' '(' match(T_RETURN, "return"); lparen(); // Parse the following expression tree = binexpr(0); // Ensure this is compatible with the function's type tree = modify_type(tree, Functionid->type, 0); if (tree == NULL) fatal("Incompatible type to return"); // Add on the A_RETURN node tree = mkastunary(A_RETURN, P_NONE, tree, NULL, 0); // Get the ')' rparen(); return (tree); } // Parse a single statement and return its AST static struct ASTnode *single_statement(void) { int type, class= C_LOCAL; struct symtable *ctype; switch (Token.token) { case T_IDENT: // We have to see if the identifier matches a typedef. // If not do the default code in this switch statement. // Otherwise, fall down to the parse_type() call. if (findtypedef(Text) == NULL) return (binexpr(0)); case T_CHAR: case T_INT: case T_LONG: case T_STRUCT: case T_UNION: case T_ENUM: case T_TYPEDEF: // The beginning of a variable declaration. // Parse the type and get the identifier. // Then parse the rest of the declaration // and skip over the semicolon type = parse_type(&ctype, &class); ident(); var_declaration(type, ctype, class); semi(); return (NULL); // No AST generated here case T_IF: return (if_statement()); case T_WHILE: return (while_statement()); case T_FOR: return (for_statement()); case T_RETURN: return (return_statement()); default: // For now, see if this is an expression. // This catches assignment statements. return (binexpr(0)); } return (NULL); // Keep -Wall happy } // Parse a compound statement // and return its AST struct ASTnode *compound_statement(void) { struct ASTnode *left = NULL; struct ASTnode *tree; // Require a left curly bracket lbrace(); while (1) { // Parse a single statement tree = single_statement(); // Some statements must be followed by a semicolon if (tree != NULL && (tree->op == A_ASSIGN || tree->op == A_RETURN || tree->op == A_FUNCCALL)) semi(); // For each new tree, either save it in left // if left is empty, or glue the left and the // new tree together if (tree != NULL) { if (left == NULL) left = tree; else left = mkastnode(A_GLUE, P_NONE, left, NULL, tree, NULL, 0); } // When we hit a right curly bracket, // skip past it and return the AST if (Token.token == T_RBRACE) { rbrace(); return (left); } } } ================================================ FILE: 35_Preprocessor/sym.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Symbol table functions // Copyright (c) 2019 Warren Toomey, GPL3 // Append a node to the singly-linked list pointed to by head or tail void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node) { // Check for valid pointers if (head == NULL || tail == NULL || node == NULL) fatal("Either head, tail or node is NULL in appendsym"); // Append to the list if (*tail) { (*tail)->next = node; *tail = node; } else *head = *tail = node; node->next = NULL; } // Create a symbol node to be added to a symbol table list. // Set up the node's: // + type: char, int etc. // + ctype: composite type pointer for struct/union // + structural type: var, function, array etc. // + size: number of elements, or endlabel: end label for a function // + posn: Position information for local symbols // Return a pointer to the new node struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int size, int posn) { // Get a new node struct symtable *node = (struct symtable *) malloc(sizeof(struct symtable)); if (node == NULL) fatal("Unable to malloc a symbol table node in newsym"); // Fill in the values node->name = strdup(name); node->type = type; node->ctype = ctype; node->stype = stype; node->class = class; node->size = size; node->posn = posn; node->next = NULL; node->member = NULL; // Generate any global space if (class == C_GLOBAL) genglobsym(node); return (node); } // Add a symbol to the global symbol list struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int class, int size) { struct symtable *sym = newsym(name, type, ctype, stype, class, size, 0); appendsym(&Globhead, &Globtail, sym); return (sym); } // Add a symbol to the local symbol list struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int size) { struct symtable *sym = newsym(name, type, ctype, stype, C_LOCAL, size, 0); appendsym(&Loclhead, &Locltail, sym); return (sym); } // Add a symbol to the parameter list struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype, int size) { struct symtable *sym = newsym(name, type, ctype, stype, C_PARAM, size, 0); appendsym(&Parmhead, &Parmtail, sym); return (sym); } // Add a symbol to the temporary member list struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int size) { struct symtable *sym = newsym(name, type, ctype, stype, C_MEMBER, size, 0); appendsym(&Membhead, &Membtail, sym); return (sym); } // Add a struct to the struct list struct symtable *addstruct(char *name, int type, struct symtable *ctype, int stype, int size) { struct symtable *sym = newsym(name, type, ctype, stype, C_STRUCT, size, 0); appendsym(&Structhead, &Structtail, sym); return (sym); } // Add a struct to the union list struct symtable *addunion(char *name, int type, struct symtable *ctype, int stype, int size) { struct symtable *sym = newsym(name, type, ctype, stype, C_UNION, size, 0); appendsym(&Unionhead, &Uniontail, sym); return (sym); } // Add an enum type or value to the enum list. // Class is C_ENUMTYPE or C_ENUMVAL. // Use posn to store the int value. struct symtable *addenum(char *name, int class, int value) { struct symtable *sym = newsym(name, P_INT, NULL, 0, class, 0, value); appendsym(&Enumhead, &Enumtail, sym); return (sym); } // Add a typedef to the typedef list struct symtable *addtypedef(char *name, int type, struct symtable *ctype, int stype, int size) { struct symtable *sym = newsym(name, type, ctype, stype, C_TYPEDEF, size, 0); appendsym(&Typehead, &Typetail, sym); return (sym); } // Search for a symbol in a specific list. // Return a pointer to the found node or NULL if not found. // If class is not zero, also match on the given class static struct symtable *findsyminlist(char *s, struct symtable *list, int class) { for (; list != NULL; list = list->next) if ((list->name != NULL) && !strcmp(s, list->name)) if (class==0 || class== list->class) return (list); return (NULL); } // Determine if the symbol s is in the global symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findglob(char *s) { return (findsyminlist(s, Globhead, 0)); } // Determine if the symbol s is in the local symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findlocl(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member, 0); if (node) return (node); } return (findsyminlist(s, Loclhead, 0)); } // Determine if the symbol s is in the symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findsymbol(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member, 0); if (node) return (node); } // Otherwise, try the local and global symbol lists node = findsyminlist(s, Loclhead, 0); if (node) return (node); return (findsyminlist(s, Globhead, 0)); } // Find a member in the member list // Return a pointer to the found node or NULL if not found. struct symtable *findmember(char *s) { return (findsyminlist(s, Membhead, 0)); } // Find a struct in the struct list // Return a pointer to the found node or NULL if not found. struct symtable *findstruct(char *s) { return (findsyminlist(s, Structhead, 0)); } // Find a struct in the union list // Return a pointer to the found node or NULL if not found. struct symtable *findunion(char *s) { return (findsyminlist(s, Unionhead, 0)); } // Find an enum type in the enum list // Return a pointer to the found node or NULL if not found. struct symtable *findenumtype(char *s) { return (findsyminlist(s, Enumhead, C_ENUMTYPE)); } // Find an enum value in the enum list // Return a pointer to the found node or NULL if not found. struct symtable *findenumval(char *s) { return (findsyminlist(s, Enumhead, C_ENUMVAL)); } // Find a type in the tyedef list // Return a pointer to the found node or NULL if not found. struct symtable *findtypedef(char *s) { return (findsyminlist(s, Typehead, 0)); } // Reset the contents of the symbol table void clear_symtable(void) { Globhead = Globtail = NULL; Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Membhead = Membtail = NULL; Structhead = Structtail = NULL; Unionhead = Uniontail = NULL; Enumhead = Enumtail = NULL; Typehead = Typetail = NULL; } // Clear all the entries in the local symbol table void freeloclsyms(void) { Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Functionid = NULL; } ================================================ FILE: 35_Preprocessor/tests/err.input31.c ================================================ Expecting a primary expression, got token:15 on line 5 of input31.c ================================================ FILE: 35_Preprocessor/tests/err.input32.c ================================================ Unknown variable:cow on line 4 of input32.c ================================================ FILE: 35_Preprocessor/tests/err.input33.c ================================================ Incompatible type to return on line 4 of input33.c ================================================ FILE: 35_Preprocessor/tests/err.input34.c ================================================ For now, declaration of non-global arrays is not implemented on line 4 of input34.c ================================================ FILE: 35_Preprocessor/tests/err.input35.c ================================================ Duplicate local variable declaration:a on line 4 of input35.c ================================================ FILE: 35_Preprocessor/tests/err.input36.c ================================================ Type doesn't match prototype for parameter:2 on line 4 of input36.c ================================================ FILE: 35_Preprocessor/tests/err.input37.c ================================================ Unexpected token in parameter list:15 on line 3 of input37.c ================================================ FILE: 35_Preprocessor/tests/err.input38.c ================================================ Type doesn't match prototype for parameter:2 on line 4 of input38.c ================================================ FILE: 35_Preprocessor/tests/err.input39.c ================================================ No statements in function with non-void type on line 4 of input39.c ================================================ FILE: 35_Preprocessor/tests/err.input40.c ================================================ No return for function with non-void type on line 4 of input40.c ================================================ FILE: 35_Preprocessor/tests/err.input41.c ================================================ Can't return from a void function on line 3 of input41.c ================================================ FILE: 35_Preprocessor/tests/err.input42.c ================================================ Undeclared function:fred on line 3 of input42.c ================================================ FILE: 35_Preprocessor/tests/err.input43.c ================================================ Undeclared array:b on line 3 of input43.c ================================================ FILE: 35_Preprocessor/tests/err.input44.c ================================================ Unknown variable:z on line 3 of input44.c ================================================ FILE: 35_Preprocessor/tests/err.input45.c ================================================ & operator must be followed by an identifier on line 3 of input45.c ================================================ FILE: 35_Preprocessor/tests/err.input46.c ================================================ * operator must be followed by an identifier or * on line 3 of input46.c ================================================ FILE: 35_Preprocessor/tests/err.input47.c ================================================ ++ operator must be followed by an identifier on line 3 of input47.c ================================================ FILE: 35_Preprocessor/tests/err.input48.c ================================================ -- operator must be followed by an identifier on line 3 of input48.c ================================================ FILE: 35_Preprocessor/tests/err.input49.c ================================================ Incompatible expression in assignment on line 6 of input49.c ================================================ FILE: 35_Preprocessor/tests/err.input50.c ================================================ Incompatible types in binary expression on line 6 of input50.c ================================================ FILE: 35_Preprocessor/tests/err.input51.c ================================================ Expected '\'' at end of char literal on line 4 of input51.c ================================================ FILE: 35_Preprocessor/tests/err.input52.c ================================================ Unrecognised character:$ on line 5 of input52.c ================================================ FILE: 35_Preprocessor/tests/err.input56.c ================================================ unknown struct/union type:var1 on line 2 of input56.c ================================================ FILE: 35_Preprocessor/tests/err.input57.c ================================================ previously defined struct/union:fred on line 2 of input57.c ================================================ FILE: 35_Preprocessor/tests/err.input59.c ================================================ Undeclared variable:y on line 3 of input59.c ================================================ FILE: 35_Preprocessor/tests/err.input60.c ================================================ Undeclared variable:x on line 3 of input60.c ================================================ FILE: 35_Preprocessor/tests/err.input61.c ================================================ Undeclared variable:x on line 3 of input61.c ================================================ FILE: 35_Preprocessor/tests/err.input64.c ================================================ undeclared enum type::fred on line 1 of input64.c ================================================ FILE: 35_Preprocessor/tests/err.input65.c ================================================ enum type redeclared::fred on line 2 of input65.c ================================================ FILE: 35_Preprocessor/tests/err.input66.c ================================================ enum value redeclared::z on line 2 of input66.c ================================================ FILE: 35_Preprocessor/tests/err.input68.c ================================================ redefinition of typedef:FOO on line 2 of input68.c ================================================ FILE: 35_Preprocessor/tests/err.input69.c ================================================ unknown type:FLOO on line 2 of input69.c ================================================ FILE: 35_Preprocessor/tests/input01.c ================================================ int printf(char *fmt); void main() { printf("%d\n", 12 * 3); printf("%d\n", 18 - 2 * 4); printf("%d\n", 1 + 2 + 9 - 5/2 + 3*5); } ================================================ FILE: 35_Preprocessor/tests/input02.c ================================================ int printf(char *fmt); void main() { int fred; int jim; fred= 5; jim= 12; printf("%d\n", fred + jim); } ================================================ FILE: 35_Preprocessor/tests/input03.c ================================================ int printf(char *fmt); void main() { int x; x= 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); } ================================================ FILE: 35_Preprocessor/tests/input04.c ================================================ int printf(char *fmt); void main() { int x; x= 7 < 9; printf("%d\n", x); x= 7 <= 9; printf("%d\n", x); x= 7 != 9; printf("%d\n", x); x= 7 == 7; printf("%d\n", x); x= 7 >= 7; printf("%d\n", x); x= 7 <= 7; printf("%d\n", x); x= 9 > 7; printf("%d\n", x); x= 9 >= 7; printf("%d\n", x); x= 9 != 7; printf("%d\n", x); } ================================================ FILE: 35_Preprocessor/tests/input05.c ================================================ int printf(char *fmt); void main() { int i; int j; i=6; j=12; if (i < j) { printf("%d\n", i); } else { printf("%d\n", j); } } ================================================ FILE: 35_Preprocessor/tests/input06.c ================================================ int printf(char *fmt); void main() { int i; i=1; while (i <= 10) { printf("%d\n", i); i= i + 1; } } ================================================ FILE: 35_Preprocessor/tests/input07.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 35_Preprocessor/tests/input08.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 35_Preprocessor/tests/input09.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } void fred() { int a; int b; a= 12; b= 3 * a; if (a >= b) { printf("%d\n", 2 * b - a); } } ================================================ FILE: 35_Preprocessor/tests/input10.c ================================================ int printf(char *fmt); void main() { int i; char j; j= 20; printf("%d\n", j); i= 10; printf("%d\n", i); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 2; j= j + 1) { printf("%d\n", j); } } ================================================ FILE: 35_Preprocessor/tests/input11.c ================================================ int printf(char *fmt); int main() { int i; char j; long k; i= 10; printf("%d\n", i); j= 20; printf("%d\n", j); k= 30; printf("%d\n", k); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 4; j= j + 1) { printf("%d\n", j); } for (k= 1; k <= 5; k= k + 1) { printf("%d\n", k); } return(i); printf("%d\n", 12345); return(3); } ================================================ FILE: 35_Preprocessor/tests/input12.c ================================================ int printf(char *fmt); int fred() { return(5); } void main() { int x; x= fred(2); printf("%d\n", x); } ================================================ FILE: 35_Preprocessor/tests/input13.c ================================================ int printf(char *fmt); int fred() { return(56); } void main() { int dummy; int result; dummy= printf("%d\n", 23); result= fred(10); dummy= printf("%d\n", result); } ================================================ FILE: 35_Preprocessor/tests/input14.c ================================================ int printf(char *fmt); int fred() { return(20); } int main() { int result; printf("%d\n", 10); result= fred(15); printf("%d\n", result); printf("%d\n", fred(15)+10); return(0); } ================================================ FILE: 35_Preprocessor/tests/input15.c ================================================ int printf(char *fmt); int main() { char a; char *b; char c; int d; int *e; int f; a= 18; printf("%d\n", a); b= &a; c= *b; printf("%d\n", c); d= 12; printf("%d\n", d); e= &d; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 35_Preprocessor/tests/input16.c ================================================ int printf(char *fmt); int c; int d; int *e; int f; int main() { c= 12; d=18; printf("%d\n", c); e= &c + 1; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 35_Preprocessor/tests/input17.c ================================================ int printf(char *fmt); int main() { char a; char *b; int d; int *e; b= &a; *b= 19; printf("%d\n", a); e= &d; *e= 12; printf("%d\n", d); return(0); } ================================================ FILE: 35_Preprocessor/tests/input18.c ================================================ int printf(char *fmt); int main() { int a; int b; a= b= 34; printf("%d\n", a); printf("%d\n", b); return(0); } ================================================ FILE: 35_Preprocessor/tests/input18a.c ================================================ int printf(char *fmt); int a; int *b; char c; char *d; int main() { b= &a; *b= 15; printf("%d\n", a); d= &c; *d= 16; printf("%d\n", c); return(0); } ================================================ FILE: 35_Preprocessor/tests/input19.c ================================================ int printf(char *fmt); int a; int b; int c; int d; int e; int main() { a= 2; b= 4; c= 3; d= 2; e= (a+b) * (c+d); printf("%d\n", e); return(0); } ================================================ FILE: 35_Preprocessor/tests/input20.c ================================================ int printf(char *fmt); int a; int b[25]; int main() { b[3]= 12; a= b[3]; printf("%d\n", a); return(0); } ================================================ FILE: 35_Preprocessor/tests/input21.c ================================================ int printf(char *fmt); char c; char *str; int main() { c= '\n'; printf("%d\n", c); for (str= "Hello world\n"; *str != 0; str= str + 1) { printf("%c", *str); } return(0); } ================================================ FILE: 35_Preprocessor/tests/input22.c ================================================ int printf(char *fmt); char a; char b; char c; int d; int e; int f; long g; long h; long i; int main() { b= 5; c= 7; a= b + c++; printf("%d\n", a); e= 5; f= 7; d= e + f++; printf("%d\n", d); h= 5; i= 7; g= h + i++; printf("%d\n", g); a= b-- + c; printf("%d\n", a); d= e-- + f; printf("%d\n", d); g= h-- + i; printf("%d\n", g); a= ++b + c; printf("%d\n", a); d= ++e + f; printf("%d\n", d); g= ++h + i; printf("%d\n", g); a= b * --c; printf("%d\n", a); d= e * --f; printf("%d\n", d); g= h * --i; printf("%d\n", g); return(0); } ================================================ FILE: 35_Preprocessor/tests/input23.c ================================================ int printf(char *fmt); char *str; int x; int main() { x= -23; printf("%d\n", x); printf("%d\n", -10 * -10); x= 1; x= ~x; printf("%d\n", x); x= 2 > 5; printf("%d\n", x); x= !x; printf("%d\n", x); x= !x; printf("%d\n", x); x= 13; if (x) { printf("%d\n", 13); } x= 0; if (!x) { printf("%d\n", 14); } for (str= "Hello world\n"; *str; str++) { printf("%c", *str); } return(0); } ================================================ FILE: 35_Preprocessor/tests/input24.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { a= 42; b= 19; printf("%d\n", a & b); printf("%d\n", a | b); printf("%d\n", a ^ b); printf("%d\n", 1 << 3); printf("%d\n", 63 >> 3); return(0); } ================================================ FILE: 35_Preprocessor/tests/input25.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { char z; int y; int x; x= 10; y= 20; z= 30; printf("%d\n", x); printf("%d\n", y); printf("%d\n", z); a= 5; b= 15; c= 25; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); return(0); } ================================================ FILE: 35_Preprocessor/tests/input26.c ================================================ int printf(char *fmt); int main(int a, char b, long c, int d, int e, int f, int g, int h) { int i; int j; int k; a= 13; printf("%d\n", a); b= 23; printf("%d\n", b); c= 34; printf("%d\n", c); d= 44; printf("%d\n", d); e= 54; printf("%d\n", e); f= 64; printf("%d\n", f); g= 74; printf("%d\n", g); h= 84; printf("%d\n", h); i= 94; printf("%d\n", i); j= 95; printf("%d\n", j); k= 96; printf("%d\n", k); return(0); } ================================================ FILE: 35_Preprocessor/tests/input27.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int param5(int a, int b, int c, int d, int e) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param2(int a, int b) { int c; int d; int e; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param0() { int a; int b; int c; int d; int e; a= 1; b= 2; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int main() { param8(1,2,3,4,5,6,7,8); param5(1,2,3,4,5); param2(1,2); param0(); return(0); } ================================================ FILE: 35_Preprocessor/tests/input28.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 35_Preprocessor/tests/input29.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h); int fred(int a, int b, int c); int main(); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 35_Preprocessor/tests/input30.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input30.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 35_Preprocessor/tests/input31.c ================================================ int printf(char *fmt); int main() { int x; x= 2 + + 3 - * / ; } ================================================ FILE: 35_Preprocessor/tests/input32.c ================================================ int printf(char *fmt); int main() { pizza cow llama sausage; } ================================================ FILE: 35_Preprocessor/tests/input33.c ================================================ int printf(char *fmt); int main() { char *z; return(z); } ================================================ FILE: 35_Preprocessor/tests/input34.c ================================================ int printf(char *fmt); int main() { int a[12]; return(0); } ================================================ FILE: 35_Preprocessor/tests/input35.c ================================================ int printf(char *fmt); int fred(int a, int b) { int a; return(a); } ================================================ FILE: 35_Preprocessor/tests/input36.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c); ================================================ FILE: 35_Preprocessor/tests/input37.c ================================================ int printf(char *fmt); int fred(int a, char b +, int z); ================================================ FILE: 35_Preprocessor/tests/input38.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c, int g); ================================================ FILE: 35_Preprocessor/tests/input39.c ================================================ int printf(char *fmt); int main() { int a; } ================================================ FILE: 35_Preprocessor/tests/input40.c ================================================ int printf(char *fmt); int main() { int a; a= 5; } ================================================ FILE: 35_Preprocessor/tests/input41.c ================================================ int printf(char *fmt); void fred() { return(5); } ================================================ FILE: 35_Preprocessor/tests/input42.c ================================================ int printf(char *fmt); int main() { fred(5); } ================================================ FILE: 35_Preprocessor/tests/input43.c ================================================ int printf(char *fmt); int main() { int a; a= b[4]; } ================================================ FILE: 35_Preprocessor/tests/input44.c ================================================ int printf(char *fmt); int main() { int a; a= z; } ================================================ FILE: 35_Preprocessor/tests/input45.c ================================================ int printf(char *fmt); int main() { int a; a= &5; } ================================================ FILE: 35_Preprocessor/tests/input46.c ================================================ int printf(char *fmt); int main() { int a; a= *5; } ================================================ FILE: 35_Preprocessor/tests/input47.c ================================================ int printf(char *fmt); int main() { int a; a= ++5; } ================================================ FILE: 35_Preprocessor/tests/input48.c ================================================ int printf(char *fmt); int main() { int a; a= --5; } ================================================ FILE: 35_Preprocessor/tests/input49.c ================================================ int printf(char *fmt); int main() { int x; char y; y= x; } ================================================ FILE: 35_Preprocessor/tests/input50.c ================================================ int printf(char *fmt); int main() { char *a; char *b; a= a + b; } ================================================ FILE: 35_Preprocessor/tests/input51.c ================================================ int printf(char *fmt); int main() { char a; a= 'fred'; } ================================================ FILE: 35_Preprocessor/tests/input52.c ================================================ int printf(char *fmt); int main() { int a; a= $5.00; } ================================================ FILE: 35_Preprocessor/tests/input53.c ================================================ int printf(char *fmt); int main() { printf("Hello world, %d\n", 23); return(0); } ================================================ FILE: 35_Preprocessor/tests/input54.c ================================================ int printf(char *fmt); int main() { int i; for (i=0; i < 20; i++) { printf("Hello world, %d\n", i); } return(0); } ================================================ FILE: 35_Preprocessor/tests/input55.c ================================================ int printf(char *fmt); int main(int argc, char **argv) { int i; char *argument; printf("Hello world\n"); for (i=0; i < argc; i++) { argument= *argv; argv= argv + 1; printf("Argument %d is %s\n", i, argument); } return(0); } ================================================ FILE: 35_Preprocessor/tests/input56.c ================================================ struct foo { int x; }; struct mary var1; ================================================ FILE: 35_Preprocessor/tests/input57.c ================================================ struct fred { int x; } ; struct fred { char y; } ; ================================================ FILE: 35_Preprocessor/tests/input58.c ================================================ int printf(char *fmt); struct fred { int x; char y; long z; }; struct fred var2; struct fred *varptr; int main() { long result; var2.x= 12; printf("%d\n", var2.x); var2.y= 'c'; printf("%d\n", var2.y); var2.z= 4005; printf("%d\n", var2.z); result= var2.x + var2.y + var2.z; printf("%d\n", result); varptr= &var2; result= varptr->x + varptr->y + varptr->z; printf("%d\n", result); return(0); } ================================================ FILE: 35_Preprocessor/tests/input59.c ================================================ int main() { int x; x= y.foo; } ================================================ FILE: 35_Preprocessor/tests/input60.c ================================================ int main() { int x; x= x.foo; } ================================================ FILE: 35_Preprocessor/tests/input61.c ================================================ int main() { int x; x= x->foo; } ================================================ FILE: 35_Preprocessor/tests/input62.c ================================================ int printf(char *fmt); union fred { char w; int x; int y; long z; }; union fred var1; union fred *varptr; int main() { var1.x= 65; printf("%d\n", var1.x); var1.x= 66; printf("%d\n", var1.x); printf("%d\n", var1.y); printf("The next two depend on the endian of the platform\n"); printf("%d\n", var1.w); printf("%d\n", var1.z); varptr= &var1; varptr->x= 67; printf("%d\n", varptr->x); printf("%d\n", varptr->y); return(0); } ================================================ FILE: 35_Preprocessor/tests/input63.c ================================================ int printf(char *fmt); enum fred { apple=1, banana, carrot, pear=10, peach, mango, papaya }; enum jane { aple=1, bnana, crrot, par=10, pech, mago, paaya }; enum fred var1; enum jane var2; enum fred var3; int main() { var1= carrot + pear + mango; printf("%d\n", var1); return(0); } ================================================ FILE: 35_Preprocessor/tests/input64.c ================================================ enum fred var3; ================================================ FILE: 35_Preprocessor/tests/input65.c ================================================ enum fred { x, y, z }; enum fred { a, b }; ================================================ FILE: 35_Preprocessor/tests/input66.c ================================================ enum fred { x, y, z }; enum mary { a, b, z }; ================================================ FILE: 35_Preprocessor/tests/input67.c ================================================ int printf(char *fmt); typedef int FOO; FOO var1; struct bar { int x; int y} ; typedef struct bar BAR; BAR var2; int main() { var1= 5; printf("%d\n", var1); var2.x= 7; var2.y= 10; printf("%d\n", var2.x + var2.y); return(0); } ================================================ FILE: 35_Preprocessor/tests/input68.c ================================================ typedef int FOO; typedef char FOO; ================================================ FILE: 35_Preprocessor/tests/input69.c ================================================ typedef int FOO; FLOO y; ================================================ FILE: 35_Preprocessor/tests/input70.c ================================================ #include typedef int FOO; int main() { FOO x; x= 56; printf("%d\n", x); return(0); } ================================================ FILE: 35_Preprocessor/tests/mktests ================================================ #!/bin/sh # Make the output files for each test # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make install) fi for i in input*c do if [ ! -f "out.$i" -a ! -f "err.$i" ] then ../cwj -o out $i 2> "err.$i" # If the err file is empty if [ ! -s "err.$i" ] then rm -f "err.$i" cc -o out $i ../lib/printint.c ./out > "out.$i" fi fi rm -f out out.s done ================================================ FILE: 35_Preprocessor/tests/out.input01.c ================================================ 36 10 25 ================================================ FILE: 35_Preprocessor/tests/out.input02.c ================================================ 17 ================================================ FILE: 35_Preprocessor/tests/out.input03.c ================================================ 1 2 3 4 5 ================================================ FILE: 35_Preprocessor/tests/out.input04.c ================================================ 1 1 1 1 1 1 1 1 1 ================================================ FILE: 35_Preprocessor/tests/out.input05.c ================================================ 6 ================================================ FILE: 35_Preprocessor/tests/out.input06.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 35_Preprocessor/tests/out.input07.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 35_Preprocessor/tests/out.input08.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 35_Preprocessor/tests/out.input09.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 35_Preprocessor/tests/out.input10.c ================================================ 20 10 1 2 3 4 5 253 254 255 0 1 ================================================ FILE: 35_Preprocessor/tests/out.input11.c ================================================ 10 20 30 1 2 3 4 5 253 254 255 0 1 2 3 1 2 3 4 5 ================================================ FILE: 35_Preprocessor/tests/out.input12.c ================================================ 5 ================================================ FILE: 35_Preprocessor/tests/out.input13.c ================================================ 23 56 ================================================ FILE: 35_Preprocessor/tests/out.input14.c ================================================ 10 20 30 ================================================ FILE: 35_Preprocessor/tests/out.input15.c ================================================ 18 18 12 12 ================================================ FILE: 35_Preprocessor/tests/out.input16.c ================================================ 12 18 ================================================ FILE: 35_Preprocessor/tests/out.input17.c ================================================ 19 12 ================================================ FILE: 35_Preprocessor/tests/out.input18.c ================================================ 34 34 ================================================ FILE: 35_Preprocessor/tests/out.input18a.c ================================================ 15 16 ================================================ FILE: 35_Preprocessor/tests/out.input19.c ================================================ 30 ================================================ FILE: 35_Preprocessor/tests/out.input20.c ================================================ 12 ================================================ FILE: 35_Preprocessor/tests/out.input21.c ================================================ 10 Hello world ================================================ FILE: 35_Preprocessor/tests/out.input22.c ================================================ 12 12 12 13 13 13 13 13 13 35 35 35 ================================================ FILE: 35_Preprocessor/tests/out.input23.c ================================================ -23 100 -2 0 1 0 13 14 Hello world ================================================ FILE: 35_Preprocessor/tests/out.input24.c ================================================ 2 59 57 8 7 ================================================ FILE: 35_Preprocessor/tests/out.input25.c ================================================ 10 20 30 5 15 25 ================================================ FILE: 35_Preprocessor/tests/out.input26.c ================================================ 13 23 34 44 54 64 74 84 94 95 96 ================================================ FILE: 35_Preprocessor/tests/out.input27.c ================================================ 1 2 3 4 5 6 7 8 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 ================================================ FILE: 35_Preprocessor/tests/out.input28.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 35_Preprocessor/tests/out.input29.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 35_Preprocessor/tests/out.input30.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input30.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 35_Preprocessor/tests/out.input53.c ================================================ Hello world, 23 ================================================ FILE: 35_Preprocessor/tests/out.input54.c ================================================ Hello world, 0 Hello world, 1 Hello world, 2 Hello world, 3 Hello world, 4 Hello world, 5 Hello world, 6 Hello world, 7 Hello world, 8 Hello world, 9 Hello world, 10 Hello world, 11 Hello world, 12 Hello world, 13 Hello world, 14 Hello world, 15 Hello world, 16 Hello world, 17 Hello world, 18 Hello world, 19 ================================================ FILE: 35_Preprocessor/tests/out.input55.c ================================================ Hello world Argument 0 is ./out ================================================ FILE: 35_Preprocessor/tests/out.input58.c ================================================ 12 99 4005 4116 4116 ================================================ FILE: 35_Preprocessor/tests/out.input62.c ================================================ 65 66 66 The next two depend on the endian of the platform 66 66 67 67 ================================================ FILE: 35_Preprocessor/tests/out.input63.c ================================================ 25 ================================================ FILE: 35_Preprocessor/tests/out.input67.c ================================================ 5 17 ================================================ FILE: 35_Preprocessor/tests/out.input70.c ================================================ 56 ================================================ FILE: 35_Preprocessor/tests/runtests ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make install) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../cwj -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../cwj $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.s "trial.$i" done ================================================ FILE: 35_Preprocessor/tests/runtestsn ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../compn ] then (cd ..; make install) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../compn -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../compn $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.o out.s "trial.$i" done ================================================ FILE: 35_Preprocessor/tree.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree functions // Copyright (c) 2019 Warren Toomey, GPL3 // Build and return a generic AST node struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue) { struct ASTnode *n; // Malloc a new ASTnode n = (struct ASTnode *) malloc(sizeof(struct ASTnode)); if (n == NULL) fatal("Unable to malloc in mkastnode()"); // Copy in the field values and return it n->op = op; n->type = type; n->left = left; n->mid = mid; n->right = right; n->sym = sym; n->intvalue = intvalue; return (n); } // Make an AST leaf node struct ASTnode *mkastleaf(int op, int type, struct symtable *sym, int intvalue) { return (mkastnode(op, type, NULL, NULL, NULL, sym, intvalue)); } // Make a unary AST node: only one child struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, struct symtable *sym, int intvalue) { return (mkastnode(op, type, left, NULL, NULL, sym, intvalue)); } // Generate and return a new label number // just for AST dumping purposes static int gendumplabel(void) { static int id = 1; return (id++); } // Given an AST tree, print it out and follow the // traversal of the tree that genAST() follows void dumpAST(struct ASTnode *n, int label, int level) { int Lfalse, Lstart, Lend; switch (n->op) { case A_IF: Lfalse = gendumplabel(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_IF"); if (n->right) { Lend = gendumplabel(); fprintf(stdout, ", end L%d", Lend); } fprintf(stdout, "\n"); dumpAST(n->left, Lfalse, level + 2); dumpAST(n->mid, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); return; case A_WHILE: Lstart = gendumplabel(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_WHILE, start L%d\n", Lstart); Lend = gendumplabel(); dumpAST(n->left, Lend, level + 2); dumpAST(n->right, NOLABEL, level + 2); return; } // Reset level to -2 for A_GLUE if (n->op == A_GLUE) level = -2; // General AST node handling if (n->left) dumpAST(n->left, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); for (int i = 0; i < level; i++) fprintf(stdout, " "); switch (n->op) { case A_GLUE: fprintf(stdout, "\n\n"); return; case A_FUNCTION: fprintf(stdout, "A_FUNCTION %s\n", n->sym->name); return; case A_ADD: fprintf(stdout, "A_ADD\n"); return; case A_SUBTRACT: fprintf(stdout, "A_SUBTRACT\n"); return; case A_MULTIPLY: fprintf(stdout, "A_MULTIPLY\n"); return; case A_DIVIDE: fprintf(stdout, "A_DIVIDE\n"); return; case A_EQ: fprintf(stdout, "A_EQ\n"); return; case A_NE: fprintf(stdout, "A_NE\n"); return; case A_LT: fprintf(stdout, "A_LE\n"); return; case A_GT: fprintf(stdout, "A_GT\n"); return; case A_LE: fprintf(stdout, "A_LE\n"); return; case A_GE: fprintf(stdout, "A_GE\n"); return; case A_INTLIT: fprintf(stdout, "A_INTLIT %d\n", n->intvalue); return; case A_STRLIT: fprintf(stdout, "A_STRLIT rval label L%d\n", n->intvalue); return; case A_IDENT: if (n->rvalue) fprintf(stdout, "A_IDENT rval %s\n", n->sym->name); else fprintf(stdout, "A_IDENT %s\n", n->sym->name); return; case A_ASSIGN: fprintf(stdout, "A_ASSIGN\n"); return; case A_WIDEN: fprintf(stdout, "A_WIDEN\n"); return; case A_RETURN: fprintf(stdout, "A_RETURN\n"); return; case A_FUNCCALL: fprintf(stdout, "A_FUNCCALL %s\n", n->sym->name); return; case A_ADDR: fprintf(stdout, "A_ADDR %s\n", n->sym->name); return; case A_DEREF: if (n->rvalue) fprintf(stdout, "A_DEREF rval\n"); else fprintf(stdout, "A_DEREF\n"); return; case A_SCALE: fprintf(stdout, "A_SCALE %d\n", n->size); return; case A_PREINC: fprintf(stdout, "A_PREINC %s\n", n->sym->name); return; case A_PREDEC: fprintf(stdout, "A_PREDEC %s\n", n->sym->name); return; case A_POSTINC: fprintf(stdout, "A_POSTINC\n"); return; case A_POSTDEC: fprintf(stdout, "A_POSTDEC\n"); return; case A_NEGATE: fprintf(stdout, "A_NEGATE\n"); return; default: fatald("Unknown dumpAST operator", n->op); } } ================================================ FILE: 35_Preprocessor/types.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Types and type handling // Copyright (c) 2019 Warren Toomey, GPL3 // Return true if a type is an int type // of any size, false otherwise int inttype(int type) { return ((type & 0xf) == 0); } // Return true if a type is of pointer type int ptrtype(int type) { return ((type & 0xf) != 0); } // Given a primitive type, return // the type which is a pointer to it int pointer_to(int type) { if ((type & 0xf) == 0xf) fatald("Unrecognised in pointer_to: type", type); return (type + 1); } // Given a primitive pointer type, return // the type which it points to int value_at(int type) { if ((type & 0xf) == 0x0) fatald("Unrecognised in value_at: type", type); return (type - 1); } // Given a type and a composite type pointer, return // the size of this type in bytes int typesize(int type, struct symtable *ctype) { if (type == P_STRUCT || type == P_UNION) return (ctype->size); return (genprimsize(type)); } // Given an AST tree and a type which we want it to become, // possibly modify the tree by widening or scaling so that // it is compatible with this type. Return the original tree // if no changes occurred, a modified tree, or NULL if the // tree is not compatible with the given type. // If this will be part of a binary operation, the AST op is not zero. struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op) { int ltype; int lsize, rsize; ltype = tree->type; // XXX No idea on these yet if (ltype == P_STRUCT || ltype == P_UNION) fatal("Don't know how to do this yet"); if (rtype == P_STRUCT || rtype == P_UNION) fatal("Don't know how to do this yet"); // Compare scalar int types if (inttype(ltype) && inttype(rtype)) { // Both types same, nothing to do if (ltype == rtype) return (tree); // Get the sizes for each type lsize = typesize(ltype, NULL); // XXX Fix soon rsize = typesize(rtype, NULL); // XXX Fix soon // Tree's size is too big if (lsize > rsize) return (NULL); // Widen to the right if (rsize > lsize) return (mkastunary(A_WIDEN, rtype, tree, NULL, 0)); } // For pointers on the left if (ptrtype(ltype)) { // OK is same type on right and not doing a binary op if (op == 0 && ltype == rtype) return (tree); } // We can scale only on A_ADD or A_SUBTRACT operation if (op == A_ADD || op == A_SUBTRACT) { // Left is int type, right is pointer type and the size // of the original type is >1: scale the left if (inttype(ltype) && ptrtype(rtype)) { rsize = genprimsize(value_at(rtype)); if (rsize > 1) return (mkastunary(A_SCALE, rtype, tree, NULL, rsize)); else return (tree); // Size 1, no need to scale } } // If we get here, the types are not compatible return (NULL); } ================================================ FILE: 36_Break_Continue/Makefile ================================================ # Define the location of the include directory # and the location to install the compiler binary INCDIR=/tmp/include BINDIR=/tmp HSRCS= data.h decl.h defs.h SRCS= cg.c decl.c expr.c gen.c main.c misc.c \ scan.c stmt.c sym.c tree.c types.c SRCN= cgn.c decl.c expr.c gen.c main.c misc.c \ scan.c stmt.c sym.c tree.c types.c ARMSRCS= cg_arm.c decl.c expr.c gen.c main.c misc.c \ scan.c stmt.c sym.c tree.c types.c cwj: $(SRCS) $(HSRCS) cc -o cwj -g -Wall -DINCDIR=\"$(INCDIR)\" $(SRCS) compn: $(SRCN) $(HSRCS) cc -D__NASM__ -o compn -g -Wall -DINCDIR=\"$(INCDIR)\" $(SRCN) cwjarm: $(ARMSRCS) $(HSRCS) cc -o cwjarm -g -Wall $(ARMSRCS) cp cwjarm cwj install: cwj mkdir -p $(INCDIR) rsync -a include/. $(INCDIR) cp cwj $(BINDIR) chmod +x $(BINDIR)/cwj installn: compn mkdir -p $(INCDIR) rsync -a include/. $(INCDIR) cp compn $(BINDIR) chmod +x $(BINDIR)/compn clean: rm -f cwj cwjarm compn *.o *.s out a.out test: install tests/runtests (cd tests; chmod +x runtests; ./runtests) armtest: cwjarm tests/runtests (cd tests; chmod +x runtests; ./runtests) testn: installn tests/runtestsn (cd tests; chmod +x runtestsn; ./runtestsn) ================================================ FILE: 36_Break_Continue/Readme.md ================================================ # Part 36: `break` and `continue` A while back, when I wrote another [simple compiler](https://github.com/DoctorWkt/h-compiler) for a typeless language, I didn't use an abstract syntax tree. This made it awkward to add the `break` and `continue` keywords to the language. Here, we do have an AST tree for each function. This makes it much easier to implement `break` and `continue`. I'll outline the reasons for this below. ## Adding the `break` and `continue` Unsurprisingly, we have two new tokens T_BREAK and T_CONTINUE, and the scanner code in `scan.c` recognises the `break` and `continue` keywords. As always, browse the code to see how this is done. ## New AST Node Types We also have two new AST node types in `defs.h`: A_BREAK and A_CONTINUE. When we parse a `break` keyword, we can generate an A_BREAK AST leaf; ditto an A_CONTINUE leaf for the `continue` keyword. Then, when we walk the AST to generate the assembly code, when we encounter an A_BREAK node, we need to generate an assembly jump to the label at the end of the loop that we are currently in. And for A_CONTINUE, we jump to the label just before the loop condition is evaluated. Now, how do we know which loop we are in? ## Tracking the Most Recent Loop Loops can be nested, and so there can be any number of loop labels in use at any point. This is what I found difficult when I wrote my previous compiler. Now that we have an AST which we traverse recursively, we can pass the details of the latest loop's labels down to our children in the AST tree. We already do this sort of thing to get to the end of an 'if' or 'while' statement. Here's some of the code for generating the assembly for 'if' from `gen.c`: ```c // Generate the code for an IF statement // and an optional ELSE clause static int genIF(struct ASTnode *n) { int Lfalse, Lend; // Generate two labels: one for the // false compound statement, and one // for the end of the overall IF statement. Lfalse = genlabel(); Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. genAST(n->left, Lfalse, n->op); ``` The left-hand AST child is the one that evaluates the 'if' statement's condition, so it needs access to the label that we have just generated. So when we generate this child's assembly output with `genAST()`, we also pass in the label's details. For loops, we need to pass to `genAST()` the label which is at the loop's end and also the label just before the code that evaluates the loop's condition. To this end, I've changed the interface to `genAST()`: ```c int genAST(struct ASTnode *n, int iflabel, int looptoplabel, int loopendlabel, int parentASTop); ``` We keep the existing `iflabel` and augment this with the two loop labels. Now we need to pass to `genAST()` the labels that are generated for each loop. So, in the code to generate the 'while' loop code: ```c static int genWHILE(struct ASTnode *n) { int Lstart, Lend; // Generate the start and end labels Lstart = genlabel(); Lend = genlabel(); // Generate the condition code followed // by a jump to the end label. genAST(n->left, Lend, Lstart, Lend, n->op); // Generate the compound statement for the body genAST(n->right, NOLABEL, Lstart, Lend, n->op); ... } ``` ## `genAST()` is Recursive Now, what about nested loops? Consider the code: ``` L1: while (x < 10) { if (x == 6) break; L2: while (y < 10) { if (y == 6) break; y++; } L3: x++; } L4: ``` the `if (y == 6) break` should leave the inner loop and jump to the `x++` code (i.e. L3), and the `if (x == 6) break;` code should leave the outer loop and jump to label L4. This works because `genAST()` calls `genWHILE()` for the outer loop. This calls `genAST(L1, L4)` so that the first `break` sees these loop labels. Then, when we hit the second loop, `genWHILE()` is called again. It generates new loop labels and calls `genAST(L2, L3)` to generate the inner loop code. Thus, the second `break` sees the L2 and L3 labels, not the L1 and L4 labels. Finally, once the inner compound statement is generated, the inside `genAST()` returns, and get back to the code which sees L1 and L4 as the loop labels. ## Implications of the Above What this means, in terms of implementation, is that anywhere that something calls `genAST()` (including itself), and we could be in a loop, then the current loop labels must get propagated down to the children involved. We have already seen the change to `genWHILE()` to pass to `genAST()` the new loop labels. Let's look at where else we need to propagate loop labels. When I first implemented `break`, I wrote this test program ```c int main() { int x; x = 0; while (x < 100) { printf("%d\n", x); if (x == 14) { break; } x = x + 1; } printf("Done\n"); ``` and generated the assembly for it. The `break` was being turned into a jump to label L0, i.e. the loop's end label wasn't getting to the code dealing with `break`. Looking at a stack trace for the compiler, I realised that: + The `genAST()` for the function called + `genWHILE()` for the loop which generated the labels and passed them to + `genAST()` for the loop body, which called + `genIF()` which passed **no** labels in to + `genAST()` for the 'if' body. Hence, the `break` never saw the labels. So I also had to modify the argument list for `genIF()`: ```c static int genIF(struct ASTnode *n, int looptoplabel, int loopendlabel); ``` I won't go through all the code in `gen.c`, but open up the file in an editor or text viewer and look for all the `genAST()` calls to see where the loop labels do get propagated. Finally, we actually do need to generate the assembly code for `break` and `continue`. Here is the code to do it in `genAST()` in `gen.c`: ```c case A_BREAK: cgjump(loopendlabel); return (NOREG); case A_CONTINUE: cgjump(looptoplabel); return (NOREG); ``` ## Parsing `break` and `continue` This time around I covered the code generation side before the parsing, but now it's time to get to the parsing of these new keywords. Luckily the syntax is either `break ;` or `continue ;`. So it would seem that they should be easy to parse. There is, of course, a small wrinkle. We parse individual statements in `single_statement()` in `stmt.c`, so the change is small: ```c case T_BREAK: return (break_statement()); case T_CONTINUE: return (continue_statement()); ``` with a slight change in `compound_statement()` to ensure that the statement is followed by a semicolon: ```c compound_statement(void) { struct ASTnode *left = NULL; struct ASTnode *tree; ... while (1) { // Parse a single statement tree = single_statement(); // Some statements must be followed by a semicolon if (tree != NULL && (tree->op == A_ASSIGN || tree->op == A_RETURN || tree->op == A_FUNCCALL || tree->op == A_BREAK || tree->op == A_CONTINUE)) semi(); ... } ``` Now the wrinkle. This following program is not legal: ```c int main() { break; } ``` as there is no loop to break out of. We need to track the depth of the loops we are parsing, and only allow a `break` or `continue` statement when the depth is not zero. Thus, the functions to parse these keywords look like: ```c // Parse a break statement and return its AST static struct ASTnode *break_statement(void) { if (Looplevel == 0) fatal("no loop to break out from"); scan(&Token); return (mkastleaf(A_BREAK, 0, NULL, 0)); } // continue_statement: 'continue' ; // // Parse a continue statement and return its AST static struct ASTnode *continue_statement(void) { if (Looplevel == 0) fatal("no loop to continue to"); scan(&Token); return (mkastleaf(A_CONTINUE, 0, NULL, 0)); } ``` ## Loop Levels We are going to need a `Looplevel` variable to track the level of the loops being parsed. This is in `data.h`: ```c extern_ int Looplevel; // Depth of nested loops ``` We need to set the level up as required. Each time we start a new function, the level is set to zero (in `decl.c`): ```c // Parse the declaration of function. struct ASTnode *function_declaration(int type) { ... // Get the AST tree for the compound statement and mark // that we have parsed no loops yet Looplevel= 0; tree = compound_statement(); ... } ``` Now, each time we parse a loop, we increment the loop level for the loop's body (in `stmt.c`): ```c // Parse a WHILE statement and return its AST static struct ASTnode *while_statement(void) { ... // Get the AST for the compound statement. // Update the loop depth in the process Looplevel++; bodyAST = compound_statement(); Looplevel--; ... } // Parse a FOR statement and return its AST static struct ASTnode *for_statement(void) { ... // Get the compound statement which is the body // Update the loop depth in the process Looplevel++; bodyAST = compound_statement(); Looplevel--; ... } ``` And this gives us the ability to determine if we are inside a loop or not inside a loop. ## The Test Code Here is the test code, `tests/input71.c`: ```c #include int main() { int x; x = 0; while (x < 100) { if (x == 5) { x = x + 2; continue; } printf("%d\n", x); if (x == 14) { break; } x = x + 1; } printf("Done\n"); return (0); } ``` As I still haven't solved the "dangling else" problem, the `break` statement has to enclosed in '{' ... '}' to make it into a compound statement. Apart from that, the code works as expected: ``` 0 1 2 3 4 7 8 9 10 11 12 13 14 Done ``` ## Conclusion and What's Next I knew that adding support for `break` and `continue` was going to be easier than it was for my earlier compiler, because of the AST. However, there were still some minor issues and wrinkles that we had to deal with in the process of implementing them. Now that we have the `break` keyword in the language, I will attempt to add `switch` statements in the next part of our compiler writing journey. This is going to require the addition of switch jump tables, and I know this is going to be complicated. So get ready for an interesting next step. [Next step](../37_Switch/Readme.md) ================================================ FILE: 36_Break_Continue/cg.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\t.text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\t.data\n", Outfile); currSeg = data_seg; } } // Given a scalar type value, return the // size of the type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch(type) { case P_CHAR: return (offset); case P_INT: case P_LONG: break; default: fatald("Bad type in calc_aligned_offset:", type); } // Here we have an int or a long. Align it on a 4-byte offset // I put the generic code here so it can be reused elsewhere. alignment= 4; offset = (offset + direction * (alignment-1)) & ~(alignment-1); return (offset); } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. static int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "%r10", "%r11", "%r12", "%r13", "%r9", "%r8", "%rcx", "%rdx", "%rsi", "%rdi" }; static char *breglist[] = { "%r10b", "%r11b", "%r12b", "%r13b", "%r9b", "%r8b", "%cl", "%dl", "%sil", "%dil" }; static char *dreglist[] = { "%r10d", "%r11d", "%r12d", "%r13d", "%r9d", "%r8d", "%ecx", "%edx", "%esi", "%edi" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the %rsp and %rsp fprintf(Outfile, "\t.globl\t%s\n" "\t.type\t%s, @function\n" "%s:\n" "\tpushq\t%%rbp\n" "\tmovq\t%%rsp, %%rbp\n", name, name, name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->posn = paramOffset; paramOffset += 8; } else { parm->posn = newlocaloffset(parm->type); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->posn = newlocaloffset(locvar->type); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\taddq\t$%d,%%rsp\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->endlabel); fprintf(Outfile, "\taddq\t$%d,%%rsp\n", stackOffset); fputs("\tpopq %rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmovq\t$%d, %s\n", value, reglist[r]); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", sym->name); } else // Print out the code to initialise it switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovzbq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", sym->name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovslq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", sym->name); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", sym->posn); fprintf(Outfile, "\tmovq\t%d(%%rbp), %s\n", sym->posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", sym->posn); } else switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", sym->posn); fprintf(Outfile, "\tmovzbq\t%d(%%rbp), %s\n", sym->posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", sym->posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", sym->posn); fprintf(Outfile, "\tmovslq\t%d(%%rbp), %s\n", sym->posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", sym->posn); break; default: fatald("Bad type in cgloadlocal:", sym->type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tleaq\tL%d(%%rip), %s\n", label, reglist[r]); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\taddq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsubq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timulq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s,%%rax\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidivq\t%s\n", reglist[r2]); fprintf(Outfile, "\tmovq\t%%rax,%s\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tandq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\torq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txorq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshlq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshrq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tnegq\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnotq\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); return (r); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(struct symtable *sym, int numargs) { // Get a new register int outr = alloc_register(); // Call the function fprintf(Outfile, "\tcall\t%s@PLT\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\taddq\t$%d, %%rsp\n", 8 * (numargs - 6)); // and copy the return value into our register fprintf(Outfile, "\tmovq\t%%rax, %s\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpushq\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmovq\t%s, %s\n", reglist[r], reglist[FIRSTPARAMREG - argposn + 1]); } } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsalq\t$%d, %s\n", val, reglist[r]); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %s(%%rip)\n", reglist[r], sym->name); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %s(%%rip)\n", breglist[r], sym->name); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %s(%%rip)\n", dreglist[r], sym->name); break; default: fatald("Bad type in cgstorglob:", sym->type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %d(%%rbp)\n", reglist[r], sym->posn); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %d(%%rbp)\n", breglist[r], sym->posn); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %d(%%rbp)\n", dreglist[r], sym->posn); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the type size = typesize(node->type, node->ctype); // Generate the global identity and the label cgdataseg(); fprintf(Outfile, "\t.globl\t%s\n", node->name); fprintf(Outfile, "%s:", node->name); // Generate the space for this type switch (size) { case 1: fprintf(Outfile, "\t.byte\t0\n"); break; case 4: fprintf(Outfile, "\t.long\t0\n"); break; case 8: fprintf(Outfile, "\t.quad\t0\n"); break; default: for (int i=0; i < size; i++) fprintf(Outfile, "\t.byte\t0\n"); } } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\t.byte\t%d\n", *cptr); } fprintf(Outfile, "\t.byte\t0\n"); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzbl\t%s, %%eax\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %%eax\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } cgjump(sym->endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = alloc_register(); if (sym->class == C_GLOBAL) fprintf(Outfile, "\tleaq\t%s(%%rip), %s\n", sym->name, reglist[r]); else fprintf(Outfile, "\tleaq\t%d(%%rbp), %s\n", sym->posn, reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzbq\t(%s), %s\n", reglist[r], reglist[r]); break; case 2: fprintf(Outfile, "\tmovslq\t(%s), %s\n", reglist[r], reglist[r]); break; case 4: case 8: fprintf(Outfile, "\tmovq\t(%s), %s\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { // Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmovb\t%s, (%s)\n", breglist[r1], reglist[r2]); break; case 2: case 4: case 8: fprintf(Outfile, "\tmovq\t%s, (%s)\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 36_Break_Continue/cg_arm.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for ARMv6 on Raspberry Pi // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. static int freereg[4]; static char *reglist[4] = { "r4", "r5", "r6", "r7" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // We have to store large integer literal values in memory. // Keep a list of them which will be output in the postamble #define MAXINTS 1024 int Intlist[MAXINTS]; static int Intslot = 0; // Determine the offset of a large integer // literal from the .L3 label. If the integer // isn't in the list, add it. static void set_int_offset(int val) { int offset = -1; // See if it is already there for (int i = 0; i < Intslot; i++) { if (Intlist[i] == val) { offset = 4 * i; break; } } // Not in the list, so add it if (offset == -1) { offset = 4 * Intslot; if (Intslot == MAXINTS) fatal("Out of int slots in set_int_offset()"); Intlist[Intslot++] = val; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L3+%d\n", offset); } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n", Outfile); } // Print out the assembly postamble void cgpostamble() { // Print out the global variables fprintf(Outfile, ".L2:\n"); for (int i = 0; i < Globs; i++) { if (Symtable[i].stype == S_VARIABLE) fprintf(Outfile, "\t.word %s\n", Symtable[i].name); } // Print out the integer literals fprintf(Outfile, ".L3:\n"); for (int i = 0; i < Intslot; i++) { fprintf(Outfile, "\t.word %d\n", Intlist[i]); } } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Symtable[id].name; fprintf(Outfile, "\t.text\n" "\t.globl\t%s\n" "\t.type\t%s, \%%function\n" "%s:\n" "\tpush\t{fp, lr}\n" "\tadd\tfp, sp, #4\n" "\tsub\tsp, sp, #8\n" "\tstr\tr0, [fp, #-8]\n", name, name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Symtable[id].endlabel); fputs("\tsub\tsp, fp, #4\n" "\tpop\t{fp, pc}\n" "\t.align\t2\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); // If the literal value is small, do it with one instruction if (value <= 1000) fprintf(Outfile, "\tmov\t%s, #%d\n", reglist[r], value); else { set_int_offset(value); fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); } return (r); } // Determine the offset of a variable from the .L2 // label. Yes, this is inefficient code. static void set_var_offset(int id) { int offset = 0; // Walk the symbol table up to id. // Find S_VARIABLEs and add on 4 until // we get to our variable for (int i = 0; i < id; i++) { if (Symtable[i].stype == S_VARIABLE) offset += 4; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L2+%d\n", offset); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tldrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgloadglob:", Symtable[id].type); } return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s, %s\n", reglist[r1], reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\tmul\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { // To do a divide: r0 holds the dividend, r1 holds the divisor. // The quotient is in r0. fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r1]); fprintf(Outfile, "\tmov\tr1, %s\n", reglist[r2]); fprintf(Outfile, "\tbl\t__aeabi_idiv\n"); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r1]); free_register(r2); return (r1); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r]); fprintf(Outfile, "\tbl\t%s\n", Symtable[id].name); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r]); return (r); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tlsl\t%s, %s, #%d\n", reglist[r], reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, int id) { // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tstr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgstorglob:", Symtable[id].type); } return (r); } // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (4); switch (type) { case P_CHAR: return (1); case P_INT: case P_LONG: return (4); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Symtable[id].type); fprintf(Outfile, "\t.data\n" "\t.globl\t%s\n", Symtable[id].name); switch (typesize) { case 1: fprintf(Outfile, "%s:\t.byte\t0\n", Symtable[id].name); break; case 4: fprintf(Outfile, "%s:\t.long\t0\n", Symtable[id].name); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "moveq", "movne", "movlt", "movgt", "movle", "movge" }; // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "movne", "moveq", "movge", "movle", "movgt", "movlt" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #1\n", cmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #0\n", invcmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\tuxtb\t%s, %s\n", reglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tb\tL%d\n", l); } // List of inverted branch instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *brlist[] = { "bne", "beq", "bge", "ble", "bgt", "blt" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", brlist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[reg]); cgjump(Symtable[id].endlabel); } // Generate code to load the address of a global // identifier into a variable. Return a new register int cgaddress(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); fprintf(Outfile, "\tmov\t%s, r3\n", reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tldrb\t%s, [%s]\n", reglist[r], reglist[r]); break; case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [%s]\n", reglist[r], reglist[r]); break; } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [%s]\n", reglist[r1], reglist[r2]); break; case P_INT: case P_LONG: fprintf(Outfile, "\tstr\t%s, [%s]\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 36_Break_Continue/cgn.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\tsection .text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\tsection .data\n", Outfile); currSeg = data_seg; } } // Given a scalar type value, return the // size of the type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch(type) { case P_CHAR: return (offset); case P_INT: case P_LONG: break; default: fatald("Bad type in calc_aligned_offset:", type); } // Here we have an int or a long. Align it on a 4-byte offset // I put the generic code here so it can be reused elsewhere. alignment= 4; offset = (offset + direction * (alignment-1)) & ~(alignment-1); return (offset); } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "r10", "r11", "r12", "r13", "r9", "r8", "rcx", "rdx", "rsi", "rdi" }; static char *breglist[] = { "r10b", "r11b", "r12b", "r13b", "r9b", "r8b", "cl", "dl", "sil", "dil" }; static char *dreglist[] = { "r10d", "r11d", "r12d", "r13d", "r9d", "r8d", "ecx", "edx", "esi", "edi" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\textern\tprintint\n", Outfile); fputs("\textern\tprintchar\n", Outfile); fputs("\textern\topen\n", Outfile); fputs("\textern\tclose\n", Outfile); fputs("\textern\tread\n", Outfile); fputs("\textern\twrite\n", Outfile); fputs("\textern\tprintf\n", Outfile); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the rsp and rbp fprintf(Outfile, "\tglobal\t%s\n" "%s:\n" "\tpush\trbp\n" "\tmov\trbp, rsp\n", name, name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->posn = paramOffset; paramOffset += 8; } else { parm->posn = newlocaloffset(parm->type); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->posn = newlocaloffset(locvar->type); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\tadd\trsp, %d\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->endlabel); fprintf(Outfile, "\tadd\trsp, %d\n", stackOffset); fputs("\tpop rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], value); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", sym->name); fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], sym->name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", sym->name); } else // Print out the code to initialise it switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", sym->name); fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], sym->name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", sym->name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", sym->name); fprintf(Outfile, "\tmovsx\t%s, word [%s]\n", dreglist[r], sym->name); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", sym->name); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", sym->posn); fprintf(Outfile, "\tmov\t%s, [rbp+%d]\n", reglist[r], sym->posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", sym->posn); } else switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", sym->posn); fprintf(Outfile, "\tmovzx\t%s, byte [rbp+%d]\n", reglist[r], sym->posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", sym->posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", sym->posn); fprintf(Outfile, "\tmovsx\t%s, word [rbp+%d]\n", reglist[r], sym->posn); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", sym->posn); break; default: fatald("Bad type in cgloadlocal:", sym->type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, L%d\n", reglist[r], label); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timul\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmov\trax, %s\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidiv\t%s\n", reglist[r2]); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tand\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\tor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshl\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshr\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tneg\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnot\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r], breglist[r]); return (r); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, byte %s\n", reglist[r], breglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(struct symtable *sym, int numargs) { // Get a new register int outr = alloc_register(); // Call the function fprintf(Outfile, "\tcall\t%s\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\tadd\trsp, %d\n", 8 * (numargs - 6)); // and copy the return value into our register fprintf(Outfile, "\tmov\t%s, rax\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpush\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmov\t%s, %s\n", reglist[FIRSTPARAMREG - argposn + 1], reglist[r]); } } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsal\t%s, %d\n", reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, dreglist[r]); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\tqword\t[rbp+%d], %s\n", sym->posn, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\tbyte\t[rbp+%d], %s\n", sym->posn, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\tdword\t[rbp+%d], %s\n", sym->posn, dreglist[r]); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the type size = typesize(node->type, node->ctype); // Generate the global identity and the label cgdataseg(); fprintf(Outfile, "\tsection\t.data\n" "\tglobal\t%s\n", node->name); fprintf(Outfile, "%s:", node->name); // Generate the space for this type // original version for (int i = 0; i < node->size; i++) { switch(size) { case 1: fprintf(Outfile, "\tdb\t0\n"); break; case 4: fprintf(Outfile, "\tdd\t0\n"); break; case 8: fprintf(Outfile, "\tdq\t0\n"); break; default: for (int i = 0; i < size; i++) fprintf(Outfile, "\tdb\t0\n"); } } /* compact version using times instead of loop switch(size) { case 1: fprintf(Outfile, "\ttimes\t%d\tdb\t0\n", node->size); break; case 4: fprintf(Outfile, "\ttimes\t%d\tdd\t0\n", node->size); break; case 8: fprintf(Outfile, "\ttimes\t%d\tdq\t0\n", node->size); break; default: fprintf(Outfile, "\ttimes\t%d\tdb\t0\n", size); } */ } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\tdb\t%d\n", *cptr); } fprintf(Outfile, "\tdb\t0\n"); /* put string in readable format on a single line // probably went overboard with error checking int comma = 0, quote = 0, start = 1; fprintf(Outfile, "\tdb\t"); for (cptr=strvalue; *cptr; cptr++) { if ( ! isprint(*cptr) ) if (comma || start) { fprintf(Outfile, "%d, ", *cptr); start = 0; comma = 1; } else if (quote) { fprintf(Outfile, "\', %d, ", *cptr); comma = 1; quote = 0; } else { fprintf(Outfile, "%d, ", *cptr); comma = 1; quote = 0; } else if (start || comma) { fprintf(Outfile, "\'%c", *cptr); start = comma = 0; quote = 1; } else { fprintf(Outfile, "%c", *cptr); comma = 0; quote = 1; } } if (comma || start) fprintf(Outfile, "0\n"); else fprintf(Outfile, "\', 0\n"); */ } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r2], breglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzx\teax, %s\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmov\teax, %s\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } cgjump(sym->endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = alloc_register(); if (sym->class == C_GLOBAL) fprintf(Outfile, "\tmov\t%s, %s\n", reglist[r], sym->name); else fprintf(Outfile, "\tlea\t%s, [rbp+%d]\n", reglist[r], sym->posn); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], reglist[r]); break; case 2: fprintf(Outfile, "\tmovsx\t%s, dword [%s]\n", reglist[r], reglist[r]); break; case 4: case 8: fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { //Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmov\t[%s], byte %s\n", reglist[r2], breglist[r1]); break; case 2: case 4: case 8: fprintf(Outfile, "\tmov\t[%s], %s\n", reglist[r2], reglist[r1]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 36_Break_Continue/data.h ================================================ #ifndef extern_ #define extern_ extern #endif // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 extern_ int Line; // Current line number extern_ int Putback; // Character put back by scanner extern_ struct symtable *Functionid; // Symbol ptr of the current function extern_ FILE *Infile; // Input and output files extern_ FILE *Outfile; extern_ char *Infilename; // Name of file we are parsing extern_ char *Outfilename; // Name of file we opened as Outfile extern_ struct token Token; // Last token scanned extern_ char Text[TEXTLEN + 1]; // Last identifier scanned extern_ int Looplevel; // Depth of nested loops // Symbol table lists extern_ struct symtable *Globhead, *Globtail; // Global variables and functions extern_ struct symtable *Loclhead, *Locltail; // Local variables extern_ struct symtable *Parmhead, *Parmtail; // Local parameters extern_ struct symtable *Membhead, *Membtail; // Temp list of struct/union members extern_ struct symtable *Structhead, *Structtail; // List of struct types extern_ struct symtable *Unionhead, *Uniontail; // List of union types extern_ struct symtable *Enumhead, *Enumtail; // List of enum types and values extern_ struct symtable *Typehead, *Typetail; // List of typedefs // Command-line flags extern_ int O_dumpAST; // If true, dump the AST trees extern_ int O_keepasm; // If true, keep any assembly files extern_ int O_assemble; // If true, assemble the assembly files extern_ int O_dolink; // If true, link the object files extern_ int O_verbose; // If true, print info on compilation stages ================================================ FILE: 36_Break_Continue/decl.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of declarations // Copyright (c) 2019 Warren Toomey, GPL3 static struct symtable *composite_declaration(int type); static void enum_declaration(void); int typedef_declaration(struct symtable **ctype); int type_of_typedef(char *name, struct symtable **ctype); // Parse the current token and return a // primitive type enum value, a pointer // to any composite type and possibly // modify the class of the type. // Also scan in the next token. int parse_type(struct symtable **ctype, int *class) { int type, exstatic=1; // See if the class has been changed to extern (later, static) while (exstatic) { switch (Token.token) { case T_EXTERN: *class= C_EXTERN; scan(&Token); break; default: exstatic= 0; } } // Now work on the actual type keyword switch (Token.token) { case T_VOID: type = P_VOID; scan(&Token); break; case T_CHAR: type = P_CHAR; scan(&Token); break; case T_INT: type = P_INT; scan(&Token); break; case T_LONG: type = P_LONG; scan(&Token); break; // For the following, if we have a ';' after the // parsing then there is no type, so return -1 case T_STRUCT: type = P_STRUCT; *ctype = composite_declaration(P_STRUCT); if (Token.token == T_SEMI) type = -1; break; case T_UNION: type = P_UNION; *ctype = composite_declaration(P_UNION); if (Token.token == T_SEMI) type = -1; break; case T_ENUM: type = P_INT; // Enums are really ints enum_declaration(); if (Token.token == T_SEMI) type = -1; break; case T_TYPEDEF: type = typedef_declaration(ctype); if (Token.token == T_SEMI) type = -1; break; case T_IDENT: type = type_of_typedef(Text, ctype); break; default: fatald("Illegal type, token", Token.token); } // Scan in one or more further '*' tokens // and determine the correct pointer type while (1) { if (Token.token != T_STAR) break; type = pointer_to(type); scan(&Token); } // We leave with the next token already scanned return (type); } // variable_declaration: type identifier ';' // | type identifier '[' INTLIT ']' ';' // ; // // Parse the declaration of a scalar variable or an array // with a given size. // The identifier has been scanned & we have the type. // class is the variable's class // Return the pointer to variable's entry in the symbol table struct symtable *var_declaration(int type, struct symtable *ctype, int class) { struct symtable *sym = NULL; // See if this has already been declared switch (class) { case C_EXTERN: case C_GLOBAL: if (findglob(Text) != NULL) fatals("Duplicate global variable declaration", Text); case C_LOCAL: case C_PARAM: if (findlocl(Text) != NULL) fatals("Duplicate local variable declaration", Text); case C_MEMBER: if (findmember(Text) != NULL) fatals("Duplicate struct/union member declaration", Text); } // Text now has the identifier's name. // If the next token is a '[' if (Token.token == T_LBRACKET) { // Skip past the '[' scan(&Token); // Check we have an array size if (Token.token == T_INTLIT) { // Add this as a known array and generate its space in assembly. // We treat the array as a pointer to its elements' type switch (class) { case C_EXTERN: case C_GLOBAL: sym = addglob(Text, pointer_to(type), ctype, S_ARRAY, class, Token.intvalue); break; case C_LOCAL: case C_PARAM: case C_MEMBER: fatal ("For now, declaration of non-global arrays is not implemented"); } } // Ensure we have a following ']' scan(&Token); match(T_RBRACKET, "]"); } else { // Add this as a known scalar // and generate its space in assembly switch (class) { case C_EXTERN: case C_GLOBAL: sym = addglob(Text, type, ctype, S_VARIABLE, class, 1); break; case C_LOCAL: sym = addlocl(Text, type, ctype, S_VARIABLE, 1); break; case C_PARAM: sym = addparm(Text, type, ctype, S_VARIABLE, 1); break; case C_MEMBER: sym = addmemb(Text, type, ctype, S_VARIABLE, 1); break; } } return (sym); } // var_declaration_list: // | variable_declaration // | variable_declaration separate_token var_declaration_list ; // // When called to parse function parameters, separate_token is ','. // When called to parse members of a struct/union, separate_token is ';'. // // Parse a list of variables. // Add them as symbols to one of the symbol table lists, and return the // number of variables. If funcsym is not NULL, there is an existing function // prototype, so compare each variable's type against this prototype. static int var_declaration_list(struct symtable *funcsym, int class, int separate_token, int end_token) { int type; int paramcnt = 0; struct symtable *protoptr = NULL; struct symtable *ctype; // If there is a prototype, get the pointer // to the first prototype parameter if (funcsym != NULL) protoptr = funcsym->member; // Loop until the final end token while (Token.token != end_token) { // Get the type and identifier type = parse_type(&ctype, &class); ident(); // Check that this type matches the prototype if there is one if (protoptr != NULL) { if (type != protoptr->type) fatald("Type doesn't match prototype for parameter", paramcnt + 1); protoptr = protoptr->next; } else { // Add a new parameter to the right symbol table list, based on the class var_declaration(type, ctype, class); } paramcnt++; // Must have a separate_token or ')' at this point if ((Token.token != separate_token) && (Token.token != end_token)) fatald("Unexpected token in parameter list", Token.token); if (Token.token == separate_token) scan(&Token); } // Check that the number of parameters in this list matches // any existing prototype if ((funcsym != NULL) && (paramcnt != funcsym->nelems)) fatals("Parameter count mismatch for function", funcsym->name); // Return the count of parameters return (paramcnt); } // // function_declaration: type identifier '(' parameter_list ')' ; // | type identifier '(' parameter_list ')' compound_statement ; // // Parse the declaration of function. // The identifier has been scanned & we have the type. struct ASTnode *function_declaration(int type) { struct ASTnode *tree, *finalstmt; struct symtable *oldfuncsym, *newfuncsym = NULL; int endlabel, paramcnt; // Text has the identifier's name. If this exists and is a // function, get the id. Otherwise, set oldfuncsym to NULL. if ((oldfuncsym = findsymbol(Text)) != NULL) if (oldfuncsym->stype != S_FUNCTION) oldfuncsym = NULL; // If this is a new function declaration, get a // label-id for the end label, and add the function // to the symbol table, if (oldfuncsym == NULL) { endlabel = genlabel(); // Assumtion: functions only return scalar types, so NULL below newfuncsym = addglob(Text, type, NULL, S_FUNCTION, C_GLOBAL, endlabel); } // Scan in the '(', any parameters and the ')'. // Pass in any existing function prototype pointer lparen(); paramcnt = var_declaration_list(oldfuncsym, C_PARAM, T_COMMA, T_RPAREN); rparen(); // If this is a new function declaration, update the // function symbol entry with the number of parameters. // Also copy the parameter list into the function's node. if (newfuncsym) { newfuncsym->nelems = paramcnt; newfuncsym->member = Parmhead; oldfuncsym = newfuncsym; } // Clear out the parameter list Parmhead = Parmtail = NULL; // Declaration ends in a semicolon, only a prototype. if (Token.token == T_SEMI) { scan(&Token); return (NULL); } // This is not just a prototype. // Set the Functionid global to the function's symbol pointer Functionid = oldfuncsym; // Get the AST tree for the compound statement and mark // that we have parsed no loops yet Looplevel= 0; tree = compound_statement(); // If the function type isn't P_VOID .. if (type != P_VOID) { // Error if no statements in the function if (tree == NULL) fatal("No statements in function with non-void type"); // Check that the last AST operation in the // compound statement was a return statement finalstmt = (tree->op == A_GLUE) ? tree->right : tree; if (finalstmt == NULL || finalstmt->op != A_RETURN) fatal("No return for function with non-void type"); } // Return an A_FUNCTION node which has the function's symbol pointer // and the compound statement sub-tree return (mkastunary(A_FUNCTION, type, tree, oldfuncsym, endlabel)); } // Parse composite type declarations: structs or unions. // Either find an existing struct/union declaration, or build // a struct/union symbol table entry and return its pointer. static struct symtable *composite_declaration(int type) { struct symtable *ctype = NULL; struct symtable *m; int offset; // Skip the struct/union keyword scan(&Token); // See if there is a following struct/union name if (Token.token == T_IDENT) { // Find any matching composite type if (type == P_STRUCT) ctype = findstruct(Text); else ctype = findunion(Text); scan(&Token); } // If the next token isn't an LBRACE , this is // the usage of an existing struct/union type. // Return the pointer to the type. if (Token.token != T_LBRACE) { if (ctype == NULL) fatals("unknown struct/union type", Text); return (ctype); } // Ensure this struct/union type hasn't been // previously defined if (ctype) fatals("previously defined struct/union", Text); // Build the composite type and skip the left brace if (type == P_STRUCT) ctype = addstruct(Text, P_STRUCT, NULL, 0, 0); else ctype = addunion(Text, P_UNION, NULL, 0, 0); scan(&Token); // Scan in the list of members and attach // to the struct type's node var_declaration_list(NULL, C_MEMBER, T_SEMI, T_RBRACE); rbrace(); ctype->member = Membhead; Membhead = Membtail = NULL; // Set the offset of the initial member // and find the first free byte after it m = ctype->member; m->posn = 0; offset = typesize(m->type, m->ctype); // Set the position of each successive member in the composite type // Unions are easy. For structs, align the member and find the next free byte for (m = m->next; m != NULL; m = m->next) { // Set the offset for this member if (type == P_STRUCT) m->posn = genalign(m->type, offset, 1); else m->posn = 0; // Get the offset of the next free byte after this member offset += typesize(m->type, m->ctype); } // Set the overall size of the composite type ctype->size = offset; return (ctype); } // Parse an enum declaration static void enum_declaration(void) { struct symtable *etype = NULL; char *name; int intval = 0; // Skip the enum keyword. scan(&Token); // If there's a following enum type name, get a // pointer to any existing enum type node. if (Token.token == T_IDENT) { etype = findenumtype(Text); name = strdup(Text); // As it gets tromped soon scan(&Token); } // If the next token isn't a LBRACE, check // that we have an enum type name, then return if (Token.token != T_LBRACE) { if (etype == NULL) fatals("undeclared enum type:", name); return; } // We do have an LBRACE. Skip it scan(&Token); // If we have an enum type name, ensure that it // hasn't been declared before. if (etype != NULL) fatals("enum type redeclared:", etype->name); else // Build an enum type node for this identifier etype = addenum(name, C_ENUMTYPE, 0); // Loop to get all the enum values while (1) { // Ensure we have an identifier // Copy it in case there's an int literal coming up ident(); name = strdup(Text); // Ensure this enum value hasn't been declared before etype = findenumval(name); if (etype != NULL) fatals("enum value redeclared:", Text); // If the next token is an '=', skip it and // get the following int literal if (Token.token == T_ASSIGN) { scan(&Token); if (Token.token != T_INTLIT) fatal("Expected int literal after '='"); intval = Token.intvalue; scan(&Token); } // Build an enum value node for this identifier. // Increment the value for the next enum identifier. etype = addenum(name, C_ENUMVAL, intval++); // Bail out on a right curly bracket, else get a comma if (Token.token == T_RBRACE) break; comma(); } scan(&Token); // Skip over the right curly bracket } // Parse a typedef declaration and return the type // and ctype that it represents int typedef_declaration(struct symtable **ctype) { int type, class=0; // Skip the typedef keyword. scan(&Token); // Get the actual type following the keyword type = parse_type(ctype, &class); if (class != 0) fatal("Can't have extern in a typedef declaration"); // See if the typedef identifier already exists if (findtypedef(Text) != NULL) fatals("redefinition of typedef", Text); // It doesn't exist so add it to the typedef list addtypedef(Text, type, *ctype, 0, 0); scan(&Token); return (type); } // Given a typedef name, return the type it represents int type_of_typedef(char *name, struct symtable **ctype) { struct symtable *t; // Look up the typedef in the list t = findtypedef(name); if (t == NULL) fatals("unknown type", name); scan(&Token); *ctype = t->ctype; return (t->type); } // Parse one or more global declarations, either // variables, functions or structs void global_declarations(void) { struct ASTnode *tree; struct symtable *ctype; int type, class= C_GLOBAL; while (1) { // Stop when we have reached EOF if (Token.token == T_EOF) break; // Get the type type = parse_type(&ctype, &class); // We might have just parsed a struct, union or enum // declaration with no associated variable. // The next token might be a ';'. Loop back if it is. // XXX: I'm not happy with this as it allows // "struct fred;" as an accepted statement if (type == -1) { semi(); continue; } // We have to read past the identifier // to see either a '(' for a function declaration // or a ',' or ';' for a variable declaration. // Text is filled in by the ident() call. ident(); if (Token.token == T_LPAREN) { // Parse the function declaration tree = function_declaration(type); // Only a function prototype, no code if (tree == NULL) continue; // A real function, generate the assembly code for it if (O_dumpAST) { dumpAST(tree, NOLABEL, 0); fprintf(stdout, "\n\n"); } genAST(tree, NOLABEL, NOLABEL, NOLABEL, 0); // Now free the symbols associated // with this function freeloclsyms(); } else { // Parse the global variable declaration // and skip past the trailing semicolon var_declaration(type, ctype, class); semi(); } } } ================================================ FILE: 36_Break_Continue/decl.h ================================================ // Function prototypes for all compiler files // Copyright (c) 2019 Warren Toomey, GPL3 // scan.c void reject_token(struct token *t); int scan(struct token *t); // tree.c struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue); struct ASTnode *mkastleaf(int op, int type, struct symtable *sym, int intvalue); struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, struct symtable *sym, int intvalue); void dumpAST(struct ASTnode *n, int label, int parentASTop); // gen.c int genlabel(void); int genAST(struct ASTnode *n, int iflabel, int looptoplabel, int loopendlabel, int parentASTop); void genpreamble(); void genpostamble(); void genfreeregs(); void genglobsym(struct symtable *node); int genglobstr(char *strvalue); int genprimsize(int type); int genalign(int type, int offset, int direction); void genreturn(int reg, int id); // cg.c int cgprimsize(int type); int cgalign(int type, int offset, int direction); void cgtextseg(); void cgdataseg(); void freeall_registers(void); void cgpreamble(); void cgpostamble(); void cgfuncpreamble(struct symtable *sym); void cgfuncpostamble(struct symtable *sym); int cgloadint(int value, int type); int cgloadglob(struct symtable *sym, int op); int cgloadlocal(struct symtable *sym, int op); int cgloadglobstr(int label); int cgadd(int r1, int r2); int cgsub(int r1, int r2); int cgmul(int r1, int r2); int cgdiv(int r1, int r2); int cgshlconst(int r, int val); int cgcall(struct symtable *sym, int numargs); void cgcopyarg(int r, int argposn); int cgstorglob(int r, struct symtable *sym); int cgstorlocal(int r, struct symtable *sym); void cgglobsym(struct symtable *node); void cgglobstr(int l, char *strvalue); int cgcompare_and_set(int ASTop, int r1, int r2); int cgcompare_and_jump(int ASTop, int r1, int r2, int label); void cglabel(int l); void cgjump(int l); int cgwiden(int r, int oldtype, int newtype); void cgreturn(int reg, struct symtable *sym); int cgaddress(struct symtable *sym); int cgderef(int r, int type); int cgstorderef(int r1, int r2, int type); int cgnegate(int r); int cginvert(int r); int cglognot(int r); int cgboolean(int r, int op, int label); int cgand(int r1, int r2); int cgor(int r1, int r2); int cgxor(int r1, int r2); int cgshl(int r1, int r2); int cgshr(int r1, int r2); // expr.c struct ASTnode *binexpr(int ptp); // stmt.c struct ASTnode *compound_statement(void); // misc.c void match(int t, char *what); void semi(void); void lbrace(void); void rbrace(void); void lparen(void); void rparen(void); void ident(void); void comma(void); void fatal(char *s); void fatals(char *s1, char *s2); void fatald(char *s, int d); void fatalc(char *s, int c); // sym.c void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node); struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int size, int posn); struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int class, int size); struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int size); struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype, int size); struct symtable *addstruct(char *name, int type, struct symtable *ctype, int stype, int size); struct symtable *addunion(char *name, int type, struct symtable *ctype, int stype, int size); struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int size); struct symtable *addenum(char *name, int class, int value); struct symtable *addtypedef(char *name, int type, struct symtable *ctype, int stype, int size); struct symtable *findglob(char *s); struct symtable *findlocl(char *s); struct symtable *findsymbol(char *s); struct symtable *findmember(char *s); struct symtable *findstruct(char *s); struct symtable *findunion(char *s); struct symtable *findenumtype(char *s); struct symtable *findenumval(char *s); struct symtable *findtypedef(char *s); void clear_symtable(void); void freeloclsyms(void); // decl.c struct symtable *var_declaration(int type, struct symtable *ctype, int class); struct ASTnode *function_declaration(int type); void global_declarations(void); // types.c int inttype(int type); int ptrtype(int type); int parse_type(struct symtable **ctype, int *class); int pointer_to(int type); int value_at(int type); int typesize(int type, struct symtable *ctype); struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op); ================================================ FILE: 36_Break_Continue/defs.h ================================================ #include #include #include #include // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 enum { TEXTLEN = 512 // Length of identifiers in input }; // Commands and default filenames #define AOUT "a.out" #ifdef __NASM__ #define ASCMD "nasm -f elf64 -o " #define LDCMD "cc -no-pie -fno-plt -Wall -o " #else #define ASCMD "as -o " #define LDCMD "cc -o " #endif #define CPPCMD "cpp -nostdinc -isystem " // Token types enum { T_EOF, // Binary operators T_ASSIGN, T_LOGOR, T_LOGAND, T_OR, T_XOR, T_AMPER, T_EQ, T_NE, T_LT, T_GT, T_LE, T_GE, T_LSHIFT, T_RSHIFT, T_PLUS, T_MINUS, T_STAR, T_SLASH, // Other operators T_INC, T_DEC, T_INVERT, T_LOGNOT, // Type keywords T_VOID, T_CHAR, T_INT, T_LONG, // Other keywords T_IF, T_ELSE, T_WHILE, T_FOR, T_RETURN, T_STRUCT, T_UNION, T_ENUM, T_TYPEDEF, T_EXTERN, T_BREAK, T_CONTINUE, // Structural tokens T_INTLIT, T_STRLIT, T_SEMI, T_IDENT, T_LBRACE, T_RBRACE, T_LPAREN, T_RPAREN, T_LBRACKET, T_RBRACKET, T_COMMA, T_DOT, T_ARROW }; // Token structure struct token { int token; // Token type, from the enum list above int intvalue; // For T_INTLIT, the integer value }; // AST node types. The first few line up // with the related tokens enum { A_ASSIGN = 1, A_LOGOR, A_LOGAND, A_OR, A_XOR, A_AND, A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE, A_LSHIFT, A_RSHIFT, A_ADD, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, A_INTLIT, A_STRLIT, A_IDENT, A_GLUE, A_IF, A_WHILE, A_FUNCTION, A_WIDEN, A_RETURN, A_FUNCCALL, A_DEREF, A_ADDR, A_SCALE, A_PREINC, A_PREDEC, A_POSTINC, A_POSTDEC, A_NEGATE, A_INVERT, A_LOGNOT, A_TOBOOL, A_BREAK, A_CONTINUE }; // Primitive types. The bottom 4 bits is an integer // value that represents the level of indirection, // e.g. 0= no pointer, 1= pointer, 2= pointer pointer etc. enum { P_NONE, P_VOID = 16, P_CHAR = 32, P_INT = 48, P_LONG = 64, P_STRUCT=80, P_UNION=96 }; // Structural types enum { S_VARIABLE, S_FUNCTION, S_ARRAY }; // Storage classes enum { C_GLOBAL = 1, // Globally visible symbol C_LOCAL, // Locally visible symbol C_PARAM, // Locally visible function parameter C_EXTERN, // External globally visible symbol C_STRUCT, // A struct C_UNION, // A union C_MEMBER, // Member of a struct or union C_ENUMTYPE, // A named enumeration type C_ENUMVAL, // A named enumeration value C_TYPEDEF // A named typedef }; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol struct symtable *ctype; // If struct/union, ptr to that type int stype; // Structural type for the symbol int class; // Storage class for the symbol union { int size; // Number of elements in the symbol int endlabel; // For functions, the end label }; union { int nelems; // For functions, # of params int posn; // For locals, the negative offset // from the stack base pointer }; struct symtable *next; // Next symbol in one list struct symtable *member; // First member of a function, struct, }; // union or enum // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates int rvalue; // True if the node is an rvalue struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; struct symtable *sym; // For many AST nodes, the pointer to union { // the symbol in the symbol table int intvalue; // For A_INTLIT, the integer value int size; // For A_SCALE, the size to scale by }; }; enum { NOREG = -1, // Use NOREG when the AST generation // functions have no register to return NOLABEL = 0 // Use NOLABEL when we have no label to // pass to genAST() }; ================================================ FILE: 36_Break_Continue/expr.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of expressions // Copyright (c) 2019 Warren Toomey, GPL3 // expression_list: // | expression // | expression ',' expression_list // ; // Parse a list of zero or more comma-separated expressions and // return an AST composed of A_GLUE nodes with the left-hand child // being the sub-tree of previous expressions (or NULL) and the right-hand // child being the next expression. Each A_GLUE node will have size field // set to the number of expressions in the tree at this point. If no // expressions are parsed, NULL is returned static struct ASTnode *expression_list(void) { struct ASTnode *tree = NULL; struct ASTnode *child = NULL; int exprcount = 0; // Loop until the final right parentheses while (Token.token != T_RPAREN) { // Parse the next expression and increment the expression count child = binexpr(0); exprcount++; // Build an A_GLUE AST node with the previous tree as the left child // and the new expression as the right child. Store the expression count. tree = mkastnode(A_GLUE, P_NONE, tree, NULL, child, NULL, exprcount); // Must have a ',' or ')' at this point switch (Token.token) { case T_COMMA: scan(&Token); break; case T_RPAREN: break; default: fatald("Unexpected token in expression list", Token.token); } } // Return the tree of expressions return (tree); } // Parse a function call and return its AST static struct ASTnode *funccall(void) { struct ASTnode *tree; struct symtable *funcptr; // Check that the identifier has been defined as a function, // then make a leaf node for it. if ((funcptr = findsymbol(Text)) == NULL || funcptr->stype != S_FUNCTION) { fatals("Undeclared function", Text); } // Get the '(' lparen(); // Parse the argument expression list tree = expression_list(); // XXX Check type of each argument against the function's prototype // Build the function call AST node. Store the // function's return type as this node's type. // Also record the function's symbol-id tree = mkastunary(A_FUNCCALL, funcptr->type, tree, funcptr, 0); // Get the ')' rparen(); return (tree); } // Parse the index into an array and return an AST tree for it static struct ASTnode *array_access(void) { struct ASTnode *left, *right; struct symtable *aryptr; // Check that the identifier has been defined as an array // then make a leaf node for it that points at the base if ((aryptr = findsymbol(Text)) == NULL || aryptr->stype != S_ARRAY) { fatals("Undeclared array", Text); } left = mkastleaf(A_ADDR, aryptr->type, aryptr, 0); // Get the '[' scan(&Token); // Parse the following expression right = binexpr(0); // Get the ']' match(T_RBRACKET, "]"); // Ensure that this is of int type if (!inttype(right->type)) fatal("Array index is not of integer type"); // Scale the index by the size of the element's type right = modify_type(right, left->type, A_ADD); // Return an AST tree where the array's base has the offset // added to it, and dereference the element. Still an lvalue // at this point. left = mkastnode(A_ADD, aryptr->type, left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, value_at(left->type), left, NULL, 0); return (left); } // Parse the member reference of a struct or union // and return an AST tree for it. If withpointer is true, // the access is through a pointer to the member. static struct ASTnode *member_access(int withpointer) { struct ASTnode *left, *right; struct symtable *compvar; struct symtable *typeptr; struct symtable *m; // Check that the identifier has been declared as a // struct/union or a struct/union pointer if ((compvar = findsymbol(Text)) == NULL) fatals("Undeclared variable", Text); if (withpointer && compvar->type != pointer_to(P_STRUCT) && compvar->type != pointer_to(P_UNION)) fatals("Undeclared variable", Text); if (!withpointer && compvar->type != P_STRUCT && compvar->type != P_UNION) fatals("Undeclared variable", Text); // If a pointer to a struct/union, get the pointer's value. // Otherwise, make a leaf node that points at the base // Either way, it's an rvalue if (withpointer) { left = mkastleaf(A_IDENT, pointer_to(compvar->type), compvar, 0); } else left = mkastleaf(A_ADDR, compvar->type, compvar, 0); left->rvalue = 1; // Get the details of the composite type typeptr = compvar->ctype; // Skip the '.' or '->' token and get the member's name scan(&Token); ident(); // Find the matching member's name in the type // Die if we can't find it for (m = typeptr->member; m != NULL; m = m->next) if (!strcmp(m->name, Text)) break; if (m == NULL) fatals("No member found in struct/union: ", Text); // Build an A_INTLIT node with the offset right = mkastleaf(A_INTLIT, P_INT, NULL, m->posn); // Add the member's offset to the base of the struct/union // and dereference it. Still an lvalue at this point left = mkastnode(A_ADD, pointer_to(m->type), left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, m->type, left, NULL, 0); return (left); } // Parse a postfix expression and return // an AST node representing it. The // identifier is already in Text. static struct ASTnode *postfix(void) { struct ASTnode *n; struct symtable *varptr; struct symtable *enumptr; // If the identifier matches an enum value, // return an A_INTLIT node if ((enumptr = findenumval(Text)) != NULL) { scan(&Token); return (mkastleaf(A_INTLIT, P_INT, NULL, enumptr->posn)); } // Scan in the next token to see if we have a postfix expression scan(&Token); // Function call if (Token.token == T_LPAREN) return (funccall()); // An array reference if (Token.token == T_LBRACKET) return (array_access()); // Access into a struct or union if (Token.token == T_DOT) return (member_access(0)); if (Token.token == T_ARROW) return (member_access(1)); // A variable. Check that the variable exists. if ((varptr = findsymbol(Text)) == NULL || varptr->stype != S_VARIABLE) fatals("Unknown variable", Text); switch (Token.token) { // Post-increment: skip over the token case T_INC: scan(&Token); n = mkastleaf(A_POSTINC, varptr->type, varptr, 0); break; // Post-decrement: skip over the token case T_DEC: scan(&Token); n = mkastleaf(A_POSTDEC, varptr->type, varptr, 0); break; // Just a variable reference default: n = mkastleaf(A_IDENT, varptr->type, varptr, 0); } return (n); } // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(void) { struct ASTnode *n; int id; switch (Token.token) { case T_INTLIT: // For an INTLIT token, make a leaf AST node for it. // Make it a P_CHAR if it's within the P_CHAR range if ((Token.intvalue) >= 0 && (Token.intvalue < 256)) n = mkastleaf(A_INTLIT, P_CHAR, NULL, Token.intvalue); else n = mkastleaf(A_INTLIT, P_INT, NULL, Token.intvalue); break; case T_STRLIT: // For a STRLIT token, generate the assembly for it. // Then make a leaf AST node for it. id is the string's label. id = genglobstr(Text); n = mkastleaf(A_STRLIT, pointer_to(P_CHAR), NULL, id); break; case T_IDENT: return (postfix()); case T_LPAREN: // Beginning of a parenthesised expression, skip the '('. // Scan in the expression and the right parenthesis scan(&Token); n = binexpr(0); rparen(); return (n); default: fatald("Expecting a primary expression, got token", Token.token); } // Scan in the next token and return the leaf node scan(&Token); return (n); } // Convert a binary operator token into a binary AST operation. // We rely on a 1:1 mapping from token to AST operation static int binastop(int tokentype) { if (tokentype > T_EOF && tokentype <= T_SLASH) return (tokentype); fatald("Syntax error, token", tokentype); return (0); // Keep -Wall happy } // Return true if a token is right-associative, // false otherwise. static int rightassoc(int tokentype) { if (tokentype == T_ASSIGN) return (1); return (0); } // Operator precedence for each token. Must // match up with the order of tokens in defs.h static int OpPrec[] = { 0, 10, 20, 30, // T_EOF, T_ASSIGN, T_LOGOR, T_LOGAND 40, 50, 60, // T_OR, T_XOR, T_AMPER 70, 70, // T_EQ, T_NE 80, 80, 80, 80, // T_LT, T_GT, T_LE, T_GE 90, 90, // T_LSHIFT, T_RSHIFT 100, 100, // T_PLUS, T_MINUS 110, 110 // T_STAR, T_SLASH }; // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec; if (tokentype > T_SLASH) fatald("Token with no precedence in op_precedence:", tokentype); prec = OpPrec[tokentype]; if (prec == 0) fatald("Syntax error, token", tokentype); return (prec); } // prefix_expression: primary // | '*' prefix_expression // | '&' prefix_expression // | '-' prefix_expression // | '++' prefix_expression // | '--' prefix_expression // ; // Parse a prefix expression and return // a sub-tree representing it. struct ASTnode *prefix(void) { struct ASTnode *tree; switch (Token.token) { case T_AMPER: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Ensure that it's an identifier if (tree->op != A_IDENT) fatal("& operator must be followed by an identifier"); // Now change the operator to A_ADDR and the type to // a pointer to the original type tree->op = A_ADDR; tree->type = pointer_to(tree->type); break; case T_STAR: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's either another deref or an // identifier if (tree->op != A_IDENT && tree->op != A_DEREF) fatal("* operator must be followed by an identifier or *"); // Prepend an A_DEREF operation to the tree tree = mkastunary(A_DEREF, value_at(tree->type), tree, NULL, 0); break; case T_MINUS: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_NEGATE operation to the tree and // make the child an rvalue. Because chars are unsigned, // also widen this to int so that it's signed tree->rvalue = 1; tree = modify_type(tree, P_INT, 0); tree = mkastunary(A_NEGATE, tree->type, tree, NULL, 0); break; case T_INVERT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_INVERT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_INVERT, tree->type, tree, NULL, 0); break; case T_LOGNOT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_LOGNOT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_LOGNOT, tree->type, tree, NULL, 0); break; case T_INC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("++ operator must be followed by an identifier"); // Prepend an A_PREINC operation to the tree tree = mkastunary(A_PREINC, tree->type, tree, NULL, 0); break; case T_DEC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("-- operator must be followed by an identifier"); // Prepend an A_PREDEC operation to the tree tree = mkastunary(A_PREDEC, tree->type, tree, NULL, 0); break; default: tree = primary(); } return (tree); } // Return an AST tree whose root is a binary operator. // Parameter ptp is the previous token's precedence. struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; struct ASTnode *ltemp, *rtemp; int ASTop; int tokentype; // Get the tree on the left. // Fetch the next token at the same time. left = prefix(); // If we hit one of several terminating tokens, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA) { left->rvalue = 1; return (left); } // While the precedence of this token is more than that of the // previous token precedence, or it's right associative and // equal to the previous token's precedence while ((op_precedence(tokentype) > ptp) || (rightassoc(tokentype) && op_precedence(tokentype) == ptp)) { // Fetch in the next integer literal scan(&Token); // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Determine the operation to be performed on the sub-trees ASTop = binastop(tokentype); if (ASTop == A_ASSIGN) { // Assignment // Make the right tree into an rvalue right->rvalue = 1; // Ensure the right's type matches the left right = modify_type(right, left->type, 0); if (right == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree. However, switch // left and right around, so that the right expression's // code will be generated before the left expression ltemp = left; left = right; right = ltemp; } else { // We are not doing an assignment, so both trees should be rvalues // Convert both trees into rvalue if they are lvalue trees left->rvalue = 1; right->rvalue = 1; // Ensure the two types are compatible by trying // to modify each tree to match the other's type. ltemp = modify_type(left, right->type, ASTop); rtemp = modify_type(right, left->type, ASTop); if (ltemp == NULL && rtemp == NULL) fatal("Incompatible types in binary expression"); if (ltemp != NULL) left = ltemp; if (rtemp != NULL) right = rtemp; } // Join that sub-tree with ours. Convert the token // into an AST operation at the same time. left = mkastnode(binastop(tokentype), left->type, left, NULL, right, NULL, 0); // Update the details of the current token. // If we hit a terminating token, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA) { left->rvalue = 1; return (left); } } // Return the tree we have when the precedence // is the same or lower left->rvalue = 1; return (left); } ================================================ FILE: 36_Break_Continue/gen.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Generic code generator // Copyright (c) 2019 Warren Toomey, GPL3 // Generate and return a new label number int genlabel(void) { static int id = 1; return (id++); } // Generate the code for an IF statement // and an optional ELSE clause static int genIF(struct ASTnode *n, int looptoplabel, int loopendlabel) { int Lfalse, Lend; // Generate two labels: one for the // false compound statement, and one // for the end of the overall IF statement. // When there is no ELSE clause, Lfalse _is_ // the ending label! Lfalse = genlabel(); if (n->right) Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. genAST(n->left, Lfalse, NOLABEL, NOLABEL, n->op); genfreeregs(); // Generate the true compound statement genAST(n->mid, NOLABEL, looptoplabel, loopendlabel, n->op); genfreeregs(); // If there is an optional ELSE clause, // generate the jump to skip to the end if (n->right) cgjump(Lend); // Now the false label cglabel(Lfalse); // Optional ELSE clause: generate the // false compound statement and the // end label if (n->right) { genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); genfreeregs(); cglabel(Lend); } return (NOREG); } // Generate the code for a WHILE statement static int genWHILE(struct ASTnode *n) { int Lstart, Lend; // Generate the start and end labels // and output the start label Lstart = genlabel(); Lend = genlabel(); cglabel(Lstart); // Generate the condition code followed // by a jump to the end label. genAST(n->left, Lend, Lstart, Lend, n->op); genfreeregs(); // Generate the compound statement for the body genAST(n->right, NOLABEL, Lstart, Lend, n->op); genfreeregs(); // Finally output the jump back to the condition, // and the end label cgjump(Lstart); cglabel(Lend); return (NOREG); } // Generate the code to copy the arguments of a // function call to its parameters, then call the // function itself. Return the register that holds // the function's return value. static int gen_funccall(struct ASTnode *n) { struct ASTnode *gluetree = n->left; int reg; int numargs = 0; // If there is a list of arguments, walk this list // from the last argument (right-hand child) to the // first while (gluetree) { // Calculate the expression's value reg = genAST(gluetree->right, NOLABEL, NOLABEL, NOLABEL, gluetree->op); // Copy this into the n'th function parameter: size is 1, 2, 3, ... cgcopyarg(reg, gluetree->size); // Keep the first (highest) number of arguments if (numargs == 0) numargs = gluetree->size; genfreeregs(); gluetree = gluetree->left; } // Call the function, clean up the stack (based on numargs), // and return its result return (cgcall(n->sym, numargs)); } // Given an AST, an optional label, and the AST op // of the parent, generate assembly code recursively. // Return the register id with the tree's final value. int genAST(struct ASTnode *n, int iflabel, int looptoplabel, int loopendlabel, int parentASTop) { int leftreg, rightreg; // We have some specific AST node handling at the top // so that we don't evaluate the child sub-trees immediately switch (n->op) { case A_IF: return (genIF(n, looptoplabel, loopendlabel)); case A_WHILE: return (genWHILE(n)); case A_FUNCCALL: return (gen_funccall(n)); case A_GLUE: // Do each child statement, and free the // registers after each child genAST(n->left, iflabel, looptoplabel, loopendlabel, n->op); genfreeregs(); genAST(n->right, iflabel, looptoplabel, loopendlabel, n->op); genfreeregs(); return (NOREG); case A_FUNCTION: // Generate the function's preamble before the code // in the child sub-tree cgfuncpreamble(n->sym); genAST(n->left, NOLABEL, NOLABEL, NOLABEL, n->op); cgfuncpostamble(n->sym); return (NOREG); } // General AST node handling below // Get the left and right sub-tree values if (n->left) leftreg = genAST(n->left, NOLABEL, NOLABEL, NOLABEL, n->op); if (n->right) rightreg = genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); switch (n->op) { case A_ADD: return (cgadd(leftreg, rightreg)); case A_SUBTRACT: return (cgsub(leftreg, rightreg)); case A_MULTIPLY: return (cgmul(leftreg, rightreg)); case A_DIVIDE: return (cgdiv(leftreg, rightreg)); case A_AND: return (cgand(leftreg, rightreg)); case A_OR: return (cgor(leftreg, rightreg)); case A_XOR: return (cgxor(leftreg, rightreg)); case A_LSHIFT: return (cgshl(leftreg, rightreg)); case A_RSHIFT: return (cgshr(leftreg, rightreg)); case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, compare registers // and set one to 1 or 0 based on the comparison. if (parentASTop == A_IF || parentASTop == A_WHILE) return (cgcompare_and_jump(n->op, leftreg, rightreg, iflabel)); else return (cgcompare_and_set(n->op, leftreg, rightreg)); case A_INTLIT: return (cgloadint(n->intvalue, n->type)); case A_STRLIT: return (cgloadglobstr(n->intvalue)); case A_IDENT: // Load our value if we are an rvalue // or we are being dereferenced if (n->rvalue || parentASTop == A_DEREF) { if (n->sym->class == C_GLOBAL) { return (cgloadglob(n->sym, n->op)); } else { return (cgloadlocal(n->sym, n->op)); } } else return (NOREG); case A_ASSIGN: // Are we assigning to an identifier or through a pointer? switch (n->right->op) { case A_IDENT: if (n->right->sym->class == C_GLOBAL) return (cgstorglob(leftreg, n->right->sym)); else return (cgstorlocal(leftreg, n->right->sym)); case A_DEREF: return (cgstorderef(leftreg, rightreg, n->right->type)); default: fatald("Can't A_ASSIGN in genAST(), op", n->op); } case A_WIDEN: // Widen the child's type to the parent's type return (cgwiden(leftreg, n->left->type, n->type)); case A_RETURN: cgreturn(leftreg, Functionid); return (NOREG); case A_ADDR: return (cgaddress(n->sym)); case A_DEREF: // If we are an rvalue, dereference to get the value we point at, // otherwise leave it for A_ASSIGN to store through the pointer if (n->rvalue) return (cgderef(leftreg, n->left->type)); else return (leftreg); case A_SCALE: // Small optimisation: use shift if the // scale value is a known power of two switch (n->size) { case 2: return (cgshlconst(leftreg, 1)); case 4: return (cgshlconst(leftreg, 2)); case 8: return (cgshlconst(leftreg, 3)); default: // Load a register with the size and // multiply the leftreg by this size rightreg = cgloadint(n->size, P_INT); return (cgmul(leftreg, rightreg)); } case A_POSTINC: case A_POSTDEC: // Load and decrement the variable's value into a register // and post increment/decrement it if (n->sym->class == C_GLOBAL) return (cgloadglob(n->sym, n->op)); else return (cgloadlocal(n->sym, n->op)); case A_PREINC: case A_PREDEC: // Load and decrement the variable's value into a register // and pre increment/decrement it if (n->left->sym->class == C_GLOBAL) return (cgloadglob(n->left->sym, n->op)); else return (cgloadlocal(n->left->sym, n->op)); case A_NEGATE: return (cgnegate(leftreg)); case A_INVERT: return (cginvert(leftreg)); case A_LOGNOT: return (cglognot(leftreg)); case A_TOBOOL: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, set the register // to 0 or 1 based on it's zeroeness or non-zeroeness return (cgboolean(leftreg, parentASTop, iflabel)); case A_BREAK: cgjump(loopendlabel); return (NOREG); case A_CONTINUE: cgjump(looptoplabel); return (NOREG); default: fatald("Unknown AST operator", n->op); } return (NOREG); // Keep -Wall happy } void genpreamble() { cgpreamble(); } void genpostamble() { cgpostamble(); } void genfreeregs() { freeall_registers(); } void genglobsym(struct symtable *node) { cgglobsym(node); } int genglobstr(char *strvalue) { int l = genlabel(); cgglobstr(l, strvalue); return (l); } int genprimsize(int type) { return (cgprimsize(type)); } int genalign(int type, int offset, int direction) { return (cgalign(type, offset, direction)); } ================================================ FILE: 36_Break_Continue/include/fcntl.h ================================================ #ifndef _FCNTL_H_ # define _FCNTL_H_ #define O_RDONLY 00 #define O_WRONLY 01 #define O_RDWR 02 int open(char *pathname, int flags); #endif // _FCNTL_H_ ================================================ FILE: 36_Break_Continue/include/stddef.h ================================================ #ifndef _STDDEF_H_ # define _STDDEF_H_ typedef long size_t; #endif //_STDDEF_H_ ================================================ FILE: 36_Break_Continue/include/stdio.h ================================================ #ifndef _STDIO_H_ # define _STDIO_H_ #include // This FILE definition will do for now typedef char * FILE; FILE *fopen(char *pathname, char *mode); size_t fread(void *ptr, size_t size, size_t nmemb, FILE *stream); size_t fwrite(void *ptr, size_t size, size_t nmemb, FILE *stream); int fclose(FILE *stream); int printf(char *format); int fprintf(FILE *stream, char *format); #endif // _STDIO_H_ ================================================ FILE: 36_Break_Continue/main.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "decl.h" #include #include // Compiler setup and top-level execution // Copyright (c) 2019 Warren Toomey, GPL3 // Given a string with a '.' and at least a 1-character suffix // after the '.', change the suffix to be the given character. // Return the new string or NULL if the original string could // not be modified char *alter_suffix(char *str, char suffix) { char *posn; char *newstr; // Clone the string if ((newstr = strdup(str)) == NULL) return (NULL); // Find the '.' if ((posn = strrchr(newstr, '.')) == NULL) return (NULL); // Ensure there is a suffix posn++; if (*posn == '\0') return (NULL); // Change the suffix and NUL-terminate the string *posn++ = suffix; *posn = '\0'; return (newstr); } // Given an input filename, compile that file // down to assembly code. Return the new file's name static char *do_compile(char *filename) { char cmd[TEXTLEN]; // Change the input file's suffix to .s Outfilename = alter_suffix(filename, 's'); if (Outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .c on the end\n", filename); exit(1); } // Generate the pre-processor command snprintf(cmd, TEXTLEN, "%s %s %s", CPPCMD, INCDIR, filename); // Open up the pre-processor pipe if ((Infile = popen(cmd, "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", filename, strerror(errno)); exit(1); } Infilename = filename; // Create the output file if ((Outfile = fopen(Outfilename, "w")) == NULL) { fprintf(stderr, "Unable to create %s: %s\n", Outfilename, strerror(errno)); exit(1); } Line = 1; // Reset the scanner Putback = '\n'; clear_symtable(); // Clear the symbol table if (O_verbose) printf("compiling %s\n", filename); scan(&Token); // Get the first token from the input genpreamble(); // Output the preamble global_declarations(); // Parse the global declarations genpostamble(); // Output the postamble fclose(Outfile); // Close the output file return (Outfilename); } // Given an input filename, assemble that file // down to object code. Return the object filename char *do_assemble(char *filename) { char cmd[TEXTLEN]; int err; char *outfilename = alter_suffix(filename, 'o'); if (outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .s on the end\n", filename); exit(1); } // Build the assembly command and run it snprintf(cmd, TEXTLEN, "%s %s %s", ASCMD, outfilename, filename); if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Assembly of %s failed\n", filename); exit(1); } return (outfilename); } // Given a list of object files and an output filename, // link all of the object filenames together. void do_link(char *outfilename, char *objlist[]) { int cnt, size = TEXTLEN; char cmd[TEXTLEN], *cptr; int err; // Start with the linker command and the output file cptr = cmd; cnt = snprintf(cptr, size, "%s %s ", LDCMD, outfilename); cptr += cnt; size -= cnt; // Now append each object file while (*objlist != NULL) { cnt = snprintf(cptr, size, "%s ", *objlist); cptr += cnt; size -= cnt; objlist++; } if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Linking failed\n"); exit(1); } } // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "Usage: %s [-vcST] [-o outfile] file [file ...]\n", prog); fprintf(stderr, " -v give verbose output of the compilation stages\n"); fprintf(stderr, " -c generate object files but don't link them\n"); fprintf(stderr, " -S generate assembly files but don't link them\n"); fprintf(stderr, " -T dump the AST trees for each input file\n"); fprintf(stderr, " -o outfile, produce the outfile executable file\n"); exit(1); } // Main program: check arguments and print a usage // if we don't have an argument. Open up the input // file and call scanfile() to scan the tokens in it. enum { MAXOBJ = 100 }; int main(int argc, char *argv[]) { char *outfilename = AOUT; char *asmfile, *objfile; char *objlist[MAXOBJ]; int i, objcnt = 0; // Initialise our variables O_dumpAST = 0; O_keepasm = 0; O_assemble = 0; O_verbose = 0; O_dolink = 1; // Scan for command-line options for (i = 1; i < argc; i++) { // No leading '-', stop scanning for options if (*argv[i] != '-') break; // For each option in this argument for (int j = 1; (*argv[i] == '-') && argv[i][j]; j++) { switch (argv[i][j]) { case 'o': outfilename = argv[++i]; // Save & skip to next argument break; case 'T': O_dumpAST = 1; break; case 'c': O_assemble = 1; O_keepasm = 0; O_dolink = 0; break; case 'S': O_keepasm = 1; O_assemble = 0; O_dolink = 0; break; case 'v': O_verbose = 1; break; default: usage(argv[0]); } } } // Ensure we have at lease one input file argument if (i >= argc) usage(argv[0]); // Work on each input file in turn while (i < argc) { asmfile = do_compile(argv[i]); // Compile the source file if (O_dolink || O_assemble) { objfile = do_assemble(asmfile); // Assemble it to object forma if (objcnt == (MAXOBJ - 2)) { fprintf(stderr, "Too many object files for the compiler to handle\n"); exit(1); } objlist[objcnt++] = objfile; // Add the object file's name objlist[objcnt] = NULL; // to the list of object files } if (!O_keepasm) // Remove the assembly file if unlink(asmfile); // we don't need to keep it i++; } // Now link all the object files together if (O_dolink) { do_link(outfilename, objlist); // If we don't need to keep the object // files, then remove them if (!O_assemble) { for (i = 0; objlist[i] != NULL; i++) unlink(objlist[i]); } } return (0); } ================================================ FILE: 36_Break_Continue/misc.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" #include // Miscellaneous functions // Copyright (c) 2019 Warren Toomey, GPL3 // Ensure that the current token is t, // and fetch the next token. Otherwise // throw an error void match(int t, char *what) { if (Token.token == t) { scan(&Token); } else { fatals("Expected", what); } } // Match a semicolon and fetch the next token void semi(void) { match(T_SEMI, ";"); } // Match a left brace and fetch the next token void lbrace(void) { match(T_LBRACE, "{"); } // Match a right brace and fetch the next token void rbrace(void) { match(T_RBRACE, "}"); } // Match a left parenthesis and fetch the next token void lparen(void) { match(T_LPAREN, "("); } // Match a right parenthesis and fetch the next token void rparen(void) { match(T_RPAREN, ")"); } // Match an identifier and fetch the next token void ident(void) { match(T_IDENT, "identifier"); } // Match a comma and fetch the next token void comma(void) { match(T_COMMA, "comma"); } // Print out fatal messages void fatal(char *s) { fprintf(stderr, "%s on line %d of %s\n", s, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatals(char *s1, char *s2) { fprintf(stderr, "%s:%s on line %d of %s\n", s1, s2, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatald(char *s, int d) { fprintf(stderr, "%s:%d on line %d of %s\n", s, d, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatalc(char *s, int c) { fprintf(stderr, "%s:%c on line %d of %s\n", s, c, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } ================================================ FILE: 36_Break_Continue/scan.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { char *p; p = strchr(s, c); return (p ? p - s : -1); } // Get the next character from the input file. static int next(void) { int c, l; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return (c); } c = fgetc(Infile); // Read from input file while (c == '#') { // We've hit a pre-processor statement scan(&Token); // Get the line number into l if (Token.token != T_INTLIT) fatals("Expecting pre-processor line number, got:", Text); l = Token.intvalue; scan(&Token); // Get the filename in Text if (Token.token != T_STRLIT) fatals("Expecting pre-processor file name, got:", Text); if (Text[0] != '<') { // If this is a real filename if (strcmp(Text, Infilename)) // and not the one we have now Infilename = strdup(Text); // save it. Then update the line num Line = l; } while ((c = fgetc(Infile)) != '\n'); // Skip to the end of the line c = fgetc(Infile); // and get the next character } if ('\n' == c) Line++; // Increment line count return (c); } // Put back an unwanted character static void putback(int c) { Putback = c; } // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } // Return the next character from a character // or string literal static int scanch(void) { int c; // Get the next input character and interpret // metacharacters that start with a backslash c = next(); if (c == '\\') { switch (c = next()) { case 'a': return '\a'; case 'b': return '\b'; case 'f': return '\f'; case 'n': return '\n'; case 'r': return '\r'; case 't': return '\t'; case 'v': return '\v'; case '\\': return '\\'; case '"': return '"'; case '\'': return '\''; default: fatalc("unknown escape sequence", c); } } return (c); // Just an ordinary old character! } // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0; // Convert each character into an int value while ((k = chrpos("0123456789", c)) >= 0) { val = val * 10 + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return (val); } // Scan in a string literal from the input file, // and store it in buf[]. Return the length of // the string. static int scanstr(char *buf) { int i, c; // Loop while we have enough buffer space for (i = 0; i < TEXTLEN - 1; i++) { // Get the next char and append to buf // Return when we hit the ending double quote if ((c = scanch()) == '"') { buf[i] = 0; return (i); } buf[i] = c; } // Ran out of buf[] space fatal("String literal too long"); return (0); } // Scan an identifier from the input file and // store it in buf[]. Return the identifier's length static int scanident(int c, char *buf, int lim) { int i = 0; // Allow digits, alpha and underscores while (isalpha(c) || isdigit(c) || '_' == c) { // Error if we hit the identifier length limit, // else append to buf[] and get next character if (lim - 1 == i) { fatal("Identifier too long"); } else if (i < lim - 1) { buf[i++] = c; } c = next(); } // We hit a non-valid character, put it back. // NUL-terminate the buf[] and return the length putback(c); buf[i] = '\0'; return (i); } // Given a word from the input, return the matching // keyword token number or 0 if it's not a keyword. // Switch on the first letter so that we don't have // to waste time strcmp()ing against all the keywords. static int keyword(char *s) { switch (*s) { case 'b': if (!strcmp(s, "break")) return (T_BREAK); break; case 'c': if (!strcmp(s, "char")) return (T_CHAR); if (!strcmp(s, "continue")) return (T_CONTINUE); break; case 'e': if (!strcmp(s, "else")) return (T_ELSE); if (!strcmp(s, "enum")) return (T_ENUM); if (!strcmp(s, "extern")) return (T_EXTERN); break; case 'f': if (!strcmp(s, "for")) return (T_FOR); break; case 'i': if (!strcmp(s, "if")) return (T_IF); if (!strcmp(s, "int")) return (T_INT); break; case 'l': if (!strcmp(s, "long")) return (T_LONG); break; case 'r': if (!strcmp(s, "return")) return (T_RETURN); break; case 's': if (!strcmp(s, "struct")) return (T_STRUCT); break; case 't': if (!strcmp(s, "typedef")) return (T_TYPEDEF); break; case 'u': if (!strcmp(s, "union")) return (T_UNION); break; case 'v': if (!strcmp(s, "void")) return (T_VOID); break; case 'w': if (!strcmp(s, "while")) return (T_WHILE); break; } return (0); } // A pointer to a rejected token static struct token *Rejtoken = NULL; // Reject the token that we just scanned void reject_token(struct token *t) { if (Rejtoken != NULL) fatal("Can't reject token twice"); Rejtoken = t; } // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c, tokentype; // If we have any rejected token, return it if (Rejtoken != NULL) { t = Rejtoken; Rejtoken = NULL; return (1); } // Skip whitespace c = skip(); // Determine the token based on // the input character switch (c) { case EOF: t->token = T_EOF; return (0); case '+': if ((c = next()) == '+') { t->token = T_INC; } else { putback(c); t->token = T_PLUS; } break; case '-': if ((c = next()) == '-') { t->token = T_DEC; } else if (c == '>') { t->token = T_ARROW; } else { putback(c); t->token = T_MINUS; } break; case '*': t->token = T_STAR; break; case '/': t->token = T_SLASH; break; case ';': t->token = T_SEMI; break; case '{': t->token = T_LBRACE; break; case '}': t->token = T_RBRACE; break; case '(': t->token = T_LPAREN; break; case ')': t->token = T_RPAREN; break; case '[': t->token = T_LBRACKET; break; case ']': t->token = T_RBRACKET; break; case '~': t->token = T_INVERT; break; case '^': t->token = T_XOR; break; case ',': t->token = T_COMMA; break; case '.': t->token = T_DOT; break; case '=': if ((c = next()) == '=') { t->token = T_EQ; } else { putback(c); t->token = T_ASSIGN; } break; case '!': if ((c = next()) == '=') { t->token = T_NE; } else { putback(c); t->token = T_LOGNOT; } break; case '<': if ((c = next()) == '=') { t->token = T_LE; } else if (c == '<') { t->token = T_LSHIFT; } else { putback(c); t->token = T_LT; } break; case '>': if ((c = next()) == '=') { t->token = T_GE; } else if (c == '>') { t->token = T_RSHIFT; } else { putback(c); t->token = T_GT; } break; case '&': if ((c = next()) == '&') { t->token = T_LOGAND; } else { putback(c); t->token = T_AMPER; } break; case '|': if ((c = next()) == '|') { t->token = T_LOGOR; } else { putback(c); t->token = T_OR; } break; case '\'': // If it's a quote, scan in the // literal character value and // the trailing quote t->intvalue = scanch(); t->token = T_INTLIT; if (next() != '\'') fatal("Expected '\\'' at end of char literal"); break; case '"': // Scan in a literal string scanstr(Text); t->token = T_STRLIT; break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } else if (isalpha(c) || '_' == c) { // Read in a keyword or identifier scanident(c, Text, TEXTLEN); // If it's a recognised keyword, return that token if ((tokentype = keyword(Text)) != 0) { t->token = tokentype; break; } // Not a recognised keyword, so it must be an identifier t->token = T_IDENT; break; } // The character isn't part of any recognised token, error fatalc("Unrecognised character", c); } // We found a token return (1); } ================================================ FILE: 36_Break_Continue/stmt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of statements // Copyright (c) 2019 Warren Toomey, GPL3 // Prototypes static struct ASTnode *single_statement(void); // compound_statement: // empty, i.e. no statement // | statement // | statement statements // ; // // statement: declaration // | expression_statement // | function_call // | if_statement // | while_statement // | for_statement // | return_statement // ; // if_statement: if_head // | if_head 'else' compound_statement // ; // // if_head: 'if' '(' true_false_expression ')' compound_statement ; // // Parse an IF statement including any // optional ELSE clause and return its AST static struct ASTnode *if_statement(void) { struct ASTnode *condAST, *trueAST, *falseAST = NULL; // Ensure we have 'if' '(' match(T_IF, "if"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); rparen(); // Get the AST for the compound statement trueAST = compound_statement(); // If we have an 'else', skip it // and get the AST for the compound statement if (Token.token == T_ELSE) { scan(&Token); falseAST = compound_statement(); } // Build and return the AST for this statement return (mkastnode(A_IF, P_NONE, condAST, trueAST, falseAST, NULL, 0)); } // while_statement: 'while' '(' true_false_expression ')' compound_statement ; // // Parse a WHILE statement and return its AST static struct ASTnode *while_statement(void) { struct ASTnode *condAST, *bodyAST; // Ensure we have 'while' '(' match(T_WHILE, "while"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); rparen(); // Get the AST for the compound statement. // Update the loop depth in the process Looplevel++; bodyAST = compound_statement(); Looplevel--; // Build and return the AST for this statement return (mkastnode(A_WHILE, P_NONE, condAST, NULL, bodyAST, NULL, 0)); } // for_statement: 'for' '(' preop_statement ';' // true_false_expression ';' // postop_statement ')' compound_statement ; // // preop_statement: statement (for now) // postop_statement: statement (for now) // // Parse a FOR statement and return its AST static struct ASTnode *for_statement(void) { struct ASTnode *condAST, *bodyAST; struct ASTnode *preopAST, *postopAST; struct ASTnode *tree; // Ensure we have 'for' '(' match(T_FOR, "for"); lparen(); // Get the pre_op statement and the ';' preopAST = single_statement(); semi(); // Get the condition and the ';'. // Force a non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); semi(); // Get the post_op statement and the ')' postopAST = single_statement(); rparen(); // Get the compound statement which is the body // Update the loop depth in the process Looplevel++; bodyAST = compound_statement(); Looplevel--; // For now, all four sub-trees have to be non-NULL. // Later on, we'll change the semantics for when some are missing // Glue the compound statement and the postop tree tree = mkastnode(A_GLUE, P_NONE, bodyAST, NULL, postopAST, NULL, 0); // Make a WHILE loop with the condition and this new body tree = mkastnode(A_WHILE, P_NONE, condAST, NULL, tree, NULL, 0); // And glue the preop tree to the A_WHILE tree return (mkastnode(A_GLUE, P_NONE, preopAST, NULL, tree, NULL, 0)); } // return_statement: 'return' '(' expression ')' ; // // Parse a return statement and return its AST static struct ASTnode *return_statement(void) { struct ASTnode *tree; // Can't return a value if function returns P_VOID if (Functionid->type == P_VOID) fatal("Can't return from a void function"); // Ensure we have 'return' '(' match(T_RETURN, "return"); lparen(); // Parse the following expression tree = binexpr(0); // Ensure this is compatible with the function's type tree = modify_type(tree, Functionid->type, 0); if (tree == NULL) fatal("Incompatible type to return"); // Add on the A_RETURN node tree = mkastunary(A_RETURN, P_NONE, tree, NULL, 0); // Get the ')' rparen(); return (tree); } // break_statement: 'break' ; // // Parse a break statement and return its AST static struct ASTnode *break_statement(void) { if (Looplevel == 0) fatal("no loop to break out from"); scan(&Token); return (mkastleaf(A_BREAK, 0, NULL, 0)); } // continue_statement: 'continue' ; // // Parse a continue statement and return its AST static struct ASTnode *continue_statement(void) { if (Looplevel == 0) fatal("no loop to continue to"); scan(&Token); return (mkastleaf(A_CONTINUE, 0, NULL, 0)); } // Parse a single statement and return its AST static struct ASTnode *single_statement(void) { int type, class = C_LOCAL; struct symtable *ctype; switch (Token.token) { case T_IDENT: // We have to see if the identifier matches a typedef. // If not do the default code in this switch statement. // Otherwise, fall down to the parse_type() call. if (findtypedef(Text) == NULL) return (binexpr(0)); case T_CHAR: case T_INT: case T_LONG: case T_STRUCT: case T_UNION: case T_ENUM: case T_TYPEDEF: // The beginning of a variable declaration. // Parse the type and get the identifier. // Then parse the rest of the declaration // and skip over the semicolon type = parse_type(&ctype, &class); ident(); var_declaration(type, ctype, class); semi(); return (NULL); // No AST generated here case T_IF: return (if_statement()); case T_WHILE: return (while_statement()); case T_FOR: return (for_statement()); case T_RETURN: return (return_statement()); case T_BREAK: return (break_statement()); case T_CONTINUE: return (continue_statement()); default: // For now, see if this is an expression. // This catches assignment statements. return (binexpr(0)); } return (NULL); // Keep -Wall happy } // Parse a compound statement // and return its AST struct ASTnode *compound_statement(void) { struct ASTnode *left = NULL; struct ASTnode *tree; // Require a left curly bracket lbrace(); while (1) { // Parse a single statement tree = single_statement(); // Some statements must be followed by a semicolon if (tree != NULL && (tree->op == A_ASSIGN || tree->op == A_RETURN || tree->op == A_FUNCCALL || tree->op == A_BREAK || tree->op == A_CONTINUE)) semi(); // For each new tree, either save it in left // if left is empty, or glue the left and the // new tree together if (tree != NULL) { if (left == NULL) left = tree; else left = mkastnode(A_GLUE, P_NONE, left, NULL, tree, NULL, 0); } // When we hit a right curly bracket, // skip past it and return the AST if (Token.token == T_RBRACE) { rbrace(); return (left); } } } ================================================ FILE: 36_Break_Continue/sym.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Symbol table functions // Copyright (c) 2019 Warren Toomey, GPL3 // Append a node to the singly-linked list pointed to by head or tail void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node) { // Check for valid pointers if (head == NULL || tail == NULL || node == NULL) fatal("Either head, tail or node is NULL in appendsym"); // Append to the list if (*tail) { (*tail)->next = node; *tail = node; } else *head = *tail = node; node->next = NULL; } // Create a symbol node to be added to a symbol table list. // Set up the node's: // + type: char, int etc. // + ctype: composite type pointer for struct/union // + structural type: var, function, array etc. // + size: number of elements, or endlabel: end label for a function // + posn: Position information for local symbols // Return a pointer to the new node struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int size, int posn) { // Get a new node struct symtable *node = (struct symtable *) malloc(sizeof(struct symtable)); if (node == NULL) fatal("Unable to malloc a symbol table node in newsym"); // Fill in the values node->name = strdup(name); node->type = type; node->ctype = ctype; node->stype = stype; node->class = class; node->size = size; node->posn = posn; node->next = NULL; node->member = NULL; // Generate any global space if (class == C_GLOBAL) genglobsym(node); return (node); } // Add a symbol to the global symbol list struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int class, int size) { struct symtable *sym = newsym(name, type, ctype, stype, class, size, 0); appendsym(&Globhead, &Globtail, sym); return (sym); } // Add a symbol to the local symbol list struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int size) { struct symtable *sym = newsym(name, type, ctype, stype, C_LOCAL, size, 0); appendsym(&Loclhead, &Locltail, sym); return (sym); } // Add a symbol to the parameter list struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype, int size) { struct symtable *sym = newsym(name, type, ctype, stype, C_PARAM, size, 0); appendsym(&Parmhead, &Parmtail, sym); return (sym); } // Add a symbol to the temporary member list struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int size) { struct symtable *sym = newsym(name, type, ctype, stype, C_MEMBER, size, 0); appendsym(&Membhead, &Membtail, sym); return (sym); } // Add a struct to the struct list struct symtable *addstruct(char *name, int type, struct symtable *ctype, int stype, int size) { struct symtable *sym = newsym(name, type, ctype, stype, C_STRUCT, size, 0); appendsym(&Structhead, &Structtail, sym); return (sym); } // Add a struct to the union list struct symtable *addunion(char *name, int type, struct symtable *ctype, int stype, int size) { struct symtable *sym = newsym(name, type, ctype, stype, C_UNION, size, 0); appendsym(&Unionhead, &Uniontail, sym); return (sym); } // Add an enum type or value to the enum list. // Class is C_ENUMTYPE or C_ENUMVAL. // Use posn to store the int value. struct symtable *addenum(char *name, int class, int value) { struct symtable *sym = newsym(name, P_INT, NULL, 0, class, 0, value); appendsym(&Enumhead, &Enumtail, sym); return (sym); } // Add a typedef to the typedef list struct symtable *addtypedef(char *name, int type, struct symtable *ctype, int stype, int size) { struct symtable *sym = newsym(name, type, ctype, stype, C_TYPEDEF, size, 0); appendsym(&Typehead, &Typetail, sym); return (sym); } // Search for a symbol in a specific list. // Return a pointer to the found node or NULL if not found. // If class is not zero, also match on the given class static struct symtable *findsyminlist(char *s, struct symtable *list, int class) { for (; list != NULL; list = list->next) if ((list->name != NULL) && !strcmp(s, list->name)) if (class==0 || class== list->class) return (list); return (NULL); } // Determine if the symbol s is in the global symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findglob(char *s) { return (findsyminlist(s, Globhead, 0)); } // Determine if the symbol s is in the local symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findlocl(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member, 0); if (node) return (node); } return (findsyminlist(s, Loclhead, 0)); } // Determine if the symbol s is in the symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findsymbol(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member, 0); if (node) return (node); } // Otherwise, try the local and global symbol lists node = findsyminlist(s, Loclhead, 0); if (node) return (node); return (findsyminlist(s, Globhead, 0)); } // Find a member in the member list // Return a pointer to the found node or NULL if not found. struct symtable *findmember(char *s) { return (findsyminlist(s, Membhead, 0)); } // Find a struct in the struct list // Return a pointer to the found node or NULL if not found. struct symtable *findstruct(char *s) { return (findsyminlist(s, Structhead, 0)); } // Find a struct in the union list // Return a pointer to the found node or NULL if not found. struct symtable *findunion(char *s) { return (findsyminlist(s, Unionhead, 0)); } // Find an enum type in the enum list // Return a pointer to the found node or NULL if not found. struct symtable *findenumtype(char *s) { return (findsyminlist(s, Enumhead, C_ENUMTYPE)); } // Find an enum value in the enum list // Return a pointer to the found node or NULL if not found. struct symtable *findenumval(char *s) { return (findsyminlist(s, Enumhead, C_ENUMVAL)); } // Find a type in the tyedef list // Return a pointer to the found node or NULL if not found. struct symtable *findtypedef(char *s) { return (findsyminlist(s, Typehead, 0)); } // Reset the contents of the symbol table void clear_symtable(void) { Globhead = Globtail = NULL; Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Membhead = Membtail = NULL; Structhead = Structtail = NULL; Unionhead = Uniontail = NULL; Enumhead = Enumtail = NULL; Typehead = Typetail = NULL; } // Clear all the entries in the local symbol table void freeloclsyms(void) { Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Functionid = NULL; } ================================================ FILE: 36_Break_Continue/tests/err.input31.c ================================================ Expecting a primary expression, got token:15 on line 5 of input31.c ================================================ FILE: 36_Break_Continue/tests/err.input32.c ================================================ Unknown variable:cow on line 4 of input32.c ================================================ FILE: 36_Break_Continue/tests/err.input33.c ================================================ Incompatible type to return on line 4 of input33.c ================================================ FILE: 36_Break_Continue/tests/err.input34.c ================================================ For now, declaration of non-global arrays is not implemented on line 4 of input34.c ================================================ FILE: 36_Break_Continue/tests/err.input35.c ================================================ Duplicate local variable declaration:a on line 4 of input35.c ================================================ FILE: 36_Break_Continue/tests/err.input36.c ================================================ Type doesn't match prototype for parameter:2 on line 4 of input36.c ================================================ FILE: 36_Break_Continue/tests/err.input37.c ================================================ Unexpected token in parameter list:15 on line 3 of input37.c ================================================ FILE: 36_Break_Continue/tests/err.input38.c ================================================ Type doesn't match prototype for parameter:2 on line 4 of input38.c ================================================ FILE: 36_Break_Continue/tests/err.input39.c ================================================ No statements in function with non-void type on line 4 of input39.c ================================================ FILE: 36_Break_Continue/tests/err.input40.c ================================================ No return for function with non-void type on line 4 of input40.c ================================================ FILE: 36_Break_Continue/tests/err.input41.c ================================================ Can't return from a void function on line 3 of input41.c ================================================ FILE: 36_Break_Continue/tests/err.input42.c ================================================ Undeclared function:fred on line 3 of input42.c ================================================ FILE: 36_Break_Continue/tests/err.input43.c ================================================ Undeclared array:b on line 3 of input43.c ================================================ FILE: 36_Break_Continue/tests/err.input44.c ================================================ Unknown variable:z on line 3 of input44.c ================================================ FILE: 36_Break_Continue/tests/err.input45.c ================================================ & operator must be followed by an identifier on line 3 of input45.c ================================================ FILE: 36_Break_Continue/tests/err.input46.c ================================================ * operator must be followed by an identifier or * on line 3 of input46.c ================================================ FILE: 36_Break_Continue/tests/err.input47.c ================================================ ++ operator must be followed by an identifier on line 3 of input47.c ================================================ FILE: 36_Break_Continue/tests/err.input48.c ================================================ -- operator must be followed by an identifier on line 3 of input48.c ================================================ FILE: 36_Break_Continue/tests/err.input49.c ================================================ Incompatible expression in assignment on line 6 of input49.c ================================================ FILE: 36_Break_Continue/tests/err.input50.c ================================================ Incompatible types in binary expression on line 6 of input50.c ================================================ FILE: 36_Break_Continue/tests/err.input51.c ================================================ Expected '\'' at end of char literal on line 4 of input51.c ================================================ FILE: 36_Break_Continue/tests/err.input52.c ================================================ Unrecognised character:$ on line 5 of input52.c ================================================ FILE: 36_Break_Continue/tests/err.input56.c ================================================ unknown struct/union type:var1 on line 2 of input56.c ================================================ FILE: 36_Break_Continue/tests/err.input57.c ================================================ previously defined struct/union:fred on line 2 of input57.c ================================================ FILE: 36_Break_Continue/tests/err.input59.c ================================================ Undeclared variable:y on line 3 of input59.c ================================================ FILE: 36_Break_Continue/tests/err.input60.c ================================================ Undeclared variable:x on line 3 of input60.c ================================================ FILE: 36_Break_Continue/tests/err.input61.c ================================================ Undeclared variable:x on line 3 of input61.c ================================================ FILE: 36_Break_Continue/tests/err.input64.c ================================================ undeclared enum type::fred on line 1 of input64.c ================================================ FILE: 36_Break_Continue/tests/err.input65.c ================================================ enum type redeclared::fred on line 2 of input65.c ================================================ FILE: 36_Break_Continue/tests/err.input66.c ================================================ enum value redeclared::z on line 2 of input66.c ================================================ FILE: 36_Break_Continue/tests/err.input68.c ================================================ redefinition of typedef:FOO on line 2 of input68.c ================================================ FILE: 36_Break_Continue/tests/err.input69.c ================================================ unknown type:FLOO on line 2 of input69.c ================================================ FILE: 36_Break_Continue/tests/err.input72.c ================================================ no loop to break out from on line 1 of input72.c ================================================ FILE: 36_Break_Continue/tests/err.input73.c ================================================ no loop to continue to on line 1 of input73.c ================================================ FILE: 36_Break_Continue/tests/input01.c ================================================ int printf(char *fmt); void main() { printf("%d\n", 12 * 3); printf("%d\n", 18 - 2 * 4); printf("%d\n", 1 + 2 + 9 - 5/2 + 3*5); } ================================================ FILE: 36_Break_Continue/tests/input02.c ================================================ int printf(char *fmt); void main() { int fred; int jim; fred= 5; jim= 12; printf("%d\n", fred + jim); } ================================================ FILE: 36_Break_Continue/tests/input03.c ================================================ int printf(char *fmt); void main() { int x; x= 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); } ================================================ FILE: 36_Break_Continue/tests/input04.c ================================================ int printf(char *fmt); void main() { int x; x= 7 < 9; printf("%d\n", x); x= 7 <= 9; printf("%d\n", x); x= 7 != 9; printf("%d\n", x); x= 7 == 7; printf("%d\n", x); x= 7 >= 7; printf("%d\n", x); x= 7 <= 7; printf("%d\n", x); x= 9 > 7; printf("%d\n", x); x= 9 >= 7; printf("%d\n", x); x= 9 != 7; printf("%d\n", x); } ================================================ FILE: 36_Break_Continue/tests/input05.c ================================================ int printf(char *fmt); void main() { int i; int j; i=6; j=12; if (i < j) { printf("%d\n", i); } else { printf("%d\n", j); } } ================================================ FILE: 36_Break_Continue/tests/input06.c ================================================ int printf(char *fmt); void main() { int i; i=1; while (i <= 10) { printf("%d\n", i); i= i + 1; } } ================================================ FILE: 36_Break_Continue/tests/input07.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 36_Break_Continue/tests/input08.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 36_Break_Continue/tests/input09.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } void fred() { int a; int b; a= 12; b= 3 * a; if (a >= b) { printf("%d\n", 2 * b - a); } } ================================================ FILE: 36_Break_Continue/tests/input10.c ================================================ int printf(char *fmt); void main() { int i; char j; j= 20; printf("%d\n", j); i= 10; printf("%d\n", i); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 2; j= j + 1) { printf("%d\n", j); } } ================================================ FILE: 36_Break_Continue/tests/input11.c ================================================ int printf(char *fmt); int main() { int i; char j; long k; i= 10; printf("%d\n", i); j= 20; printf("%d\n", j); k= 30; printf("%d\n", k); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 4; j= j + 1) { printf("%d\n", j); } for (k= 1; k <= 5; k= k + 1) { printf("%d\n", k); } return(i); printf("%d\n", 12345); return(3); } ================================================ FILE: 36_Break_Continue/tests/input12.c ================================================ int printf(char *fmt); int fred() { return(5); } void main() { int x; x= fred(2); printf("%d\n", x); } ================================================ FILE: 36_Break_Continue/tests/input13.c ================================================ int printf(char *fmt); int fred() { return(56); } void main() { int dummy; int result; dummy= printf("%d\n", 23); result= fred(10); dummy= printf("%d\n", result); } ================================================ FILE: 36_Break_Continue/tests/input14.c ================================================ int printf(char *fmt); int fred() { return(20); } int main() { int result; printf("%d\n", 10); result= fred(15); printf("%d\n", result); printf("%d\n", fred(15)+10); return(0); } ================================================ FILE: 36_Break_Continue/tests/input15.c ================================================ int printf(char *fmt); int main() { char a; char *b; char c; int d; int *e; int f; a= 18; printf("%d\n", a); b= &a; c= *b; printf("%d\n", c); d= 12; printf("%d\n", d); e= &d; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 36_Break_Continue/tests/input16.c ================================================ int printf(char *fmt); int c; int d; int *e; int f; int main() { c= 12; d=18; printf("%d\n", c); e= &c + 1; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 36_Break_Continue/tests/input17.c ================================================ int printf(char *fmt); int main() { char a; char *b; int d; int *e; b= &a; *b= 19; printf("%d\n", a); e= &d; *e= 12; printf("%d\n", d); return(0); } ================================================ FILE: 36_Break_Continue/tests/input18.c ================================================ int printf(char *fmt); int main() { int a; int b; a= b= 34; printf("%d\n", a); printf("%d\n", b); return(0); } ================================================ FILE: 36_Break_Continue/tests/input18a.c ================================================ int printf(char *fmt); int a; int *b; char c; char *d; int main() { b= &a; *b= 15; printf("%d\n", a); d= &c; *d= 16; printf("%d\n", c); return(0); } ================================================ FILE: 36_Break_Continue/tests/input19.c ================================================ int printf(char *fmt); int a; int b; int c; int d; int e; int main() { a= 2; b= 4; c= 3; d= 2; e= (a+b) * (c+d); printf("%d\n", e); return(0); } ================================================ FILE: 36_Break_Continue/tests/input20.c ================================================ int printf(char *fmt); int a; int b[25]; int main() { b[3]= 12; a= b[3]; printf("%d\n", a); return(0); } ================================================ FILE: 36_Break_Continue/tests/input21.c ================================================ int printf(char *fmt); char c; char *str; int main() { c= '\n'; printf("%d\n", c); for (str= "Hello world\n"; *str != 0; str= str + 1) { printf("%c", *str); } return(0); } ================================================ FILE: 36_Break_Continue/tests/input22.c ================================================ int printf(char *fmt); char a; char b; char c; int d; int e; int f; long g; long h; long i; int main() { b= 5; c= 7; a= b + c++; printf("%d\n", a); e= 5; f= 7; d= e + f++; printf("%d\n", d); h= 5; i= 7; g= h + i++; printf("%d\n", g); a= b-- + c; printf("%d\n", a); d= e-- + f; printf("%d\n", d); g= h-- + i; printf("%d\n", g); a= ++b + c; printf("%d\n", a); d= ++e + f; printf("%d\n", d); g= ++h + i; printf("%d\n", g); a= b * --c; printf("%d\n", a); d= e * --f; printf("%d\n", d); g= h * --i; printf("%d\n", g); return(0); } ================================================ FILE: 36_Break_Continue/tests/input23.c ================================================ int printf(char *fmt); char *str; int x; int main() { x= -23; printf("%d\n", x); printf("%d\n", -10 * -10); x= 1; x= ~x; printf("%d\n", x); x= 2 > 5; printf("%d\n", x); x= !x; printf("%d\n", x); x= !x; printf("%d\n", x); x= 13; if (x) { printf("%d\n", 13); } x= 0; if (!x) { printf("%d\n", 14); } for (str= "Hello world\n"; *str; str++) { printf("%c", *str); } return(0); } ================================================ FILE: 36_Break_Continue/tests/input24.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { a= 42; b= 19; printf("%d\n", a & b); printf("%d\n", a | b); printf("%d\n", a ^ b); printf("%d\n", 1 << 3); printf("%d\n", 63 >> 3); return(0); } ================================================ FILE: 36_Break_Continue/tests/input25.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { char z; int y; int x; x= 10; y= 20; z= 30; printf("%d\n", x); printf("%d\n", y); printf("%d\n", z); a= 5; b= 15; c= 25; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); return(0); } ================================================ FILE: 36_Break_Continue/tests/input26.c ================================================ int printf(char *fmt); int main(int a, char b, long c, int d, int e, int f, int g, int h) { int i; int j; int k; a= 13; printf("%d\n", a); b= 23; printf("%d\n", b); c= 34; printf("%d\n", c); d= 44; printf("%d\n", d); e= 54; printf("%d\n", e); f= 64; printf("%d\n", f); g= 74; printf("%d\n", g); h= 84; printf("%d\n", h); i= 94; printf("%d\n", i); j= 95; printf("%d\n", j); k= 96; printf("%d\n", k); return(0); } ================================================ FILE: 36_Break_Continue/tests/input27.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int param5(int a, int b, int c, int d, int e) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param2(int a, int b) { int c; int d; int e; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param0() { int a; int b; int c; int d; int e; a= 1; b= 2; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int main() { param8(1,2,3,4,5,6,7,8); param5(1,2,3,4,5); param2(1,2); param0(); return(0); } ================================================ FILE: 36_Break_Continue/tests/input28.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 36_Break_Continue/tests/input29.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h); int fred(int a, int b, int c); int main(); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 36_Break_Continue/tests/input30.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input30.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 36_Break_Continue/tests/input31.c ================================================ int printf(char *fmt); int main() { int x; x= 2 + + 3 - * / ; } ================================================ FILE: 36_Break_Continue/tests/input32.c ================================================ int printf(char *fmt); int main() { pizza cow llama sausage; } ================================================ FILE: 36_Break_Continue/tests/input33.c ================================================ int printf(char *fmt); int main() { char *z; return(z); } ================================================ FILE: 36_Break_Continue/tests/input34.c ================================================ int printf(char *fmt); int main() { int a[12]; return(0); } ================================================ FILE: 36_Break_Continue/tests/input35.c ================================================ int printf(char *fmt); int fred(int a, int b) { int a; return(a); } ================================================ FILE: 36_Break_Continue/tests/input36.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c); ================================================ FILE: 36_Break_Continue/tests/input37.c ================================================ int printf(char *fmt); int fred(int a, char b +, int z); ================================================ FILE: 36_Break_Continue/tests/input38.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c, int g); ================================================ FILE: 36_Break_Continue/tests/input39.c ================================================ int printf(char *fmt); int main() { int a; } ================================================ FILE: 36_Break_Continue/tests/input40.c ================================================ int printf(char *fmt); int main() { int a; a= 5; } ================================================ FILE: 36_Break_Continue/tests/input41.c ================================================ int printf(char *fmt); void fred() { return(5); } ================================================ FILE: 36_Break_Continue/tests/input42.c ================================================ int printf(char *fmt); int main() { fred(5); } ================================================ FILE: 36_Break_Continue/tests/input43.c ================================================ int printf(char *fmt); int main() { int a; a= b[4]; } ================================================ FILE: 36_Break_Continue/tests/input44.c ================================================ int printf(char *fmt); int main() { int a; a= z; } ================================================ FILE: 36_Break_Continue/tests/input45.c ================================================ int printf(char *fmt); int main() { int a; a= &5; } ================================================ FILE: 36_Break_Continue/tests/input46.c ================================================ int printf(char *fmt); int main() { int a; a= *5; } ================================================ FILE: 36_Break_Continue/tests/input47.c ================================================ int printf(char *fmt); int main() { int a; a= ++5; } ================================================ FILE: 36_Break_Continue/tests/input48.c ================================================ int printf(char *fmt); int main() { int a; a= --5; } ================================================ FILE: 36_Break_Continue/tests/input49.c ================================================ int printf(char *fmt); int main() { int x; char y; y= x; } ================================================ FILE: 36_Break_Continue/tests/input50.c ================================================ int printf(char *fmt); int main() { char *a; char *b; a= a + b; } ================================================ FILE: 36_Break_Continue/tests/input51.c ================================================ int printf(char *fmt); int main() { char a; a= 'fred'; } ================================================ FILE: 36_Break_Continue/tests/input52.c ================================================ int printf(char *fmt); int main() { int a; a= $5.00; } ================================================ FILE: 36_Break_Continue/tests/input53.c ================================================ int printf(char *fmt); int main() { printf("Hello world, %d\n", 23); return(0); } ================================================ FILE: 36_Break_Continue/tests/input54.c ================================================ int printf(char *fmt); int main() { int i; for (i=0; i < 20; i++) { printf("Hello world, %d\n", i); } return(0); } ================================================ FILE: 36_Break_Continue/tests/input55.c ================================================ int printf(char *fmt); int main(int argc, char **argv) { int i; char *argument; printf("Hello world\n"); for (i=0; i < argc; i++) { argument= *argv; argv= argv + 1; printf("Argument %d is %s\n", i, argument); } return(0); } ================================================ FILE: 36_Break_Continue/tests/input56.c ================================================ struct foo { int x; }; struct mary var1; ================================================ FILE: 36_Break_Continue/tests/input57.c ================================================ struct fred { int x; } ; struct fred { char y; } ; ================================================ FILE: 36_Break_Continue/tests/input58.c ================================================ int printf(char *fmt); struct fred { int x; char y; long z; }; struct fred var2; struct fred *varptr; int main() { long result; var2.x= 12; printf("%d\n", var2.x); var2.y= 'c'; printf("%d\n", var2.y); var2.z= 4005; printf("%d\n", var2.z); result= var2.x + var2.y + var2.z; printf("%d\n", result); varptr= &var2; result= varptr->x + varptr->y + varptr->z; printf("%d\n", result); return(0); } ================================================ FILE: 36_Break_Continue/tests/input59.c ================================================ int main() { int x; x= y.foo; } ================================================ FILE: 36_Break_Continue/tests/input60.c ================================================ int main() { int x; x= x.foo; } ================================================ FILE: 36_Break_Continue/tests/input61.c ================================================ int main() { int x; x= x->foo; } ================================================ FILE: 36_Break_Continue/tests/input62.c ================================================ int printf(char *fmt); union fred { char w; int x; int y; long z; }; union fred var1; union fred *varptr; int main() { var1.x= 65; printf("%d\n", var1.x); var1.x= 66; printf("%d\n", var1.x); printf("%d\n", var1.y); printf("The next two depend on the endian of the platform\n"); printf("%d\n", var1.w); printf("%d\n", var1.z); varptr= &var1; varptr->x= 67; printf("%d\n", varptr->x); printf("%d\n", varptr->y); return(0); } ================================================ FILE: 36_Break_Continue/tests/input63.c ================================================ int printf(char *fmt); enum fred { apple=1, banana, carrot, pear=10, peach, mango, papaya }; enum jane { aple=1, bnana, crrot, par=10, pech, mago, paaya }; enum fred var1; enum jane var2; enum fred var3; int main() { var1= carrot + pear + mango; printf("%d\n", var1); return(0); } ================================================ FILE: 36_Break_Continue/tests/input64.c ================================================ enum fred var3; ================================================ FILE: 36_Break_Continue/tests/input65.c ================================================ enum fred { x, y, z }; enum fred { a, b }; ================================================ FILE: 36_Break_Continue/tests/input66.c ================================================ enum fred { x, y, z }; enum mary { a, b, z }; ================================================ FILE: 36_Break_Continue/tests/input67.c ================================================ int printf(char *fmt); typedef int FOO; FOO var1; struct bar { int x; int y} ; typedef struct bar BAR; BAR var2; int main() { var1= 5; printf("%d\n", var1); var2.x= 7; var2.y= 10; printf("%d\n", var2.x + var2.y); return(0); } ================================================ FILE: 36_Break_Continue/tests/input68.c ================================================ typedef int FOO; typedef char FOO; ================================================ FILE: 36_Break_Continue/tests/input69.c ================================================ typedef int FOO; FLOO y; ================================================ FILE: 36_Break_Continue/tests/input70.c ================================================ #include typedef int FOO; int main() { FOO x; x= 56; printf("%d\n", x); return(0); } ================================================ FILE: 36_Break_Continue/tests/input71.c ================================================ #include int main() { int x; x = 0; while (x < 100) { if (x == 5) { x = x + 2; continue; } printf("%d\n", x); if (x == 14) { break; } x = x + 1; } printf("Done\n"); return (0); } ================================================ FILE: 36_Break_Continue/tests/input72.c ================================================ int main() { break; } ================================================ FILE: 36_Break_Continue/tests/input73.c ================================================ int main() { continue; } ================================================ FILE: 36_Break_Continue/tests/mktests ================================================ #!/bin/sh # Make the output files for each test # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make install) fi for i in input*c do if [ ! -f "out.$i" -a ! -f "err.$i" ] then ../cwj -o out $i 2> "err.$i" # If the err file is empty if [ ! -s "err.$i" ] then rm -f "err.$i" cc -o out $i ../lib/printint.c ./out > "out.$i" fi fi rm -f out out.s done ================================================ FILE: 36_Break_Continue/tests/out.input01.c ================================================ 36 10 25 ================================================ FILE: 36_Break_Continue/tests/out.input02.c ================================================ 17 ================================================ FILE: 36_Break_Continue/tests/out.input03.c ================================================ 1 2 3 4 5 ================================================ FILE: 36_Break_Continue/tests/out.input04.c ================================================ 1 1 1 1 1 1 1 1 1 ================================================ FILE: 36_Break_Continue/tests/out.input05.c ================================================ 6 ================================================ FILE: 36_Break_Continue/tests/out.input06.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 36_Break_Continue/tests/out.input07.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 36_Break_Continue/tests/out.input08.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 36_Break_Continue/tests/out.input09.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 36_Break_Continue/tests/out.input10.c ================================================ 20 10 1 2 3 4 5 253 254 255 0 1 ================================================ FILE: 36_Break_Continue/tests/out.input11.c ================================================ 10 20 30 1 2 3 4 5 253 254 255 0 1 2 3 1 2 3 4 5 ================================================ FILE: 36_Break_Continue/tests/out.input12.c ================================================ 5 ================================================ FILE: 36_Break_Continue/tests/out.input13.c ================================================ 23 56 ================================================ FILE: 36_Break_Continue/tests/out.input14.c ================================================ 10 20 30 ================================================ FILE: 36_Break_Continue/tests/out.input15.c ================================================ 18 18 12 12 ================================================ FILE: 36_Break_Continue/tests/out.input16.c ================================================ 12 18 ================================================ FILE: 36_Break_Continue/tests/out.input17.c ================================================ 19 12 ================================================ FILE: 36_Break_Continue/tests/out.input18.c ================================================ 34 34 ================================================ FILE: 36_Break_Continue/tests/out.input18a.c ================================================ 15 16 ================================================ FILE: 36_Break_Continue/tests/out.input19.c ================================================ 30 ================================================ FILE: 36_Break_Continue/tests/out.input20.c ================================================ 12 ================================================ FILE: 36_Break_Continue/tests/out.input21.c ================================================ 10 Hello world ================================================ FILE: 36_Break_Continue/tests/out.input22.c ================================================ 12 12 12 13 13 13 13 13 13 35 35 35 ================================================ FILE: 36_Break_Continue/tests/out.input23.c ================================================ -23 100 -2 0 1 0 13 14 Hello world ================================================ FILE: 36_Break_Continue/tests/out.input24.c ================================================ 2 59 57 8 7 ================================================ FILE: 36_Break_Continue/tests/out.input25.c ================================================ 10 20 30 5 15 25 ================================================ FILE: 36_Break_Continue/tests/out.input26.c ================================================ 13 23 34 44 54 64 74 84 94 95 96 ================================================ FILE: 36_Break_Continue/tests/out.input27.c ================================================ 1 2 3 4 5 6 7 8 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 ================================================ FILE: 36_Break_Continue/tests/out.input28.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 36_Break_Continue/tests/out.input29.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 36_Break_Continue/tests/out.input30.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input30.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 36_Break_Continue/tests/out.input53.c ================================================ Hello world, 23 ================================================ FILE: 36_Break_Continue/tests/out.input54.c ================================================ Hello world, 0 Hello world, 1 Hello world, 2 Hello world, 3 Hello world, 4 Hello world, 5 Hello world, 6 Hello world, 7 Hello world, 8 Hello world, 9 Hello world, 10 Hello world, 11 Hello world, 12 Hello world, 13 Hello world, 14 Hello world, 15 Hello world, 16 Hello world, 17 Hello world, 18 Hello world, 19 ================================================ FILE: 36_Break_Continue/tests/out.input55.c ================================================ Hello world Argument 0 is ./out ================================================ FILE: 36_Break_Continue/tests/out.input58.c ================================================ 12 99 4005 4116 4116 ================================================ FILE: 36_Break_Continue/tests/out.input62.c ================================================ 65 66 66 The next two depend on the endian of the platform 66 66 67 67 ================================================ FILE: 36_Break_Continue/tests/out.input63.c ================================================ 25 ================================================ FILE: 36_Break_Continue/tests/out.input67.c ================================================ 5 17 ================================================ FILE: 36_Break_Continue/tests/out.input70.c ================================================ 56 ================================================ FILE: 36_Break_Continue/tests/out.input71.c ================================================ 0 1 2 3 4 7 8 9 10 11 12 13 14 Done ================================================ FILE: 36_Break_Continue/tests/runtests ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make install) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../cwj -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../cwj $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.s "trial.$i" done ================================================ FILE: 36_Break_Continue/tests/runtestsn ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../compn ] then (cd ..; make install) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../compn -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../compn $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.o out.s "trial.$i" done ================================================ FILE: 36_Break_Continue/tree.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree functions // Copyright (c) 2019 Warren Toomey, GPL3 // Build and return a generic AST node struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue) { struct ASTnode *n; // Malloc a new ASTnode n = (struct ASTnode *) malloc(sizeof(struct ASTnode)); if (n == NULL) fatal("Unable to malloc in mkastnode()"); // Copy in the field values and return it n->op = op; n->type = type; n->left = left; n->mid = mid; n->right = right; n->sym = sym; n->intvalue = intvalue; return (n); } // Make an AST leaf node struct ASTnode *mkastleaf(int op, int type, struct symtable *sym, int intvalue) { return (mkastnode(op, type, NULL, NULL, NULL, sym, intvalue)); } // Make a unary AST node: only one child struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, struct symtable *sym, int intvalue) { return (mkastnode(op, type, left, NULL, NULL, sym, intvalue)); } // Generate and return a new label number // just for AST dumping purposes static int gendumplabel(void) { static int id = 1; return (id++); } // Given an AST tree, print it out and follow the // traversal of the tree that genAST() follows void dumpAST(struct ASTnode *n, int label, int level) { int Lfalse, Lstart, Lend; switch (n->op) { case A_IF: Lfalse = gendumplabel(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_IF"); if (n->right) { Lend = gendumplabel(); fprintf(stdout, ", end L%d", Lend); } fprintf(stdout, "\n"); dumpAST(n->left, Lfalse, level + 2); dumpAST(n->mid, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); return; case A_WHILE: Lstart = gendumplabel(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_WHILE, start L%d\n", Lstart); Lend = gendumplabel(); dumpAST(n->left, Lend, level + 2); dumpAST(n->right, NOLABEL, level + 2); return; } // Reset level to -2 for A_GLUE if (n->op == A_GLUE) level = -2; // General AST node handling if (n->left) dumpAST(n->left, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); for (int i = 0; i < level; i++) fprintf(stdout, " "); switch (n->op) { case A_GLUE: fprintf(stdout, "\n\n"); return; case A_FUNCTION: fprintf(stdout, "A_FUNCTION %s\n", n->sym->name); return; case A_ADD: fprintf(stdout, "A_ADD\n"); return; case A_SUBTRACT: fprintf(stdout, "A_SUBTRACT\n"); return; case A_MULTIPLY: fprintf(stdout, "A_MULTIPLY\n"); return; case A_DIVIDE: fprintf(stdout, "A_DIVIDE\n"); return; case A_EQ: fprintf(stdout, "A_EQ\n"); return; case A_NE: fprintf(stdout, "A_NE\n"); return; case A_LT: fprintf(stdout, "A_LE\n"); return; case A_GT: fprintf(stdout, "A_GT\n"); return; case A_LE: fprintf(stdout, "A_LE\n"); return; case A_GE: fprintf(stdout, "A_GE\n"); return; case A_INTLIT: fprintf(stdout, "A_INTLIT %d\n", n->intvalue); return; case A_STRLIT: fprintf(stdout, "A_STRLIT rval label L%d\n", n->intvalue); return; case A_IDENT: if (n->rvalue) fprintf(stdout, "A_IDENT rval %s\n", n->sym->name); else fprintf(stdout, "A_IDENT %s\n", n->sym->name); return; case A_ASSIGN: fprintf(stdout, "A_ASSIGN\n"); return; case A_WIDEN: fprintf(stdout, "A_WIDEN\n"); return; case A_RETURN: fprintf(stdout, "A_RETURN\n"); return; case A_FUNCCALL: fprintf(stdout, "A_FUNCCALL %s\n", n->sym->name); return; case A_ADDR: fprintf(stdout, "A_ADDR %s\n", n->sym->name); return; case A_DEREF: if (n->rvalue) fprintf(stdout, "A_DEREF rval\n"); else fprintf(stdout, "A_DEREF\n"); return; case A_SCALE: fprintf(stdout, "A_SCALE %d\n", n->size); return; case A_PREINC: fprintf(stdout, "A_PREINC %s\n", n->sym->name); return; case A_PREDEC: fprintf(stdout, "A_PREDEC %s\n", n->sym->name); return; case A_POSTINC: fprintf(stdout, "A_POSTINC\n"); return; case A_POSTDEC: fprintf(stdout, "A_POSTDEC\n"); return; case A_NEGATE: fprintf(stdout, "A_NEGATE\n"); return; default: fatald("Unknown dumpAST operator", n->op); } } ================================================ FILE: 36_Break_Continue/types.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Types and type handling // Copyright (c) 2019 Warren Toomey, GPL3 // Return true if a type is an int type // of any size, false otherwise int inttype(int type) { return ((type & 0xf) == 0); } // Return true if a type is of pointer type int ptrtype(int type) { return ((type & 0xf) != 0); } // Given a primitive type, return // the type which is a pointer to it int pointer_to(int type) { if ((type & 0xf) == 0xf) fatald("Unrecognised in pointer_to: type", type); return (type + 1); } // Given a primitive pointer type, return // the type which it points to int value_at(int type) { if ((type & 0xf) == 0x0) fatald("Unrecognised in value_at: type", type); return (type - 1); } // Given a type and a composite type pointer, return // the size of this type in bytes int typesize(int type, struct symtable *ctype) { if (type == P_STRUCT || type == P_UNION) return (ctype->size); return (genprimsize(type)); } // Given an AST tree and a type which we want it to become, // possibly modify the tree by widening or scaling so that // it is compatible with this type. Return the original tree // if no changes occurred, a modified tree, or NULL if the // tree is not compatible with the given type. // If this will be part of a binary operation, the AST op is not zero. struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op) { int ltype; int lsize, rsize; ltype = tree->type; // XXX No idea on these yet if (ltype == P_STRUCT || ltype == P_UNION) fatal("Don't know how to do this yet"); if (rtype == P_STRUCT || rtype == P_UNION) fatal("Don't know how to do this yet"); // Compare scalar int types if (inttype(ltype) && inttype(rtype)) { // Both types same, nothing to do if (ltype == rtype) return (tree); // Get the sizes for each type lsize = typesize(ltype, NULL); // XXX Fix soon rsize = typesize(rtype, NULL); // XXX Fix soon // Tree's size is too big if (lsize > rsize) return (NULL); // Widen to the right if (rsize > lsize) return (mkastunary(A_WIDEN, rtype, tree, NULL, 0)); } // For pointers on the left if (ptrtype(ltype)) { // OK is same type on right and not doing a binary op if (op == 0 && ltype == rtype) return (tree); } // We can scale only on A_ADD or A_SUBTRACT operation if (op == A_ADD || op == A_SUBTRACT) { // Left is int type, right is pointer type and the size // of the original type is >1: scale the left if (inttype(ltype) && ptrtype(rtype)) { rsize = genprimsize(value_at(rtype)); if (rsize > 1) return (mkastunary(A_SCALE, rtype, tree, NULL, rsize)); else return (tree); // Size 1, no need to scale } } // If we get here, the types are not compatible return (NULL); } ================================================ FILE: 37_Switch/Makefile ================================================ # Define the location of the include directory # and the location to install the compiler binary INCDIR=/tmp/include BINDIR=/tmp HSRCS= data.h decl.h defs.h SRCS= cg.c decl.c expr.c gen.c main.c misc.c \ scan.c stmt.c sym.c tree.c types.c SRCN= cgn.c decl.c expr.c gen.c main.c misc.c \ scan.c stmt.c sym.c tree.c types.c ARMSRCS= cg_arm.c decl.c expr.c gen.c main.c misc.c \ scan.c stmt.c sym.c tree.c types.c cwj: $(SRCS) $(HSRCS) cc -o cwj -g -Wall -DINCDIR=\"$(INCDIR)\" $(SRCS) compn: $(SRCN) $(HSRCS) cc -D__NASM__ -o compn -g -Wall -DINCDIR=\"$(INCDIR)\" $(SRCN) cwjarm: $(ARMSRCS) $(HSRCS) cc -o cwjarm -g -Wall $(ARMSRCS) cp cwjarm cwj install: cwj mkdir -p $(INCDIR) rsync -a include/. $(INCDIR) cp cwj $(BINDIR) chmod +x $(BINDIR)/cwj installn: compn mkdir -p $(INCDIR) rsync -a include/. $(INCDIR) cp compn $(BINDIR) chmod +x $(BINDIR)/compn clean: rm -f cwj cwjarm compn *.o *.s out a.out test: install tests/runtests (cd tests; chmod +x runtests; ./runtests) armtest: cwjarm tests/runtests (cd tests; chmod +x runtests; ./runtests) testn: installn tests/runtestsn (cd tests; chmod +x runtestsn; ./runtestsn) ================================================ FILE: 37_Switch/Readme.md ================================================ # Part 37: Switch Statements In this part of our compiler writing journey, we are going to implement the 'switch' statement. This is really tricky for several reasons which I'll cover. So let's start with an example and look at the implications. ## An Example Switch Statement ```c switch(x) { case 1: printf("One\n"); break; case 2: printf("Two\n"); break; case 3: printf("Three\n"); default: printf("More than two\n"); } ``` This is like a multi-way 'if' statement where the value of `x` chooses the branch to take. However, we need to insert the `break` statement to bypass all the other branches; if we leave out the `break` statement, the branch that we are in falls through and continues on with the execution of the next branch. The expression for the 'switch' decision has to be integer, and all of the case options have to be integer literals. We can't say `case 3*y+17`, for example. The `default` case catches all values that are not given by previous cases. It has to appear as the last case in the list. Also, we cannot have duplicate case values, so `case 2: ...; case 2` is not permitted. ## Converting the Above into Assembly One way to translate a 'switch' statement into assembly is to treat it as a multi-way 'if' statement. This would mean that we would compare `x` against integer values, one after the other, and go into or skip over sections of assembly code as required. This would work but it makes the assembly code inefficient, especially if you consider this example: ```c switch (2 * x - (18 +y)/z) { ... } ``` Given our current "[KISS](https://en.wikipedia.org/wiki/KISS_principle)" compiler's operation, we would have to evaluate the expression again and again for each comparison against a literal value. It makes more sense to evaluate the 'switch' expression once. Then, compare this value against a table of case literal values. When we find a match, we jump to the code branch associated with the case values. This is known as a [jump table](https://en.wikipedia.org/wiki/Branch_table). It means that, for each case option, we will need to create a label to place at the beginning of the code for this option. As an example, the jump table for the first example above might look like: | Case Value | Label | |:----------:|:-----:| | 1 | L18 | | 2 | L19 | | 3 | L22 | | default | L26 | We also need a label to mark the code after the 'switch' statement. When one code branch wants to `break;`, we jump to this 'switch' end label. Otherwise, we let the code branch fall through into the next code branch. ## Parsing Implications All of the above is fine and good, except that we have to parse a 'switch' statement from top to bottom. This means that we won't know how big the jump table should be until after we have parsed all of the cases. This also means that, unless we perform some clever tricks, we will have generated the assembly code for all the cases *before* we can generate the jump table. As you know, I'm writing this compiler following the "KISS principle": keep it simple, stupid! So I am avoiding the clever tricks, but this means that, yes, we are going to delay the output of the jump table until after we generate all of the assembly code for the various cases. Visually, here is how we are going to lay out our code: ![](Figs/switch_logic.png) The code to evaluate the switch decision is at the top, as we parse it first. We don't want to continue on into the first case, so we can jump to a label which we will output later. Then we parse each case statement and generate the corresponding assembly code. We will have already generated an "end of switch" label, so we can jump to it. Again, we will output this label later. As we generate each case, we get a label for it and output this label. Once all the cases and the default case (if any) are output, we can now generate the jump table. But now we need some code to walk the jump table, compare the switch decision against each case value, and jump appropriately. We could generate this assembly code for each and every 'switch' statement but, if this jump handling code is large, we will be wasting memory. It's better to have one copy of the jump handling code in memory, but now we have to jump to it! Even worse, this code doesn't know which register holds the switch decision result, so we will have to copy this register into a known register, and copy the base of the jump table into a known register. What we have done here is trade off complexity in the parsing and code generation for a spaghetti of assembly code with jumps all over the place. Well, the CPU can deal with the jump spaghetti, so for now it's a fair tradeoff. Obviously, a production compiler would do things differently. The red lines in the diagram show the flow of execution from the switch decision to loading the registers to the jump table handling and finally to the specific case code. The green line shows that the base address of the jump table is passed to the jump table handling code. Finally, the blue lines shows that the case ended with a `break;` which jumped to the end of the switch assembly code. So, the assembly output is ugly but it does work. Now that we've seen how we are going to implement 'switch' statements, let's actually do it. ## New Keywords and Tokens We have two new tokens, T_CASE and T_DEFAULT, to go along with the new `case` and `default` keywords. As always, browse the code to see how this is done. ## New AST Node Types We need to build the AST tree to represent 'switch' statements. The structure of a 'switch' statement is in no way a binary tree like our expressions. But it is *our* AST tree, so we can shape it any way that suits us. So I sat down for a bit and decided to go with this structure: ![](Figs/switch_ast.png) The root of the 'switch' tree is A_SWITCH, On the left is the sub-tree with the expression that calculates the switch's condition. On the right we have a linked list of A_CASE nodes, one for each case. Finally, there is an optional A_DEFAULT to capture any default case. The `intvalue` field in each A_CASE node will hold the case value which the expression must match. The left child sub-tree will hold the details of the compound statement which is the case's body. At this point, we don't have any jump labels or the jump table: we will generate this later. ## Parsing the Switch Statement With all the above on-board, we're now ready to look at the parsing of a 'switch' statement. There is quite a lot of error checking code here, so I will take it in small sections. This code is in `stmt.c` and is called from `single_statement()`: ```c case T_SWITCH: return (switch_statement()); ``` Let's go.. ```c // Parse a switch statement and return its AST static struct ASTnode *switch_statement(void) { struct ASTnode *left, *n, *c, *casetree= NULL, *casetail; int inloop=1, casecount=0; int seendefault=0; int ASTop, casevalue; // Skip the 'switch' and '(' scan(&Token); lparen(); // Get the switch expression, the ')' and the '{' left= binexpr(0); rparen(); lbrace(); // Ensure that this is of int type if (!inttype(left->type)) fatal("Switch expression is not of integer type"); ``` OK, so there's a lot of local variables at the top which should clue you in that we will have to deal with some state in this function. This first section is easy, though: parse the `switch (expression) {` syntax, get the AST for the expression and ensure that its is of integer type. ```c // Build an A_SWITCH subtree with the expression as // the child n= mkastunary(A_SWITCH, 0, left, NULL, 0); // Now parse the cases Switchlevel++; ``` We've got the switch decision tree, so we can now build the A_SWITCH node which we will return. Do you remember that we could only let a `break;` occur when we are inside at least one loop. Well, now we also have to let `break;` happen when there is at least one 'switch' statement. Thus, there is a new global variable, `Switchlevel` to record this. ```c // Now parse the cases Switchlevel++; while (inloop) { switch(Token.token) { // Leave the loop when we hit a '}' case T_RBRACE: if (casecount==0) fatal("No cases in switch"); inloop=0; break; ... } ``` The loop is controlled by `inloop` which starts at one. When we hit a '}' token, we reset it to zero and break out of this 'switch' statement, thus ending the loop. We also check that we have seen at least one case. > It's a bit weird using a 'switch' statement to parse 'switch' statements. Now we move on to the parsing of `case` and `default`: ```c case T_CASE: case T_DEFAULT: // Ensure this isn't after a previous 'default' if (seendefault) fatal("case or default after existing default"); ``` We have a lot of common code to perform, so both tokens fall into the same code. First, ensure that we haven't already seen a default case, and this has to be the last case in the series. ```c // Set the AST operation. Scan the case value if required if (Token.token==T_DEFAULT) { ASTop= A_DEFAULT; seendefault= 1; scan(&Token); } else ... ``` If we are parsing `default:`, then there is no following integer value. Skip over the keyword and record that we have seen a default case. ```c } else { ASTop= A_CASE; scan(&Token); left= binexpr(0); // Ensure the case value is an integer literal if (left->op != A_INTLIT) fatal("Expecting integer literal for case value"); casevalue= left->intvalue; // Walk the list of existing case values to ensure // that there isn't a duplicate case value for (c= casetree; c != NULL; c= c -> right) if (casevalue == c->intvalue) fatal("Duplicate case value"); } ``` This code deals specifically with `case :`. We read in the value after the case using `binexpr()`. Now, I could have been "clever" and called `primary()` instead which goes straight to parsing integer literals. However, `primary()` can call `binexpr()` anyway, so it really doesn't make any difference: we are still going to have to error check the resulting tree to ensure that it is an A_INTLIT node only. Then we walk the list of previous A_CASE nodes that we have (`casetree` points to the head of this list) to ensure that we don't have any duplicate case values. Along the way, we have set the `ASTop` variable to either A_CASE for a case with an integer literal value or A_DEFAULT for the default case. We can now perform the code common to both. ```c // Scan the ':' and get the compound expression match(T_COLON, ":"); left= compound_statement(); casecount++; // Build a sub-tree with the compound statement as the left child // and link it in to the growing A_CASE tree if (casetree==NULL) { casetree= casetail= mkastunary(ASTop, 0, left, NULL, casevalue); } else { casetail->right= mkastunary(ASTop, 0, left, NULL, casevalue); casetail= casetail->right; } break; ``` Check that the next token is a ':'. Get the AST sub-tree with the compound statement in it. Build an A_CASE or A_DEFAULT node with this sub-tree as the left child, and link this to the linked list of A_CASE/A_DEFAULT nodes: `casetree` is the head and `casetail` is the tail of this list. ```c default: fatald("Unexpected token in switch", Token.token); } } ``` There should only be `case` and `default` keywords in the 'switch' body, so ensure that this is the case. ```c Switchlevel--; // We have a sub-tree with the cases and any default. Put the // case count into the A_SWITCH node and attach the case tree. n->intvalue= casecount; n->right= casetree; rbrace(); return(n); ``` We've finally parsed all of the cases and the default case, and we now have the count of them and the list which `casetree` points to. Add these values to the A_SWITCH node and return this as the final tree. OK, so that was a substantial amount of parsing. Now we need to turn our attention to code generation. ## Switch Code Generation: An Example. At this point I think it would be worth seeing the assembly output of an example 'switch' statement so that you can see how the code matches the graphic of execution flow that I gave at the top. Here is the example: ```c #include int x; int y; int main() { switch(x) { case 1: { y= 5; break; } case 2: { y= 7; break; } case 3: { y= 9; } default: { y= 100; } } return(0); } ``` First up, yes we do need '{' ... '}' around the case bodies. This is because I still haven't solved the "dangling else" problem, so all compound statements have to be surrounded by '{' ... '}'. I'm going to leave out the jump table handling code for now, but here is the assembly output for this example: ![](Figs/switch_logic2.png) The code that loads `x` into a register is at the top, and it jumps down past the jump table. As the jump table handling code doesn't know which register this will be, we always load the value into `%rax`, and we load the jump table's base address into `%rdx`. The jump table itself has this structure: + First is the number of cases with integer values + Next is a set of value/label pairs, one for each case + Finally there is the label of the default case. If there is no default case, this has to be the label of the 'switch' end, so that we do no code if there is no matching case. The jump table handling code (which we will look at soon) interprets the jump table and jumps to one of the labels in this table. Let's assume that we have jumped to `L11` which is `case 2:`. We perform the code for this case option. This option has a `break;` statement, so there is a jump to `L9` which is the label for the end of the 'switch' statement. ## The Jump Table Handling Code You already know that x86-64 assembly code isn't my forte. Therefore, I've borrowed the jump table handling code directly from [SubC](http://www.t3x.org/subc/). I've added it to the `cgpreamble()` function in `cg.c`, so that it is output for every assembly file that we create. Here is the commented code: ``` # internal switch(expr) routine # %rsi = switch table, %rax = expr switch: pushq %rsi # Save %rsi movq %rdx,%rsi # Base of jump table -> %rsi movq %rax,%rbx # Switch value -> %rbx cld # Clear direction flag lodsq # Load count of cases into %rcx, movq %rax,%rcx # incrementing %rsi in the process next: lodsq # Get the case value into %rdx movq %rax,%rdx lodsq # and the label address into %rax cmpq %rdx,%rbx # Does switch value matches the case? jnz no # No, jump over this code popq %rsi # Restore %rsi jmp *%rax # and jump to the chosen case no: loop next # Loop for the number of cases lodsq # Out of loop, load default label address popq %rsi # Restore %rsi jmp *%rax # and jump to the default case ``` We need to thanks Nils Holm for writing this, as I would never have arrived at this code! Now we can look at how the above assembly code gets generated. Fortunately, we already have lots of useful functions in `cg.c` which we can reuse. ## Generating the Assembly Code In `genAST()` in `gen.c`, up near the top we identify an A_SWITCH node and call a function to deal with this node and the tree below it. ```c case A_SWITCH: return (genSWITCH(n)); ``` So let's look at this new function in stages: ```c // Generate the code for a SWITCH statement static int genSWITCH(struct ASTnode *n) { int *caseval, *caselabel; int Ljumptop, Lend; int i, reg, defaultlabel = 0, casecount = 0; struct ASTnode *c; // Create arrays for the case values and associated labels. // Ensure that we have at least one position in each array. caseval = (int *) malloc((n->intvalue + 1) * sizeof(int)); caselabel = (int *) malloc((n->intvalue + 1) * sizeof(int)); ``` The reason for the `+1` here is that we may have a default case which needs a label even though it doesn't have a case value. ```c // Generate labels for the top of the jump table, and the // end of the switch statement. Set a default label for // the end of the switch, in case we don't have a default. Ljumptop = genlabel(); Lend = genlabel(); defaultlabel = Lend; ``` These labels are made but not output as assembly yet. Until we have a default label, we set it to `Lend`. ```c // Output the code to calculate the switch condition reg = genAST(n->left, NOLABEL, NOLABEL, NOLABEL, 0); cgjump(Ljumptop); genfreeregs(); ``` We output the code to jump to the code after the jump table even though it hasn't been output. We can also free all the registers at this point. ```c // Walk the right-child linked list to // generate the code for each case for (i = 0, c = n->right; c != NULL; i++, c = c->right) { // Get a label for this case. Store it // and the case value in the arrays. // Record if it is the default case. caselabel[i] = genlabel(); caseval[i] = c->intvalue; cglabel(caselabel[i]); if (c->op == A_DEFAULT) defaultlabel = caselabel[i]; else casecount++; // Generate the case code. Pass in the end label for the breaks genAST(c->left, NOLABEL, NOLABEL, Lend, 0); genfreeregs(); } ``` This is the code that both generates the label for each case and also outputs the assembly code which is the body of the case. We store the case value and the case label in the two arrays. And, if this is the default case, we can update `defaultlabel` with the correct label. Also note that `genAST()` gets passed `Lend` which is the label after our 'switch' code. This allows any `break;` in the case body to jump out to what comes next. ```c // Ensure the last case jumps past the switch table cgjump(Lend); // Now output the switch table and the end label. cgswitch(reg, casecount, Ljumptop, caselabel, caseval, defaultlabel); cglabel(Lend); return (NOREG); } ``` We can't rely on the programmer to end their last case with a `break;'` statement, so we force the last case to have a jump to the end of the switch statement. At this point we have: + the register which has the switch value + the array of case values + the array of case labels + the number of cases + some useful labels We pass all of these into `cgswitch()` in `cg.c`, and (apart from the code from SubC) this is the only new assembly code we need to introduce for this part. ## `cgswitch()` Here, we need to build the jump table and load the registers so that we can jump to the `switch` assembly code. As a reminder, here is the jump table structure: + First is the number of cases with integer values + Next is a set of value/label pairs, one for each case + Finally there is the label of the default case. If there is no default case, this has to be the label of the 'switch' end, so that we do no code if there is no matching case. For our example, the jump table looks like: ``` L14: # Switch jump table .quad 3 # Three case values .quad 1, L10 # case 1: jump to L10 .quad 2, L11 # case 2: jump to L11 .quad 3, L12 # case 3: jump to L12 .quad L13 # default: jump to L13 ``` Here is how we generate all of this. ```c // Generate a switch jump table and the code to // load the registers and call the switch() code void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel) { int i, label; // Get a label for the switch table label = genlabel(); cglabel(label); ``` This is the `L14:` above. ```c // Heuristic. If we have no cases, create one case // which points to the default case if (casecount == 0) { caseval[0] = 0; caselabel[0] = defaultlabel; casecount = 1; } ``` We must have at least one case value/label pair in the jump table. This code makes one that points at the default case. The case value is irrelevant: if it matches, fine. If not, we jump to the default case anyway. ```c // Generate the switch jump table. fprintf(Outfile, "\t.quad\t%d\n", casecount); for (i = 0; i < casecount; i++) fprintf(Outfile, "\t.quad\t%d, L%d\n", caseval[i], caselabel[i]); fprintf(Outfile, "\t.quad\tL%d\n", defaultlabel); ``` Here is the code to generate the jump table. Nice and easy. ```c // Load the specific registers cglabel(toplabel); fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); fprintf(Outfile, "\tleaq\tL%d(%%rip), %%rdx\n", label); fprintf(Outfile, "\tjmp\tswitch\n"); } ``` Finally, load the `%rax` register with the switch value, load `%rdx` with the label of the jump table and call the `switch` code. ## Testing The Code I've augmented our example with a loop so that all cases in the 'switch' statement get tested This is the file `tests/input74.c`: ```c #include int main() { int x; int y; y= 0; for (x=0; x < 5; x++) { switch(x) { case 1: { y= 5; break; } case 2: { y= 7; break; } case 3: { y= 9; } default: { y= 100; } } printf("%d\n", y); } return(0); } ``` And here is the output from the program: ``` 100 5 7 100 100 ``` Note that the value 9 is not output, because we fall into the default case when we are doing case 3. ## Conclusion and What's Next We've just implemented our first really big new statement in our compiler, the 'switch' statement. As I've never done this before, I essentially followed the SubC implementation. There are many other, more efficient, ways to implement 'switch', but I applied the "KISS principle" here. That said, it still was quite a complicated implementation. If you are still reading at this point, congratulations on your staying power! I'm starting to get annoyed with the compulsory '{' ... '}' around all of our compound statements. So, in the next part of our compiler writing journey, I will bite the bullet and attempt to solve the "dangling else" problem. [Next step](../38_Dangling_Else/Readme.md) ================================================ FILE: 37_Switch/cg.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\t.text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\t.data\n", Outfile); currSeg = data_seg; } } // Given a scalar type value, return the // size of the type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch (type) { case P_CHAR: return (offset); case P_INT: case P_LONG: break; default: fatald("Bad type in calc_aligned_offset:", type); } // Here we have an int or a long. Align it on a 4-byte offset // I put the generic code here so it can be reused elsewhere. alignment = 4; offset = (offset + direction * (alignment - 1)) & ~(alignment - 1); return (offset); } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. static int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "%r10", "%r11", "%r12", "%r13", "%r9", "%r8", "%rcx", "%rdx", "%rsi", "%rdi" }; static char *breglist[] = { "%r10b", "%r11b", "%r12b", "%r13b", "%r9b", "%r8b", "%cl", "%dl", "%sil", "%dil" }; static char *dreglist[] = { "%r10d", "%r11d", "%r12d", "%r13d", "%r9d", "%r8d", "%ecx", "%edx", "%esi", "%edi" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); cgtextseg(); fprintf(Outfile, "# internal switch(expr) routine\n" "# %%rsi = switch table, %%rax = expr\n" "# from SubC: http://www.t3x.org/subc/\n" "\n" "switch:\n" " pushq %%rsi\n" " movq %%rdx, %%rsi\n" " movq %%rax, %%rbx\n" " cld\n" " lodsq\n" " movq %%rax, %%rcx\n" "next:\n" " lodsq\n" " movq %%rax, %%rdx\n" " lodsq\n" " cmpq %%rdx, %%rbx\n" " jnz no\n" " popq %%rsi\n" " jmp *%%rax\n" "no:\n" " loop next\n" " lodsq\n" " popq %%rsi\n" " jmp *%%rax\n" "\n"); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the %rsp and %rsp fprintf(Outfile, "\t.globl\t%s\n" "\t.type\t%s, @function\n" "%s:\n" "\tpushq\t%%rbp\n" "\tmovq\t%%rsp, %%rbp\n", name, name, name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->posn = paramOffset; paramOffset += 8; } else { parm->posn = newlocaloffset(parm->type); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->posn = newlocaloffset(locvar->type); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\taddq\t$%d,%%rsp\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->endlabel); fprintf(Outfile, "\taddq\t$%d,%%rsp\n", stackOffset); fputs("\tpopq %rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmovq\t$%d, %s\n", value, reglist[r]); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", sym->name); } else // Print out the code to initialise it switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovzbq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", sym->name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovslq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", sym->name); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", sym->posn); fprintf(Outfile, "\tmovq\t%d(%%rbp), %s\n", sym->posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", sym->posn); } else switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", sym->posn); fprintf(Outfile, "\tmovzbq\t%d(%%rbp), %s\n", sym->posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", sym->posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", sym->posn); fprintf(Outfile, "\tmovslq\t%d(%%rbp), %s\n", sym->posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", sym->posn); break; default: fatald("Bad type in cgloadlocal:", sym->type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tleaq\tL%d(%%rip), %s\n", label, reglist[r]); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\taddq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsubq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timulq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s,%%rax\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidivq\t%s\n", reglist[r2]); fprintf(Outfile, "\tmovq\t%%rax,%s\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tandq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\torq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txorq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshlq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshrq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tnegq\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnotq\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); return (r); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(struct symtable *sym, int numargs) { // Get a new register int outr = alloc_register(); // Call the function fprintf(Outfile, "\tcall\t%s@PLT\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\taddq\t$%d, %%rsp\n", 8 * (numargs - 6)); // and copy the return value into our register fprintf(Outfile, "\tmovq\t%%rax, %s\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpushq\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmovq\t%s, %s\n", reglist[r], reglist[FIRSTPARAMREG - argposn + 1]); } } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsalq\t$%d, %s\n", val, reglist[r]); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %s(%%rip)\n", reglist[r], sym->name); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %s(%%rip)\n", breglist[r], sym->name); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %s(%%rip)\n", dreglist[r], sym->name); break; default: fatald("Bad type in cgstorglob:", sym->type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %d(%%rbp)\n", reglist[r], sym->posn); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %d(%%rbp)\n", breglist[r], sym->posn); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %d(%%rbp)\n", dreglist[r], sym->posn); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the type size = typesize(node->type, node->ctype); // Generate the global identity and the label cgdataseg(); fprintf(Outfile, "\t.globl\t%s\n", node->name); fprintf(Outfile, "%s:", node->name); // Generate the space for this type switch (size) { case 1: fprintf(Outfile, "\t.byte\t0\n"); break; case 4: fprintf(Outfile, "\t.long\t0\n"); break; case 8: fprintf(Outfile, "\t.quad\t0\n"); break; default: for (int i = 0; i < size; i++) fprintf(Outfile, "\t.byte\t0\n"); } } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\t.byte\t%d\n", *cptr); } fprintf(Outfile, "\t.byte\t0\n"); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzbl\t%s, %%eax\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %%eax\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } cgjump(sym->endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = alloc_register(); if (sym->class == C_GLOBAL) fprintf(Outfile, "\tleaq\t%s(%%rip), %s\n", sym->name, reglist[r]); else fprintf(Outfile, "\tleaq\t%d(%%rbp), %s\n", sym->posn, reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzbq\t(%s), %s\n", reglist[r], reglist[r]); break; case 2: fprintf(Outfile, "\tmovslq\t(%s), %s\n", reglist[r], reglist[r]); break; case 4: case 8: fprintf(Outfile, "\tmovq\t(%s), %s\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { // Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmovb\t%s, (%s)\n", breglist[r1], reglist[r2]); break; case 2: case 4: case 8: fprintf(Outfile, "\tmovq\t%s, (%s)\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } // Generate a switch jump table and the code to // load the registers and call the switch() code void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel) { int i, label; // Get a label for the switch table label = genlabel(); cglabel(label); // Heuristic. If we have no cases, create one case // which points to the default case if (casecount == 0) { caseval[0] = 0; caselabel[0] = defaultlabel; casecount = 1; } // Generate the switch jump table. fprintf(Outfile, "\t.quad\t%d\n", casecount); for (i = 0; i < casecount; i++) fprintf(Outfile, "\t.quad\t%d, L%d\n", caseval[i], caselabel[i]); fprintf(Outfile, "\t.quad\tL%d\n", defaultlabel); // Load the specific registers cglabel(toplabel); fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); fprintf(Outfile, "\tleaq\tL%d(%%rip), %%rdx\n", label); fprintf(Outfile, "\tjmp\tswitch\n"); } ================================================ FILE: 37_Switch/cg_arm.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for ARMv6 on Raspberry Pi // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. static int freereg[4]; static char *reglist[4] = { "r4", "r5", "r6", "r7" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // We have to store large integer literal values in memory. // Keep a list of them which will be output in the postamble #define MAXINTS 1024 int Intlist[MAXINTS]; static int Intslot = 0; // Determine the offset of a large integer // literal from the .L3 label. If the integer // isn't in the list, add it. static void set_int_offset(int val) { int offset = -1; // See if it is already there for (int i = 0; i < Intslot; i++) { if (Intlist[i] == val) { offset = 4 * i; break; } } // Not in the list, so add it if (offset == -1) { offset = 4 * Intslot; if (Intslot == MAXINTS) fatal("Out of int slots in set_int_offset()"); Intlist[Intslot++] = val; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L3+%d\n", offset); } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n", Outfile); } // Print out the assembly postamble void cgpostamble() { // Print out the global variables fprintf(Outfile, ".L2:\n"); for (int i = 0; i < Globs; i++) { if (Symtable[i].stype == S_VARIABLE) fprintf(Outfile, "\t.word %s\n", Symtable[i].name); } // Print out the integer literals fprintf(Outfile, ".L3:\n"); for (int i = 0; i < Intslot; i++) { fprintf(Outfile, "\t.word %d\n", Intlist[i]); } } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Symtable[id].name; fprintf(Outfile, "\t.text\n" "\t.globl\t%s\n" "\t.type\t%s, \%%function\n" "%s:\n" "\tpush\t{fp, lr}\n" "\tadd\tfp, sp, #4\n" "\tsub\tsp, sp, #8\n" "\tstr\tr0, [fp, #-8]\n", name, name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Symtable[id].endlabel); fputs("\tsub\tsp, fp, #4\n" "\tpop\t{fp, pc}\n" "\t.align\t2\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); // If the literal value is small, do it with one instruction if (value <= 1000) fprintf(Outfile, "\tmov\t%s, #%d\n", reglist[r], value); else { set_int_offset(value); fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); } return (r); } // Determine the offset of a variable from the .L2 // label. Yes, this is inefficient code. static void set_var_offset(int id) { int offset = 0; // Walk the symbol table up to id. // Find S_VARIABLEs and add on 4 until // we get to our variable for (int i = 0; i < id; i++) { if (Symtable[i].stype == S_VARIABLE) offset += 4; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L2+%d\n", offset); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tldrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgloadglob:", Symtable[id].type); } return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s, %s\n", reglist[r1], reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\tmul\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { // To do a divide: r0 holds the dividend, r1 holds the divisor. // The quotient is in r0. fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r1]); fprintf(Outfile, "\tmov\tr1, %s\n", reglist[r2]); fprintf(Outfile, "\tbl\t__aeabi_idiv\n"); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r1]); free_register(r2); return (r1); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r]); fprintf(Outfile, "\tbl\t%s\n", Symtable[id].name); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r]); return (r); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tlsl\t%s, %s, #%d\n", reglist[r], reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, int id) { // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tstr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgstorglob:", Symtable[id].type); } return (r); } // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (4); switch (type) { case P_CHAR: return (1); case P_INT: case P_LONG: return (4); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Symtable[id].type); fprintf(Outfile, "\t.data\n" "\t.globl\t%s\n", Symtable[id].name); switch (typesize) { case 1: fprintf(Outfile, "%s:\t.byte\t0\n", Symtable[id].name); break; case 4: fprintf(Outfile, "%s:\t.long\t0\n", Symtable[id].name); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "moveq", "movne", "movlt", "movgt", "movle", "movge" }; // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "movne", "moveq", "movge", "movle", "movgt", "movlt" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #1\n", cmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #0\n", invcmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\tuxtb\t%s, %s\n", reglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tb\tL%d\n", l); } // List of inverted branch instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *brlist[] = { "bne", "beq", "bge", "ble", "bgt", "blt" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", brlist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[reg]); cgjump(Symtable[id].endlabel); } // Generate code to load the address of a global // identifier into a variable. Return a new register int cgaddress(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); fprintf(Outfile, "\tmov\t%s, r3\n", reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tldrb\t%s, [%s]\n", reglist[r], reglist[r]); break; case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [%s]\n", reglist[r], reglist[r]); break; } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [%s]\n", reglist[r1], reglist[r2]); break; case P_INT: case P_LONG: fprintf(Outfile, "\tstr\t%s, [%s]\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 37_Switch/cgn.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\tsection .text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\tsection .data\n", Outfile); currSeg = data_seg; } } // Given a scalar type value, return the // size of the type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch (type) { case P_CHAR: return (offset); case P_INT: case P_LONG: break; default: fatald("Bad type in calc_aligned_offset:", type); } // Here we have an int or a long. Align it on a 4-byte offset // I put the generic code here so it can be reused elsewhere. alignment = 4; offset = (offset + direction * (alignment - 1)) & ~(alignment - 1); return (offset); } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "r10", "r11", "r12", "r13", "r9", "r8", "rcx", "rdx", "rsi", "rdi" }; static char *breglist[] = { "r10b", "r11b", "r12b", "r13b", "r9b", "r8b", "cl", "dl", "sil", "dil" }; static char *dreglist[] = { "r10d", "r11d", "r12d", "r13d", "r9d", "r8d", "ecx", "edx", "esi", "edi" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\textern\tprintint\n", Outfile); fputs("\textern\tprintchar\n", Outfile); fputs("\textern\topen\n", Outfile); fputs("\textern\tclose\n", Outfile); fputs("\textern\tread\n", Outfile); fputs("\textern\twrite\n", Outfile); fputs("\textern\tprintf\n", Outfile); cgtextseg(); fprintf(Outfile, "; internal switch(expr) routine\n" "; rsi = switch table, rax = expr\n" "; from SubC: http://www.t3x.org/subc/\n" "\n" "switch:\n" " push rsi\n" " mov rsi, rdx\n" " mov rbx, rax\n" " cld\n" " lodsq\n" " mov rcx, rax\n" "next:\n" " lodsq\n" " mov rdx, rax\n" " lodsq\n" " cmp rbx, rdx\n" " jnz no\n" " pop rsi\n" " jmp rax\n" "no:\n" " loop next\n" " lodsq\n" " pop rsi\n" " jmp rax\n" "\n"); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the rsp and rbp fprintf(Outfile, "\tglobal\t%s\n" "%s:\n" "\tpush\trbp\n" "\tmov\trbp, rsp\n", name, name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->posn = paramOffset; paramOffset += 8; } else { parm->posn = newlocaloffset(parm->type); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->posn = newlocaloffset(locvar->type); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\tadd\trsp, %d\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->endlabel); fprintf(Outfile, "\tadd\trsp, %d\n", stackOffset); fputs("\tpop rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], value); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", sym->name); fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], sym->name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", sym->name); } else // Print out the code to initialise it switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", sym->name); fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], sym->name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", sym->name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", sym->name); fprintf(Outfile, "\tmovsx\t%s, word [%s]\n", dreglist[r], sym->name); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", sym->name); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", sym->posn); fprintf(Outfile, "\tmov\t%s, [rbp+%d]\n", reglist[r], sym->posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", sym->posn); } else switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", sym->posn); fprintf(Outfile, "\tmovzx\t%s, byte [rbp+%d]\n", reglist[r], sym->posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", sym->posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", sym->posn); fprintf(Outfile, "\tmovsx\t%s, dword [rbp+%d]\n", reglist[r], sym->posn); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", sym->posn); break; default: fatald("Bad type in cgloadlocal:", sym->type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, L%d\n", reglist[r], label); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timul\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmov\trax, %s\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidiv\t%s\n", reglist[r2]); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tand\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\tor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshl\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshr\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tneg\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnot\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r], breglist[r]); return (r); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, byte %s\n", reglist[r], breglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(struct symtable *sym, int numargs) { // Get a new register int outr = alloc_register(); // Call the function fprintf(Outfile, "\tcall\t%s\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\tadd\trsp, %d\n", 8 * (numargs - 6)); // and copy the return value into our register fprintf(Outfile, "\tmov\t%s, rax\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpush\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmov\t%s, %s\n", reglist[FIRSTPARAMREG - argposn + 1], reglist[r]); } } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsal\t%s, %d\n", reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, dreglist[r]); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\tqword\t[rbp+%d], %s\n", sym->posn, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\tbyte\t[rbp+%d], %s\n", sym->posn, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\tdword\t[rbp+%d], %s\n", sym->posn, dreglist[r]); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the type size = typesize(node->type, node->ctype); // Generate the global identity and the label cgdataseg(); fprintf(Outfile, "\tsection\t.data\n" "\tglobal\t%s\n", node->name); fprintf(Outfile, "%s:", node->name); // Generate the space for this type // original version for (int i = 0; i < node->size; i++) { switch(size) { case 1: fprintf(Outfile, "\tdb\t0\n"); break; case 4: fprintf(Outfile, "\tdd\t0\n"); break; case 8: fprintf(Outfile, "\tdq\t0\n"); break; default: for (int i = 0; i < size; i++) fprintf(Outfile, "\tdb\t0\n"); } } /* compact version using times instead of loop switch(size) { case 1: fprintf(Outfile, "\ttimes\t%d\tdb\t0\n", node->size); break; case 4: fprintf(Outfile, "\ttimes\t%d\tdd\t0\n", node->size); break; case 8: fprintf(Outfile, "\ttimes\t%d\tdq\t0\n", node->size); break; default: fprintf(Outfile, "\ttimes\t%d\tdb\t0\n", size); } */ } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\tdb\t%d\n", *cptr); } fprintf(Outfile, "\tdb\t0\n"); /* put string in readable format on a single line // probably went overboard with error checking int comma = 0, quote = 0, start = 1; fprintf(Outfile, "\tdb\t"); for (cptr=strvalue; *cptr; cptr++) { if ( ! isprint(*cptr) ) if (comma || start) { fprintf(Outfile, "%d, ", *cptr); start = 0; comma = 1; } else if (quote) { fprintf(Outfile, "\', %d, ", *cptr); comma = 1; quote = 0; } else { fprintf(Outfile, "%d, ", *cptr); comma = 1; quote = 0; } else if (start || comma) { fprintf(Outfile, "\'%c", *cptr); start = comma = 0; quote = 1; } else { fprintf(Outfile, "%c", *cptr); comma = 0; quote = 1; } } if (comma || start) fprintf(Outfile, "0\n"); else fprintf(Outfile, "\', 0\n"); */ } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r2], breglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzx\teax, %s\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmov\teax, %s\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } cgjump(sym->endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = alloc_register(); if (sym->class == C_GLOBAL) fprintf(Outfile, "\tmov\t%s, %s\n", reglist[r], sym->name); else fprintf(Outfile, "\tlea\t%s, [rbp+%d]\n", reglist[r], sym->posn); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], reglist[r]); break; case 2: fprintf(Outfile, "\tmovsx\t%s, dword [%s]\n", reglist[r], reglist[r]); break; case 4: case 8: fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { //Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmov\t[%s], byte %s\n", reglist[r2], breglist[r1]); break; case 2: case 4: case 8: fprintf(Outfile, "\tmov\t[%s], %s\n", reglist[r2], reglist[r1]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } // Generate a switch jump table and the code to // load the registers and call the switch() code void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel) { int i, label; // Get a label for the switch table label = genlabel(); cglabel(label); // Heuristic. If we have no cases, create one case // which points to the default case if (casecount == 0) { caseval[0] = 0; caselabel[0] = defaultlabel; casecount = 1; } // Generate the switch jump table. fprintf(Outfile, "\tdq\t%d\n", casecount); for (i = 0; i < casecount; i++) fprintf(Outfile, "\tdq\t%d, L%d\n", caseval[i], caselabel[i]); fprintf(Outfile, "\tdq\tL%d\n", defaultlabel); // Load the specific registers cglabel(toplabel); fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); fprintf(Outfile, "\tmov\trdx, L%d\n", label); fprintf(Outfile, "\tjmp\tswitch\n"); } ================================================ FILE: 37_Switch/data.h ================================================ #ifndef extern_ #define extern_ extern #endif // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 extern_ int Line; // Current line number extern_ int Putback; // Character put back by scanner extern_ struct symtable *Functionid; // Symbol ptr of the current function extern_ FILE *Infile; // Input and output files extern_ FILE *Outfile; extern_ char *Infilename; // Name of file we are parsing extern_ char *Outfilename; // Name of file we opened as Outfile extern_ struct token Token; // Last token scanned extern_ char Text[TEXTLEN + 1]; // Last identifier scanned extern_ int Looplevel; // Depth of nested loops extern_ int Switchlevel; // Depth of nested switches // Symbol table lists extern_ struct symtable *Globhead, *Globtail; // Global variables and functions extern_ struct symtable *Loclhead, *Locltail; // Local variables extern_ struct symtable *Parmhead, *Parmtail; // Local parameters extern_ struct symtable *Membhead, *Membtail; // Temp list of struct/union members extern_ struct symtable *Structhead, *Structtail; // List of struct types extern_ struct symtable *Unionhead, *Uniontail; // List of union types extern_ struct symtable *Enumhead, *Enumtail; // List of enum types and values extern_ struct symtable *Typehead, *Typetail; // List of typedefs // Command-line flags extern_ int O_dumpAST; // If true, dump the AST trees extern_ int O_keepasm; // If true, keep any assembly files extern_ int O_assemble; // If true, assemble the assembly files extern_ int O_dolink; // If true, link the object files extern_ int O_verbose; // If true, print info on compilation stages ================================================ FILE: 37_Switch/decl.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of declarations // Copyright (c) 2019 Warren Toomey, GPL3 static struct symtable *composite_declaration(int type); static void enum_declaration(void); int typedef_declaration(struct symtable **ctype); int type_of_typedef(char *name, struct symtable **ctype); // Parse the current token and return a // primitive type enum value, a pointer // to any composite type and possibly // modify the class of the type. // Also scan in the next token. int parse_type(struct symtable **ctype, int *class) { int type, exstatic=1; // See if the class has been changed to extern (later, static) while (exstatic) { switch (Token.token) { case T_EXTERN: *class= C_EXTERN; scan(&Token); break; default: exstatic= 0; } } // Now work on the actual type keyword switch (Token.token) { case T_VOID: type = P_VOID; scan(&Token); break; case T_CHAR: type = P_CHAR; scan(&Token); break; case T_INT: type = P_INT; scan(&Token); break; case T_LONG: type = P_LONG; scan(&Token); break; // For the following, if we have a ';' after the // parsing then there is no type, so return -1 case T_STRUCT: type = P_STRUCT; *ctype = composite_declaration(P_STRUCT); if (Token.token == T_SEMI) type = -1; break; case T_UNION: type = P_UNION; *ctype = composite_declaration(P_UNION); if (Token.token == T_SEMI) type = -1; break; case T_ENUM: type = P_INT; // Enums are really ints enum_declaration(); if (Token.token == T_SEMI) type = -1; break; case T_TYPEDEF: type = typedef_declaration(ctype); if (Token.token == T_SEMI) type = -1; break; case T_IDENT: type = type_of_typedef(Text, ctype); break; default: fatald("Illegal type, token", Token.token); } // Scan in one or more further '*' tokens // and determine the correct pointer type while (1) { if (Token.token != T_STAR) break; type = pointer_to(type); scan(&Token); } // We leave with the next token already scanned return (type); } // variable_declaration: type identifier ';' // | type identifier '[' INTLIT ']' ';' // ; // // Parse the declaration of a scalar variable or an array // with a given size. // The identifier has been scanned & we have the type. // class is the variable's class // Return the pointer to variable's entry in the symbol table struct symtable *var_declaration(int type, struct symtable *ctype, int class) { struct symtable *sym = NULL; // See if this has already been declared switch (class) { case C_EXTERN: case C_GLOBAL: if (findglob(Text) != NULL) fatals("Duplicate global variable declaration", Text); case C_LOCAL: case C_PARAM: if (findlocl(Text) != NULL) fatals("Duplicate local variable declaration", Text); case C_MEMBER: if (findmember(Text) != NULL) fatals("Duplicate struct/union member declaration", Text); } // Text now has the identifier's name. // If the next token is a '[' if (Token.token == T_LBRACKET) { // Skip past the '[' scan(&Token); // Check we have an array size if (Token.token == T_INTLIT) { // Add this as a known array and generate its space in assembly. // We treat the array as a pointer to its elements' type switch (class) { case C_EXTERN: case C_GLOBAL: sym = addglob(Text, pointer_to(type), ctype, S_ARRAY, class, Token.intvalue); break; case C_LOCAL: case C_PARAM: case C_MEMBER: fatal ("For now, declaration of non-global arrays is not implemented"); } } // Ensure we have a following ']' scan(&Token); match(T_RBRACKET, "]"); } else { // Add this as a known scalar // and generate its space in assembly switch (class) { case C_EXTERN: case C_GLOBAL: sym = addglob(Text, type, ctype, S_VARIABLE, class, 1); break; case C_LOCAL: sym = addlocl(Text, type, ctype, S_VARIABLE, 1); break; case C_PARAM: sym = addparm(Text, type, ctype, S_VARIABLE, 1); break; case C_MEMBER: sym = addmemb(Text, type, ctype, S_VARIABLE, 1); break; } } return (sym); } // var_declaration_list: // | variable_declaration // | variable_declaration separate_token var_declaration_list ; // // When called to parse function parameters, separate_token is ','. // When called to parse members of a struct/union, separate_token is ';'. // // Parse a list of variables. // Add them as symbols to one of the symbol table lists, and return the // number of variables. If funcsym is not NULL, there is an existing function // prototype, so compare each variable's type against this prototype. static int var_declaration_list(struct symtable *funcsym, int class, int separate_token, int end_token) { int type; int paramcnt = 0; struct symtable *protoptr = NULL; struct symtable *ctype; // If there is a prototype, get the pointer // to the first prototype parameter if (funcsym != NULL) protoptr = funcsym->member; // Loop until the final end token while (Token.token != end_token) { // Get the type and identifier type = parse_type(&ctype, &class); ident(); // Check that this type matches the prototype if there is one if (protoptr != NULL) { if (type != protoptr->type) fatald("Type doesn't match prototype for parameter", paramcnt + 1); protoptr = protoptr->next; } else { // Add a new parameter to the right symbol table list, based on the class var_declaration(type, ctype, class); } paramcnt++; // Must have a separate_token or ')' at this point if ((Token.token != separate_token) && (Token.token != end_token)) fatald("Unexpected token in parameter list", Token.token); if (Token.token == separate_token) scan(&Token); } // Check that the number of parameters in this list matches // any existing prototype if ((funcsym != NULL) && (paramcnt != funcsym->nelems)) fatals("Parameter count mismatch for function", funcsym->name); // Return the count of parameters return (paramcnt); } // // function_declaration: type identifier '(' parameter_list ')' ; // | type identifier '(' parameter_list ')' compound_statement ; // // Parse the declaration of function. // The identifier has been scanned & we have the type. struct ASTnode *function_declaration(int type) { struct ASTnode *tree, *finalstmt; struct symtable *oldfuncsym, *newfuncsym = NULL; int endlabel, paramcnt; // Text has the identifier's name. If this exists and is a // function, get the id. Otherwise, set oldfuncsym to NULL. if ((oldfuncsym = findsymbol(Text)) != NULL) if (oldfuncsym->stype != S_FUNCTION) oldfuncsym = NULL; // If this is a new function declaration, get a // label-id for the end label, and add the function // to the symbol table, if (oldfuncsym == NULL) { endlabel = genlabel(); // Assumtion: functions only return scalar types, so NULL below newfuncsym = addglob(Text, type, NULL, S_FUNCTION, C_GLOBAL, endlabel); } // Scan in the '(', any parameters and the ')'. // Pass in any existing function prototype pointer lparen(); paramcnt = var_declaration_list(oldfuncsym, C_PARAM, T_COMMA, T_RPAREN); rparen(); // If this is a new function declaration, update the // function symbol entry with the number of parameters. // Also copy the parameter list into the function's node. if (newfuncsym) { newfuncsym->nelems = paramcnt; newfuncsym->member = Parmhead; oldfuncsym = newfuncsym; } // Clear out the parameter list Parmhead = Parmtail = NULL; // Declaration ends in a semicolon, only a prototype. if (Token.token == T_SEMI) { scan(&Token); return (NULL); } // This is not just a prototype. // Set the Functionid global to the function's symbol pointer Functionid = oldfuncsym; // Get the AST tree for the compound statement and mark // that we have parsed no loops or switches yet Looplevel= 0; Switchlevel= 0; tree = compound_statement(); // If the function type isn't P_VOID .. if (type != P_VOID) { // Error if no statements in the function if (tree == NULL) fatal("No statements in function with non-void type"); // Check that the last AST operation in the // compound statement was a return statement finalstmt = (tree->op == A_GLUE) ? tree->right : tree; if (finalstmt == NULL || finalstmt->op != A_RETURN) fatal("No return for function with non-void type"); } // Return an A_FUNCTION node which has the function's symbol pointer // and the compound statement sub-tree return (mkastunary(A_FUNCTION, type, tree, oldfuncsym, endlabel)); } // Parse composite type declarations: structs or unions. // Either find an existing struct/union declaration, or build // a struct/union symbol table entry and return its pointer. static struct symtable *composite_declaration(int type) { struct symtable *ctype = NULL; struct symtable *m; int offset; // Skip the struct/union keyword scan(&Token); // See if there is a following struct/union name if (Token.token == T_IDENT) { // Find any matching composite type if (type == P_STRUCT) ctype = findstruct(Text); else ctype = findunion(Text); scan(&Token); } // If the next token isn't an LBRACE , this is // the usage of an existing struct/union type. // Return the pointer to the type. if (Token.token != T_LBRACE) { if (ctype == NULL) fatals("unknown struct/union type", Text); return (ctype); } // Ensure this struct/union type hasn't been // previously defined if (ctype) fatals("previously defined struct/union", Text); // Build the composite type and skip the left brace if (type == P_STRUCT) ctype = addstruct(Text, P_STRUCT, NULL, 0, 0); else ctype = addunion(Text, P_UNION, NULL, 0, 0); scan(&Token); // Scan in the list of members and attach // to the struct type's node var_declaration_list(NULL, C_MEMBER, T_SEMI, T_RBRACE); rbrace(); ctype->member = Membhead; Membhead = Membtail = NULL; // Set the offset of the initial member // and find the first free byte after it m = ctype->member; m->posn = 0; offset = typesize(m->type, m->ctype); // Set the position of each successive member in the composite type // Unions are easy. For structs, align the member and find the next free byte for (m = m->next; m != NULL; m = m->next) { // Set the offset for this member if (type == P_STRUCT) m->posn = genalign(m->type, offset, 1); else m->posn = 0; // Get the offset of the next free byte after this member offset += typesize(m->type, m->ctype); } // Set the overall size of the composite type ctype->size = offset; return (ctype); } // Parse an enum declaration static void enum_declaration(void) { struct symtable *etype = NULL; char *name; int intval = 0; // Skip the enum keyword. scan(&Token); // If there's a following enum type name, get a // pointer to any existing enum type node. if (Token.token == T_IDENT) { etype = findenumtype(Text); name = strdup(Text); // As it gets tromped soon scan(&Token); } // If the next token isn't a LBRACE, check // that we have an enum type name, then return if (Token.token != T_LBRACE) { if (etype == NULL) fatals("undeclared enum type:", name); return; } // We do have an LBRACE. Skip it scan(&Token); // If we have an enum type name, ensure that it // hasn't been declared before. if (etype != NULL) fatals("enum type redeclared:", etype->name); else // Build an enum type node for this identifier etype = addenum(name, C_ENUMTYPE, 0); // Loop to get all the enum values while (1) { // Ensure we have an identifier // Copy it in case there's an int literal coming up ident(); name = strdup(Text); // Ensure this enum value hasn't been declared before etype = findenumval(name); if (etype != NULL) fatals("enum value redeclared:", Text); // If the next token is an '=', skip it and // get the following int literal if (Token.token == T_ASSIGN) { scan(&Token); if (Token.token != T_INTLIT) fatal("Expected int literal after '='"); intval = Token.intvalue; scan(&Token); } // Build an enum value node for this identifier. // Increment the value for the next enum identifier. etype = addenum(name, C_ENUMVAL, intval++); // Bail out on a right curly bracket, else get a comma if (Token.token == T_RBRACE) break; comma(); } scan(&Token); // Skip over the right curly bracket } // Parse a typedef declaration and return the type // and ctype that it represents int typedef_declaration(struct symtable **ctype) { int type, class=0; // Skip the typedef keyword. scan(&Token); // Get the actual type following the keyword type = parse_type(ctype, &class); if (class != 0) fatal("Can't have extern in a typedef declaration"); // See if the typedef identifier already exists if (findtypedef(Text) != NULL) fatals("redefinition of typedef", Text); // It doesn't exist so add it to the typedef list addtypedef(Text, type, *ctype, 0, 0); scan(&Token); return (type); } // Given a typedef name, return the type it represents int type_of_typedef(char *name, struct symtable **ctype) { struct symtable *t; // Look up the typedef in the list t = findtypedef(name); if (t == NULL) fatals("unknown type", name); scan(&Token); *ctype = t->ctype; return (t->type); } // Parse one or more global declarations, either // variables, functions or structs void global_declarations(void) { struct ASTnode *tree; struct symtable *ctype; int type, class= C_GLOBAL; while (1) { // Stop when we have reached EOF if (Token.token == T_EOF) break; // Get the type type = parse_type(&ctype, &class); // We might have just parsed a struct, union or enum // declaration with no associated variable. // The next token might be a ';'. Loop back if it is. // XXX: I'm not happy with this as it allows // "struct fred;" as an accepted statement if (type == -1) { semi(); continue; } // We have to read past the identifier // to see either a '(' for a function declaration // or a ',' or ';' for a variable declaration. // Text is filled in by the ident() call. ident(); if (Token.token == T_LPAREN) { // Parse the function declaration tree = function_declaration(type); // Only a function prototype, no code if (tree == NULL) continue; // A real function, generate the assembly code for it if (O_dumpAST) { dumpAST(tree, NOLABEL, 0); fprintf(stdout, "\n\n"); } genAST(tree, NOLABEL, NOLABEL, NOLABEL, 0); // Now free the symbols associated // with this function freeloclsyms(); } else { // Parse the global variable declaration // and skip past the trailing semicolon var_declaration(type, ctype, class); semi(); } } } ================================================ FILE: 37_Switch/decl.h ================================================ // Function prototypes for all compiler files // Copyright (c) 2019 Warren Toomey, GPL3 // scan.c void reject_token(struct token *t); int scan(struct token *t); // tree.c struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue); struct ASTnode *mkastleaf(int op, int type, struct symtable *sym, int intvalue); struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, struct symtable *sym, int intvalue); void dumpAST(struct ASTnode *n, int label, int parentASTop); // gen.c int genlabel(void); int genAST(struct ASTnode *n, int iflabel, int looptoplabel, int loopendlabel, int parentASTop); void genpreamble(); void genpostamble(); void genfreeregs(); void genglobsym(struct symtable *node); int genglobstr(char *strvalue); int genprimsize(int type); int genalign(int type, int offset, int direction); void genreturn(int reg, int id); // cg.c int cgprimsize(int type); int cgalign(int type, int offset, int direction); void cgtextseg(); void cgdataseg(); void freeall_registers(void); void cgpreamble(); void cgpostamble(); void cgfuncpreamble(struct symtable *sym); void cgfuncpostamble(struct symtable *sym); int cgloadint(int value, int type); int cgloadglob(struct symtable *sym, int op); int cgloadlocal(struct symtable *sym, int op); int cgloadglobstr(int label); int cgadd(int r1, int r2); int cgsub(int r1, int r2); int cgmul(int r1, int r2); int cgdiv(int r1, int r2); int cgshlconst(int r, int val); int cgcall(struct symtable *sym, int numargs); void cgcopyarg(int r, int argposn); int cgstorglob(int r, struct symtable *sym); int cgstorlocal(int r, struct symtable *sym); void cgglobsym(struct symtable *node); void cgglobstr(int l, char *strvalue); int cgcompare_and_set(int ASTop, int r1, int r2); int cgcompare_and_jump(int ASTop, int r1, int r2, int label); void cglabel(int l); void cgjump(int l); int cgwiden(int r, int oldtype, int newtype); void cgreturn(int reg, struct symtable *sym); int cgaddress(struct symtable *sym); int cgderef(int r, int type); int cgstorderef(int r1, int r2, int type); int cgnegate(int r); int cginvert(int r); int cglognot(int r); int cgboolean(int r, int op, int label); int cgand(int r1, int r2); int cgor(int r1, int r2); int cgxor(int r1, int r2); int cgshl(int r1, int r2); int cgshr(int r1, int r2); void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel); // expr.c struct ASTnode *binexpr(int ptp); // stmt.c struct ASTnode *compound_statement(void); // misc.c void match(int t, char *what); void semi(void); void lbrace(void); void rbrace(void); void lparen(void); void rparen(void); void ident(void); void comma(void); void fatal(char *s); void fatals(char *s1, char *s2); void fatald(char *s, int d); void fatalc(char *s, int c); // sym.c void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node); struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int size, int posn); struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int class, int size); struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int size); struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype, int size); struct symtable *addstruct(char *name, int type, struct symtable *ctype, int stype, int size); struct symtable *addunion(char *name, int type, struct symtable *ctype, int stype, int size); struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int size); struct symtable *addenum(char *name, int class, int value); struct symtable *addtypedef(char *name, int type, struct symtable *ctype, int stype, int size); struct symtable *findglob(char *s); struct symtable *findlocl(char *s); struct symtable *findsymbol(char *s); struct symtable *findmember(char *s); struct symtable *findstruct(char *s); struct symtable *findunion(char *s); struct symtable *findenumtype(char *s); struct symtable *findenumval(char *s); struct symtable *findtypedef(char *s); void clear_symtable(void); void freeloclsyms(void); // decl.c struct symtable *var_declaration(int type, struct symtable *ctype, int class); struct ASTnode *function_declaration(int type); void global_declarations(void); // types.c int inttype(int type); int ptrtype(int type); int parse_type(struct symtable **ctype, int *class); int pointer_to(int type); int value_at(int type); int typesize(int type, struct symtable *ctype); struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op); ================================================ FILE: 37_Switch/defs.h ================================================ #include #include #include #include // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 enum { TEXTLEN = 512 // Length of identifiers in input }; // Commands and default filenames #define AOUT "a.out" #ifdef __NASM__ #define ASCMD "nasm -f elf64 -o " #define LDCMD "cc -no-pie -fno-plt -Wall -o " #else #define ASCMD "as -o " #define LDCMD "cc -o " #endif #define CPPCMD "cpp -nostdinc -isystem " // Token types enum { T_EOF, // Binary operators T_ASSIGN, T_LOGOR, T_LOGAND, T_OR, T_XOR, T_AMPER, T_EQ, T_NE, T_LT, T_GT, T_LE, T_GE, T_LSHIFT, T_RSHIFT, T_PLUS, T_MINUS, T_STAR, T_SLASH, // Other operators T_INC, T_DEC, T_INVERT, T_LOGNOT, // Type keywords T_VOID, T_CHAR, T_INT, T_LONG, // Other keywords T_IF, T_ELSE, T_WHILE, T_FOR, T_RETURN, T_STRUCT, T_UNION, T_ENUM, T_TYPEDEF, T_EXTERN, T_BREAK, T_CONTINUE, T_SWITCH, T_CASE, T_DEFAULT, // Structural tokens T_INTLIT, T_STRLIT, T_SEMI, T_IDENT, T_LBRACE, T_RBRACE, T_LPAREN, T_RPAREN, T_LBRACKET, T_RBRACKET, T_COMMA, T_DOT, T_ARROW, T_COLON }; // Token structure struct token { int token; // Token type, from the enum list above int intvalue; // For T_INTLIT, the integer value }; // AST node types. The first few line up // with the related tokens enum { A_ASSIGN = 1, A_LOGOR, A_LOGAND, A_OR, A_XOR, A_AND, A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE, A_LSHIFT, A_RSHIFT, A_ADD, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, A_INTLIT, A_STRLIT, A_IDENT, A_GLUE, A_IF, A_WHILE, A_FUNCTION, A_WIDEN, A_RETURN, A_FUNCCALL, A_DEREF, A_ADDR, A_SCALE, A_PREINC, A_PREDEC, A_POSTINC, A_POSTDEC, A_NEGATE, A_INVERT, A_LOGNOT, A_TOBOOL, A_BREAK, A_CONTINUE, A_SWITCH, A_CASE, A_DEFAULT }; // Primitive types. The bottom 4 bits is an integer // value that represents the level of indirection, // e.g. 0= no pointer, 1= pointer, 2= pointer pointer etc. enum { P_NONE, P_VOID = 16, P_CHAR = 32, P_INT = 48, P_LONG = 64, P_STRUCT=80, P_UNION=96 }; // Structural types enum { S_VARIABLE, S_FUNCTION, S_ARRAY }; // Storage classes enum { C_GLOBAL = 1, // Globally visible symbol C_LOCAL, // Locally visible symbol C_PARAM, // Locally visible function parameter C_EXTERN, // External globally visible symbol C_STRUCT, // A struct C_UNION, // A union C_MEMBER, // Member of a struct or union C_ENUMTYPE, // A named enumeration type C_ENUMVAL, // A named enumeration value C_TYPEDEF // A named typedef }; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol struct symtable *ctype; // If struct/union, ptr to that type int stype; // Structural type for the symbol int class; // Storage class for the symbol union { int size; // Number of elements in the symbol int endlabel; // For functions, the end label }; union { int nelems; // For functions, # of params int posn; // For locals, the negative offset // from the stack base pointer }; struct symtable *next; // Next symbol in one list struct symtable *member; // First member of a function, struct, }; // union or enum // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates int rvalue; // True if the node is an rvalue struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; struct symtable *sym; // For many AST nodes, the pointer to union { // the symbol in the symbol table int intvalue; // For A_INTLIT, the integer value int size; // For A_SCALE, the size to scale by }; }; enum { NOREG = -1, // Use NOREG when the AST generation // functions have no register to return NOLABEL = 0 // Use NOLABEL when we have no label to // pass to genAST() }; ================================================ FILE: 37_Switch/expr.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of expressions // Copyright (c) 2019 Warren Toomey, GPL3 // expression_list: // | expression // | expression ',' expression_list // ; // Parse a list of zero or more comma-separated expressions and // return an AST composed of A_GLUE nodes with the left-hand child // being the sub-tree of previous expressions (or NULL) and the right-hand // child being the next expression. Each A_GLUE node will have size field // set to the number of expressions in the tree at this point. If no // expressions are parsed, NULL is returned static struct ASTnode *expression_list(void) { struct ASTnode *tree = NULL; struct ASTnode *child = NULL; int exprcount = 0; // Loop until the final right parentheses while (Token.token != T_RPAREN) { // Parse the next expression and increment the expression count child = binexpr(0); exprcount++; // Build an A_GLUE AST node with the previous tree as the left child // and the new expression as the right child. Store the expression count. tree = mkastnode(A_GLUE, P_NONE, tree, NULL, child, NULL, exprcount); // Must have a ',' or ')' at this point switch (Token.token) { case T_COMMA: scan(&Token); break; case T_RPAREN: break; default: fatald("Unexpected token in expression list", Token.token); } } // Return the tree of expressions return (tree); } // Parse a function call and return its AST static struct ASTnode *funccall(void) { struct ASTnode *tree; struct symtable *funcptr; // Check that the identifier has been defined as a function, // then make a leaf node for it. if ((funcptr = findsymbol(Text)) == NULL || funcptr->stype != S_FUNCTION) { fatals("Undeclared function", Text); } // Get the '(' lparen(); // Parse the argument expression list tree = expression_list(); // XXX Check type of each argument against the function's prototype // Build the function call AST node. Store the // function's return type as this node's type. // Also record the function's symbol-id tree = mkastunary(A_FUNCCALL, funcptr->type, tree, funcptr, 0); // Get the ')' rparen(); return (tree); } // Parse the index into an array and return an AST tree for it static struct ASTnode *array_access(void) { struct ASTnode *left, *right; struct symtable *aryptr; // Check that the identifier has been defined as an array // then make a leaf node for it that points at the base if ((aryptr = findsymbol(Text)) == NULL || aryptr->stype != S_ARRAY) { fatals("Undeclared array", Text); } left = mkastleaf(A_ADDR, aryptr->type, aryptr, 0); // Get the '[' scan(&Token); // Parse the following expression right = binexpr(0); // Get the ']' match(T_RBRACKET, "]"); // Ensure that this is of int type if (!inttype(right->type)) fatal("Array index is not of integer type"); // Scale the index by the size of the element's type right = modify_type(right, left->type, A_ADD); // Return an AST tree where the array's base has the offset // added to it, and dereference the element. Still an lvalue // at this point. left = mkastnode(A_ADD, aryptr->type, left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, value_at(left->type), left, NULL, 0); return (left); } // Parse the member reference of a struct or union // and return an AST tree for it. If withpointer is true, // the access is through a pointer to the member. static struct ASTnode *member_access(int withpointer) { struct ASTnode *left, *right; struct symtable *compvar; struct symtable *typeptr; struct symtable *m; // Check that the identifier has been declared as a // struct/union or a struct/union pointer if ((compvar = findsymbol(Text)) == NULL) fatals("Undeclared variable", Text); if (withpointer && compvar->type != pointer_to(P_STRUCT) && compvar->type != pointer_to(P_UNION)) fatals("Undeclared variable", Text); if (!withpointer && compvar->type != P_STRUCT && compvar->type != P_UNION) fatals("Undeclared variable", Text); // If a pointer to a struct/union, get the pointer's value. // Otherwise, make a leaf node that points at the base // Either way, it's an rvalue if (withpointer) { left = mkastleaf(A_IDENT, pointer_to(compvar->type), compvar, 0); } else left = mkastleaf(A_ADDR, compvar->type, compvar, 0); left->rvalue = 1; // Get the details of the composite type typeptr = compvar->ctype; // Skip the '.' or '->' token and get the member's name scan(&Token); ident(); // Find the matching member's name in the type // Die if we can't find it for (m = typeptr->member; m != NULL; m = m->next) if (!strcmp(m->name, Text)) break; if (m == NULL) fatals("No member found in struct/union: ", Text); // Build an A_INTLIT node with the offset right = mkastleaf(A_INTLIT, P_INT, NULL, m->posn); // Add the member's offset to the base of the struct/union // and dereference it. Still an lvalue at this point left = mkastnode(A_ADD, pointer_to(m->type), left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, m->type, left, NULL, 0); return (left); } // Parse a postfix expression and return // an AST node representing it. The // identifier is already in Text. static struct ASTnode *postfix(void) { struct ASTnode *n; struct symtable *varptr; struct symtable *enumptr; // If the identifier matches an enum value, // return an A_INTLIT node if ((enumptr = findenumval(Text)) != NULL) { scan(&Token); return (mkastleaf(A_INTLIT, P_INT, NULL, enumptr->posn)); } // Scan in the next token to see if we have a postfix expression scan(&Token); // Function call if (Token.token == T_LPAREN) return (funccall()); // An array reference if (Token.token == T_LBRACKET) return (array_access()); // Access into a struct or union if (Token.token == T_DOT) return (member_access(0)); if (Token.token == T_ARROW) return (member_access(1)); // A variable. Check that the variable exists. if ((varptr = findsymbol(Text)) == NULL || varptr->stype != S_VARIABLE) fatals("Unknown variable", Text); switch (Token.token) { // Post-increment: skip over the token case T_INC: scan(&Token); n = mkastleaf(A_POSTINC, varptr->type, varptr, 0); break; // Post-decrement: skip over the token case T_DEC: scan(&Token); n = mkastleaf(A_POSTDEC, varptr->type, varptr, 0); break; // Just a variable reference default: n = mkastleaf(A_IDENT, varptr->type, varptr, 0); } return (n); } // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(void) { struct ASTnode *n; int id; switch (Token.token) { case T_INTLIT: // For an INTLIT token, make a leaf AST node for it. // Make it a P_CHAR if it's within the P_CHAR range if ((Token.intvalue) >= 0 && (Token.intvalue < 256)) n = mkastleaf(A_INTLIT, P_CHAR, NULL, Token.intvalue); else n = mkastleaf(A_INTLIT, P_INT, NULL, Token.intvalue); break; case T_STRLIT: // For a STRLIT token, generate the assembly for it. // Then make a leaf AST node for it. id is the string's label. id = genglobstr(Text); n = mkastleaf(A_STRLIT, pointer_to(P_CHAR), NULL, id); break; case T_IDENT: return (postfix()); case T_LPAREN: // Beginning of a parenthesised expression, skip the '('. // Scan in the expression and the right parenthesis scan(&Token); n = binexpr(0); rparen(); return (n); default: fatald("Expecting a primary expression, got token", Token.token); } // Scan in the next token and return the leaf node scan(&Token); return (n); } // Convert a binary operator token into a binary AST operation. // We rely on a 1:1 mapping from token to AST operation static int binastop(int tokentype) { if (tokentype > T_EOF && tokentype <= T_SLASH) return (tokentype); fatald("Syntax error, token", tokentype); return (0); // Keep -Wall happy } // Return true if a token is right-associative, // false otherwise. static int rightassoc(int tokentype) { if (tokentype == T_ASSIGN) return (1); return (0); } // Operator precedence for each token. Must // match up with the order of tokens in defs.h static int OpPrec[] = { 0, 10, 20, 30, // T_EOF, T_ASSIGN, T_LOGOR, T_LOGAND 40, 50, 60, // T_OR, T_XOR, T_AMPER 70, 70, // T_EQ, T_NE 80, 80, 80, 80, // T_LT, T_GT, T_LE, T_GE 90, 90, // T_LSHIFT, T_RSHIFT 100, 100, // T_PLUS, T_MINUS 110, 110 // T_STAR, T_SLASH }; // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec; if (tokentype > T_SLASH) fatald("Token with no precedence in op_precedence:", tokentype); prec = OpPrec[tokentype]; if (prec == 0) fatald("Syntax error, token", tokentype); return (prec); } // prefix_expression: primary // | '*' prefix_expression // | '&' prefix_expression // | '-' prefix_expression // | '++' prefix_expression // | '--' prefix_expression // ; // Parse a prefix expression and return // a sub-tree representing it. struct ASTnode *prefix(void) { struct ASTnode *tree; switch (Token.token) { case T_AMPER: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Ensure that it's an identifier if (tree->op != A_IDENT) fatal("& operator must be followed by an identifier"); // Now change the operator to A_ADDR and the type to // a pointer to the original type tree->op = A_ADDR; tree->type = pointer_to(tree->type); break; case T_STAR: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's either another deref or an // identifier if (tree->op != A_IDENT && tree->op != A_DEREF) fatal("* operator must be followed by an identifier or *"); // Prepend an A_DEREF operation to the tree tree = mkastunary(A_DEREF, value_at(tree->type), tree, NULL, 0); break; case T_MINUS: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_NEGATE operation to the tree and // make the child an rvalue. Because chars are unsigned, // also widen this to int so that it's signed tree->rvalue = 1; tree = modify_type(tree, P_INT, 0); tree = mkastunary(A_NEGATE, tree->type, tree, NULL, 0); break; case T_INVERT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_INVERT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_INVERT, tree->type, tree, NULL, 0); break; case T_LOGNOT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_LOGNOT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_LOGNOT, tree->type, tree, NULL, 0); break; case T_INC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("++ operator must be followed by an identifier"); // Prepend an A_PREINC operation to the tree tree = mkastunary(A_PREINC, tree->type, tree, NULL, 0); break; case T_DEC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("-- operator must be followed by an identifier"); // Prepend an A_PREDEC operation to the tree tree = mkastunary(A_PREDEC, tree->type, tree, NULL, 0); break; default: tree = primary(); } return (tree); } // Return an AST tree whose root is a binary operator. // Parameter ptp is the previous token's precedence. struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; struct ASTnode *ltemp, *rtemp; int ASTop; int tokentype; // Get the tree on the left. // Fetch the next token at the same time. left = prefix(); // If we hit one of several terminating tokens, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA || tokentype == T_COLON) { left->rvalue = 1; return (left); } // While the precedence of this token is more than that of the // previous token precedence, or it's right associative and // equal to the previous token's precedence while ((op_precedence(tokentype) > ptp) || (rightassoc(tokentype) && op_precedence(tokentype) == ptp)) { // Fetch in the next integer literal scan(&Token); // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Determine the operation to be performed on the sub-trees ASTop = binastop(tokentype); if (ASTop == A_ASSIGN) { // Assignment // Make the right tree into an rvalue right->rvalue = 1; // Ensure the right's type matches the left right = modify_type(right, left->type, 0); if (right == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree. However, switch // left and right around, so that the right expression's // code will be generated before the left expression ltemp = left; left = right; right = ltemp; } else { // We are not doing an assignment, so both trees should be rvalues // Convert both trees into rvalue if they are lvalue trees left->rvalue = 1; right->rvalue = 1; // Ensure the two types are compatible by trying // to modify each tree to match the other's type. ltemp = modify_type(left, right->type, ASTop); rtemp = modify_type(right, left->type, ASTop); if (ltemp == NULL && rtemp == NULL) fatal("Incompatible types in binary expression"); if (ltemp != NULL) left = ltemp; if (rtemp != NULL) right = rtemp; } // Join that sub-tree with ours. Convert the token // into an AST operation at the same time. left = mkastnode(binastop(tokentype), left->type, left, NULL, right, NULL, 0); // Update the details of the current token. // If we hit a terminating token, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA || tokentype == T_COLON) { left->rvalue = 1; return (left); } } // Return the tree we have when the precedence // is the same or lower left->rvalue = 1; return (left); } ================================================ FILE: 37_Switch/gen.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Generic code generator // Copyright (c) 2019 Warren Toomey, GPL3 // Generate and return a new label number int genlabel(void) { static int id = 1; return (id++); } // Generate the code for an IF statement // and an optional ELSE clause static int genIF(struct ASTnode *n, int looptoplabel, int loopendlabel) { int Lfalse, Lend; // Generate two labels: one for the // false compound statement, and one // for the end of the overall IF statement. // When there is no ELSE clause, Lfalse _is_ // the ending label! Lfalse = genlabel(); if (n->right) Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. genAST(n->left, Lfalse, NOLABEL, NOLABEL, n->op); genfreeregs(); // Generate the true compound statement genAST(n->mid, NOLABEL, looptoplabel, loopendlabel, n->op); genfreeregs(); // If there is an optional ELSE clause, // generate the jump to skip to the end if (n->right) cgjump(Lend); // Now the false label cglabel(Lfalse); // Optional ELSE clause: generate the // false compound statement and the // end label if (n->right) { genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); genfreeregs(); cglabel(Lend); } return (NOREG); } // Generate the code for a WHILE statement static int genWHILE(struct ASTnode *n) { int Lstart, Lend; // Generate the start and end labels // and output the start label Lstart = genlabel(); Lend = genlabel(); cglabel(Lstart); // Generate the condition code followed // by a jump to the end label. genAST(n->left, Lend, Lstart, Lend, n->op); genfreeregs(); // Generate the compound statement for the body genAST(n->right, NOLABEL, Lstart, Lend, n->op); genfreeregs(); // Finally output the jump back to the condition, // and the end label cgjump(Lstart); cglabel(Lend); return (NOREG); } // Generate the code for a SWITCH statement static int genSWITCH(struct ASTnode *n) { int *caseval, *caselabel; int Ljumptop, Lend; int i, reg, defaultlabel = 0, casecount = 0; struct ASTnode *c; // Create arrays for the case values and associated labels. // Ensure that we have at least one position in each array. caseval = (int *) malloc((n->intvalue + 1) * sizeof(int)); caselabel = (int *) malloc((n->intvalue + 1) * sizeof(int)); // Generate labels for the top of the jump table, and the // end of the switch statement. Set a default label for // the end of the switch, in case we don't have a default. Ljumptop = genlabel(); Lend = genlabel(); defaultlabel = Lend; // Output the code to calculate the switch condition reg = genAST(n->left, NOLABEL, NOLABEL, NOLABEL, 0); cgjump(Ljumptop); genfreeregs(); // Walk the right-child linked list to // generate the code for each case for (i = 0, c = n->right; c != NULL; i++, c = c->right) { // Get a label for this case. Store it // and the case value in the arrays. // Record if it is the default case. caselabel[i] = genlabel(); caseval[i] = c->intvalue; cglabel(caselabel[i]); if (c->op == A_DEFAULT) defaultlabel = caselabel[i]; else casecount++; // Generate the case code. Pass in the end label for the breaks genAST(c->left, NOLABEL, NOLABEL, Lend, 0); genfreeregs(); } // Ensure the last case jumps past the switch table cgjump(Lend); // Now output the switch table and the end label. cgswitch(reg, casecount, Ljumptop, caselabel, caseval, defaultlabel); cglabel(Lend); return (NOREG); } // Generate the code to copy the arguments of a // function call to its parameters, then call the // function itself. Return the register that holds // the function's return value. static int gen_funccall(struct ASTnode *n) { struct ASTnode *gluetree = n->left; int reg; int numargs = 0; // If there is a list of arguments, walk this list // from the last argument (right-hand child) to the // first while (gluetree) { // Calculate the expression's value reg = genAST(gluetree->right, NOLABEL, NOLABEL, NOLABEL, gluetree->op); // Copy this into the n'th function parameter: size is 1, 2, 3, ... cgcopyarg(reg, gluetree->size); // Keep the first (highest) number of arguments if (numargs == 0) numargs = gluetree->size; genfreeregs(); gluetree = gluetree->left; } // Call the function, clean up the stack (based on numargs), // and return its result return (cgcall(n->sym, numargs)); } // Given an AST, an optional label, and the AST op // of the parent, generate assembly code recursively. // Return the register id with the tree's final value. int genAST(struct ASTnode *n, int iflabel, int looptoplabel, int loopendlabel, int parentASTop) { int leftreg, rightreg; // We have some specific AST node handling at the top // so that we don't evaluate the child sub-trees immediately switch (n->op) { case A_IF: return (genIF(n, looptoplabel, loopendlabel)); case A_WHILE: return (genWHILE(n)); case A_SWITCH: return (genSWITCH(n)); case A_FUNCCALL: return (gen_funccall(n)); case A_GLUE: // Do each child statement, and free the // registers after each child genAST(n->left, iflabel, looptoplabel, loopendlabel, n->op); genfreeregs(); genAST(n->right, iflabel, looptoplabel, loopendlabel, n->op); genfreeregs(); return (NOREG); case A_FUNCTION: // Generate the function's preamble before the code // in the child sub-tree cgfuncpreamble(n->sym); genAST(n->left, NOLABEL, NOLABEL, NOLABEL, n->op); cgfuncpostamble(n->sym); return (NOREG); } // General AST node handling below // Get the left and right sub-tree values if (n->left) leftreg = genAST(n->left, NOLABEL, NOLABEL, NOLABEL, n->op); if (n->right) rightreg = genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); switch (n->op) { case A_ADD: return (cgadd(leftreg, rightreg)); case A_SUBTRACT: return (cgsub(leftreg, rightreg)); case A_MULTIPLY: return (cgmul(leftreg, rightreg)); case A_DIVIDE: return (cgdiv(leftreg, rightreg)); case A_AND: return (cgand(leftreg, rightreg)); case A_OR: return (cgor(leftreg, rightreg)); case A_XOR: return (cgxor(leftreg, rightreg)); case A_LSHIFT: return (cgshl(leftreg, rightreg)); case A_RSHIFT: return (cgshr(leftreg, rightreg)); case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, compare registers // and set one to 1 or 0 based on the comparison. if (parentASTop == A_IF || parentASTop == A_WHILE) return (cgcompare_and_jump(n->op, leftreg, rightreg, iflabel)); else return (cgcompare_and_set(n->op, leftreg, rightreg)); case A_INTLIT: return (cgloadint(n->intvalue, n->type)); case A_STRLIT: return (cgloadglobstr(n->intvalue)); case A_IDENT: // Load our value if we are an rvalue // or we are being dereferenced if (n->rvalue || parentASTop == A_DEREF) { if (n->sym->class == C_GLOBAL) { return (cgloadglob(n->sym, n->op)); } else { return (cgloadlocal(n->sym, n->op)); } } else return (NOREG); case A_ASSIGN: // Are we assigning to an identifier or through a pointer? switch (n->right->op) { case A_IDENT: if (n->right->sym->class == C_GLOBAL) return (cgstorglob(leftreg, n->right->sym)); else return (cgstorlocal(leftreg, n->right->sym)); case A_DEREF: return (cgstorderef(leftreg, rightreg, n->right->type)); default: fatald("Can't A_ASSIGN in genAST(), op", n->op); } case A_WIDEN: // Widen the child's type to the parent's type return (cgwiden(leftreg, n->left->type, n->type)); case A_RETURN: cgreturn(leftreg, Functionid); return (NOREG); case A_ADDR: return (cgaddress(n->sym)); case A_DEREF: // If we are an rvalue, dereference to get the value we point at, // otherwise leave it for A_ASSIGN to store through the pointer if (n->rvalue) return (cgderef(leftreg, n->left->type)); else return (leftreg); case A_SCALE: // Small optimisation: use shift if the // scale value is a known power of two switch (n->size) { case 2: return (cgshlconst(leftreg, 1)); case 4: return (cgshlconst(leftreg, 2)); case 8: return (cgshlconst(leftreg, 3)); default: // Load a register with the size and // multiply the leftreg by this size rightreg = cgloadint(n->size, P_INT); return (cgmul(leftreg, rightreg)); } case A_POSTINC: case A_POSTDEC: // Load and decrement the variable's value into a register // and post increment/decrement it if (n->sym->class == C_GLOBAL) return (cgloadglob(n->sym, n->op)); else return (cgloadlocal(n->sym, n->op)); case A_PREINC: case A_PREDEC: // Load and decrement the variable's value into a register // and pre increment/decrement it if (n->left->sym->class == C_GLOBAL) return (cgloadglob(n->left->sym, n->op)); else return (cgloadlocal(n->left->sym, n->op)); case A_NEGATE: return (cgnegate(leftreg)); case A_INVERT: return (cginvert(leftreg)); case A_LOGNOT: return (cglognot(leftreg)); case A_TOBOOL: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, set the register // to 0 or 1 based on it's zeroeness or non-zeroeness return (cgboolean(leftreg, parentASTop, iflabel)); case A_BREAK: cgjump(loopendlabel); return (NOREG); case A_CONTINUE: cgjump(looptoplabel); return (NOREG); default: fatald("Unknown AST operator", n->op); } return (NOREG); // Keep -Wall happy } void genpreamble() { cgpreamble(); } void genpostamble() { cgpostamble(); } void genfreeregs() { freeall_registers(); } void genglobsym(struct symtable *node) { cgglobsym(node); } int genglobstr(char *strvalue) { int l = genlabel(); cgglobstr(l, strvalue); return (l); } int genprimsize(int type) { return (cgprimsize(type)); } int genalign(int type, int offset, int direction) { return (cgalign(type, offset, direction)); } ================================================ FILE: 37_Switch/include/fcntl.h ================================================ #ifndef _FCNTL_H_ # define _FCNTL_H_ #define O_RDONLY 00 #define O_WRONLY 01 #define O_RDWR 02 int open(char *pathname, int flags); #endif // _FCNTL_H_ ================================================ FILE: 37_Switch/include/stddef.h ================================================ #ifndef _STDDEF_H_ # define _STDDEF_H_ typedef long size_t; #endif //_STDDEF_H_ ================================================ FILE: 37_Switch/include/stdio.h ================================================ #ifndef _STDIO_H_ # define _STDIO_H_ #include // This FILE definition will do for now typedef char * FILE; FILE *fopen(char *pathname, char *mode); size_t fread(void *ptr, size_t size, size_t nmemb, FILE *stream); size_t fwrite(void *ptr, size_t size, size_t nmemb, FILE *stream); int fclose(FILE *stream); int printf(char *format); int fprintf(FILE *stream, char *format); #endif // _STDIO_H_ ================================================ FILE: 37_Switch/main.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "decl.h" #include #include // Compiler setup and top-level execution // Copyright (c) 2019 Warren Toomey, GPL3 // Given a string with a '.' and at least a 1-character suffix // after the '.', change the suffix to be the given character. // Return the new string or NULL if the original string could // not be modified char *alter_suffix(char *str, char suffix) { char *posn; char *newstr; // Clone the string if ((newstr = strdup(str)) == NULL) return (NULL); // Find the '.' if ((posn = strrchr(newstr, '.')) == NULL) return (NULL); // Ensure there is a suffix posn++; if (*posn == '\0') return (NULL); // Change the suffix and NUL-terminate the string *posn++ = suffix; *posn = '\0'; return (newstr); } // Given an input filename, compile that file // down to assembly code. Return the new file's name static char *do_compile(char *filename) { char cmd[TEXTLEN]; // Change the input file's suffix to .s Outfilename = alter_suffix(filename, 's'); if (Outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .c on the end\n", filename); exit(1); } // Generate the pre-processor command snprintf(cmd, TEXTLEN, "%s %s %s", CPPCMD, INCDIR, filename); // Open up the pre-processor pipe if ((Infile = popen(cmd, "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", filename, strerror(errno)); exit(1); } Infilename = filename; // Create the output file if ((Outfile = fopen(Outfilename, "w")) == NULL) { fprintf(stderr, "Unable to create %s: %s\n", Outfilename, strerror(errno)); exit(1); } Line = 1; // Reset the scanner Putback = '\n'; clear_symtable(); // Clear the symbol table if (O_verbose) printf("compiling %s\n", filename); scan(&Token); // Get the first token from the input genpreamble(); // Output the preamble global_declarations(); // Parse the global declarations genpostamble(); // Output the postamble fclose(Outfile); // Close the output file return (Outfilename); } // Given an input filename, assemble that file // down to object code. Return the object filename char *do_assemble(char *filename) { char cmd[TEXTLEN]; int err; char *outfilename = alter_suffix(filename, 'o'); if (outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .s on the end\n", filename); exit(1); } // Build the assembly command and run it snprintf(cmd, TEXTLEN, "%s %s %s", ASCMD, outfilename, filename); if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Assembly of %s failed\n", filename); exit(1); } return (outfilename); } // Given a list of object files and an output filename, // link all of the object filenames together. void do_link(char *outfilename, char *objlist[]) { int cnt, size = TEXTLEN; char cmd[TEXTLEN], *cptr; int err; // Start with the linker command and the output file cptr = cmd; cnt = snprintf(cptr, size, "%s %s ", LDCMD, outfilename); cptr += cnt; size -= cnt; // Now append each object file while (*objlist != NULL) { cnt = snprintf(cptr, size, "%s ", *objlist); cptr += cnt; size -= cnt; objlist++; } if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Linking failed\n"); exit(1); } } // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "Usage: %s [-vcST] [-o outfile] file [file ...]\n", prog); fprintf(stderr, " -v give verbose output of the compilation stages\n"); fprintf(stderr, " -c generate object files but don't link them\n"); fprintf(stderr, " -S generate assembly files but don't link them\n"); fprintf(stderr, " -T dump the AST trees for each input file\n"); fprintf(stderr, " -o outfile, produce the outfile executable file\n"); exit(1); } // Main program: check arguments and print a usage // if we don't have an argument. Open up the input // file and call scanfile() to scan the tokens in it. enum { MAXOBJ = 100 }; int main(int argc, char *argv[]) { char *outfilename = AOUT; char *asmfile, *objfile; char *objlist[MAXOBJ]; int i, objcnt = 0; // Initialise our variables O_dumpAST = 0; O_keepasm = 0; O_assemble = 0; O_verbose = 0; O_dolink = 1; // Scan for command-line options for (i = 1; i < argc; i++) { // No leading '-', stop scanning for options if (*argv[i] != '-') break; // For each option in this argument for (int j = 1; (*argv[i] == '-') && argv[i][j]; j++) { switch (argv[i][j]) { case 'o': outfilename = argv[++i]; // Save & skip to next argument break; case 'T': O_dumpAST = 1; break; case 'c': O_assemble = 1; O_keepasm = 0; O_dolink = 0; break; case 'S': O_keepasm = 1; O_assemble = 0; O_dolink = 0; break; case 'v': O_verbose = 1; break; default: usage(argv[0]); } } } // Ensure we have at lease one input file argument if (i >= argc) usage(argv[0]); // Work on each input file in turn while (i < argc) { asmfile = do_compile(argv[i]); // Compile the source file if (O_dolink || O_assemble) { objfile = do_assemble(asmfile); // Assemble it to object forma if (objcnt == (MAXOBJ - 2)) { fprintf(stderr, "Too many object files for the compiler to handle\n"); exit(1); } objlist[objcnt++] = objfile; // Add the object file's name objlist[objcnt] = NULL; // to the list of object files } if (!O_keepasm) // Remove the assembly file if unlink(asmfile); // we don't need to keep it i++; } // Now link all the object files together if (O_dolink) { do_link(outfilename, objlist); // If we don't need to keep the object // files, then remove them if (!O_assemble) { for (i = 0; objlist[i] != NULL; i++) unlink(objlist[i]); } } return (0); } ================================================ FILE: 37_Switch/misc.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" #include // Miscellaneous functions // Copyright (c) 2019 Warren Toomey, GPL3 // Ensure that the current token is t, // and fetch the next token. Otherwise // throw an error void match(int t, char *what) { if (Token.token == t) { scan(&Token); } else { fatals("Expected", what); } } // Match a semicolon and fetch the next token void semi(void) { match(T_SEMI, ";"); } // Match a left brace and fetch the next token void lbrace(void) { match(T_LBRACE, "{"); } // Match a right brace and fetch the next token void rbrace(void) { match(T_RBRACE, "}"); } // Match a left parenthesis and fetch the next token void lparen(void) { match(T_LPAREN, "("); } // Match a right parenthesis and fetch the next token void rparen(void) { match(T_RPAREN, ")"); } // Match an identifier and fetch the next token void ident(void) { match(T_IDENT, "identifier"); } // Match a comma and fetch the next token void comma(void) { match(T_COMMA, "comma"); } // Print out fatal messages void fatal(char *s) { fprintf(stderr, "%s on line %d of %s\n", s, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatals(char *s1, char *s2) { fprintf(stderr, "%s:%s on line %d of %s\n", s1, s2, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatald(char *s, int d) { fprintf(stderr, "%s:%d on line %d of %s\n", s, d, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatalc(char *s, int c) { fprintf(stderr, "%s:%c on line %d of %s\n", s, c, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } ================================================ FILE: 37_Switch/scan.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { char *p; p = strchr(s, c); return (p ? p - s : -1); } // Get the next character from the input file. static int next(void) { int c, l; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return (c); } c = fgetc(Infile); // Read from input file while (c == '#') { // We've hit a pre-processor statement scan(&Token); // Get the line number into l if (Token.token != T_INTLIT) fatals("Expecting pre-processor line number, got:", Text); l = Token.intvalue; scan(&Token); // Get the filename in Text if (Token.token != T_STRLIT) fatals("Expecting pre-processor file name, got:", Text); if (Text[0] != '<') { // If this is a real filename if (strcmp(Text, Infilename)) // and not the one we have now Infilename = strdup(Text); // save it. Then update the line num Line = l; } while ((c = fgetc(Infile)) != '\n'); // Skip to the end of the line c = fgetc(Infile); // and get the next character } if ('\n' == c) Line++; // Increment line count return (c); } // Put back an unwanted character static void putback(int c) { Putback = c; } // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } // Return the next character from a character // or string literal static int scanch(void) { int c; // Get the next input character and interpret // metacharacters that start with a backslash c = next(); if (c == '\\') { switch (c = next()) { case 'a': return '\a'; case 'b': return '\b'; case 'f': return '\f'; case 'n': return '\n'; case 'r': return '\r'; case 't': return '\t'; case 'v': return '\v'; case '\\': return '\\'; case '"': return '"'; case '\'': return '\''; default: fatalc("unknown escape sequence", c); } } return (c); // Just an ordinary old character! } // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0; // Convert each character into an int value while ((k = chrpos("0123456789", c)) >= 0) { val = val * 10 + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return (val); } // Scan in a string literal from the input file, // and store it in buf[]. Return the length of // the string. static int scanstr(char *buf) { int i, c; // Loop while we have enough buffer space for (i = 0; i < TEXTLEN - 1; i++) { // Get the next char and append to buf // Return when we hit the ending double quote if ((c = scanch()) == '"') { buf[i] = 0; return (i); } buf[i] = c; } // Ran out of buf[] space fatal("String literal too long"); return (0); } // Scan an identifier from the input file and // store it in buf[]. Return the identifier's length static int scanident(int c, char *buf, int lim) { int i = 0; // Allow digits, alpha and underscores while (isalpha(c) || isdigit(c) || '_' == c) { // Error if we hit the identifier length limit, // else append to buf[] and get next character if (lim - 1 == i) { fatal("Identifier too long"); } else if (i < lim - 1) { buf[i++] = c; } c = next(); } // We hit a non-valid character, put it back. // NUL-terminate the buf[] and return the length putback(c); buf[i] = '\0'; return (i); } // Given a word from the input, return the matching // keyword token number or 0 if it's not a keyword. // Switch on the first letter so that we don't have // to waste time strcmp()ing against all the keywords. static int keyword(char *s) { switch (*s) { case 'b': if (!strcmp(s, "break")) return (T_BREAK); break; case 'c': if (!strcmp(s, "case")) return (T_CASE); if (!strcmp(s, "char")) return (T_CHAR); if (!strcmp(s, "continue")) return (T_CONTINUE); break; case 'd': if (!strcmp(s, "default")) return (T_DEFAULT); break; case 'e': if (!strcmp(s, "else")) return (T_ELSE); if (!strcmp(s, "enum")) return (T_ENUM); if (!strcmp(s, "extern")) return (T_EXTERN); break; case 'f': if (!strcmp(s, "for")) return (T_FOR); break; case 'i': if (!strcmp(s, "if")) return (T_IF); if (!strcmp(s, "int")) return (T_INT); break; case 'l': if (!strcmp(s, "long")) return (T_LONG); break; case 'r': if (!strcmp(s, "return")) return (T_RETURN); break; case 's': if (!strcmp(s, "struct")) return (T_STRUCT); if (!strcmp(s, "switch")) return (T_SWITCH); break; case 't': if (!strcmp(s, "typedef")) return (T_TYPEDEF); break; case 'u': if (!strcmp(s, "union")) return (T_UNION); break; case 'v': if (!strcmp(s, "void")) return (T_VOID); break; case 'w': if (!strcmp(s, "while")) return (T_WHILE); break; } return (0); } // A pointer to a rejected token static struct token *Rejtoken = NULL; // Reject the token that we just scanned void reject_token(struct token *t) { if (Rejtoken != NULL) fatal("Can't reject token twice"); Rejtoken = t; } // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c, tokentype; // If we have any rejected token, return it if (Rejtoken != NULL) { t = Rejtoken; Rejtoken = NULL; return (1); } // Skip whitespace c = skip(); // Determine the token based on // the input character switch (c) { case EOF: t->token = T_EOF; return (0); case '+': if ((c = next()) == '+') { t->token = T_INC; } else { putback(c); t->token = T_PLUS; } break; case '-': if ((c = next()) == '-') { t->token = T_DEC; } else if (c == '>') { t->token = T_ARROW; } else { putback(c); t->token = T_MINUS; } break; case '*': t->token = T_STAR; break; case '/': t->token = T_SLASH; break; case ';': t->token = T_SEMI; break; case '{': t->token = T_LBRACE; break; case '}': t->token = T_RBRACE; break; case '(': t->token = T_LPAREN; break; case ')': t->token = T_RPAREN; break; case '[': t->token = T_LBRACKET; break; case ']': t->token = T_RBRACKET; break; case '~': t->token = T_INVERT; break; case '^': t->token = T_XOR; break; case ',': t->token = T_COMMA; break; case '.': t->token = T_DOT; break; case ':': t->token = T_COLON; break; case '=': if ((c = next()) == '=') { t->token = T_EQ; } else { putback(c); t->token = T_ASSIGN; } break; case '!': if ((c = next()) == '=') { t->token = T_NE; } else { putback(c); t->token = T_LOGNOT; } break; case '<': if ((c = next()) == '=') { t->token = T_LE; } else if (c == '<') { t->token = T_LSHIFT; } else { putback(c); t->token = T_LT; } break; case '>': if ((c = next()) == '=') { t->token = T_GE; } else if (c == '>') { t->token = T_RSHIFT; } else { putback(c); t->token = T_GT; } break; case '&': if ((c = next()) == '&') { t->token = T_LOGAND; } else { putback(c); t->token = T_AMPER; } break; case '|': if ((c = next()) == '|') { t->token = T_LOGOR; } else { putback(c); t->token = T_OR; } break; case '\'': // If it's a quote, scan in the // literal character value and // the trailing quote t->intvalue = scanch(); t->token = T_INTLIT; if (next() != '\'') fatal("Expected '\\'' at end of char literal"); break; case '"': // Scan in a literal string scanstr(Text); t->token = T_STRLIT; break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } else if (isalpha(c) || '_' == c) { // Read in a keyword or identifier scanident(c, Text, TEXTLEN); // If it's a recognised keyword, return that token if ((tokentype = keyword(Text)) != 0) { t->token = tokentype; break; } // Not a recognised keyword, so it must be an identifier t->token = T_IDENT; break; } // The character isn't part of any recognised token, error fatalc("Unrecognised character", c); } // We found a token return (1); } ================================================ FILE: 37_Switch/stmt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of statements // Copyright (c) 2019 Warren Toomey, GPL3 // Prototypes static struct ASTnode *single_statement(void); // compound_statement: // empty, i.e. no statement // | statement // | statement statements // ; // // statement: declaration // | expression_statement // | function_call // | if_statement // | while_statement // | for_statement // | return_statement // ; // if_statement: if_head // | if_head 'else' compound_statement // ; // // if_head: 'if' '(' true_false_expression ')' compound_statement ; // // Parse an IF statement including any // optional ELSE clause and return its AST static struct ASTnode *if_statement(void) { struct ASTnode *condAST, *trueAST, *falseAST = NULL; // Ensure we have 'if' '(' match(T_IF, "if"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); rparen(); // Get the AST for the compound statement trueAST = compound_statement(); // If we have an 'else', skip it // and get the AST for the compound statement if (Token.token == T_ELSE) { scan(&Token); falseAST = compound_statement(); } // Build and return the AST for this statement return (mkastnode(A_IF, P_NONE, condAST, trueAST, falseAST, NULL, 0)); } // while_statement: 'while' '(' true_false_expression ')' compound_statement ; // // Parse a WHILE statement and return its AST static struct ASTnode *while_statement(void) { struct ASTnode *condAST, *bodyAST; // Ensure we have 'while' '(' match(T_WHILE, "while"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); rparen(); // Get the AST for the compound statement. // Update the loop depth in the process Looplevel++; bodyAST = compound_statement(); Looplevel--; // Build and return the AST for this statement return (mkastnode(A_WHILE, P_NONE, condAST, NULL, bodyAST, NULL, 0)); } // for_statement: 'for' '(' preop_statement ';' // true_false_expression ';' // postop_statement ')' compound_statement ; // // preop_statement: statement (for now) // postop_statement: statement (for now) // // Parse a FOR statement and return its AST static struct ASTnode *for_statement(void) { struct ASTnode *condAST, *bodyAST; struct ASTnode *preopAST, *postopAST; struct ASTnode *tree; // Ensure we have 'for' '(' match(T_FOR, "for"); lparen(); // Get the pre_op statement and the ';' preopAST = single_statement(); semi(); // Get the condition and the ';'. // Force a non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); semi(); // Get the post_op statement and the ')' postopAST = single_statement(); rparen(); // Get the compound statement which is the body // Update the loop depth in the process Looplevel++; bodyAST = compound_statement(); Looplevel--; // For now, all four sub-trees have to be non-NULL. // Later on, we'll change the semantics for when some are missing // Glue the compound statement and the postop tree tree = mkastnode(A_GLUE, P_NONE, bodyAST, NULL, postopAST, NULL, 0); // Make a WHILE loop with the condition and this new body tree = mkastnode(A_WHILE, P_NONE, condAST, NULL, tree, NULL, 0); // And glue the preop tree to the A_WHILE tree return (mkastnode(A_GLUE, P_NONE, preopAST, NULL, tree, NULL, 0)); } // return_statement: 'return' '(' expression ')' ; // // Parse a return statement and return its AST static struct ASTnode *return_statement(void) { struct ASTnode *tree; // Can't return a value if function returns P_VOID if (Functionid->type == P_VOID) fatal("Can't return from a void function"); // Ensure we have 'return' '(' match(T_RETURN, "return"); lparen(); // Parse the following expression tree = binexpr(0); // Ensure this is compatible with the function's type tree = modify_type(tree, Functionid->type, 0); if (tree == NULL) fatal("Incompatible type to return"); // Add on the A_RETURN node tree = mkastunary(A_RETURN, P_NONE, tree, NULL, 0); // Get the ')' rparen(); return (tree); } // break_statement: 'break' ; // // Parse a break statement and return its AST static struct ASTnode *break_statement(void) { if (Looplevel == 0 && Switchlevel == 0) fatal("no loop or switch to break out from"); scan(&Token); return (mkastleaf(A_BREAK, 0, NULL, 0)); } // continue_statement: 'continue' ; // // Parse a continue statement and return its AST static struct ASTnode *continue_statement(void) { if (Looplevel == 0) fatal("no loop to continue to"); scan(&Token); return (mkastleaf(A_CONTINUE, 0, NULL, 0)); } // Parse a switch statement and return its AST static struct ASTnode *switch_statement(void) { struct ASTnode *left, *n, *c, *casetree= NULL, *casetail; int inloop=1, casecount=0; int seendefault=0; int ASTop, casevalue; // Skip the 'switch' and '(' scan(&Token); lparen(); // Get the switch expression, the ')' and the '{' left= binexpr(0); rparen(); lbrace(); // Ensure that this is of int type if (!inttype(left->type)) fatal("Switch expression is not of integer type"); // Build an A_SWITCH subtree with the expression as // the child n= mkastunary(A_SWITCH, 0, left, NULL, 0); // Now parse the cases Switchlevel++; while (inloop) { switch(Token.token) { // Leave the loop when we hit a '}' case T_RBRACE: if (casecount==0) fatal("No cases in switch"); inloop=0; break; case T_CASE: case T_DEFAULT: // Ensure this isn't after a previous 'default' if (seendefault) fatal("case or default after existing default"); // Set the AST operation. Scan the case value if required if (Token.token==T_DEFAULT) { ASTop= A_DEFAULT; seendefault= 1; scan(&Token); } else { ASTop= A_CASE; scan(&Token); left= binexpr(0); // Ensure the case value is an integer literal if (left->op != A_INTLIT) fatal("Expecting integer literal for case value"); casevalue= left->intvalue; // Walk the list of existing case values to ensure // that there isn't a duplicate case value for (c= casetree; c != NULL; c= c -> right) if (casevalue == c->intvalue) fatal("Duplicate case value"); } // Scan the ':' and get the compound expression match(T_COLON, ":"); left= compound_statement(); casecount++; // Build a sub-tree with the compound statement as the left child // and link it in to the growing A_CASE tree if (casetree==NULL) { casetree= casetail= mkastunary(ASTop, 0, left, NULL, casevalue); } else { casetail->right= mkastunary(ASTop, 0, left, NULL, casevalue); casetail= casetail->right; } break; default: fatald("Unexpected token in switch", Token.token); } } Switchlevel--; // We have a sub-tree with the cases and any default. Put the // case count into the A_SWITCH node and attach the case tree. n->intvalue= casecount; n->right= casetree; rbrace(); return(n); } // Parse a single statement and return its AST static struct ASTnode *single_statement(void) { int type, class = C_LOCAL; struct symtable *ctype; switch (Token.token) { case T_IDENT: // We have to see if the identifier matches a typedef. // If not do the default code in this switch statement. // Otherwise, fall down to the parse_type() call. if (findtypedef(Text) == NULL) return (binexpr(0)); case T_CHAR: case T_INT: case T_LONG: case T_STRUCT: case T_UNION: case T_ENUM: case T_TYPEDEF: // The beginning of a variable declaration. // Parse the type and get the identifier. // Then parse the rest of the declaration // and skip over the semicolon type = parse_type(&ctype, &class); ident(); var_declaration(type, ctype, class); semi(); return (NULL); // No AST generated here case T_IF: return (if_statement()); case T_WHILE: return (while_statement()); case T_FOR: return (for_statement()); case T_RETURN: return (return_statement()); case T_BREAK: return (break_statement()); case T_CONTINUE: return (continue_statement()); case T_SWITCH: return (switch_statement()); default: // For now, see if this is an expression. // This catches assignment statements. return (binexpr(0)); } return (NULL); // Keep -Wall happy } // Parse a compound statement // and return its AST struct ASTnode *compound_statement(void) { struct ASTnode *left = NULL; struct ASTnode *tree; // Require a left curly bracket lbrace(); while (1) { // Parse a single statement tree = single_statement(); // Some statements must be followed by a semicolon if (tree != NULL && (tree->op == A_ASSIGN || tree->op == A_RETURN || tree->op == A_FUNCCALL || tree->op == A_BREAK || tree->op == A_CONTINUE)) semi(); // For each new tree, either save it in left // if left is empty, or glue the left and the // new tree together if (tree != NULL) { if (left == NULL) left = tree; else left = mkastnode(A_GLUE, P_NONE, left, NULL, tree, NULL, 0); } // When we hit a right curly bracket, // skip past it and return the AST if (Token.token == T_RBRACE) { rbrace(); return (left); } } } ================================================ FILE: 37_Switch/sym.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Symbol table functions // Copyright (c) 2019 Warren Toomey, GPL3 // Append a node to the singly-linked list pointed to by head or tail void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node) { // Check for valid pointers if (head == NULL || tail == NULL || node == NULL) fatal("Either head, tail or node is NULL in appendsym"); // Append to the list if (*tail) { (*tail)->next = node; *tail = node; } else *head = *tail = node; node->next = NULL; } // Create a symbol node to be added to a symbol table list. // Set up the node's: // + type: char, int etc. // + ctype: composite type pointer for struct/union // + structural type: var, function, array etc. // + size: number of elements, or endlabel: end label for a function // + posn: Position information for local symbols // Return a pointer to the new node struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int size, int posn) { // Get a new node struct symtable *node = (struct symtable *) malloc(sizeof(struct symtable)); if (node == NULL) fatal("Unable to malloc a symbol table node in newsym"); // Fill in the values node->name = strdup(name); node->type = type; node->ctype = ctype; node->stype = stype; node->class = class; node->size = size; node->posn = posn; node->next = NULL; node->member = NULL; // Generate any global space if (class == C_GLOBAL) genglobsym(node); return (node); } // Add a symbol to the global symbol list struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int class, int size) { struct symtable *sym = newsym(name, type, ctype, stype, class, size, 0); appendsym(&Globhead, &Globtail, sym); return (sym); } // Add a symbol to the local symbol list struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int size) { struct symtable *sym = newsym(name, type, ctype, stype, C_LOCAL, size, 0); appendsym(&Loclhead, &Locltail, sym); return (sym); } // Add a symbol to the parameter list struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype, int size) { struct symtable *sym = newsym(name, type, ctype, stype, C_PARAM, size, 0); appendsym(&Parmhead, &Parmtail, sym); return (sym); } // Add a symbol to the temporary member list struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int size) { struct symtable *sym = newsym(name, type, ctype, stype, C_MEMBER, size, 0); appendsym(&Membhead, &Membtail, sym); return (sym); } // Add a struct to the struct list struct symtable *addstruct(char *name, int type, struct symtable *ctype, int stype, int size) { struct symtable *sym = newsym(name, type, ctype, stype, C_STRUCT, size, 0); appendsym(&Structhead, &Structtail, sym); return (sym); } // Add a struct to the union list struct symtable *addunion(char *name, int type, struct symtable *ctype, int stype, int size) { struct symtable *sym = newsym(name, type, ctype, stype, C_UNION, size, 0); appendsym(&Unionhead, &Uniontail, sym); return (sym); } // Add an enum type or value to the enum list. // Class is C_ENUMTYPE or C_ENUMVAL. // Use posn to store the int value. struct symtable *addenum(char *name, int class, int value) { struct symtable *sym = newsym(name, P_INT, NULL, 0, class, 0, value); appendsym(&Enumhead, &Enumtail, sym); return (sym); } // Add a typedef to the typedef list struct symtable *addtypedef(char *name, int type, struct symtable *ctype, int stype, int size) { struct symtable *sym = newsym(name, type, ctype, stype, C_TYPEDEF, size, 0); appendsym(&Typehead, &Typetail, sym); return (sym); } // Search for a symbol in a specific list. // Return a pointer to the found node or NULL if not found. // If class is not zero, also match on the given class static struct symtable *findsyminlist(char *s, struct symtable *list, int class) { for (; list != NULL; list = list->next) if ((list->name != NULL) && !strcmp(s, list->name)) if (class==0 || class== list->class) return (list); return (NULL); } // Determine if the symbol s is in the global symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findglob(char *s) { return (findsyminlist(s, Globhead, 0)); } // Determine if the symbol s is in the local symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findlocl(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member, 0); if (node) return (node); } return (findsyminlist(s, Loclhead, 0)); } // Determine if the symbol s is in the symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findsymbol(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member, 0); if (node) return (node); } // Otherwise, try the local and global symbol lists node = findsyminlist(s, Loclhead, 0); if (node) return (node); return (findsyminlist(s, Globhead, 0)); } // Find a member in the member list // Return a pointer to the found node or NULL if not found. struct symtable *findmember(char *s) { return (findsyminlist(s, Membhead, 0)); } // Find a struct in the struct list // Return a pointer to the found node or NULL if not found. struct symtable *findstruct(char *s) { return (findsyminlist(s, Structhead, 0)); } // Find a struct in the union list // Return a pointer to the found node or NULL if not found. struct symtable *findunion(char *s) { return (findsyminlist(s, Unionhead, 0)); } // Find an enum type in the enum list // Return a pointer to the found node or NULL if not found. struct symtable *findenumtype(char *s) { return (findsyminlist(s, Enumhead, C_ENUMTYPE)); } // Find an enum value in the enum list // Return a pointer to the found node or NULL if not found. struct symtable *findenumval(char *s) { return (findsyminlist(s, Enumhead, C_ENUMVAL)); } // Find a type in the tyedef list // Return a pointer to the found node or NULL if not found. struct symtable *findtypedef(char *s) { return (findsyminlist(s, Typehead, 0)); } // Reset the contents of the symbol table void clear_symtable(void) { Globhead = Globtail = NULL; Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Membhead = Membtail = NULL; Structhead = Structtail = NULL; Unionhead = Uniontail = NULL; Enumhead = Enumtail = NULL; Typehead = Typetail = NULL; } // Clear all the entries in the local symbol table void freeloclsyms(void) { Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Functionid = NULL; } ================================================ FILE: 37_Switch/tests/err.input31.c ================================================ Expecting a primary expression, got token:15 on line 5 of input31.c ================================================ FILE: 37_Switch/tests/err.input32.c ================================================ Unknown variable:cow on line 4 of input32.c ================================================ FILE: 37_Switch/tests/err.input33.c ================================================ Incompatible type to return on line 4 of input33.c ================================================ FILE: 37_Switch/tests/err.input34.c ================================================ For now, declaration of non-global arrays is not implemented on line 4 of input34.c ================================================ FILE: 37_Switch/tests/err.input35.c ================================================ Duplicate local variable declaration:a on line 4 of input35.c ================================================ FILE: 37_Switch/tests/err.input36.c ================================================ Type doesn't match prototype for parameter:2 on line 4 of input36.c ================================================ FILE: 37_Switch/tests/err.input37.c ================================================ Unexpected token in parameter list:15 on line 3 of input37.c ================================================ FILE: 37_Switch/tests/err.input38.c ================================================ Type doesn't match prototype for parameter:2 on line 4 of input38.c ================================================ FILE: 37_Switch/tests/err.input39.c ================================================ No statements in function with non-void type on line 4 of input39.c ================================================ FILE: 37_Switch/tests/err.input40.c ================================================ No return for function with non-void type on line 4 of input40.c ================================================ FILE: 37_Switch/tests/err.input41.c ================================================ Can't return from a void function on line 3 of input41.c ================================================ FILE: 37_Switch/tests/err.input42.c ================================================ Undeclared function:fred on line 3 of input42.c ================================================ FILE: 37_Switch/tests/err.input43.c ================================================ Undeclared array:b on line 3 of input43.c ================================================ FILE: 37_Switch/tests/err.input44.c ================================================ Unknown variable:z on line 3 of input44.c ================================================ FILE: 37_Switch/tests/err.input45.c ================================================ & operator must be followed by an identifier on line 3 of input45.c ================================================ FILE: 37_Switch/tests/err.input46.c ================================================ * operator must be followed by an identifier or * on line 3 of input46.c ================================================ FILE: 37_Switch/tests/err.input47.c ================================================ ++ operator must be followed by an identifier on line 3 of input47.c ================================================ FILE: 37_Switch/tests/err.input48.c ================================================ -- operator must be followed by an identifier on line 3 of input48.c ================================================ FILE: 37_Switch/tests/err.input49.c ================================================ Incompatible expression in assignment on line 6 of input49.c ================================================ FILE: 37_Switch/tests/err.input50.c ================================================ Incompatible types in binary expression on line 6 of input50.c ================================================ FILE: 37_Switch/tests/err.input51.c ================================================ Expected '\'' at end of char literal on line 4 of input51.c ================================================ FILE: 37_Switch/tests/err.input52.c ================================================ Unrecognised character:$ on line 5 of input52.c ================================================ FILE: 37_Switch/tests/err.input56.c ================================================ unknown struct/union type:var1 on line 2 of input56.c ================================================ FILE: 37_Switch/tests/err.input57.c ================================================ previously defined struct/union:fred on line 2 of input57.c ================================================ FILE: 37_Switch/tests/err.input59.c ================================================ Undeclared variable:y on line 3 of input59.c ================================================ FILE: 37_Switch/tests/err.input60.c ================================================ Undeclared variable:x on line 3 of input60.c ================================================ FILE: 37_Switch/tests/err.input61.c ================================================ Undeclared variable:x on line 3 of input61.c ================================================ FILE: 37_Switch/tests/err.input64.c ================================================ undeclared enum type::fred on line 1 of input64.c ================================================ FILE: 37_Switch/tests/err.input65.c ================================================ enum type redeclared::fred on line 2 of input65.c ================================================ FILE: 37_Switch/tests/err.input66.c ================================================ enum value redeclared::z on line 2 of input66.c ================================================ FILE: 37_Switch/tests/err.input68.c ================================================ redefinition of typedef:FOO on line 2 of input68.c ================================================ FILE: 37_Switch/tests/err.input69.c ================================================ unknown type:FLOO on line 2 of input69.c ================================================ FILE: 37_Switch/tests/err.input72.c ================================================ no loop or switch to break out from on line 1 of input72.c ================================================ FILE: 37_Switch/tests/err.input73.c ================================================ no loop to continue to on line 1 of input73.c ================================================ FILE: 37_Switch/tests/err.input75.c ================================================ Unexpected token in switch:27 on line 4 of input75.c ================================================ FILE: 37_Switch/tests/err.input76.c ================================================ No cases in switch on line 3 of input76.c ================================================ FILE: 37_Switch/tests/err.input77.c ================================================ case or default after existing default on line 6 of input77.c ================================================ FILE: 37_Switch/tests/err.input78.c ================================================ case or default after existing default on line 6 of input78.c ================================================ FILE: 37_Switch/tests/err.input79.c ================================================ Duplicate case value on line 6 of input79.c ================================================ FILE: 37_Switch/tests/input01.c ================================================ int printf(char *fmt); void main() { printf("%d\n", 12 * 3); printf("%d\n", 18 - 2 * 4); printf("%d\n", 1 + 2 + 9 - 5/2 + 3*5); } ================================================ FILE: 37_Switch/tests/input02.c ================================================ int printf(char *fmt); void main() { int fred; int jim; fred= 5; jim= 12; printf("%d\n", fred + jim); } ================================================ FILE: 37_Switch/tests/input03.c ================================================ int printf(char *fmt); void main() { int x; x= 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); } ================================================ FILE: 37_Switch/tests/input04.c ================================================ int printf(char *fmt); void main() { int x; x= 7 < 9; printf("%d\n", x); x= 7 <= 9; printf("%d\n", x); x= 7 != 9; printf("%d\n", x); x= 7 == 7; printf("%d\n", x); x= 7 >= 7; printf("%d\n", x); x= 7 <= 7; printf("%d\n", x); x= 9 > 7; printf("%d\n", x); x= 9 >= 7; printf("%d\n", x); x= 9 != 7; printf("%d\n", x); } ================================================ FILE: 37_Switch/tests/input05.c ================================================ int printf(char *fmt); void main() { int i; int j; i=6; j=12; if (i < j) { printf("%d\n", i); } else { printf("%d\n", j); } } ================================================ FILE: 37_Switch/tests/input06.c ================================================ int printf(char *fmt); void main() { int i; i=1; while (i <= 10) { printf("%d\n", i); i= i + 1; } } ================================================ FILE: 37_Switch/tests/input07.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 37_Switch/tests/input08.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 37_Switch/tests/input09.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } void fred() { int a; int b; a= 12; b= 3 * a; if (a >= b) { printf("%d\n", 2 * b - a); } } ================================================ FILE: 37_Switch/tests/input10.c ================================================ int printf(char *fmt); void main() { int i; char j; j= 20; printf("%d\n", j); i= 10; printf("%d\n", i); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 2; j= j + 1) { printf("%d\n", j); } } ================================================ FILE: 37_Switch/tests/input11.c ================================================ int printf(char *fmt); int main() { int i; char j; long k; i= 10; printf("%d\n", i); j= 20; printf("%d\n", j); k= 30; printf("%d\n", k); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 4; j= j + 1) { printf("%d\n", j); } for (k= 1; k <= 5; k= k + 1) { printf("%d\n", k); } return(i); printf("%d\n", 12345); return(3); } ================================================ FILE: 37_Switch/tests/input12.c ================================================ int printf(char *fmt); int fred() { return(5); } void main() { int x; x= fred(2); printf("%d\n", x); } ================================================ FILE: 37_Switch/tests/input13.c ================================================ int printf(char *fmt); int fred() { return(56); } void main() { int dummy; int result; dummy= printf("%d\n", 23); result= fred(10); dummy= printf("%d\n", result); } ================================================ FILE: 37_Switch/tests/input14.c ================================================ int printf(char *fmt); int fred() { return(20); } int main() { int result; printf("%d\n", 10); result= fred(15); printf("%d\n", result); printf("%d\n", fred(15)+10); return(0); } ================================================ FILE: 37_Switch/tests/input15.c ================================================ int printf(char *fmt); int main() { char a; char *b; char c; int d; int *e; int f; a= 18; printf("%d\n", a); b= &a; c= *b; printf("%d\n", c); d= 12; printf("%d\n", d); e= &d; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 37_Switch/tests/input16.c ================================================ int printf(char *fmt); int c; int d; int *e; int f; int main() { c= 12; d=18; printf("%d\n", c); e= &c + 1; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 37_Switch/tests/input17.c ================================================ int printf(char *fmt); int main() { char a; char *b; int d; int *e; b= &a; *b= 19; printf("%d\n", a); e= &d; *e= 12; printf("%d\n", d); return(0); } ================================================ FILE: 37_Switch/tests/input18.c ================================================ int printf(char *fmt); int main() { int a; int b; a= b= 34; printf("%d\n", a); printf("%d\n", b); return(0); } ================================================ FILE: 37_Switch/tests/input18a.c ================================================ int printf(char *fmt); int a; int *b; char c; char *d; int main() { b= &a; *b= 15; printf("%d\n", a); d= &c; *d= 16; printf("%d\n", c); return(0); } ================================================ FILE: 37_Switch/tests/input19.c ================================================ int printf(char *fmt); int a; int b; int c; int d; int e; int main() { a= 2; b= 4; c= 3; d= 2; e= (a+b) * (c+d); printf("%d\n", e); return(0); } ================================================ FILE: 37_Switch/tests/input20.c ================================================ int printf(char *fmt); int a; int b[25]; int main() { b[3]= 12; a= b[3]; printf("%d\n", a); return(0); } ================================================ FILE: 37_Switch/tests/input21.c ================================================ int printf(char *fmt); char c; char *str; int main() { c= '\n'; printf("%d\n", c); for (str= "Hello world\n"; *str != 0; str= str + 1) { printf("%c", *str); } return(0); } ================================================ FILE: 37_Switch/tests/input22.c ================================================ int printf(char *fmt); char a; char b; char c; int d; int e; int f; long g; long h; long i; int main() { b= 5; c= 7; a= b + c++; printf("%d\n", a); e= 5; f= 7; d= e + f++; printf("%d\n", d); h= 5; i= 7; g= h + i++; printf("%d\n", g); a= b-- + c; printf("%d\n", a); d= e-- + f; printf("%d\n", d); g= h-- + i; printf("%d\n", g); a= ++b + c; printf("%d\n", a); d= ++e + f; printf("%d\n", d); g= ++h + i; printf("%d\n", g); a= b * --c; printf("%d\n", a); d= e * --f; printf("%d\n", d); g= h * --i; printf("%d\n", g); return(0); } ================================================ FILE: 37_Switch/tests/input23.c ================================================ int printf(char *fmt); char *str; int x; int main() { x= -23; printf("%d\n", x); printf("%d\n", -10 * -10); x= 1; x= ~x; printf("%d\n", x); x= 2 > 5; printf("%d\n", x); x= !x; printf("%d\n", x); x= !x; printf("%d\n", x); x= 13; if (x) { printf("%d\n", 13); } x= 0; if (!x) { printf("%d\n", 14); } for (str= "Hello world\n"; *str; str++) { printf("%c", *str); } return(0); } ================================================ FILE: 37_Switch/tests/input24.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { a= 42; b= 19; printf("%d\n", a & b); printf("%d\n", a | b); printf("%d\n", a ^ b); printf("%d\n", 1 << 3); printf("%d\n", 63 >> 3); return(0); } ================================================ FILE: 37_Switch/tests/input25.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { char z; int y; int x; x= 10; y= 20; z= 30; printf("%d\n", x); printf("%d\n", y); printf("%d\n", z); a= 5; b= 15; c= 25; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); return(0); } ================================================ FILE: 37_Switch/tests/input26.c ================================================ int printf(char *fmt); int main(int a, char b, long c, int d, int e, int f, int g, int h) { int i; int j; int k; a= 13; printf("%d\n", a); b= 23; printf("%d\n", b); c= 34; printf("%d\n", c); d= 44; printf("%d\n", d); e= 54; printf("%d\n", e); f= 64; printf("%d\n", f); g= 74; printf("%d\n", g); h= 84; printf("%d\n", h); i= 94; printf("%d\n", i); j= 95; printf("%d\n", j); k= 96; printf("%d\n", k); return(0); } ================================================ FILE: 37_Switch/tests/input27.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int param5(int a, int b, int c, int d, int e) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param2(int a, int b) { int c; int d; int e; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param0() { int a; int b; int c; int d; int e; a= 1; b= 2; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int main() { param8(1,2,3,4,5,6,7,8); param5(1,2,3,4,5); param2(1,2); param0(); return(0); } ================================================ FILE: 37_Switch/tests/input28.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 37_Switch/tests/input29.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h); int fred(int a, int b, int c); int main(); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 37_Switch/tests/input30.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input30.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 37_Switch/tests/input31.c ================================================ int printf(char *fmt); int main() { int x; x= 2 + + 3 - * / ; } ================================================ FILE: 37_Switch/tests/input32.c ================================================ int printf(char *fmt); int main() { pizza cow llama sausage; } ================================================ FILE: 37_Switch/tests/input33.c ================================================ int printf(char *fmt); int main() { char *z; return(z); } ================================================ FILE: 37_Switch/tests/input34.c ================================================ int printf(char *fmt); int main() { int a[12]; return(0); } ================================================ FILE: 37_Switch/tests/input35.c ================================================ int printf(char *fmt); int fred(int a, int b) { int a; return(a); } ================================================ FILE: 37_Switch/tests/input36.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c); ================================================ FILE: 37_Switch/tests/input37.c ================================================ int printf(char *fmt); int fred(int a, char b +, int z); ================================================ FILE: 37_Switch/tests/input38.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c, int g); ================================================ FILE: 37_Switch/tests/input39.c ================================================ int printf(char *fmt); int main() { int a; } ================================================ FILE: 37_Switch/tests/input40.c ================================================ int printf(char *fmt); int main() { int a; a= 5; } ================================================ FILE: 37_Switch/tests/input41.c ================================================ int printf(char *fmt); void fred() { return(5); } ================================================ FILE: 37_Switch/tests/input42.c ================================================ int printf(char *fmt); int main() { fred(5); } ================================================ FILE: 37_Switch/tests/input43.c ================================================ int printf(char *fmt); int main() { int a; a= b[4]; } ================================================ FILE: 37_Switch/tests/input44.c ================================================ int printf(char *fmt); int main() { int a; a= z; } ================================================ FILE: 37_Switch/tests/input45.c ================================================ int printf(char *fmt); int main() { int a; a= &5; } ================================================ FILE: 37_Switch/tests/input46.c ================================================ int printf(char *fmt); int main() { int a; a= *5; } ================================================ FILE: 37_Switch/tests/input47.c ================================================ int printf(char *fmt); int main() { int a; a= ++5; } ================================================ FILE: 37_Switch/tests/input48.c ================================================ int printf(char *fmt); int main() { int a; a= --5; } ================================================ FILE: 37_Switch/tests/input49.c ================================================ int printf(char *fmt); int main() { int x; char y; y= x; } ================================================ FILE: 37_Switch/tests/input50.c ================================================ int printf(char *fmt); int main() { char *a; char *b; a= a + b; } ================================================ FILE: 37_Switch/tests/input51.c ================================================ int printf(char *fmt); int main() { char a; a= 'fred'; } ================================================ FILE: 37_Switch/tests/input52.c ================================================ int printf(char *fmt); int main() { int a; a= $5.00; } ================================================ FILE: 37_Switch/tests/input53.c ================================================ int printf(char *fmt); int main() { printf("Hello world, %d\n", 23); return(0); } ================================================ FILE: 37_Switch/tests/input54.c ================================================ int printf(char *fmt); int main() { int i; for (i=0; i < 20; i++) { printf("Hello world, %d\n", i); } return(0); } ================================================ FILE: 37_Switch/tests/input55.c ================================================ int printf(char *fmt); int main(int argc, char **argv) { int i; char *argument; printf("Hello world\n"); for (i=0; i < argc; i++) { argument= *argv; argv= argv + 1; printf("Argument %d is %s\n", i, argument); } return(0); } ================================================ FILE: 37_Switch/tests/input56.c ================================================ struct foo { int x; }; struct mary var1; ================================================ FILE: 37_Switch/tests/input57.c ================================================ struct fred { int x; } ; struct fred { char y; } ; ================================================ FILE: 37_Switch/tests/input58.c ================================================ int printf(char *fmt); struct fred { int x; char y; long z; }; struct fred var2; struct fred *varptr; int main() { long result; var2.x= 12; printf("%d\n", var2.x); var2.y= 'c'; printf("%d\n", var2.y); var2.z= 4005; printf("%d\n", var2.z); result= var2.x + var2.y + var2.z; printf("%d\n", result); varptr= &var2; result= varptr->x + varptr->y + varptr->z; printf("%d\n", result); return(0); } ================================================ FILE: 37_Switch/tests/input59.c ================================================ int main() { int x; x= y.foo; } ================================================ FILE: 37_Switch/tests/input60.c ================================================ int main() { int x; x= x.foo; } ================================================ FILE: 37_Switch/tests/input61.c ================================================ int main() { int x; x= x->foo; } ================================================ FILE: 37_Switch/tests/input62.c ================================================ int printf(char *fmt); union fred { char w; int x; int y; long z; }; union fred var1; union fred *varptr; int main() { var1.x= 65; printf("%d\n", var1.x); var1.x= 66; printf("%d\n", var1.x); printf("%d\n", var1.y); printf("The next two depend on the endian of the platform\n"); printf("%d\n", var1.w); printf("%d\n", var1.z); varptr= &var1; varptr->x= 67; printf("%d\n", varptr->x); printf("%d\n", varptr->y); return(0); } ================================================ FILE: 37_Switch/tests/input63.c ================================================ int printf(char *fmt); enum fred { apple=1, banana, carrot, pear=10, peach, mango, papaya }; enum jane { aple=1, bnana, crrot, par=10, pech, mago, paaya }; enum fred var1; enum jane var2; enum fred var3; int main() { var1= carrot + pear + mango; printf("%d\n", var1); return(0); } ================================================ FILE: 37_Switch/tests/input64.c ================================================ enum fred var3; ================================================ FILE: 37_Switch/tests/input65.c ================================================ enum fred { x, y, z }; enum fred { a, b }; ================================================ FILE: 37_Switch/tests/input66.c ================================================ enum fred { x, y, z }; enum mary { a, b, z }; ================================================ FILE: 37_Switch/tests/input67.c ================================================ int printf(char *fmt); typedef int FOO; FOO var1; struct bar { int x; int y} ; typedef struct bar BAR; BAR var2; int main() { var1= 5; printf("%d\n", var1); var2.x= 7; var2.y= 10; printf("%d\n", var2.x + var2.y); return(0); } ================================================ FILE: 37_Switch/tests/input68.c ================================================ typedef int FOO; typedef char FOO; ================================================ FILE: 37_Switch/tests/input69.c ================================================ typedef int FOO; FLOO y; ================================================ FILE: 37_Switch/tests/input70.c ================================================ #include typedef int FOO; int main() { FOO x; x= 56; printf("%d\n", x); return(0); } ================================================ FILE: 37_Switch/tests/input71.c ================================================ #include int main() { int x; x = 0; while (x < 100) { if (x == 5) { x = x + 2; continue; } printf("%d\n", x); if (x == 14) { break; } x = x + 1; } printf("Done\n"); return (0); } ================================================ FILE: 37_Switch/tests/input72.c ================================================ int main() { break; } ================================================ FILE: 37_Switch/tests/input73.c ================================================ int main() { continue; } ================================================ FILE: 37_Switch/tests/input74.c ================================================ #include int main() { int x; int y; y= 0; for (x=0; x < 5; x++) { switch(x) { case 1: { y= 5; break; } case 2: { y= 7; break; } case 3: { y= 9; } default: { y= 100; } } printf("%d\n", y); } return(0); } ================================================ FILE: 37_Switch/tests/input75.c ================================================ int main() { int x; switch(x) { if (x<5); } } ================================================ FILE: 37_Switch/tests/input76.c ================================================ int main() { int x; switch(x) { } } ================================================ FILE: 37_Switch/tests/input77.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } default: { x= 3; } case 4: { x= 2; } } } ================================================ FILE: 37_Switch/tests/input78.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } default: { x= 3; } default: { x= 2; } } } ================================================ FILE: 37_Switch/tests/input79.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } case 2: { x= 2; } case 1: { x= 2; } default: { x= 2; } } } ================================================ FILE: 37_Switch/tests/mktests ================================================ #!/bin/sh # Make the output files for each test # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make install) fi for i in input*c do if [ ! -f "out.$i" -a ! -f "err.$i" ] then ../cwj -o out $i 2> "err.$i" # If the err file is empty if [ ! -s "err.$i" ] then rm -f "err.$i" cc -o out $i ../lib/printint.c ./out > "out.$i" fi fi rm -f out out.s done ================================================ FILE: 37_Switch/tests/out.input01.c ================================================ 36 10 25 ================================================ FILE: 37_Switch/tests/out.input02.c ================================================ 17 ================================================ FILE: 37_Switch/tests/out.input03.c ================================================ 1 2 3 4 5 ================================================ FILE: 37_Switch/tests/out.input04.c ================================================ 1 1 1 1 1 1 1 1 1 ================================================ FILE: 37_Switch/tests/out.input05.c ================================================ 6 ================================================ FILE: 37_Switch/tests/out.input06.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 37_Switch/tests/out.input07.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 37_Switch/tests/out.input08.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 37_Switch/tests/out.input09.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 37_Switch/tests/out.input10.c ================================================ 20 10 1 2 3 4 5 253 254 255 0 1 ================================================ FILE: 37_Switch/tests/out.input11.c ================================================ 10 20 30 1 2 3 4 5 253 254 255 0 1 2 3 1 2 3 4 5 ================================================ FILE: 37_Switch/tests/out.input12.c ================================================ 5 ================================================ FILE: 37_Switch/tests/out.input13.c ================================================ 23 56 ================================================ FILE: 37_Switch/tests/out.input14.c ================================================ 10 20 30 ================================================ FILE: 37_Switch/tests/out.input15.c ================================================ 18 18 12 12 ================================================ FILE: 37_Switch/tests/out.input16.c ================================================ 12 18 ================================================ FILE: 37_Switch/tests/out.input17.c ================================================ 19 12 ================================================ FILE: 37_Switch/tests/out.input18.c ================================================ 34 34 ================================================ FILE: 37_Switch/tests/out.input18a.c ================================================ 15 16 ================================================ FILE: 37_Switch/tests/out.input19.c ================================================ 30 ================================================ FILE: 37_Switch/tests/out.input20.c ================================================ 12 ================================================ FILE: 37_Switch/tests/out.input21.c ================================================ 10 Hello world ================================================ FILE: 37_Switch/tests/out.input22.c ================================================ 12 12 12 13 13 13 13 13 13 35 35 35 ================================================ FILE: 37_Switch/tests/out.input23.c ================================================ -23 100 -2 0 1 0 13 14 Hello world ================================================ FILE: 37_Switch/tests/out.input24.c ================================================ 2 59 57 8 7 ================================================ FILE: 37_Switch/tests/out.input25.c ================================================ 10 20 30 5 15 25 ================================================ FILE: 37_Switch/tests/out.input26.c ================================================ 13 23 34 44 54 64 74 84 94 95 96 ================================================ FILE: 37_Switch/tests/out.input27.c ================================================ 1 2 3 4 5 6 7 8 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 ================================================ FILE: 37_Switch/tests/out.input28.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 37_Switch/tests/out.input29.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 37_Switch/tests/out.input30.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input30.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 37_Switch/tests/out.input53.c ================================================ Hello world, 23 ================================================ FILE: 37_Switch/tests/out.input54.c ================================================ Hello world, 0 Hello world, 1 Hello world, 2 Hello world, 3 Hello world, 4 Hello world, 5 Hello world, 6 Hello world, 7 Hello world, 8 Hello world, 9 Hello world, 10 Hello world, 11 Hello world, 12 Hello world, 13 Hello world, 14 Hello world, 15 Hello world, 16 Hello world, 17 Hello world, 18 Hello world, 19 ================================================ FILE: 37_Switch/tests/out.input55.c ================================================ Hello world Argument 0 is ./out ================================================ FILE: 37_Switch/tests/out.input58.c ================================================ 12 99 4005 4116 4116 ================================================ FILE: 37_Switch/tests/out.input62.c ================================================ 65 66 66 The next two depend on the endian of the platform 66 66 67 67 ================================================ FILE: 37_Switch/tests/out.input63.c ================================================ 25 ================================================ FILE: 37_Switch/tests/out.input67.c ================================================ 5 17 ================================================ FILE: 37_Switch/tests/out.input70.c ================================================ 56 ================================================ FILE: 37_Switch/tests/out.input71.c ================================================ 0 1 2 3 4 7 8 9 10 11 12 13 14 Done ================================================ FILE: 37_Switch/tests/out.input74.c ================================================ 100 5 7 100 100 ================================================ FILE: 37_Switch/tests/runtests ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make install) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../cwj -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../cwj $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.s "trial.$i" done ================================================ FILE: 37_Switch/tests/runtestsn ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../compn ] then (cd ..; make install) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../compn -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../compn $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.o out.s "trial.$i" done ================================================ FILE: 37_Switch/tree.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree functions // Copyright (c) 2019 Warren Toomey, GPL3 // Build and return a generic AST node struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue) { struct ASTnode *n; // Malloc a new ASTnode n = (struct ASTnode *) malloc(sizeof(struct ASTnode)); if (n == NULL) fatal("Unable to malloc in mkastnode()"); // Copy in the field values and return it n->op = op; n->type = type; n->left = left; n->mid = mid; n->right = right; n->sym = sym; n->intvalue = intvalue; return (n); } // Make an AST leaf node struct ASTnode *mkastleaf(int op, int type, struct symtable *sym, int intvalue) { return (mkastnode(op, type, NULL, NULL, NULL, sym, intvalue)); } // Make a unary AST node: only one child struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, struct symtable *sym, int intvalue) { return (mkastnode(op, type, left, NULL, NULL, sym, intvalue)); } // Generate and return a new label number // just for AST dumping purposes static int gendumplabel(void) { static int id = 1; return (id++); } // Given an AST tree, print it out and follow the // traversal of the tree that genAST() follows void dumpAST(struct ASTnode *n, int label, int level) { int Lfalse, Lstart, Lend; switch (n->op) { case A_IF: Lfalse = gendumplabel(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_IF"); if (n->right) { Lend = gendumplabel(); fprintf(stdout, ", end L%d", Lend); } fprintf(stdout, "\n"); dumpAST(n->left, Lfalse, level + 2); dumpAST(n->mid, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); return; case A_WHILE: Lstart = gendumplabel(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_WHILE, start L%d\n", Lstart); Lend = gendumplabel(); dumpAST(n->left, Lend, level + 2); dumpAST(n->right, NOLABEL, level + 2); return; } // Reset level to -2 for A_GLUE if (n->op == A_GLUE) level = -2; // General AST node handling if (n->left) dumpAST(n->left, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); for (int i = 0; i < level; i++) fprintf(stdout, " "); switch (n->op) { case A_GLUE: fprintf(stdout, "\n\n"); return; case A_FUNCTION: fprintf(stdout, "A_FUNCTION %s\n", n->sym->name); return; case A_ADD: fprintf(stdout, "A_ADD\n"); return; case A_SUBTRACT: fprintf(stdout, "A_SUBTRACT\n"); return; case A_MULTIPLY: fprintf(stdout, "A_MULTIPLY\n"); return; case A_DIVIDE: fprintf(stdout, "A_DIVIDE\n"); return; case A_EQ: fprintf(stdout, "A_EQ\n"); return; case A_NE: fprintf(stdout, "A_NE\n"); return; case A_LT: fprintf(stdout, "A_LE\n"); return; case A_GT: fprintf(stdout, "A_GT\n"); return; case A_LE: fprintf(stdout, "A_LE\n"); return; case A_GE: fprintf(stdout, "A_GE\n"); return; case A_INTLIT: fprintf(stdout, "A_INTLIT %d\n", n->intvalue); return; case A_STRLIT: fprintf(stdout, "A_STRLIT rval label L%d\n", n->intvalue); return; case A_IDENT: if (n->rvalue) fprintf(stdout, "A_IDENT rval %s\n", n->sym->name); else fprintf(stdout, "A_IDENT %s\n", n->sym->name); return; case A_ASSIGN: fprintf(stdout, "A_ASSIGN\n"); return; case A_WIDEN: fprintf(stdout, "A_WIDEN\n"); return; case A_RETURN: fprintf(stdout, "A_RETURN\n"); return; case A_FUNCCALL: fprintf(stdout, "A_FUNCCALL %s\n", n->sym->name); return; case A_ADDR: fprintf(stdout, "A_ADDR %s\n", n->sym->name); return; case A_DEREF: if (n->rvalue) fprintf(stdout, "A_DEREF rval\n"); else fprintf(stdout, "A_DEREF\n"); return; case A_SCALE: fprintf(stdout, "A_SCALE %d\n", n->size); return; case A_PREINC: fprintf(stdout, "A_PREINC %s\n", n->sym->name); return; case A_PREDEC: fprintf(stdout, "A_PREDEC %s\n", n->sym->name); return; case A_POSTINC: fprintf(stdout, "A_POSTINC\n"); return; case A_POSTDEC: fprintf(stdout, "A_POSTDEC\n"); return; case A_NEGATE: fprintf(stdout, "A_NEGATE\n"); return; case A_BREAK: fprintf(stdout, "A_BREAK\n"); return; case A_CONTINUE: fprintf(stdout, "A_CONTINUE\n"); return; case A_CASE: fprintf(stdout, "A_CASE %d\n", n->intvalue); return; case A_DEFAULT: fprintf(stdout, "A_DEFAULT\n"); return; case A_SWITCH: fprintf(stdout, "A_SWITCH\n"); return; default: fatald("Unknown dumpAST operator", n->op); } } ================================================ FILE: 37_Switch/types.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Types and type handling // Copyright (c) 2019 Warren Toomey, GPL3 // Return true if a type is an int type // of any size, false otherwise int inttype(int type) { return (((type & 0xf) == 0) && (type <= P_LONG)); } // Return true if a type is of pointer type int ptrtype(int type) { return ((type & 0xf) != 0); } // Given a primitive type, return // the type which is a pointer to it int pointer_to(int type) { if ((type & 0xf) == 0xf) fatald("Unrecognised in pointer_to: type", type); return (type + 1); } // Given a primitive pointer type, return // the type which it points to int value_at(int type) { if ((type & 0xf) == 0x0) fatald("Unrecognised in value_at: type", type); return (type - 1); } // Given a type and a composite type pointer, return // the size of this type in bytes int typesize(int type, struct symtable *ctype) { if (type == P_STRUCT || type == P_UNION) return (ctype->size); return (genprimsize(type)); } // Given an AST tree and a type which we want it to become, // possibly modify the tree by widening or scaling so that // it is compatible with this type. Return the original tree // if no changes occurred, a modified tree, or NULL if the // tree is not compatible with the given type. // If this will be part of a binary operation, the AST op is not zero. struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op) { int ltype; int lsize, rsize; ltype = tree->type; // XXX No idea on these yet if (ltype == P_STRUCT || ltype == P_UNION) fatal("Don't know how to do this yet"); if (rtype == P_STRUCT || rtype == P_UNION) fatal("Don't know how to do this yet"); // Compare scalar int types if (inttype(ltype) && inttype(rtype)) { // Both types same, nothing to do if (ltype == rtype) return (tree); // Get the sizes for each type lsize = typesize(ltype, NULL); // XXX Fix soon rsize = typesize(rtype, NULL); // XXX Fix soon // Tree's size is too big if (lsize > rsize) return (NULL); // Widen to the right if (rsize > lsize) return (mkastunary(A_WIDEN, rtype, tree, NULL, 0)); } // For pointers on the left if (ptrtype(ltype)) { // OK is same type on right and not doing a binary op if (op == 0 && ltype == rtype) return (tree); } // We can scale only on A_ADD or A_SUBTRACT operation if (op == A_ADD || op == A_SUBTRACT) { // Left is int type, right is pointer type and the size // of the original type is >1: scale the left if (inttype(ltype) && ptrtype(rtype)) { rsize = genprimsize(value_at(rtype)); if (rsize > 1) return (mkastunary(A_SCALE, rtype, tree, NULL, rsize)); else return (tree); // Size 1, no need to scale } } // If we get here, the types are not compatible return (NULL); } ================================================ FILE: 38_Dangling_Else/Makefile ================================================ # Define the location of the include directory # and the location to install the compiler binary INCDIR=/tmp/include BINDIR=/tmp HSRCS= data.h decl.h defs.h SRCS= cg.c decl.c expr.c gen.c main.c misc.c \ scan.c stmt.c sym.c tree.c types.c SRCN= cgn.c decl.c expr.c gen.c main.c misc.c \ scan.c stmt.c sym.c tree.c types.c ARMSRCS= cg_arm.c decl.c expr.c gen.c main.c misc.c \ scan.c stmt.c sym.c tree.c types.c cwj: $(SRCS) $(HSRCS) cc -o cwj -g -Wall -DINCDIR=\"$(INCDIR)\" $(SRCS) compn: $(SRCN) $(HSRCS) cc -D__NASM__ -o compn -g -Wall -DINCDIR=\"$(INCDIR)\" $(SRCN) cwjarm: $(ARMSRCS) $(HSRCS) cc -o cwjarm -g -Wall $(ARMSRCS) cp cwjarm cwj install: cwj mkdir -p $(INCDIR) rsync -a include/. $(INCDIR) cp cwj $(BINDIR) chmod +x $(BINDIR)/cwj installn: compn mkdir -p $(INCDIR) rsync -a include/. $(INCDIR) cp compn $(BINDIR) chmod +x $(BINDIR)/compn clean: rm -f cwj cwjarm compn *.o *.s out a.out test: install tests/runtests (cd tests; chmod +x runtests; ./runtests) armtest: cwjarm tests/runtests (cd tests; chmod +x runtests; ./runtests) testn: installn tests/runtestsn (cd tests; chmod +x runtestsn; ./runtestsn) ================================================ FILE: 38_Dangling_Else/Readme.md ================================================ # Part 38: Dangling Else and More I started this part of our compiler writing journey hoping to fix up the [dangling else problem](https:en.wikipedia.org/wiki/Dangling_else). It turns out that what I actually had to do was restructure the way we parse a few things because I had the parsing wrong in the first place. This probably happened because I was keen to add functionality, but in the process I didn't step back enough and consider what we had been building. So, let's see what mistakes in the compiler need fixing. ## Fixing Up the For Grammar We'll start with our FOR loop structure. Yes it works, but it isn't as general as it should be. Up to now, the BNF grammar for our FOR loop has been: ``` for_statement: 'for' '(' preop_statement ';' true_false_expression ';' postop_statement ')' compound_statement ; ``` However, the [BNF Grammar for C](https://www.lysator.liu.se/c/ANSI-C-grammar-y.html) has this: ``` for_statement: | FOR '(' expression_statement expression_statement ')' statement | FOR '(' expression_statement expression_statement expression ')' statement ; expression_statement : ';' | expression ';' ; ``` and an `expression` is actually an expression list where expressions are separated by commas. This means that all three clauses of a FOR loop can actually be expression lists. If we were writing a "full" C compiler, this would end up being tricky. However, we are only writing a compiler for a *subset* of C, and therefore I don't have to make our compiler deal with the full grammar for C. So, I have changed the parser for the FOR loop to recognise this: ``` for_statement: 'for' '(' expression_list ';' true_false_expression ';' expression_list ')' compound_statement ; ``` The middle clause is a single expression that must provide a true or false result. The first and last clauses can be expression lists. This allows a FOR loop like the one now in `tests/input80.c`: ```c for (x=0, y=1; x < 6; x++, y=y+2) ``` ## Changes to `expression_list()` To do the above, I need to modify the `for_statement()` parsing function to call `expression_list()` to parse the list of expressions in the first and third clause. But, in the existing compiler, `expression_list()` only allows the ')' token to end an expression list. Therefore, I've modified `expression_list()` in `expr.c` to get the end token as an argument. And in `for_statement()` in `stmt.c`, we now have this code: ```c // Parse a FOR statement and return its AST static struct ASTnode *for_statement(void) { ... // Get the pre_op expression and the ';' preopAST = expression_list(T_SEMI); semi(); ... // Get the condition and the ';'. condAST = binexpr(0); semi(); ... // Get the post_op expression and the ')' postopAST = expression_list(T_RPAREN); rparen(); } ``` And the code in `expression_list()` now looks like this: ```c struct ASTnode *expression_list(int endtoken) { ... // Loop until the end token while (Token.token != endtoken) { // Parse the next expression child = binexpr(0); // Build an A_GLUE AST node ... tree = mkastnode(A_GLUE, P_NONE, tree, NULL, child, NULL, exprcount); // Stop when we reach the end token if (Token.token == endtoken) break; // Must have a ',' at this point match(T_COMMA, ","); } // Return the tree of expressions return (tree); } ``` ## Single and Compound Statements Up to now, I've forced the programmer using our compiler to always put code in '{' ... '}' for: + the true body of an IF statement + the false body of an IF statement + the body of a WHILE statement + the body of a FOR statement + the body after a 'case' clause + the body after a 'default' clause For the first four statements in this list, we don't need curly brackets when there is only a single statement, e.g. ```c if (x>5) x= x - 16; else x++; ``` But when there are multiple statements in the body, we *do* need a compound statement which is a set of single statements surrounded by curly brackets, e.g. ```c if (x>5) { x= x - 16; printf("not again!\n"); } else x++; ``` But, for some unknown reason, the code after a 'case' or 'default' clause in a 'switch' statement can be a set of single statements and we don't need curly brackets!! Who was the crazy person who thought that was OK? An example: ```c switch (x) { case 1: printf("statement 1\n"); printf("statement 2\n"); break; default: ... } ``` Even worse, this is also legal: ```c switch (x) { case 1: { printf("statement 1\n"); printf("statement 2\n"); break; } default: ... } ``` Therefore, we need to be able to parse: + single statements + a set of statements which are surrounded by curly brackets + a set of statements which don't start with a '{', but end with one of 'case', 'default', or '}' if they started with '{' To this end, I've modified the `compound_statement()` in `stmt.c` to take an argument: ```c // Parse a compound statement // and return its AST. If inswitch is true, // we look for a '}', 'case' or 'default' token // to end the parsing. Otherwise, look for // just a '}' to end the parsing. struct ASTnode *compound_statement(int inswitch) { struct ASTnode *left = NULL; struct ASTnode *tree; while (1) { // Parse a single statement tree = single_statement(); ... // Leave if we've hit the end token if (Token.token == T_RBRACE) return(left); if (inswitch && (Token.token == T_CASE || Token.token == T_DEFAULT)) return(left); } } ``` If this function get's called with `inswitch` set to 1, then we have been called during the parsing of a 'switch' statement, so look for 'case', 'default' or '}' to end the compound statement. Otherwise, we are in a more typical '{' ... '}' situation. Now, we also need to allow: + a single statement inside the body of an IF statement + a single statement inside the body of an WHILE statement + a single statement inside the body of a FOR statement All of these are, at present, calling `compound_statement(0)`, but this enforces the parsing of a closing '}', and we won't have one of these for a single statement. The answer is to get the IF, WHILE and FOR parsing code to call `single_statement()` to parse one statement. And, get `single_statement()` to call `compound_statement()` if it see an opening curly bracket. Thus, I've also made these changes in `stmt.c`: ```c // Parse a single statement and return its AST. static struct ASTnode *single_statement(void) { ... switch (Token.token) { case T_LBRACE: // We have a '{', so this is a compound statement lbrace(); stmt = compound_statement(0); rbrace(); return(stmt); } ... static struct ASTnode *if_statement(void) { ... // Get the AST for the statement trueAST = single_statement(); ... // If we have an 'else', skip it // and get the AST for the statement if (Token.token == T_ELSE) { scan(&Token); falseAST = single_statement(); } ... } ... static struct ASTnode *while_statement(void) { ... // Get the AST for the statement. // Update the loop depth in the process Looplevel++; bodyAST = single_statement(); Looplevel--; ... } ... static struct ASTnode *for_statement(void) { ... // Get the statement which is the body // Update the loop depth in the process Looplevel++; bodyAST = single_statement(); Looplevel--; ... } ``` This now means the compiler will accept code which looks like this: ```c if (x>5) x= x - 16; else x++; ``` ## Yes, But "Dangling Else?" I still haven't solved the "dangling else" problem, which after all is why I started this part of the journey. Well, it turns out that this problem was solved due to the way that we already parse our input. Consider this program: ```c // Dangling else test. // We should not print anything for x<= 5 for (x=0; x < 12; x++) if (x > 5) if (x > 10) printf("10 < %2d\n", x); else printf(" 5 < %2d <= 10\n", x); ``` We want the 'else' code to pair up with the nearest 'if' statement. Therefore, the last `printf` statement above should only print when `x` is between 5 and 10. The 'else' code should *not* be invoked due to the opposite of `x > 5`. Luckily, in our `if_statement()` parser, we greedily scan for any 'else' token after the body of the IF statement: ```c // Get the AST for the statement trueAST = single_statement(); // If we have an 'else', skip it // and get the AST for the statement if (Token.token == T_ELSE) { scan(&Token); falseAST = single_statement(); } ``` This forces the 'else' to pair up with the nearest 'if' and solves the dangling else problem. So, all this time, I was forcing the use of '{' ... '}' when I'd already solved the problem I was worrying about! Sigh. ## Some Better Debug Output Finally, I've made a change to our scanner to improve debugging. Or, more exactly, to improve the debug messages that we print out. Up to now, we have been printing the token numeric value in our error messages, e.g. + Unexpected token in parameter list: 23 + Expecting a primary expression, got token: 19 + Syntax error, token: 44 For the programmer who receives these error messages, they are essentially unusable. In `scan.c`, I've added this list of token strings: ```c // List of token strings, for debugging purposes char *Tstring[] = { "EOF", "=", "||", "&&", "|", "^", "&", "==", "!=", ",", ">", "<=", ">=", "<<", ">>", "+", "-", "*", "/", "++", "--", "~", "!", "void", "char", "int", "long", "if", "else", "while", "for", "return", "struct", "union", "enum", "typedef", "extern", "break", "continue", "switch", "case", "default", "intlit", "strlit", ";", "identifier", "{", "}", "(", ")", "[", "]", ",", ".", "->", ":" }; ``` In `defs.h`, I've added another field to the Token structure: ```c // Token structure struct token { int token; // Token type, from the enum list above char *tokstr; // String version of the token int intvalue; // For T_INTLIT, the integer value }; ``` In `scan()` in `scan.c`, just before we return a token, we set up its string equivalent: ```c t->tokstr = Tstring[t->token]; ``` And, finally, I've modified a bunch of `fatalXX()` calls to print out the `tokstr` field of the current token instead of the `intvalue` field. This means we now see: + Unexpected token in parameter list: == + Expecting a primary expression, got token: ] + Syntax error, token: >> which is much better. ## Conclusion and What's Next I set out to solve the "dangling else" misfeature in our compiler and ended up fixing a bunch of other misfeatures. In the process, I found out that there was no "dangling else" problem to solve. We have reached a stage in the development of the compiler where all the essential elements we need to self-compile the compiler are implemented, but now we need to find and fix a bunch of small issues. This is the "mop up" phase. What this means is, from now on, there will be less and less on how to write a compiler, and more and more on how to fix a broken compiler. I won't be disappointed if you choose to bail out on the future parts of our journey. If you do, I hope that you found all the parts of the journey so far useful. In the next part of our compiler writing journey, I will pick something that currently doesn't work but we need to work to self-compile our compiler, and fix it. [Next step](../39_Var_Initialisation_pt1/Readme.md) ================================================ FILE: 38_Dangling_Else/cg.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\t.text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\t.data\n", Outfile); currSeg = data_seg; } } // Given a scalar type value, return the // size of the type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch (type) { case P_CHAR: return (offset); case P_INT: case P_LONG: break; default: fatald("Bad type in calc_aligned_offset:", type); } // Here we have an int or a long. Align it on a 4-byte offset // I put the generic code here so it can be reused elsewhere. alignment = 4; offset = (offset + direction * (alignment - 1)) & ~(alignment - 1); return (offset); } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. static int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "%r10", "%r11", "%r12", "%r13", "%r9", "%r8", "%rcx", "%rdx", "%rsi", "%rdi" }; static char *breglist[] = { "%r10b", "%r11b", "%r12b", "%r13b", "%r9b", "%r8b", "%cl", "%dl", "%sil", "%dil" }; static char *dreglist[] = { "%r10d", "%r11d", "%r12d", "%r13d", "%r9d", "%r8d", "%ecx", "%edx", "%esi", "%edi" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); cgtextseg(); fprintf(Outfile, "# internal switch(expr) routine\n" "# %%rsi = switch table, %%rax = expr\n" "# from SubC: http://www.t3x.org/subc/\n" "\n" "switch:\n" " pushq %%rsi\n" " movq %%rdx, %%rsi\n" " movq %%rax, %%rbx\n" " cld\n" " lodsq\n" " movq %%rax, %%rcx\n" "next:\n" " lodsq\n" " movq %%rax, %%rdx\n" " lodsq\n" " cmpq %%rdx, %%rbx\n" " jnz no\n" " popq %%rsi\n" " jmp *%%rax\n" "no:\n" " loop next\n" " lodsq\n" " popq %%rsi\n" " jmp *%%rax\n" "\n"); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the %rsp and %rsp fprintf(Outfile, "\t.globl\t%s\n" "\t.type\t%s, @function\n" "%s:\n" "\tpushq\t%%rbp\n" "\tmovq\t%%rsp, %%rbp\n", name, name, name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->posn = paramOffset; paramOffset += 8; } else { parm->posn = newlocaloffset(parm->type); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->posn = newlocaloffset(locvar->type); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\taddq\t$%d,%%rsp\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->endlabel); fprintf(Outfile, "\taddq\t$%d,%%rsp\n", stackOffset); fputs("\tpopq %rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmovq\t$%d, %s\n", value, reglist[r]); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", sym->name); } else // Print out the code to initialise it switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovzbq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", sym->name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovslq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", sym->name); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", sym->posn); fprintf(Outfile, "\tmovq\t%d(%%rbp), %s\n", sym->posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", sym->posn); } else switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", sym->posn); fprintf(Outfile, "\tmovzbq\t%d(%%rbp), %s\n", sym->posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", sym->posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", sym->posn); fprintf(Outfile, "\tmovslq\t%d(%%rbp), %s\n", sym->posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", sym->posn); break; default: fatald("Bad type in cgloadlocal:", sym->type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tleaq\tL%d(%%rip), %s\n", label, reglist[r]); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\taddq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsubq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timulq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s,%%rax\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidivq\t%s\n", reglist[r2]); fprintf(Outfile, "\tmovq\t%%rax,%s\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tandq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\torq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txorq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshlq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshrq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tnegq\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnotq\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); return (r); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(struct symtable *sym, int numargs) { // Get a new register int outr = alloc_register(); // Call the function fprintf(Outfile, "\tcall\t%s@PLT\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\taddq\t$%d, %%rsp\n", 8 * (numargs - 6)); // and copy the return value into our register fprintf(Outfile, "\tmovq\t%%rax, %s\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpushq\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmovq\t%s, %s\n", reglist[r], reglist[FIRSTPARAMREG - argposn + 1]); } } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsalq\t$%d, %s\n", val, reglist[r]); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %s(%%rip)\n", reglist[r], sym->name); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %s(%%rip)\n", breglist[r], sym->name); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %s(%%rip)\n", dreglist[r], sym->name); break; default: fatald("Bad type in cgstorglob:", sym->type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %d(%%rbp)\n", reglist[r], sym->posn); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %d(%%rbp)\n", breglist[r], sym->posn); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %d(%%rbp)\n", dreglist[r], sym->posn); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the type size = typesize(node->type, node->ctype); // Generate the global identity and the label cgdataseg(); fprintf(Outfile, "\t.globl\t%s\n", node->name); fprintf(Outfile, "%s:", node->name); // Generate the space for this type switch (size) { case 1: fprintf(Outfile, "\t.byte\t0\n"); break; case 4: fprintf(Outfile, "\t.long\t0\n"); break; case 8: fprintf(Outfile, "\t.quad\t0\n"); break; default: for (int i = 0; i < size; i++) fprintf(Outfile, "\t.byte\t0\n"); } } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\t.byte\t%d\n", *cptr); } fprintf(Outfile, "\t.byte\t0\n"); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzbl\t%s, %%eax\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %%eax\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } cgjump(sym->endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = alloc_register(); if (sym->class == C_GLOBAL) fprintf(Outfile, "\tleaq\t%s(%%rip), %s\n", sym->name, reglist[r]); else fprintf(Outfile, "\tleaq\t%d(%%rbp), %s\n", sym->posn, reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzbq\t(%s), %s\n", reglist[r], reglist[r]); break; case 2: fprintf(Outfile, "\tmovslq\t(%s), %s\n", reglist[r], reglist[r]); break; case 4: case 8: fprintf(Outfile, "\tmovq\t(%s), %s\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { // Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmovb\t%s, (%s)\n", breglist[r1], reglist[r2]); break; case 2: case 4: case 8: fprintf(Outfile, "\tmovq\t%s, (%s)\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } // Generate a switch jump table and the code to // load the registers and call the switch() code void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel) { int i, label; // Get a label for the switch table label = genlabel(); cglabel(label); // Heuristic. If we have no cases, create one case // which points to the default case if (casecount == 0) { caseval[0] = 0; caselabel[0] = defaultlabel; casecount = 1; } // Generate the switch jump table. fprintf(Outfile, "\t.quad\t%d\n", casecount); for (i = 0; i < casecount; i++) fprintf(Outfile, "\t.quad\t%d, L%d\n", caseval[i], caselabel[i]); fprintf(Outfile, "\t.quad\tL%d\n", defaultlabel); // Load the specific registers cglabel(toplabel); fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); fprintf(Outfile, "\tleaq\tL%d(%%rip), %%rdx\n", label); fprintf(Outfile, "\tjmp\tswitch\n"); } ================================================ FILE: 38_Dangling_Else/cg_arm.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for ARMv6 on Raspberry Pi // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. static int freereg[4]; static char *reglist[4] = { "r4", "r5", "r6", "r7" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // We have to store large integer literal values in memory. // Keep a list of them which will be output in the postamble #define MAXINTS 1024 int Intlist[MAXINTS]; static int Intslot = 0; // Determine the offset of a large integer // literal from the .L3 label. If the integer // isn't in the list, add it. static void set_int_offset(int val) { int offset = -1; // See if it is already there for (int i = 0; i < Intslot; i++) { if (Intlist[i] == val) { offset = 4 * i; break; } } // Not in the list, so add it if (offset == -1) { offset = 4 * Intslot; if (Intslot == MAXINTS) fatal("Out of int slots in set_int_offset()"); Intlist[Intslot++] = val; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L3+%d\n", offset); } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n", Outfile); } // Print out the assembly postamble void cgpostamble() { // Print out the global variables fprintf(Outfile, ".L2:\n"); for (int i = 0; i < Globs; i++) { if (Symtable[i].stype == S_VARIABLE) fprintf(Outfile, "\t.word %s\n", Symtable[i].name); } // Print out the integer literals fprintf(Outfile, ".L3:\n"); for (int i = 0; i < Intslot; i++) { fprintf(Outfile, "\t.word %d\n", Intlist[i]); } } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Symtable[id].name; fprintf(Outfile, "\t.text\n" "\t.globl\t%s\n" "\t.type\t%s, \%%function\n" "%s:\n" "\tpush\t{fp, lr}\n" "\tadd\tfp, sp, #4\n" "\tsub\tsp, sp, #8\n" "\tstr\tr0, [fp, #-8]\n", name, name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Symtable[id].endlabel); fputs("\tsub\tsp, fp, #4\n" "\tpop\t{fp, pc}\n" "\t.align\t2\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); // If the literal value is small, do it with one instruction if (value <= 1000) fprintf(Outfile, "\tmov\t%s, #%d\n", reglist[r], value); else { set_int_offset(value); fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); } return (r); } // Determine the offset of a variable from the .L2 // label. Yes, this is inefficient code. static void set_var_offset(int id) { int offset = 0; // Walk the symbol table up to id. // Find S_VARIABLEs and add on 4 until // we get to our variable for (int i = 0; i < id; i++) { if (Symtable[i].stype == S_VARIABLE) offset += 4; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L2+%d\n", offset); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tldrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgloadglob:", Symtable[id].type); } return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s, %s\n", reglist[r1], reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\tmul\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { // To do a divide: r0 holds the dividend, r1 holds the divisor. // The quotient is in r0. fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r1]); fprintf(Outfile, "\tmov\tr1, %s\n", reglist[r2]); fprintf(Outfile, "\tbl\t__aeabi_idiv\n"); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r1]); free_register(r2); return (r1); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r]); fprintf(Outfile, "\tbl\t%s\n", Symtable[id].name); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r]); return (r); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tlsl\t%s, %s, #%d\n", reglist[r], reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, int id) { // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tstr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgstorglob:", Symtable[id].type); } return (r); } // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (4); switch (type) { case P_CHAR: return (1); case P_INT: case P_LONG: return (4); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Symtable[id].type); fprintf(Outfile, "\t.data\n" "\t.globl\t%s\n", Symtable[id].name); switch (typesize) { case 1: fprintf(Outfile, "%s:\t.byte\t0\n", Symtable[id].name); break; case 4: fprintf(Outfile, "%s:\t.long\t0\n", Symtable[id].name); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "moveq", "movne", "movlt", "movgt", "movle", "movge" }; // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "movne", "moveq", "movge", "movle", "movgt", "movlt" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #1\n", cmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #0\n", invcmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\tuxtb\t%s, %s\n", reglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tb\tL%d\n", l); } // List of inverted branch instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *brlist[] = { "bne", "beq", "bge", "ble", "bgt", "blt" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", brlist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[reg]); cgjump(Symtable[id].endlabel); } // Generate code to load the address of a global // identifier into a variable. Return a new register int cgaddress(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); fprintf(Outfile, "\tmov\t%s, r3\n", reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tldrb\t%s, [%s]\n", reglist[r], reglist[r]); break; case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [%s]\n", reglist[r], reglist[r]); break; } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [%s]\n", reglist[r1], reglist[r2]); break; case P_INT: case P_LONG: fprintf(Outfile, "\tstr\t%s, [%s]\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 38_Dangling_Else/cgn.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\tsection .text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\tsection .data\n", Outfile); currSeg = data_seg; } } // Given a scalar type value, return the // size of the type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch (type) { case P_CHAR: return (offset); case P_INT: case P_LONG: break; default: fatald("Bad type in calc_aligned_offset:", type); } // Here we have an int or a long. Align it on a 4-byte offset // I put the generic code here so it can be reused elsewhere. alignment = 4; offset = (offset + direction * (alignment - 1)) & ~(alignment - 1); return (offset); } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "r10", "r11", "r12", "r13", "r9", "r8", "rcx", "rdx", "rsi", "rdi" }; static char *breglist[] = { "r10b", "r11b", "r12b", "r13b", "r9b", "r8b", "cl", "dl", "sil", "dil" }; static char *dreglist[] = { "r10d", "r11d", "r12d", "r13d", "r9d", "r8d", "ecx", "edx", "esi", "edi" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\textern\tprintint\n", Outfile); fputs("\textern\tprintchar\n", Outfile); fputs("\textern\topen\n", Outfile); fputs("\textern\tclose\n", Outfile); fputs("\textern\tread\n", Outfile); fputs("\textern\twrite\n", Outfile); fputs("\textern\tprintf\n", Outfile); cgtextseg(); fprintf(Outfile, "; internal switch(expr) routine\n" "; rsi = switch table, rax = expr\n" "; from SubC: http://www.t3x.org/subc/\n" "\n" "switch:\n" " push rsi\n" " mov rsi, rdx\n" " mov rbx, rax\n" " cld\n" " lodsq\n" " mov rcx, rax\n" "next:\n" " lodsq\n" " mov rdx, rax\n" " lodsq\n" " cmp rbx, rdx\n" " jnz no\n" " pop rsi\n" " jmp rax\n" "no:\n" " loop next\n" " lodsq\n" " pop rsi\n" " jmp rax\n" "\n"); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the rsp and rbp fprintf(Outfile, "\tglobal\t%s\n" "%s:\n" "\tpush\trbp\n" "\tmov\trbp, rsp\n", name, name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->posn = paramOffset; paramOffset += 8; } else { parm->posn = newlocaloffset(parm->type); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->posn = newlocaloffset(locvar->type); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\tadd\trsp, %d\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->endlabel); fprintf(Outfile, "\tadd\trsp, %d\n", stackOffset); fputs("\tpop rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], value); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", sym->name); fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], sym->name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", sym->name); } else // Print out the code to initialise it switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", sym->name); fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], sym->name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", sym->name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", sym->name); fprintf(Outfile, "\tmovsx\t%s, word [%s]\n", dreglist[r], sym->name); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", sym->name); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", sym->posn); fprintf(Outfile, "\tmov\t%s, [rbp+%d]\n", reglist[r], sym->posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", sym->posn); } else switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", sym->posn); fprintf(Outfile, "\tmovzx\t%s, byte [rbp+%d]\n", reglist[r], sym->posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", sym->posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", sym->posn); fprintf(Outfile, "\tmovsx\t%s, dword [rbp+%d]\n", reglist[r], sym->posn); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", sym->posn); break; default: fatald("Bad type in cgloadlocal:", sym->type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, L%d\n", reglist[r], label); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timul\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmov\trax, %s\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidiv\t%s\n", reglist[r2]); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tand\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\tor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshl\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshr\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tneg\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnot\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r], breglist[r]); return (r); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, byte %s\n", reglist[r], breglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(struct symtable *sym, int numargs) { // Get a new register int outr = alloc_register(); // Call the function fprintf(Outfile, "\tcall\t%s\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\tadd\trsp, %d\n", 8 * (numargs - 6)); // and copy the return value into our register fprintf(Outfile, "\tmov\t%s, rax\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpush\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmov\t%s, %s\n", reglist[FIRSTPARAMREG - argposn + 1], reglist[r]); } } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsal\t%s, %d\n", reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, dreglist[r]); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\tqword\t[rbp+%d], %s\n", sym->posn, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\tbyte\t[rbp+%d], %s\n", sym->posn, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\tdword\t[rbp+%d], %s\n", sym->posn, dreglist[r]); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the type size = typesize(node->type, node->ctype); // Generate the global identity and the label cgdataseg(); fprintf(Outfile, "\tsection\t.data\n" "\tglobal\t%s\n", node->name); fprintf(Outfile, "%s:", node->name); // Generate the space for this type // original version for (int i = 0; i < node->size; i++) { switch(size) { case 1: fprintf(Outfile, "\tdb\t0\n"); break; case 4: fprintf(Outfile, "\tdd\t0\n"); break; case 8: fprintf(Outfile, "\tdq\t0\n"); break; default: for (int i = 0; i < size; i++) fprintf(Outfile, "\tdb\t0\n"); } } /* compact version using times instead of loop switch(size) { case 1: fprintf(Outfile, "\ttimes\t%d\tdb\t0\n", node->size); break; case 4: fprintf(Outfile, "\ttimes\t%d\tdd\t0\n", node->size); break; case 8: fprintf(Outfile, "\ttimes\t%d\tdq\t0\n", node->size); break; default: fprintf(Outfile, "\ttimes\t%d\tdb\t0\n", size); } */ } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\tdb\t%d\n", *cptr); } fprintf(Outfile, "\tdb\t0\n"); /* put string in readable format on a single line // probably went overboard with error checking int comma = 0, quote = 0, start = 1; fprintf(Outfile, "\tdb\t"); for (cptr=strvalue; *cptr; cptr++) { if ( ! isprint(*cptr) ) if (comma || start) { fprintf(Outfile, "%d, ", *cptr); start = 0; comma = 1; } else if (quote) { fprintf(Outfile, "\', %d, ", *cptr); comma = 1; quote = 0; } else { fprintf(Outfile, "%d, ", *cptr); comma = 1; quote = 0; } else if (start || comma) { fprintf(Outfile, "\'%c", *cptr); start = comma = 0; quote = 1; } else { fprintf(Outfile, "%c", *cptr); comma = 0; quote = 1; } } if (comma || start) fprintf(Outfile, "0\n"); else fprintf(Outfile, "\', 0\n"); */ } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r2], breglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzx\teax, %s\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmov\teax, %s\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } cgjump(sym->endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = alloc_register(); if (sym->class == C_GLOBAL) fprintf(Outfile, "\tmov\t%s, %s\n", reglist[r], sym->name); else fprintf(Outfile, "\tlea\t%s, [rbp+%d]\n", reglist[r], sym->posn); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], reglist[r]); break; case 2: fprintf(Outfile, "\tmovsx\t%s, dword [%s]\n", reglist[r], reglist[r]); break; case 4: case 8: fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { //Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmov\t[%s], byte %s\n", reglist[r2], breglist[r1]); break; case 2: case 4: case 8: fprintf(Outfile, "\tmov\t[%s], %s\n", reglist[r2], reglist[r1]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } // Generate a switch jump table and the code to // load the registers and call the switch() code void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel) { int i, label; // Get a label for the switch table label = genlabel(); cglabel(label); // Heuristic. If we have no cases, create one case // which points to the default case if (casecount == 0) { caseval[0] = 0; caselabel[0] = defaultlabel; casecount = 1; } // Generate the switch jump table. fprintf(Outfile, "\tdq\t%d\n", casecount); for (i = 0; i < casecount; i++) fprintf(Outfile, "\tdq\t%d, L%d\n", caseval[i], caselabel[i]); fprintf(Outfile, "\tdq\tL%d\n", defaultlabel); // Load the specific registers cglabel(toplabel); fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); fprintf(Outfile, "\tmov\trdx, L%d\n", label); fprintf(Outfile, "\tjmp\tswitch\n"); } ================================================ FILE: 38_Dangling_Else/data.h ================================================ #ifndef extern_ #define extern_ extern #endif // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 extern_ int Line; // Current line number extern_ int Putback; // Character put back by scanner extern_ struct symtable *Functionid; // Symbol ptr of the current function extern_ FILE *Infile; // Input and output files extern_ FILE *Outfile; extern_ char *Infilename; // Name of file we are parsing extern_ char *Outfilename; // Name of file we opened as Outfile extern_ struct token Token; // Last token scanned extern_ char Text[TEXTLEN + 1]; // Last identifier scanned extern_ int Looplevel; // Depth of nested loops extern_ int Switchlevel; // Depth of nested switches // Symbol table lists extern_ struct symtable *Globhead, *Globtail; // Global variables and functions extern_ struct symtable *Loclhead, *Locltail; // Local variables extern_ struct symtable *Parmhead, *Parmtail; // Local parameters extern_ struct symtable *Membhead, *Membtail; // Temp list of struct/union members extern_ struct symtable *Structhead, *Structtail; // List of struct types extern_ struct symtable *Unionhead, *Uniontail; // List of union types extern_ struct symtable *Enumhead, *Enumtail; // List of enum types and values extern_ struct symtable *Typehead, *Typetail; // List of typedefs // Command-line flags extern_ int O_dumpAST; // If true, dump the AST trees extern_ int O_keepasm; // If true, keep any assembly files extern_ int O_assemble; // If true, assemble the assembly files extern_ int O_dolink; // If true, link the object files extern_ int O_verbose; // If true, print info on compilation stages ================================================ FILE: 38_Dangling_Else/decl.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of declarations // Copyright (c) 2019 Warren Toomey, GPL3 static struct symtable *composite_declaration(int type); static void enum_declaration(void); int typedef_declaration(struct symtable **ctype); int type_of_typedef(char *name, struct symtable **ctype); // Parse the current token and return a // primitive type enum value, a pointer // to any composite type and possibly // modify the class of the type. // Also scan in the next token. int parse_type(struct symtable **ctype, int *class) { int type, exstatic=1; // See if the class has been changed to extern (later, static) while (exstatic) { switch (Token.token) { case T_EXTERN: *class= C_EXTERN; scan(&Token); break; default: exstatic= 0; } } // Now work on the actual type keyword switch (Token.token) { case T_VOID: type = P_VOID; scan(&Token); break; case T_CHAR: type = P_CHAR; scan(&Token); break; case T_INT: type = P_INT; scan(&Token); break; case T_LONG: type = P_LONG; scan(&Token); break; // For the following, if we have a ';' after the // parsing then there is no type, so return -1 case T_STRUCT: type = P_STRUCT; *ctype = composite_declaration(P_STRUCT); if (Token.token == T_SEMI) type = -1; break; case T_UNION: type = P_UNION; *ctype = composite_declaration(P_UNION); if (Token.token == T_SEMI) type = -1; break; case T_ENUM: type = P_INT; // Enums are really ints enum_declaration(); if (Token.token == T_SEMI) type = -1; break; case T_TYPEDEF: type = typedef_declaration(ctype); if (Token.token == T_SEMI) type = -1; break; case T_IDENT: type = type_of_typedef(Text, ctype); break; default: fatals("Illegal type, token", Token.tokstr); } // Scan in one or more further '*' tokens // and determine the correct pointer type while (1) { if (Token.token != T_STAR) break; type = pointer_to(type); scan(&Token); } // We leave with the next token already scanned return (type); } // variable_declaration: type identifier ';' // | type identifier '[' INTLIT ']' ';' // ; // // Parse the declaration of a scalar variable or an array // with a given size. // The identifier has been scanned & we have the type. // class is the variable's class // Return the pointer to variable's entry in the symbol table struct symtable *var_declaration(int type, struct symtable *ctype, int class) { struct symtable *sym = NULL; // See if this has already been declared switch (class) { case C_EXTERN: case C_GLOBAL: if (findglob(Text) != NULL) fatals("Duplicate global variable declaration", Text); case C_LOCAL: case C_PARAM: if (findlocl(Text) != NULL) fatals("Duplicate local variable declaration", Text); case C_MEMBER: if (findmember(Text) != NULL) fatals("Duplicate struct/union member declaration", Text); } // Text now has the identifier's name. // If the next token is a '[' if (Token.token == T_LBRACKET) { // Skip past the '[' scan(&Token); // Check we have an array size if (Token.token == T_INTLIT) { // Add this as a known array and generate its space in assembly. // We treat the array as a pointer to its elements' type switch (class) { case C_EXTERN: case C_GLOBAL: sym = addglob(Text, pointer_to(type), ctype, S_ARRAY, class, Token.intvalue); break; case C_LOCAL: case C_PARAM: case C_MEMBER: fatal ("For now, declaration of non-global arrays is not implemented"); } } // Ensure we have a following ']' scan(&Token); match(T_RBRACKET, "]"); } else { // Add this as a known scalar // and generate its space in assembly switch (class) { case C_EXTERN: case C_GLOBAL: sym = addglob(Text, type, ctype, S_VARIABLE, class, 1); break; case C_LOCAL: sym = addlocl(Text, type, ctype, S_VARIABLE, 1); break; case C_PARAM: sym = addparm(Text, type, ctype, S_VARIABLE, 1); break; case C_MEMBER: sym = addmemb(Text, type, ctype, S_VARIABLE, 1); break; } } return (sym); } // var_declaration_list: // | variable_declaration // | variable_declaration separate_token var_declaration_list ; // // When called to parse function parameters, separate_token is ','. // When called to parse members of a struct/union, separate_token is ';'. // // Parse a list of variables. // Add them as symbols to one of the symbol table lists, and return the // number of variables. If funcsym is not NULL, there is an existing function // prototype, so compare each variable's type against this prototype. static int var_declaration_list(struct symtable *funcsym, int class, int separate_token, int end_token) { int type; int paramcnt = 0; struct symtable *protoptr = NULL; struct symtable *ctype; // If there is a prototype, get the pointer // to the first prototype parameter if (funcsym != NULL) protoptr = funcsym->member; // Loop until the final end token while (Token.token != end_token) { // Get the type and identifier type = parse_type(&ctype, &class); ident(); // Check that this type matches the prototype if there is one if (protoptr != NULL) { if (type != protoptr->type) fatald("Type doesn't match prototype for parameter", paramcnt + 1); protoptr = protoptr->next; } else { // Add a new parameter to the right symbol table list, based on the class var_declaration(type, ctype, class); } paramcnt++; // Must have a separate_token or ')' at this point if ((Token.token != separate_token) && (Token.token != end_token)) fatals("Unexpected token in parameter list", Token.tokstr); if (Token.token == separate_token) scan(&Token); } // Check that the number of parameters in this list matches // any existing prototype if ((funcsym != NULL) && (paramcnt != funcsym->nelems)) fatals("Parameter count mismatch for function", funcsym->name); // Return the count of parameters return (paramcnt); } // // function_declaration: type identifier '(' parameter_list ')' ; // | type identifier '(' parameter_list ')' compound_statement ; // // Parse the declaration of function. // The identifier has been scanned & we have the type. struct ASTnode *function_declaration(int type) { struct ASTnode *tree, *finalstmt; struct symtable *oldfuncsym, *newfuncsym = NULL; int endlabel, paramcnt; // Text has the identifier's name. If this exists and is a // function, get the id. Otherwise, set oldfuncsym to NULL. if ((oldfuncsym = findsymbol(Text)) != NULL) if (oldfuncsym->stype != S_FUNCTION) oldfuncsym = NULL; // If this is a new function declaration, get a // label-id for the end label, and add the function // to the symbol table, if (oldfuncsym == NULL) { endlabel = genlabel(); // Assumtion: functions only return scalar types, so NULL below newfuncsym = addglob(Text, type, NULL, S_FUNCTION, C_GLOBAL, endlabel); } // Scan in the '(', any parameters and the ')'. // Pass in any existing function prototype pointer lparen(); paramcnt = var_declaration_list(oldfuncsym, C_PARAM, T_COMMA, T_RPAREN); rparen(); // If this is a new function declaration, update the // function symbol entry with the number of parameters. // Also copy the parameter list into the function's node. if (newfuncsym) { newfuncsym->nelems = paramcnt; newfuncsym->member = Parmhead; oldfuncsym = newfuncsym; } // Clear out the parameter list Parmhead = Parmtail = NULL; // Declaration ends in a semicolon, only a prototype. if (Token.token == T_SEMI) { scan(&Token); return (NULL); } // This is not just a prototype. // Set the Functionid global to the function's symbol pointer Functionid = oldfuncsym; // Get the AST tree for the compound statement and mark // that we have parsed no loops or switches yet Looplevel= 0; Switchlevel= 0; lbrace(); tree = compound_statement(0); rbrace(); // If the function type isn't P_VOID .. if (type != P_VOID) { // Error if no statements in the function if (tree == NULL) fatal("No statements in function with non-void type"); // Check that the last AST operation in the // compound statement was a return statement finalstmt = (tree->op == A_GLUE) ? tree->right : tree; if (finalstmt == NULL || finalstmt->op != A_RETURN) fatal("No return for function with non-void type"); } // Return an A_FUNCTION node which has the function's symbol pointer // and the compound statement sub-tree return (mkastunary(A_FUNCTION, type, tree, oldfuncsym, endlabel)); } // Parse composite type declarations: structs or unions. // Either find an existing struct/union declaration, or build // a struct/union symbol table entry and return its pointer. static struct symtable *composite_declaration(int type) { struct symtable *ctype = NULL; struct symtable *m; int offset; // Skip the struct/union keyword scan(&Token); // See if there is a following struct/union name if (Token.token == T_IDENT) { // Find any matching composite type if (type == P_STRUCT) ctype = findstruct(Text); else ctype = findunion(Text); scan(&Token); } // If the next token isn't an LBRACE , this is // the usage of an existing struct/union type. // Return the pointer to the type. if (Token.token != T_LBRACE) { if (ctype == NULL) fatals("unknown struct/union type", Text); return (ctype); } // Ensure this struct/union type hasn't been // previously defined if (ctype) fatals("previously defined struct/union", Text); // Build the composite type and skip the left brace if (type == P_STRUCT) ctype = addstruct(Text, P_STRUCT, NULL, 0, 0); else ctype = addunion(Text, P_UNION, NULL, 0, 0); scan(&Token); // Scan in the list of members and attach // to the struct type's node var_declaration_list(NULL, C_MEMBER, T_SEMI, T_RBRACE); rbrace(); ctype->member = Membhead; Membhead = Membtail = NULL; // Set the offset of the initial member // and find the first free byte after it m = ctype->member; m->posn = 0; offset = typesize(m->type, m->ctype); // Set the position of each successive member in the composite type // Unions are easy. For structs, align the member and find the next free byte for (m = m->next; m != NULL; m = m->next) { // Set the offset for this member if (type == P_STRUCT) m->posn = genalign(m->type, offset, 1); else m->posn = 0; // Get the offset of the next free byte after this member offset += typesize(m->type, m->ctype); } // Set the overall size of the composite type ctype->size = offset; return (ctype); } // Parse an enum declaration static void enum_declaration(void) { struct symtable *etype = NULL; char *name; int intval = 0; // Skip the enum keyword. scan(&Token); // If there's a following enum type name, get a // pointer to any existing enum type node. if (Token.token == T_IDENT) { etype = findenumtype(Text); name = strdup(Text); // As it gets tromped soon scan(&Token); } // If the next token isn't a LBRACE, check // that we have an enum type name, then return if (Token.token != T_LBRACE) { if (etype == NULL) fatals("undeclared enum type:", name); return; } // We do have an LBRACE. Skip it scan(&Token); // If we have an enum type name, ensure that it // hasn't been declared before. if (etype != NULL) fatals("enum type redeclared:", etype->name); else // Build an enum type node for this identifier etype = addenum(name, C_ENUMTYPE, 0); // Loop to get all the enum values while (1) { // Ensure we have an identifier // Copy it in case there's an int literal coming up ident(); name = strdup(Text); // Ensure this enum value hasn't been declared before etype = findenumval(name); if (etype != NULL) fatals("enum value redeclared:", Text); // If the next token is an '=', skip it and // get the following int literal if (Token.token == T_ASSIGN) { scan(&Token); if (Token.token != T_INTLIT) fatal("Expected int literal after '='"); intval = Token.intvalue; scan(&Token); } // Build an enum value node for this identifier. // Increment the value for the next enum identifier. etype = addenum(name, C_ENUMVAL, intval++); // Bail out on a right curly bracket, else get a comma if (Token.token == T_RBRACE) break; comma(); } scan(&Token); // Skip over the right curly bracket } // Parse a typedef declaration and return the type // and ctype that it represents int typedef_declaration(struct symtable **ctype) { int type, class=0; // Skip the typedef keyword. scan(&Token); // Get the actual type following the keyword type = parse_type(ctype, &class); if (class != 0) fatal("Can't have extern in a typedef declaration"); // See if the typedef identifier already exists if (findtypedef(Text) != NULL) fatals("redefinition of typedef", Text); // It doesn't exist so add it to the typedef list addtypedef(Text, type, *ctype, 0, 0); scan(&Token); return (type); } // Given a typedef name, return the type it represents int type_of_typedef(char *name, struct symtable **ctype) { struct symtable *t; // Look up the typedef in the list t = findtypedef(name); if (t == NULL) fatals("unknown type", name); scan(&Token); *ctype = t->ctype; return (t->type); } // Parse one or more global declarations, either // variables, functions or structs void global_declarations(void) { struct ASTnode *tree; struct symtable *ctype; int type, class= C_GLOBAL; while (1) { // Stop when we have reached EOF if (Token.token == T_EOF) break; // Get the type type = parse_type(&ctype, &class); // We might have just parsed a struct, union or enum // declaration with no associated variable. // The next token might be a ';'. Loop back if it is. // XXX: I'm not happy with this as it allows // "struct fred;" as an accepted statement if (type == -1) { semi(); continue; } // We have to read past the identifier // to see either a '(' for a function declaration // or a ',' or ';' for a variable declaration. // Text is filled in by the ident() call. ident(); if (Token.token == T_LPAREN) { // Parse the function declaration tree = function_declaration(type); // Only a function prototype, no code if (tree == NULL) continue; // A real function, generate the assembly code for it if (O_dumpAST) { dumpAST(tree, NOLABEL, 0); fprintf(stdout, "\n\n"); } genAST(tree, NOLABEL, NOLABEL, NOLABEL, 0); // Now free the symbols associated // with this function freeloclsyms(); } else { // Parse the global variable declaration // and skip past the trailing semicolon var_declaration(type, ctype, class); semi(); } } } ================================================ FILE: 38_Dangling_Else/decl.h ================================================ // Function prototypes for all compiler files // Copyright (c) 2019 Warren Toomey, GPL3 // scan.c void reject_token(struct token *t); int scan(struct token *t); // tree.c struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue); struct ASTnode *mkastleaf(int op, int type, struct symtable *sym, int intvalue); struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, struct symtable *sym, int intvalue); void dumpAST(struct ASTnode *n, int label, int parentASTop); // gen.c int genlabel(void); int genAST(struct ASTnode *n, int iflabel, int looptoplabel, int loopendlabel, int parentASTop); void genpreamble(); void genpostamble(); void genfreeregs(); void genglobsym(struct symtable *node); int genglobstr(char *strvalue); int genprimsize(int type); int genalign(int type, int offset, int direction); void genreturn(int reg, int id); // cg.c int cgprimsize(int type); int cgalign(int type, int offset, int direction); void cgtextseg(); void cgdataseg(); void freeall_registers(void); void cgpreamble(); void cgpostamble(); void cgfuncpreamble(struct symtable *sym); void cgfuncpostamble(struct symtable *sym); int cgloadint(int value, int type); int cgloadglob(struct symtable *sym, int op); int cgloadlocal(struct symtable *sym, int op); int cgloadglobstr(int label); int cgadd(int r1, int r2); int cgsub(int r1, int r2); int cgmul(int r1, int r2); int cgdiv(int r1, int r2); int cgshlconst(int r, int val); int cgcall(struct symtable *sym, int numargs); void cgcopyarg(int r, int argposn); int cgstorglob(int r, struct symtable *sym); int cgstorlocal(int r, struct symtable *sym); void cgglobsym(struct symtable *node); void cgglobstr(int l, char *strvalue); int cgcompare_and_set(int ASTop, int r1, int r2); int cgcompare_and_jump(int ASTop, int r1, int r2, int label); void cglabel(int l); void cgjump(int l); int cgwiden(int r, int oldtype, int newtype); void cgreturn(int reg, struct symtable *sym); int cgaddress(struct symtable *sym); int cgderef(int r, int type); int cgstorderef(int r1, int r2, int type); int cgnegate(int r); int cginvert(int r); int cglognot(int r); int cgboolean(int r, int op, int label); int cgand(int r1, int r2); int cgor(int r1, int r2); int cgxor(int r1, int r2); int cgshl(int r1, int r2); int cgshr(int r1, int r2); void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel); // expr.c struct ASTnode *expression_list(int endtoken); struct ASTnode *binexpr(int ptp); // stmt.c struct ASTnode *compound_statement(int inswitch); // misc.c void match(int t, char *what); void semi(void); void lbrace(void); void rbrace(void); void lparen(void); void rparen(void); void ident(void); void comma(void); void fatal(char *s); void fatals(char *s1, char *s2); void fatald(char *s, int d); void fatalc(char *s, int c); // sym.c void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node); struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int size, int posn); struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int class, int size); struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int size); struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype, int size); struct symtable *addstruct(char *name, int type, struct symtable *ctype, int stype, int size); struct symtable *addunion(char *name, int type, struct symtable *ctype, int stype, int size); struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int size); struct symtable *addenum(char *name, int class, int value); struct symtable *addtypedef(char *name, int type, struct symtable *ctype, int stype, int size); struct symtable *findglob(char *s); struct symtable *findlocl(char *s); struct symtable *findsymbol(char *s); struct symtable *findmember(char *s); struct symtable *findstruct(char *s); struct symtable *findunion(char *s); struct symtable *findenumtype(char *s); struct symtable *findenumval(char *s); struct symtable *findtypedef(char *s); void clear_symtable(void); void freeloclsyms(void); // decl.c struct symtable *var_declaration(int type, struct symtable *ctype, int class); struct ASTnode *function_declaration(int type); void global_declarations(void); // types.c int inttype(int type); int ptrtype(int type); int parse_type(struct symtable **ctype, int *class); int pointer_to(int type); int value_at(int type); int typesize(int type, struct symtable *ctype); struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op); ================================================ FILE: 38_Dangling_Else/defs.h ================================================ #include #include #include #include // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 enum { TEXTLEN = 512 // Length of identifiers in input }; // Commands and default filenames #define AOUT "a.out" #ifdef __NASM__ #define ASCMD "nasm -f elf64 -o " #define LDCMD "cc -no-pie -fno-plt -Wall -o " #else #define ASCMD "as -o " #define LDCMD "cc -o " #endif #define CPPCMD "cpp -nostdinc -isystem " // Token types enum { T_EOF, // Binary operators T_ASSIGN, T_LOGOR, T_LOGAND, T_OR, T_XOR, T_AMPER, T_EQ, T_NE, T_LT, T_GT, T_LE, T_GE, T_LSHIFT, T_RSHIFT, T_PLUS, T_MINUS, T_STAR, T_SLASH, // Other operators T_INC, T_DEC, T_INVERT, T_LOGNOT, // Type keywords T_VOID, T_CHAR, T_INT, T_LONG, // Other keywords T_IF, T_ELSE, T_WHILE, T_FOR, T_RETURN, T_STRUCT, T_UNION, T_ENUM, T_TYPEDEF, T_EXTERN, T_BREAK, T_CONTINUE, T_SWITCH, T_CASE, T_DEFAULT, // Structural tokens T_INTLIT, T_STRLIT, T_SEMI, T_IDENT, T_LBRACE, T_RBRACE, T_LPAREN, T_RPAREN, T_LBRACKET, T_RBRACKET, T_COMMA, T_DOT, T_ARROW, T_COLON }; // Token structure struct token { int token; // Token type, from the enum list above char *tokstr; // String version of the token int intvalue; // For T_INTLIT, the integer value }; // AST node types. The first few line up // with the related tokens enum { A_ASSIGN = 1, A_LOGOR, A_LOGAND, A_OR, A_XOR, A_AND, A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE, A_LSHIFT, A_RSHIFT, A_ADD, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, A_INTLIT, A_STRLIT, A_IDENT, A_GLUE, A_IF, A_WHILE, A_FUNCTION, A_WIDEN, A_RETURN, A_FUNCCALL, A_DEREF, A_ADDR, A_SCALE, A_PREINC, A_PREDEC, A_POSTINC, A_POSTDEC, A_NEGATE, A_INVERT, A_LOGNOT, A_TOBOOL, A_BREAK, A_CONTINUE, A_SWITCH, A_CASE, A_DEFAULT }; // Primitive types. The bottom 4 bits is an integer // value that represents the level of indirection, // e.g. 0= no pointer, 1= pointer, 2= pointer pointer etc. enum { P_NONE, P_VOID = 16, P_CHAR = 32, P_INT = 48, P_LONG = 64, P_STRUCT=80, P_UNION=96 }; // Structural types enum { S_VARIABLE, S_FUNCTION, S_ARRAY }; // Storage classes enum { C_GLOBAL = 1, // Globally visible symbol C_LOCAL, // Locally visible symbol C_PARAM, // Locally visible function parameter C_EXTERN, // External globally visible symbol C_STRUCT, // A struct C_UNION, // A union C_MEMBER, // Member of a struct or union C_ENUMTYPE, // A named enumeration type C_ENUMVAL, // A named enumeration value C_TYPEDEF // A named typedef }; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol struct symtable *ctype; // If struct/union, ptr to that type int stype; // Structural type for the symbol int class; // Storage class for the symbol union { int size; // Number of elements in the symbol int endlabel; // For functions, the end label }; union { int nelems; // For functions, # of params int posn; // For locals, the negative offset // from the stack base pointer }; struct symtable *next; // Next symbol in one list struct symtable *member; // First member of a function, struct, }; // union or enum // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates int rvalue; // True if the node is an rvalue struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; struct symtable *sym; // For many AST nodes, the pointer to union { // the symbol in the symbol table int intvalue; // For A_INTLIT, the integer value int size; // For A_SCALE, the size to scale by }; }; enum { NOREG = -1, // Use NOREG when the AST generation // functions have no register to return NOLABEL = 0 // Use NOLABEL when we have no label to // pass to genAST() }; ================================================ FILE: 38_Dangling_Else/expr.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of expressions // Copyright (c) 2019 Warren Toomey, GPL3 // expression_list: // | expression // | expression ',' expression_list // ; // Parse a list of zero or more comma-separated expressions and // return an AST composed of A_GLUE nodes with the left-hand child // being the sub-tree of previous expressions (or NULL) and the right-hand // child being the next expression. Each A_GLUE node will have size field // set to the number of expressions in the tree at this point. If no // expressions are parsed, NULL is returned struct ASTnode *expression_list(int endtoken) { struct ASTnode *tree = NULL; struct ASTnode *child = NULL; int exprcount = 0; // Loop until the end token while (Token.token != endtoken) { // Parse the next expression and increment the expression count child = binexpr(0); exprcount++; // Build an A_GLUE AST node with the previous tree as the left child // and the new expression as the right child. Store the expression count. tree = mkastnode(A_GLUE, P_NONE, tree, NULL, child, NULL, exprcount); // Stop when we reach the end token if (Token.token == endtoken) break; // Must have a ',' at this point match(T_COMMA, ","); } // Return the tree of expressions return (tree); } // Parse a function call and return its AST static struct ASTnode *funccall(void) { struct ASTnode *tree; struct symtable *funcptr; // Check that the identifier has been defined as a function, // then make a leaf node for it. if ((funcptr = findsymbol(Text)) == NULL || funcptr->stype != S_FUNCTION) { fatals("Undeclared function", Text); } // Get the '(' lparen(); // Parse the argument expression list tree = expression_list(T_RPAREN); // XXX Check type of each argument against the function's prototype // Build the function call AST node. Store the // function's return type as this node's type. // Also record the function's symbol-id tree = mkastunary(A_FUNCCALL, funcptr->type, tree, funcptr, 0); // Get the ')' rparen(); return (tree); } // Parse the index into an array and return an AST tree for it static struct ASTnode *array_access(void) { struct ASTnode *left, *right; struct symtable *aryptr; // Check that the identifier has been defined as an array // then make a leaf node for it that points at the base if ((aryptr = findsymbol(Text)) == NULL || aryptr->stype != S_ARRAY) { fatals("Undeclared array", Text); } left = mkastleaf(A_ADDR, aryptr->type, aryptr, 0); // Get the '[' scan(&Token); // Parse the following expression right = binexpr(0); // Get the ']' match(T_RBRACKET, "]"); // Ensure that this is of int type if (!inttype(right->type)) fatal("Array index is not of integer type"); // Scale the index by the size of the element's type right = modify_type(right, left->type, A_ADD); // Return an AST tree where the array's base has the offset // added to it, and dereference the element. Still an lvalue // at this point. left = mkastnode(A_ADD, aryptr->type, left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, value_at(left->type), left, NULL, 0); return (left); } // Parse the member reference of a struct or union // and return an AST tree for it. If withpointer is true, // the access is through a pointer to the member. static struct ASTnode *member_access(int withpointer) { struct ASTnode *left, *right; struct symtable *compvar; struct symtable *typeptr; struct symtable *m; // Check that the identifier has been declared as a // struct/union or a struct/union pointer if ((compvar = findsymbol(Text)) == NULL) fatals("Undeclared variable", Text); if (withpointer && compvar->type != pointer_to(P_STRUCT) && compvar->type != pointer_to(P_UNION)) fatals("Undeclared variable", Text); if (!withpointer && compvar->type != P_STRUCT && compvar->type != P_UNION) fatals("Undeclared variable", Text); // If a pointer to a struct/union, get the pointer's value. // Otherwise, make a leaf node that points at the base // Either way, it's an rvalue if (withpointer) { left = mkastleaf(A_IDENT, pointer_to(compvar->type), compvar, 0); } else left = mkastleaf(A_ADDR, compvar->type, compvar, 0); left->rvalue = 1; // Get the details of the composite type typeptr = compvar->ctype; // Skip the '.' or '->' token and get the member's name scan(&Token); ident(); // Find the matching member's name in the type // Die if we can't find it for (m = typeptr->member; m != NULL; m = m->next) if (!strcmp(m->name, Text)) break; if (m == NULL) fatals("No member found in struct/union: ", Text); // Build an A_INTLIT node with the offset right = mkastleaf(A_INTLIT, P_INT, NULL, m->posn); // Add the member's offset to the base of the struct/union // and dereference it. Still an lvalue at this point left = mkastnode(A_ADD, pointer_to(m->type), left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, m->type, left, NULL, 0); return (left); } // Parse a postfix expression and return // an AST node representing it. The // identifier is already in Text. static struct ASTnode *postfix(void) { struct ASTnode *n; struct symtable *varptr; struct symtable *enumptr; // If the identifier matches an enum value, // return an A_INTLIT node if ((enumptr = findenumval(Text)) != NULL) { scan(&Token); return (mkastleaf(A_INTLIT, P_INT, NULL, enumptr->posn)); } // Scan in the next token to see if we have a postfix expression scan(&Token); // Function call if (Token.token == T_LPAREN) return (funccall()); // An array reference if (Token.token == T_LBRACKET) return (array_access()); // Access into a struct or union if (Token.token == T_DOT) return (member_access(0)); if (Token.token == T_ARROW) return (member_access(1)); // A variable. Check that the variable exists. if ((varptr = findsymbol(Text)) == NULL || varptr->stype != S_VARIABLE) fatals("Unknown variable", Text); switch (Token.token) { // Post-increment: skip over the token case T_INC: scan(&Token); n = mkastleaf(A_POSTINC, varptr->type, varptr, 0); break; // Post-decrement: skip over the token case T_DEC: scan(&Token); n = mkastleaf(A_POSTDEC, varptr->type, varptr, 0); break; // Just a variable reference default: n = mkastleaf(A_IDENT, varptr->type, varptr, 0); } return (n); } // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(void) { struct ASTnode *n; int id; switch (Token.token) { case T_INTLIT: // For an INTLIT token, make a leaf AST node for it. // Make it a P_CHAR if it's within the P_CHAR range if ((Token.intvalue) >= 0 && (Token.intvalue < 256)) n = mkastleaf(A_INTLIT, P_CHAR, NULL, Token.intvalue); else n = mkastleaf(A_INTLIT, P_INT, NULL, Token.intvalue); break; case T_STRLIT: // For a STRLIT token, generate the assembly for it. // Then make a leaf AST node for it. id is the string's label. id = genglobstr(Text); n = mkastleaf(A_STRLIT, pointer_to(P_CHAR), NULL, id); break; case T_IDENT: return (postfix()); case T_LPAREN: // Beginning of a parenthesised expression, skip the '('. // Scan in the expression and the right parenthesis scan(&Token); n = binexpr(0); rparen(); return (n); default: fatals("Expecting a primary expression, got token", Token.tokstr); } // Scan in the next token and return the leaf node scan(&Token); return (n); } // Convert a binary operator token into a binary AST operation. // We rely on a 1:1 mapping from token to AST operation static int binastop(int tokentype) { if (tokentype > T_EOF && tokentype <= T_SLASH) return (tokentype); fatald("Syntax error, token", tokentype); return (0); // Keep -Wall happy } // Return true if a token is right-associative, // false otherwise. static int rightassoc(int tokentype) { if (tokentype == T_ASSIGN) return (1); return (0); } // Operator precedence for each token. Must // match up with the order of tokens in defs.h static int OpPrec[] = { 0, 10, 20, 30, // T_EOF, T_ASSIGN, T_LOGOR, T_LOGAND 40, 50, 60, // T_OR, T_XOR, T_AMPER 70, 70, // T_EQ, T_NE 80, 80, 80, 80, // T_LT, T_GT, T_LE, T_GE 90, 90, // T_LSHIFT, T_RSHIFT 100, 100, // T_PLUS, T_MINUS 110, 110 // T_STAR, T_SLASH }; // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec; if (tokentype > T_SLASH) fatald("Token with no precedence in op_precedence:", tokentype); prec = OpPrec[tokentype]; if (prec == 0) fatald("Syntax error, token", tokentype); return (prec); } // prefix_expression: primary // | '*' prefix_expression // | '&' prefix_expression // | '-' prefix_expression // | '++' prefix_expression // | '--' prefix_expression // ; // Parse a prefix expression and return // a sub-tree representing it. struct ASTnode *prefix(void) { struct ASTnode *tree; switch (Token.token) { case T_AMPER: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Ensure that it's an identifier if (tree->op != A_IDENT) fatal("& operator must be followed by an identifier"); // Now change the operator to A_ADDR and the type to // a pointer to the original type tree->op = A_ADDR; tree->type = pointer_to(tree->type); break; case T_STAR: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's either another deref or an // identifier if (tree->op != A_IDENT && tree->op != A_DEREF) fatal("* operator must be followed by an identifier or *"); // Prepend an A_DEREF operation to the tree tree = mkastunary(A_DEREF, value_at(tree->type), tree, NULL, 0); break; case T_MINUS: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_NEGATE operation to the tree and // make the child an rvalue. Because chars are unsigned, // also widen this to int so that it's signed tree->rvalue = 1; tree = modify_type(tree, P_INT, 0); tree = mkastunary(A_NEGATE, tree->type, tree, NULL, 0); break; case T_INVERT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_INVERT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_INVERT, tree->type, tree, NULL, 0); break; case T_LOGNOT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_LOGNOT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_LOGNOT, tree->type, tree, NULL, 0); break; case T_INC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("++ operator must be followed by an identifier"); // Prepend an A_PREINC operation to the tree tree = mkastunary(A_PREINC, tree->type, tree, NULL, 0); break; case T_DEC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("-- operator must be followed by an identifier"); // Prepend an A_PREDEC operation to the tree tree = mkastunary(A_PREDEC, tree->type, tree, NULL, 0); break; default: tree = primary(); } return (tree); } // Return an AST tree whose root is a binary operator. // Parameter ptp is the previous token's precedence. struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; struct ASTnode *ltemp, *rtemp; int ASTop; int tokentype; // Get the tree on the left. // Fetch the next token at the same time. left = prefix(); // If we hit one of several terminating tokens, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA || tokentype == T_COLON) { left->rvalue = 1; return (left); } // While the precedence of this token is more than that of the // previous token precedence, or it's right associative and // equal to the previous token's precedence while ((op_precedence(tokentype) > ptp) || (rightassoc(tokentype) && op_precedence(tokentype) == ptp)) { // Fetch in the next integer literal scan(&Token); // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Determine the operation to be performed on the sub-trees ASTop = binastop(tokentype); if (ASTop == A_ASSIGN) { // Assignment // Make the right tree into an rvalue right->rvalue = 1; // Ensure the right's type matches the left right = modify_type(right, left->type, 0); if (right == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree. However, switch // left and right around, so that the right expression's // code will be generated before the left expression ltemp = left; left = right; right = ltemp; } else { // We are not doing an assignment, so both trees should be rvalues // Convert both trees into rvalue if they are lvalue trees left->rvalue = 1; right->rvalue = 1; // Ensure the two types are compatible by trying // to modify each tree to match the other's type. ltemp = modify_type(left, right->type, ASTop); rtemp = modify_type(right, left->type, ASTop); if (ltemp == NULL && rtemp == NULL) fatal("Incompatible types in binary expression"); if (ltemp != NULL) left = ltemp; if (rtemp != NULL) right = rtemp; } // Join that sub-tree with ours. Convert the token // into an AST operation at the same time. left = mkastnode(binastop(tokentype), left->type, left, NULL, right, NULL, 0); // Update the details of the current token. // If we hit a terminating token, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA || tokentype == T_COLON) { left->rvalue = 1; return (left); } } // Return the tree we have when the precedence // is the same or lower left->rvalue = 1; return (left); } ================================================ FILE: 38_Dangling_Else/gen.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Generic code generator // Copyright (c) 2019 Warren Toomey, GPL3 // Generate and return a new label number int genlabel(void) { static int id = 1; return (id++); } // Generate the code for an IF statement // and an optional ELSE clause static int genIF(struct ASTnode *n, int looptoplabel, int loopendlabel) { int Lfalse, Lend; // Generate two labels: one for the // false compound statement, and one // for the end of the overall IF statement. // When there is no ELSE clause, Lfalse _is_ // the ending label! Lfalse = genlabel(); if (n->right) Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. genAST(n->left, Lfalse, NOLABEL, NOLABEL, n->op); genfreeregs(); // Generate the true compound statement genAST(n->mid, NOLABEL, looptoplabel, loopendlabel, n->op); genfreeregs(); // If there is an optional ELSE clause, // generate the jump to skip to the end if (n->right) cgjump(Lend); // Now the false label cglabel(Lfalse); // Optional ELSE clause: generate the // false compound statement and the // end label if (n->right) { genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); genfreeregs(); cglabel(Lend); } return (NOREG); } // Generate the code for a WHILE statement static int genWHILE(struct ASTnode *n) { int Lstart, Lend; // Generate the start and end labels // and output the start label Lstart = genlabel(); Lend = genlabel(); cglabel(Lstart); // Generate the condition code followed // by a jump to the end label. genAST(n->left, Lend, Lstart, Lend, n->op); genfreeregs(); // Generate the compound statement for the body genAST(n->right, NOLABEL, Lstart, Lend, n->op); genfreeregs(); // Finally output the jump back to the condition, // and the end label cgjump(Lstart); cglabel(Lend); return (NOREG); } // Generate the code for a SWITCH statement static int genSWITCH(struct ASTnode *n) { int *caseval, *caselabel; int Ljumptop, Lend; int i, reg, defaultlabel = 0, casecount = 0; struct ASTnode *c; // Create arrays for the case values and associated labels. // Ensure that we have at least one position in each array. caseval = (int *) malloc((n->intvalue + 1) * sizeof(int)); caselabel = (int *) malloc((n->intvalue + 1) * sizeof(int)); // Generate labels for the top of the jump table, and the // end of the switch statement. Set a default label for // the end of the switch, in case we don't have a default. Ljumptop = genlabel(); Lend = genlabel(); defaultlabel = Lend; // Output the code to calculate the switch condition reg = genAST(n->left, NOLABEL, NOLABEL, NOLABEL, 0); cgjump(Ljumptop); genfreeregs(); // Walk the right-child linked list to // generate the code for each case for (i = 0, c = n->right; c != NULL; i++, c = c->right) { // Get a label for this case. Store it // and the case value in the arrays. // Record if it is the default case. caselabel[i] = genlabel(); caseval[i] = c->intvalue; cglabel(caselabel[i]); if (c->op == A_DEFAULT) defaultlabel = caselabel[i]; else casecount++; // Generate the case code. Pass in the end label for the breaks genAST(c->left, NOLABEL, NOLABEL, Lend, 0); genfreeregs(); } // Ensure the last case jumps past the switch table cgjump(Lend); // Now output the switch table and the end label. cgswitch(reg, casecount, Ljumptop, caselabel, caseval, defaultlabel); cglabel(Lend); return (NOREG); } // Generate the code to copy the arguments of a // function call to its parameters, then call the // function itself. Return the register that holds // the function's return value. static int gen_funccall(struct ASTnode *n) { struct ASTnode *gluetree = n->left; int reg; int numargs = 0; // If there is a list of arguments, walk this list // from the last argument (right-hand child) to the // first while (gluetree) { // Calculate the expression's value reg = genAST(gluetree->right, NOLABEL, NOLABEL, NOLABEL, gluetree->op); // Copy this into the n'th function parameter: size is 1, 2, 3, ... cgcopyarg(reg, gluetree->size); // Keep the first (highest) number of arguments if (numargs == 0) numargs = gluetree->size; genfreeregs(); gluetree = gluetree->left; } // Call the function, clean up the stack (based on numargs), // and return its result return (cgcall(n->sym, numargs)); } // Given an AST, an optional label, and the AST op // of the parent, generate assembly code recursively. // Return the register id with the tree's final value. int genAST(struct ASTnode *n, int iflabel, int looptoplabel, int loopendlabel, int parentASTop) { int leftreg, rightreg; // We have some specific AST node handling at the top // so that we don't evaluate the child sub-trees immediately switch (n->op) { case A_IF: return (genIF(n, looptoplabel, loopendlabel)); case A_WHILE: return (genWHILE(n)); case A_SWITCH: return (genSWITCH(n)); case A_FUNCCALL: return (gen_funccall(n)); case A_GLUE: // Do each child statement, and free the // registers after each child if (n->left != NULL) genAST(n->left, iflabel, looptoplabel, loopendlabel, n->op); genfreeregs(); if (n->right != NULL) genAST(n->right, iflabel, looptoplabel, loopendlabel, n->op); genfreeregs(); return (NOREG); case A_FUNCTION: // Generate the function's preamble before the code // in the child sub-tree cgfuncpreamble(n->sym); genAST(n->left, NOLABEL, NOLABEL, NOLABEL, n->op); cgfuncpostamble(n->sym); return (NOREG); } // General AST node handling below // Get the left and right sub-tree values if (n->left) leftreg = genAST(n->left, NOLABEL, NOLABEL, NOLABEL, n->op); if (n->right) rightreg = genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); switch (n->op) { case A_ADD: return (cgadd(leftreg, rightreg)); case A_SUBTRACT: return (cgsub(leftreg, rightreg)); case A_MULTIPLY: return (cgmul(leftreg, rightreg)); case A_DIVIDE: return (cgdiv(leftreg, rightreg)); case A_AND: return (cgand(leftreg, rightreg)); case A_OR: return (cgor(leftreg, rightreg)); case A_XOR: return (cgxor(leftreg, rightreg)); case A_LSHIFT: return (cgshl(leftreg, rightreg)); case A_RSHIFT: return (cgshr(leftreg, rightreg)); case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, compare registers // and set one to 1 or 0 based on the comparison. if (parentASTop == A_IF || parentASTop == A_WHILE) return (cgcompare_and_jump(n->op, leftreg, rightreg, iflabel)); else return (cgcompare_and_set(n->op, leftreg, rightreg)); case A_INTLIT: return (cgloadint(n->intvalue, n->type)); case A_STRLIT: return (cgloadglobstr(n->intvalue)); case A_IDENT: // Load our value if we are an rvalue // or we are being dereferenced if (n->rvalue || parentASTop == A_DEREF) { if (n->sym->class == C_GLOBAL) { return (cgloadglob(n->sym, n->op)); } else { return (cgloadlocal(n->sym, n->op)); } } else return (NOREG); case A_ASSIGN: // Are we assigning to an identifier or through a pointer? switch (n->right->op) { case A_IDENT: if (n->right->sym->class == C_GLOBAL) return (cgstorglob(leftreg, n->right->sym)); else return (cgstorlocal(leftreg, n->right->sym)); case A_DEREF: return (cgstorderef(leftreg, rightreg, n->right->type)); default: fatald("Can't A_ASSIGN in genAST(), op", n->op); } case A_WIDEN: // Widen the child's type to the parent's type return (cgwiden(leftreg, n->left->type, n->type)); case A_RETURN: cgreturn(leftreg, Functionid); return (NOREG); case A_ADDR: return (cgaddress(n->sym)); case A_DEREF: // If we are an rvalue, dereference to get the value we point at, // otherwise leave it for A_ASSIGN to store through the pointer if (n->rvalue) return (cgderef(leftreg, n->left->type)); else return (leftreg); case A_SCALE: // Small optimisation: use shift if the // scale value is a known power of two switch (n->size) { case 2: return (cgshlconst(leftreg, 1)); case 4: return (cgshlconst(leftreg, 2)); case 8: return (cgshlconst(leftreg, 3)); default: // Load a register with the size and // multiply the leftreg by this size rightreg = cgloadint(n->size, P_INT); return (cgmul(leftreg, rightreg)); } case A_POSTINC: case A_POSTDEC: // Load and decrement the variable's value into a register // and post increment/decrement it if (n->sym->class == C_GLOBAL) return (cgloadglob(n->sym, n->op)); else return (cgloadlocal(n->sym, n->op)); case A_PREINC: case A_PREDEC: // Load and decrement the variable's value into a register // and pre increment/decrement it if (n->left->sym->class == C_GLOBAL) return (cgloadglob(n->left->sym, n->op)); else return (cgloadlocal(n->left->sym, n->op)); case A_NEGATE: return (cgnegate(leftreg)); case A_INVERT: return (cginvert(leftreg)); case A_LOGNOT: return (cglognot(leftreg)); case A_TOBOOL: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, set the register // to 0 or 1 based on it's zeroeness or non-zeroeness return (cgboolean(leftreg, parentASTop, iflabel)); case A_BREAK: cgjump(loopendlabel); return (NOREG); case A_CONTINUE: cgjump(looptoplabel); return (NOREG); default: fatald("Unknown AST operator", n->op); } return (NOREG); // Keep -Wall happy } void genpreamble() { cgpreamble(); } void genpostamble() { cgpostamble(); } void genfreeregs() { freeall_registers(); } void genglobsym(struct symtable *node) { cgglobsym(node); } int genglobstr(char *strvalue) { int l = genlabel(); cgglobstr(l, strvalue); return (l); } int genprimsize(int type) { return (cgprimsize(type)); } int genalign(int type, int offset, int direction) { return (cgalign(type, offset, direction)); } ================================================ FILE: 38_Dangling_Else/include/fcntl.h ================================================ #ifndef _FCNTL_H_ # define _FCNTL_H_ #define O_RDONLY 00 #define O_WRONLY 01 #define O_RDWR 02 int open(char *pathname, int flags); #endif // _FCNTL_H_ ================================================ FILE: 38_Dangling_Else/include/stddef.h ================================================ #ifndef _STDDEF_H_ # define _STDDEF_H_ typedef long size_t; #endif //_STDDEF_H_ ================================================ FILE: 38_Dangling_Else/include/stdio.h ================================================ #ifndef _STDIO_H_ # define _STDIO_H_ #include // This FILE definition will do for now typedef char * FILE; FILE *fopen(char *pathname, char *mode); size_t fread(void *ptr, size_t size, size_t nmemb, FILE *stream); size_t fwrite(void *ptr, size_t size, size_t nmemb, FILE *stream); int fclose(FILE *stream); int printf(char *format); int fprintf(FILE *stream, char *format); #endif // _STDIO_H_ ================================================ FILE: 38_Dangling_Else/main.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "decl.h" #include #include // Compiler setup and top-level execution // Copyright (c) 2019 Warren Toomey, GPL3 // Given a string with a '.' and at least a 1-character suffix // after the '.', change the suffix to be the given character. // Return the new string or NULL if the original string could // not be modified char *alter_suffix(char *str, char suffix) { char *posn; char *newstr; // Clone the string if ((newstr = strdup(str)) == NULL) return (NULL); // Find the '.' if ((posn = strrchr(newstr, '.')) == NULL) return (NULL); // Ensure there is a suffix posn++; if (*posn == '\0') return (NULL); // Change the suffix and NUL-terminate the string *posn++ = suffix; *posn = '\0'; return (newstr); } // Given an input filename, compile that file // down to assembly code. Return the new file's name static char *do_compile(char *filename) { char cmd[TEXTLEN]; // Change the input file's suffix to .s Outfilename = alter_suffix(filename, 's'); if (Outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .c on the end\n", filename); exit(1); } // Generate the pre-processor command snprintf(cmd, TEXTLEN, "%s %s %s", CPPCMD, INCDIR, filename); // Open up the pre-processor pipe if ((Infile = popen(cmd, "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", filename, strerror(errno)); exit(1); } Infilename = filename; // Create the output file if ((Outfile = fopen(Outfilename, "w")) == NULL) { fprintf(stderr, "Unable to create %s: %s\n", Outfilename, strerror(errno)); exit(1); } Line = 1; // Reset the scanner Putback = '\n'; clear_symtable(); // Clear the symbol table if (O_verbose) printf("compiling %s\n", filename); scan(&Token); // Get the first token from the input genpreamble(); // Output the preamble global_declarations(); // Parse the global declarations genpostamble(); // Output the postamble fclose(Outfile); // Close the output file return (Outfilename); } // Given an input filename, assemble that file // down to object code. Return the object filename char *do_assemble(char *filename) { char cmd[TEXTLEN]; int err; char *outfilename = alter_suffix(filename, 'o'); if (outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .s on the end\n", filename); exit(1); } // Build the assembly command and run it snprintf(cmd, TEXTLEN, "%s %s %s", ASCMD, outfilename, filename); if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Assembly of %s failed\n", filename); exit(1); } return (outfilename); } // Given a list of object files and an output filename, // link all of the object filenames together. void do_link(char *outfilename, char *objlist[]) { int cnt, size = TEXTLEN; char cmd[TEXTLEN], *cptr; int err; // Start with the linker command and the output file cptr = cmd; cnt = snprintf(cptr, size, "%s %s ", LDCMD, outfilename); cptr += cnt; size -= cnt; // Now append each object file while (*objlist != NULL) { cnt = snprintf(cptr, size, "%s ", *objlist); cptr += cnt; size -= cnt; objlist++; } if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Linking failed\n"); exit(1); } } // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "Usage: %s [-vcST] [-o outfile] file [file ...]\n", prog); fprintf(stderr, " -v give verbose output of the compilation stages\n"); fprintf(stderr, " -c generate object files but don't link them\n"); fprintf(stderr, " -S generate assembly files but don't link them\n"); fprintf(stderr, " -T dump the AST trees for each input file\n"); fprintf(stderr, " -o outfile, produce the outfile executable file\n"); exit(1); } // Main program: check arguments and print a usage // if we don't have an argument. Open up the input // file and call scanfile() to scan the tokens in it. enum { MAXOBJ = 100 }; int main(int argc, char *argv[]) { char *outfilename = AOUT; char *asmfile, *objfile; char *objlist[MAXOBJ]; int i, objcnt = 0; // Initialise our variables O_dumpAST = 0; O_keepasm = 0; O_assemble = 0; O_verbose = 0; O_dolink = 1; // Scan for command-line options for (i = 1; i < argc; i++) { // No leading '-', stop scanning for options if (*argv[i] != '-') break; // For each option in this argument for (int j = 1; (*argv[i] == '-') && argv[i][j]; j++) { switch (argv[i][j]) { case 'o': outfilename = argv[++i]; // Save & skip to next argument break; case 'T': O_dumpAST = 1; break; case 'c': O_assemble = 1; O_keepasm = 0; O_dolink = 0; break; case 'S': O_keepasm = 1; O_assemble = 0; O_dolink = 0; break; case 'v': O_verbose = 1; break; default: usage(argv[0]); } } } // Ensure we have at lease one input file argument if (i >= argc) usage(argv[0]); // Work on each input file in turn while (i < argc) { asmfile = do_compile(argv[i]); // Compile the source file if (O_dolink || O_assemble) { objfile = do_assemble(asmfile); // Assemble it to object forma if (objcnt == (MAXOBJ - 2)) { fprintf(stderr, "Too many object files for the compiler to handle\n"); exit(1); } objlist[objcnt++] = objfile; // Add the object file's name objlist[objcnt] = NULL; // to the list of object files } if (!O_keepasm) // Remove the assembly file if unlink(asmfile); // we don't need to keep it i++; } // Now link all the object files together if (O_dolink) { do_link(outfilename, objlist); // If we don't need to keep the object // files, then remove them if (!O_assemble) { for (i = 0; objlist[i] != NULL; i++) unlink(objlist[i]); } } return (0); } ================================================ FILE: 38_Dangling_Else/misc.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" #include // Miscellaneous functions // Copyright (c) 2019 Warren Toomey, GPL3 // Ensure that the current token is t, // and fetch the next token. Otherwise // throw an error void match(int t, char *what) { if (Token.token == t) { scan(&Token); } else { fatals("Expected", what); } } // Match a semicolon and fetch the next token void semi(void) { match(T_SEMI, ";"); } // Match a left brace and fetch the next token void lbrace(void) { match(T_LBRACE, "{"); } // Match a right brace and fetch the next token void rbrace(void) { match(T_RBRACE, "}"); } // Match a left parenthesis and fetch the next token void lparen(void) { match(T_LPAREN, "("); } // Match a right parenthesis and fetch the next token void rparen(void) { match(T_RPAREN, ")"); } // Match an identifier and fetch the next token void ident(void) { match(T_IDENT, "identifier"); } // Match a comma and fetch the next token void comma(void) { match(T_COMMA, "comma"); } // Print out fatal messages void fatal(char *s) { fprintf(stderr, "%s on line %d of %s\n", s, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatals(char *s1, char *s2) { fprintf(stderr, "%s:%s on line %d of %s\n", s1, s2, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatald(char *s, int d) { fprintf(stderr, "%s:%d on line %d of %s\n", s, d, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatalc(char *s, int c) { fprintf(stderr, "%s:%c on line %d of %s\n", s, c, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } ================================================ FILE: 38_Dangling_Else/scan.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { char *p; p = strchr(s, c); return (p ? p - s : -1); } // Get the next character from the input file. static int next(void) { int c, l; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return (c); } c = fgetc(Infile); // Read from input file while (c == '#') { // We've hit a pre-processor statement scan(&Token); // Get the line number into l if (Token.token != T_INTLIT) fatals("Expecting pre-processor line number, got:", Text); l = Token.intvalue; scan(&Token); // Get the filename in Text if (Token.token != T_STRLIT) fatals("Expecting pre-processor file name, got:", Text); if (Text[0] != '<') { // If this is a real filename if (strcmp(Text, Infilename)) // and not the one we have now Infilename = strdup(Text); // save it. Then update the line num Line = l; } while ((c = fgetc(Infile)) != '\n'); // Skip to the end of the line c = fgetc(Infile); // and get the next character } if ('\n' == c) Line++; // Increment line count return (c); } // Put back an unwanted character static void putback(int c) { Putback = c; } // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } // Return the next character from a character // or string literal static int scanch(void) { int c; // Get the next input character and interpret // metacharacters that start with a backslash c = next(); if (c == '\\') { switch (c = next()) { case 'a': return '\a'; case 'b': return '\b'; case 'f': return '\f'; case 'n': return '\n'; case 'r': return '\r'; case 't': return '\t'; case 'v': return '\v'; case '\\': return '\\'; case '"': return '"'; case '\'': return '\''; default: fatalc("unknown escape sequence", c); } } return (c); // Just an ordinary old character! } // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0; // Convert each character into an int value while ((k = chrpos("0123456789", c)) >= 0) { val = val * 10 + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return (val); } // Scan in a string literal from the input file, // and store it in buf[]. Return the length of // the string. static int scanstr(char *buf) { int i, c; // Loop while we have enough buffer space for (i = 0; i < TEXTLEN - 1; i++) { // Get the next char and append to buf // Return when we hit the ending double quote if ((c = scanch()) == '"') { buf[i] = 0; return (i); } buf[i] = c; } // Ran out of buf[] space fatal("String literal too long"); return (0); } // Scan an identifier from the input file and // store it in buf[]. Return the identifier's length static int scanident(int c, char *buf, int lim) { int i = 0; // Allow digits, alpha and underscores while (isalpha(c) || isdigit(c) || '_' == c) { // Error if we hit the identifier length limit, // else append to buf[] and get next character if (lim - 1 == i) { fatal("Identifier too long"); } else if (i < lim - 1) { buf[i++] = c; } c = next(); } // We hit a non-valid character, put it back. // NUL-terminate the buf[] and return the length putback(c); buf[i] = '\0'; return (i); } // Given a word from the input, return the matching // keyword token number or 0 if it's not a keyword. // Switch on the first letter so that we don't have // to waste time strcmp()ing against all the keywords. static int keyword(char *s) { switch (*s) { case 'b': if (!strcmp(s, "break")) return (T_BREAK); break; case 'c': if (!strcmp(s, "case")) return (T_CASE); if (!strcmp(s, "char")) return (T_CHAR); if (!strcmp(s, "continue")) return (T_CONTINUE); break; case 'd': if (!strcmp(s, "default")) return (T_DEFAULT); break; case 'e': if (!strcmp(s, "else")) return (T_ELSE); if (!strcmp(s, "enum")) return (T_ENUM); if (!strcmp(s, "extern")) return (T_EXTERN); break; case 'f': if (!strcmp(s, "for")) return (T_FOR); break; case 'i': if (!strcmp(s, "if")) return (T_IF); if (!strcmp(s, "int")) return (T_INT); break; case 'l': if (!strcmp(s, "long")) return (T_LONG); break; case 'r': if (!strcmp(s, "return")) return (T_RETURN); break; case 's': if (!strcmp(s, "struct")) return (T_STRUCT); if (!strcmp(s, "switch")) return (T_SWITCH); break; case 't': if (!strcmp(s, "typedef")) return (T_TYPEDEF); break; case 'u': if (!strcmp(s, "union")) return (T_UNION); break; case 'v': if (!strcmp(s, "void")) return (T_VOID); break; case 'w': if (!strcmp(s, "while")) return (T_WHILE); break; } return (0); } // A pointer to a rejected token static struct token *Rejtoken = NULL; // Reject the token that we just scanned void reject_token(struct token *t) { if (Rejtoken != NULL) fatal("Can't reject token twice"); Rejtoken = t; } // List of token strings, for debugging purposes char *Tstring[] = { "EOF", "=", "||", "&&", "|", "^", "&", "==", "!=", ",", ">", "<=", ">=", "<<", ">>", "+", "-", "*", "/", "++", "--", "~", "!", "void", "char", "int", "long", "if", "else", "while", "for", "return", "struct", "union", "enum", "typedef", "extern", "break", "continue", "switch", "case", "default", "intlit", "strlit", ";", "identifier", "{", "}", "(", ")", "[", "]", ",", ".", "->", ":" }; // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c, tokentype; // If we have any rejected token, return it if (Rejtoken != NULL) { t = Rejtoken; Rejtoken = NULL; return (1); } // Skip whitespace c = skip(); // Determine the token based on // the input character switch (c) { case EOF: t->token = T_EOF; return (0); case '+': if ((c = next()) == '+') { t->token = T_INC; } else { putback(c); t->token = T_PLUS; } break; case '-': if ((c = next()) == '-') { t->token = T_DEC; } else if (c == '>') { t->token = T_ARROW; } else { putback(c); t->token = T_MINUS; } break; case '*': t->token = T_STAR; break; case '/': t->token = T_SLASH; break; case ';': t->token = T_SEMI; break; case '{': t->token = T_LBRACE; break; case '}': t->token = T_RBRACE; break; case '(': t->token = T_LPAREN; break; case ')': t->token = T_RPAREN; break; case '[': t->token = T_LBRACKET; break; case ']': t->token = T_RBRACKET; break; case '~': t->token = T_INVERT; break; case '^': t->token = T_XOR; break; case ',': t->token = T_COMMA; break; case '.': t->token = T_DOT; break; case ':': t->token = T_COLON; break; case '=': if ((c = next()) == '=') { t->token = T_EQ; } else { putback(c); t->token = T_ASSIGN; } break; case '!': if ((c = next()) == '=') { t->token = T_NE; } else { putback(c); t->token = T_LOGNOT; } break; case '<': if ((c = next()) == '=') { t->token = T_LE; } else if (c == '<') { t->token = T_LSHIFT; } else { putback(c); t->token = T_LT; } break; case '>': if ((c = next()) == '=') { t->token = T_GE; } else if (c == '>') { t->token = T_RSHIFT; } else { putback(c); t->token = T_GT; } break; case '&': if ((c = next()) == '&') { t->token = T_LOGAND; } else { putback(c); t->token = T_AMPER; } break; case '|': if ((c = next()) == '|') { t->token = T_LOGOR; } else { putback(c); t->token = T_OR; } break; case '\'': // If it's a quote, scan in the // literal character value and // the trailing quote t->intvalue = scanch(); t->token = T_INTLIT; if (next() != '\'') fatal("Expected '\\'' at end of char literal"); break; case '"': // Scan in a literal string scanstr(Text); t->token = T_STRLIT; break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } else if (isalpha(c) || '_' == c) { // Read in a keyword or identifier scanident(c, Text, TEXTLEN); // If it's a recognised keyword, return that token if ((tokentype = keyword(Text)) != 0) { t->token = tokentype; break; } // Not a recognised keyword, so it must be an identifier t->token = T_IDENT; break; } // The character isn't part of any recognised token, error fatalc("Unrecognised character", c); } // We found a token t->tokstr = Tstring[t->token]; return (1); } ================================================ FILE: 38_Dangling_Else/stmt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of statements // Copyright (c) 2019 Warren Toomey, GPL3 // Prototypes static struct ASTnode *single_statement(void); // compound_statement: // empty, i.e. no statement // | statement // | statement statements // ; // // statement: declaration // | expression_statement // | function_call // | if_statement // | while_statement // | for_statement // | return_statement // ; // if_statement: if_head // | if_head 'else' statement // ; // // if_head: 'if' '(' true_false_expression ')' statement ; // // Parse an IF statement including any // optional ELSE clause and return its AST static struct ASTnode *if_statement(void) { struct ASTnode *condAST, *trueAST, *falseAST = NULL; // Ensure we have 'if' '(' match(T_IF, "if"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); rparen(); // Get the AST for the statement trueAST = single_statement(); // If we have an 'else', skip it // and get the AST for the statement if (Token.token == T_ELSE) { scan(&Token); falseAST = single_statement(); } // Build and return the AST for this statement return (mkastnode(A_IF, P_NONE, condAST, trueAST, falseAST, NULL, 0)); } // while_statement: 'while' '(' true_false_expression ')' statement ; // // Parse a WHILE statement and return its AST static struct ASTnode *while_statement(void) { struct ASTnode *condAST, *bodyAST; // Ensure we have 'while' '(' match(T_WHILE, "while"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); rparen(); // Get the AST for the statement. // Update the loop depth in the process Looplevel++; bodyAST = single_statement(); Looplevel--; // Build and return the AST for this statement return (mkastnode(A_WHILE, P_NONE, condAST, NULL, bodyAST, NULL, 0)); } // for_statement: 'for' '(' expression_list ';' // true_false_expression ';' // expression_list ')' statement ; // // Parse a FOR statement and return its AST static struct ASTnode *for_statement(void) { struct ASTnode *condAST, *bodyAST; struct ASTnode *preopAST, *postopAST; struct ASTnode *tree; // Ensure we have 'for' '(' match(T_FOR, "for"); lparen(); // Get the pre_op expression and the ';' preopAST = expression_list(T_SEMI); semi(); // Get the condition and the ';'. // Force a non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); semi(); // Get the post_op expression and the ')' postopAST = expression_list(T_RPAREN); rparen(); // Get the statement which is the body // Update the loop depth in the process Looplevel++; bodyAST = single_statement(); Looplevel--; // Glue the statement and the postop tree tree = mkastnode(A_GLUE, P_NONE, bodyAST, NULL, postopAST, NULL, 0); // Make a WHILE loop with the condition and this new body tree = mkastnode(A_WHILE, P_NONE, condAST, NULL, tree, NULL, 0); // And glue the preop tree to the A_WHILE tree return (mkastnode(A_GLUE, P_NONE, preopAST, NULL, tree, NULL, 0)); } // return_statement: 'return' '(' expression ')' ; // // Parse a return statement and return its AST static struct ASTnode *return_statement(void) { struct ASTnode *tree; // Can't return a value if function returns P_VOID if (Functionid->type == P_VOID) fatal("Can't return from a void function"); // Ensure we have 'return' '(' match(T_RETURN, "return"); lparen(); // Parse the following expression tree = binexpr(0); // Ensure this is compatible with the function's type tree = modify_type(tree, Functionid->type, 0); if (tree == NULL) fatal("Incompatible type to return"); // Add on the A_RETURN node tree = mkastunary(A_RETURN, P_NONE, tree, NULL, 0); // Get the ')' and ';' rparen(); semi(); return (tree); } // break_statement: 'break' ; // // Parse a break statement and return its AST static struct ASTnode *break_statement(void) { if (Looplevel == 0 && Switchlevel == 0) fatal("no loop or switch to break out from"); scan(&Token); semi(); return (mkastleaf(A_BREAK, 0, NULL, 0)); } // continue_statement: 'continue' ; // // Parse a continue statement and return its AST static struct ASTnode *continue_statement(void) { if (Looplevel == 0) fatal("no loop to continue to"); scan(&Token); semi(); return (mkastleaf(A_CONTINUE, 0, NULL, 0)); } // Parse a switch statement and return its AST static struct ASTnode *switch_statement(void) { struct ASTnode *left, *n, *c, *casetree= NULL, *casetail; int inloop=1, casecount=0; int seendefault=0; int ASTop, casevalue; // Skip the 'switch' and '(' scan(&Token); lparen(); // Get the switch expression, the ')' and the '{' left= binexpr(0); rparen(); lbrace(); // Ensure that this is of int type if (!inttype(left->type)) fatal("Switch expression is not of integer type"); // Build an A_SWITCH subtree with the expression as // the child n= mkastunary(A_SWITCH, 0, left, NULL, 0); // Now parse the cases Switchlevel++; while (inloop) { switch(Token.token) { // Leave the loop when we hit a '}' case T_RBRACE: if (casecount==0) fatal("No cases in switch"); inloop=0; break; case T_CASE: case T_DEFAULT: // Ensure this isn't after a previous 'default' if (seendefault) fatal("case or default after existing default"); // Set the AST operation. Scan the case value if required if (Token.token==T_DEFAULT) { ASTop= A_DEFAULT; seendefault= 1; scan(&Token); } else { ASTop= A_CASE; scan(&Token); left= binexpr(0); // Ensure the case value is an integer literal if (left->op != A_INTLIT) fatal("Expecting integer literal for case value"); casevalue= left->intvalue; // Walk the list of existing case values to ensure // that there isn't a duplicate case value for (c= casetree; c != NULL; c= c -> right) if (casevalue == c->intvalue) fatal("Duplicate case value"); } // Scan the ':' and get the compound expression match(T_COLON, ":"); left= compound_statement(1); casecount++; // Build a sub-tree with the compound statement as the left child // and link it in to the growing A_CASE tree if (casetree==NULL) { casetree= casetail= mkastunary(ASTop, 0, left, NULL, casevalue); } else { casetail->right= mkastunary(ASTop, 0, left, NULL, casevalue); casetail= casetail->right; } break; default: fatals("Unexpected token in switch", Token.tokstr); } } Switchlevel--; // We have a sub-tree with the cases and any default. Put the // case count into the A_SWITCH node and attach the case tree. n->intvalue= casecount; n->right= casetree; rbrace(); return(n); } // Parse a single statement and return its AST. static struct ASTnode *single_statement(void) { int type, class = C_LOCAL; struct symtable *ctype; struct ASTnode *stmt; switch (Token.token) { case T_LBRACE: // We have a '{', so this is a compound statement lbrace(); stmt = compound_statement(0); rbrace(); return(stmt); case T_IDENT: // We have to see if the identifier matches a typedef. // If not, treat it as an expression. // Otherwise, fall down to the parse_type() call. if (findtypedef(Text) == NULL) { stmt= binexpr(0); semi(); return(stmt); } case T_CHAR: case T_INT: case T_LONG: case T_STRUCT: case T_UNION: case T_ENUM: case T_TYPEDEF: // The beginning of a variable declaration. // Parse the type and get the identifier. // Then parse the rest of the declaration // and skip over the semicolon type = parse_type(&ctype, &class); ident(); var_declaration(type, ctype, class); semi(); return (NULL); // No AST generated here case T_IF: return (if_statement()); case T_WHILE: return (while_statement()); case T_FOR: return (for_statement()); case T_RETURN: return (return_statement()); case T_BREAK: return (break_statement()); case T_CONTINUE: return (continue_statement()); case T_SWITCH: return (switch_statement()); default: // For now, see if this is an expression. // This catches assignment statements. stmt= binexpr(0); semi(); return(stmt); } return (NULL); // Keep -Wall happy } // Parse a compound statement // and return its AST. If inswitch is true, // we look for a '}', 'case' or 'default' token // to end the parsing. Otherwise, look for // just a '}' to end the parsing. struct ASTnode *compound_statement(int inswitch) { struct ASTnode *left = NULL; struct ASTnode *tree; while (1) { // Parse a single statement tree = single_statement(); // For each new tree, either save it in left // if left is empty, or glue the left and the // new tree together if (tree != NULL) { if (left == NULL) left = tree; else left = mkastnode(A_GLUE, P_NONE, left, NULL, tree, NULL, 0); } // Leave if we've hit the end token if (Token.token == T_RBRACE) return(left); if (inswitch && (Token.token == T_CASE || Token.token == T_DEFAULT)) return(left); } } ================================================ FILE: 38_Dangling_Else/sym.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Symbol table functions // Copyright (c) 2019 Warren Toomey, GPL3 // Append a node to the singly-linked list pointed to by head or tail void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node) { // Check for valid pointers if (head == NULL || tail == NULL || node == NULL) fatal("Either head, tail or node is NULL in appendsym"); // Append to the list if (*tail) { (*tail)->next = node; *tail = node; } else *head = *tail = node; node->next = NULL; } // Create a symbol node to be added to a symbol table list. // Set up the node's: // + type: char, int etc. // + ctype: composite type pointer for struct/union // + structural type: var, function, array etc. // + size: number of elements, or endlabel: end label for a function // + posn: Position information for local symbols // Return a pointer to the new node struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int size, int posn) { // Get a new node struct symtable *node = (struct symtable *) malloc(sizeof(struct symtable)); if (node == NULL) fatal("Unable to malloc a symbol table node in newsym"); // Fill in the values node->name = strdup(name); node->type = type; node->ctype = ctype; node->stype = stype; node->class = class; node->size = size; node->posn = posn; node->next = NULL; node->member = NULL; // Generate any global space if (class == C_GLOBAL) genglobsym(node); return (node); } // Add a symbol to the global symbol list struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int class, int size) { struct symtable *sym = newsym(name, type, ctype, stype, class, size, 0); appendsym(&Globhead, &Globtail, sym); return (sym); } // Add a symbol to the local symbol list struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int size) { struct symtable *sym = newsym(name, type, ctype, stype, C_LOCAL, size, 0); appendsym(&Loclhead, &Locltail, sym); return (sym); } // Add a symbol to the parameter list struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype, int size) { struct symtable *sym = newsym(name, type, ctype, stype, C_PARAM, size, 0); appendsym(&Parmhead, &Parmtail, sym); return (sym); } // Add a symbol to the temporary member list struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int size) { struct symtable *sym = newsym(name, type, ctype, stype, C_MEMBER, size, 0); appendsym(&Membhead, &Membtail, sym); return (sym); } // Add a struct to the struct list struct symtable *addstruct(char *name, int type, struct symtable *ctype, int stype, int size) { struct symtable *sym = newsym(name, type, ctype, stype, C_STRUCT, size, 0); appendsym(&Structhead, &Structtail, sym); return (sym); } // Add a struct to the union list struct symtable *addunion(char *name, int type, struct symtable *ctype, int stype, int size) { struct symtable *sym = newsym(name, type, ctype, stype, C_UNION, size, 0); appendsym(&Unionhead, &Uniontail, sym); return (sym); } // Add an enum type or value to the enum list. // Class is C_ENUMTYPE or C_ENUMVAL. // Use posn to store the int value. struct symtable *addenum(char *name, int class, int value) { struct symtable *sym = newsym(name, P_INT, NULL, 0, class, 0, value); appendsym(&Enumhead, &Enumtail, sym); return (sym); } // Add a typedef to the typedef list struct symtable *addtypedef(char *name, int type, struct symtable *ctype, int stype, int size) { struct symtable *sym = newsym(name, type, ctype, stype, C_TYPEDEF, size, 0); appendsym(&Typehead, &Typetail, sym); return (sym); } // Search for a symbol in a specific list. // Return a pointer to the found node or NULL if not found. // If class is not zero, also match on the given class static struct symtable *findsyminlist(char *s, struct symtable *list, int class) { for (; list != NULL; list = list->next) if ((list->name != NULL) && !strcmp(s, list->name)) if (class==0 || class== list->class) return (list); return (NULL); } // Determine if the symbol s is in the global symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findglob(char *s) { return (findsyminlist(s, Globhead, 0)); } // Determine if the symbol s is in the local symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findlocl(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member, 0); if (node) return (node); } return (findsyminlist(s, Loclhead, 0)); } // Determine if the symbol s is in the symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findsymbol(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member, 0); if (node) return (node); } // Otherwise, try the local and global symbol lists node = findsyminlist(s, Loclhead, 0); if (node) return (node); return (findsyminlist(s, Globhead, 0)); } // Find a member in the member list // Return a pointer to the found node or NULL if not found. struct symtable *findmember(char *s) { return (findsyminlist(s, Membhead, 0)); } // Find a struct in the struct list // Return a pointer to the found node or NULL if not found. struct symtable *findstruct(char *s) { return (findsyminlist(s, Structhead, 0)); } // Find a struct in the union list // Return a pointer to the found node or NULL if not found. struct symtable *findunion(char *s) { return (findsyminlist(s, Unionhead, 0)); } // Find an enum type in the enum list // Return a pointer to the found node or NULL if not found. struct symtable *findenumtype(char *s) { return (findsyminlist(s, Enumhead, C_ENUMTYPE)); } // Find an enum value in the enum list // Return a pointer to the found node or NULL if not found. struct symtable *findenumval(char *s) { return (findsyminlist(s, Enumhead, C_ENUMVAL)); } // Find a type in the tyedef list // Return a pointer to the found node or NULL if not found. struct symtable *findtypedef(char *s) { return (findsyminlist(s, Typehead, 0)); } // Reset the contents of the symbol table void clear_symtable(void) { Globhead = Globtail = NULL; Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Membhead = Membtail = NULL; Structhead = Structtail = NULL; Unionhead = Uniontail = NULL; Enumhead = Enumtail = NULL; Typehead = Typetail = NULL; } // Clear all the entries in the local symbol table void freeloclsyms(void) { Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Functionid = NULL; } ================================================ FILE: 38_Dangling_Else/tests/err.input31.c ================================================ Expecting a primary expression, got token:+ on line 5 of input31.c ================================================ FILE: 38_Dangling_Else/tests/err.input32.c ================================================ Unknown variable:cow on line 4 of input32.c ================================================ FILE: 38_Dangling_Else/tests/err.input33.c ================================================ Incompatible type to return on line 4 of input33.c ================================================ FILE: 38_Dangling_Else/tests/err.input34.c ================================================ For now, declaration of non-global arrays is not implemented on line 4 of input34.c ================================================ FILE: 38_Dangling_Else/tests/err.input35.c ================================================ Duplicate local variable declaration:a on line 4 of input35.c ================================================ FILE: 38_Dangling_Else/tests/err.input36.c ================================================ Type doesn't match prototype for parameter:2 on line 4 of input36.c ================================================ FILE: 38_Dangling_Else/tests/err.input37.c ================================================ Unexpected token in parameter list:+ on line 3 of input37.c ================================================ FILE: 38_Dangling_Else/tests/err.input38.c ================================================ Type doesn't match prototype for parameter:2 on line 4 of input38.c ================================================ FILE: 38_Dangling_Else/tests/err.input39.c ================================================ No statements in function with non-void type on line 4 of input39.c ================================================ FILE: 38_Dangling_Else/tests/err.input40.c ================================================ No return for function with non-void type on line 4 of input40.c ================================================ FILE: 38_Dangling_Else/tests/err.input41.c ================================================ Can't return from a void function on line 3 of input41.c ================================================ FILE: 38_Dangling_Else/tests/err.input42.c ================================================ Undeclared function:fred on line 3 of input42.c ================================================ FILE: 38_Dangling_Else/tests/err.input43.c ================================================ Undeclared array:b on line 3 of input43.c ================================================ FILE: 38_Dangling_Else/tests/err.input44.c ================================================ Unknown variable:z on line 3 of input44.c ================================================ FILE: 38_Dangling_Else/tests/err.input45.c ================================================ & operator must be followed by an identifier on line 3 of input45.c ================================================ FILE: 38_Dangling_Else/tests/err.input46.c ================================================ * operator must be followed by an identifier or * on line 3 of input46.c ================================================ FILE: 38_Dangling_Else/tests/err.input47.c ================================================ ++ operator must be followed by an identifier on line 3 of input47.c ================================================ FILE: 38_Dangling_Else/tests/err.input48.c ================================================ -- operator must be followed by an identifier on line 3 of input48.c ================================================ FILE: 38_Dangling_Else/tests/err.input49.c ================================================ Incompatible expression in assignment on line 6 of input49.c ================================================ FILE: 38_Dangling_Else/tests/err.input50.c ================================================ Incompatible types in binary expression on line 6 of input50.c ================================================ FILE: 38_Dangling_Else/tests/err.input51.c ================================================ Expected '\'' at end of char literal on line 4 of input51.c ================================================ FILE: 38_Dangling_Else/tests/err.input52.c ================================================ Unrecognised character:$ on line 5 of input52.c ================================================ FILE: 38_Dangling_Else/tests/err.input56.c ================================================ unknown struct/union type:var1 on line 2 of input56.c ================================================ FILE: 38_Dangling_Else/tests/err.input57.c ================================================ previously defined struct/union:fred on line 2 of input57.c ================================================ FILE: 38_Dangling_Else/tests/err.input59.c ================================================ Undeclared variable:y on line 3 of input59.c ================================================ FILE: 38_Dangling_Else/tests/err.input60.c ================================================ Undeclared variable:x on line 3 of input60.c ================================================ FILE: 38_Dangling_Else/tests/err.input61.c ================================================ Undeclared variable:x on line 3 of input61.c ================================================ FILE: 38_Dangling_Else/tests/err.input64.c ================================================ undeclared enum type::fred on line 1 of input64.c ================================================ FILE: 38_Dangling_Else/tests/err.input65.c ================================================ enum type redeclared::fred on line 2 of input65.c ================================================ FILE: 38_Dangling_Else/tests/err.input66.c ================================================ enum value redeclared::z on line 2 of input66.c ================================================ FILE: 38_Dangling_Else/tests/err.input68.c ================================================ redefinition of typedef:FOO on line 2 of input68.c ================================================ FILE: 38_Dangling_Else/tests/err.input69.c ================================================ unknown type:FLOO on line 2 of input69.c ================================================ FILE: 38_Dangling_Else/tests/err.input72.c ================================================ no loop or switch to break out from on line 1 of input72.c ================================================ FILE: 38_Dangling_Else/tests/err.input73.c ================================================ no loop to continue to on line 1 of input73.c ================================================ FILE: 38_Dangling_Else/tests/err.input75.c ================================================ Unexpected token in switch:if on line 4 of input75.c ================================================ FILE: 38_Dangling_Else/tests/err.input76.c ================================================ No cases in switch on line 3 of input76.c ================================================ FILE: 38_Dangling_Else/tests/err.input77.c ================================================ case or default after existing default on line 6 of input77.c ================================================ FILE: 38_Dangling_Else/tests/err.input78.c ================================================ case or default after existing default on line 6 of input78.c ================================================ FILE: 38_Dangling_Else/tests/err.input79.c ================================================ Duplicate case value on line 6 of input79.c ================================================ FILE: 38_Dangling_Else/tests/input01.c ================================================ int printf(char *fmt); void main() { printf("%d\n", 12 * 3); printf("%d\n", 18 - 2 * 4); printf("%d\n", 1 + 2 + 9 - 5/2 + 3*5); } ================================================ FILE: 38_Dangling_Else/tests/input02.c ================================================ int printf(char *fmt); void main() { int fred; int jim; fred= 5; jim= 12; printf("%d\n", fred + jim); } ================================================ FILE: 38_Dangling_Else/tests/input03.c ================================================ int printf(char *fmt); void main() { int x; x= 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); } ================================================ FILE: 38_Dangling_Else/tests/input04.c ================================================ int printf(char *fmt); void main() { int x; x= 7 < 9; printf("%d\n", x); x= 7 <= 9; printf("%d\n", x); x= 7 != 9; printf("%d\n", x); x= 7 == 7; printf("%d\n", x); x= 7 >= 7; printf("%d\n", x); x= 7 <= 7; printf("%d\n", x); x= 9 > 7; printf("%d\n", x); x= 9 >= 7; printf("%d\n", x); x= 9 != 7; printf("%d\n", x); } ================================================ FILE: 38_Dangling_Else/tests/input05.c ================================================ int printf(char *fmt); void main() { int i; int j; i=6; j=12; if (i < j) { printf("%d\n", i); } else { printf("%d\n", j); } } ================================================ FILE: 38_Dangling_Else/tests/input06.c ================================================ int printf(char *fmt); void main() { int i; i=1; while (i <= 10) { printf("%d\n", i); i= i + 1; } } ================================================ FILE: 38_Dangling_Else/tests/input07.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 38_Dangling_Else/tests/input08.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 38_Dangling_Else/tests/input09.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } void fred() { int a; int b; a= 12; b= 3 * a; if (a >= b) { printf("%d\n", 2 * b - a); } } ================================================ FILE: 38_Dangling_Else/tests/input10.c ================================================ int printf(char *fmt); void main() { int i; char j; j= 20; printf("%d\n", j); i= 10; printf("%d\n", i); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 2; j= j + 1) { printf("%d\n", j); } } ================================================ FILE: 38_Dangling_Else/tests/input11.c ================================================ int printf(char *fmt); int main() { int i; char j; long k; i= 10; printf("%d\n", i); j= 20; printf("%d\n", j); k= 30; printf("%d\n", k); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 4; j= j + 1) { printf("%d\n", j); } for (k= 1; k <= 5; k= k + 1) { printf("%d\n", k); } return(i); printf("%d\n", 12345); return(3); } ================================================ FILE: 38_Dangling_Else/tests/input12.c ================================================ int printf(char *fmt); int fred() { return(5); } void main() { int x; x= fred(2); printf("%d\n", x); } ================================================ FILE: 38_Dangling_Else/tests/input13.c ================================================ int printf(char *fmt); int fred() { return(56); } void main() { int dummy; int result; dummy= printf("%d\n", 23); result= fred(10); dummy= printf("%d\n", result); } ================================================ FILE: 38_Dangling_Else/tests/input14.c ================================================ int printf(char *fmt); int fred() { return(20); } int main() { int result; printf("%d\n", 10); result= fred(15); printf("%d\n", result); printf("%d\n", fred(15)+10); return(0); } ================================================ FILE: 38_Dangling_Else/tests/input15.c ================================================ int printf(char *fmt); int main() { char a; char *b; char c; int d; int *e; int f; a= 18; printf("%d\n", a); b= &a; c= *b; printf("%d\n", c); d= 12; printf("%d\n", d); e= &d; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 38_Dangling_Else/tests/input16.c ================================================ int printf(char *fmt); int c; int d; int *e; int f; int main() { c= 12; d=18; printf("%d\n", c); e= &c + 1; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 38_Dangling_Else/tests/input17.c ================================================ int printf(char *fmt); int main() { char a; char *b; int d; int *e; b= &a; *b= 19; printf("%d\n", a); e= &d; *e= 12; printf("%d\n", d); return(0); } ================================================ FILE: 38_Dangling_Else/tests/input18.c ================================================ int printf(char *fmt); int main() { int a; int b; a= b= 34; printf("%d\n", a); printf("%d\n", b); return(0); } ================================================ FILE: 38_Dangling_Else/tests/input18a.c ================================================ int printf(char *fmt); int a; int *b; char c; char *d; int main() { b= &a; *b= 15; printf("%d\n", a); d= &c; *d= 16; printf("%d\n", c); return(0); } ================================================ FILE: 38_Dangling_Else/tests/input19.c ================================================ int printf(char *fmt); int a; int b; int c; int d; int e; int main() { a= 2; b= 4; c= 3; d= 2; e= (a+b) * (c+d); printf("%d\n", e); return(0); } ================================================ FILE: 38_Dangling_Else/tests/input20.c ================================================ int printf(char *fmt); int a; int b[25]; int main() { b[3]= 12; a= b[3]; printf("%d\n", a); return(0); } ================================================ FILE: 38_Dangling_Else/tests/input21.c ================================================ int printf(char *fmt); char c; char *str; int main() { c= '\n'; printf("%d\n", c); for (str= "Hello world\n"; *str != 0; str= str + 1) { printf("%c", *str); } return(0); } ================================================ FILE: 38_Dangling_Else/tests/input22.c ================================================ int printf(char *fmt); char a; char b; char c; int d; int e; int f; long g; long h; long i; int main() { b= 5; c= 7; a= b + c++; printf("%d\n", a); e= 5; f= 7; d= e + f++; printf("%d\n", d); h= 5; i= 7; g= h + i++; printf("%d\n", g); a= b-- + c; printf("%d\n", a); d= e-- + f; printf("%d\n", d); g= h-- + i; printf("%d\n", g); a= ++b + c; printf("%d\n", a); d= ++e + f; printf("%d\n", d); g= ++h + i; printf("%d\n", g); a= b * --c; printf("%d\n", a); d= e * --f; printf("%d\n", d); g= h * --i; printf("%d\n", g); return(0); } ================================================ FILE: 38_Dangling_Else/tests/input23.c ================================================ int printf(char *fmt); char *str; int x; int main() { x= -23; printf("%d\n", x); printf("%d\n", -10 * -10); x= 1; x= ~x; printf("%d\n", x); x= 2 > 5; printf("%d\n", x); x= !x; printf("%d\n", x); x= !x; printf("%d\n", x); x= 13; if (x) { printf("%d\n", 13); } x= 0; if (!x) { printf("%d\n", 14); } for (str= "Hello world\n"; *str; str++) { printf("%c", *str); } return(0); } ================================================ FILE: 38_Dangling_Else/tests/input24.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { a= 42; b= 19; printf("%d\n", a & b); printf("%d\n", a | b); printf("%d\n", a ^ b); printf("%d\n", 1 << 3); printf("%d\n", 63 >> 3); return(0); } ================================================ FILE: 38_Dangling_Else/tests/input25.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { char z; int y; int x; x= 10; y= 20; z= 30; printf("%d\n", x); printf("%d\n", y); printf("%d\n", z); a= 5; b= 15; c= 25; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); return(0); } ================================================ FILE: 38_Dangling_Else/tests/input26.c ================================================ int printf(char *fmt); int main(int a, char b, long c, int d, int e, int f, int g, int h) { int i; int j; int k; a= 13; printf("%d\n", a); b= 23; printf("%d\n", b); c= 34; printf("%d\n", c); d= 44; printf("%d\n", d); e= 54; printf("%d\n", e); f= 64; printf("%d\n", f); g= 74; printf("%d\n", g); h= 84; printf("%d\n", h); i= 94; printf("%d\n", i); j= 95; printf("%d\n", j); k= 96; printf("%d\n", k); return(0); } ================================================ FILE: 38_Dangling_Else/tests/input27.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int param5(int a, int b, int c, int d, int e) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param2(int a, int b) { int c; int d; int e; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param0() { int a; int b; int c; int d; int e; a= 1; b= 2; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int main() { param8(1,2,3,4,5,6,7,8); param5(1,2,3,4,5); param2(1,2); param0(); return(0); } ================================================ FILE: 38_Dangling_Else/tests/input28.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 38_Dangling_Else/tests/input29.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h); int fred(int a, int b, int c); int main(); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 38_Dangling_Else/tests/input30.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input30.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 38_Dangling_Else/tests/input31.c ================================================ int printf(char *fmt); int main() { int x; x= 2 + + 3 - * / ; } ================================================ FILE: 38_Dangling_Else/tests/input32.c ================================================ int printf(char *fmt); int main() { pizza cow llama sausage; } ================================================ FILE: 38_Dangling_Else/tests/input33.c ================================================ int printf(char *fmt); int main() { char *z; return(z); } ================================================ FILE: 38_Dangling_Else/tests/input34.c ================================================ int printf(char *fmt); int main() { int a[12]; return(0); } ================================================ FILE: 38_Dangling_Else/tests/input35.c ================================================ int printf(char *fmt); int fred(int a, int b) { int a; return(a); } ================================================ FILE: 38_Dangling_Else/tests/input36.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c); ================================================ FILE: 38_Dangling_Else/tests/input37.c ================================================ int printf(char *fmt); int fred(int a, char b +, int z); ================================================ FILE: 38_Dangling_Else/tests/input38.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c, int g); ================================================ FILE: 38_Dangling_Else/tests/input39.c ================================================ int printf(char *fmt); int main() { int a; } ================================================ FILE: 38_Dangling_Else/tests/input40.c ================================================ int printf(char *fmt); int main() { int a; a= 5; } ================================================ FILE: 38_Dangling_Else/tests/input41.c ================================================ int printf(char *fmt); void fred() { return(5); } ================================================ FILE: 38_Dangling_Else/tests/input42.c ================================================ int printf(char *fmt); int main() { fred(5); } ================================================ FILE: 38_Dangling_Else/tests/input43.c ================================================ int printf(char *fmt); int main() { int a; a= b[4]; } ================================================ FILE: 38_Dangling_Else/tests/input44.c ================================================ int printf(char *fmt); int main() { int a; a= z; } ================================================ FILE: 38_Dangling_Else/tests/input45.c ================================================ int printf(char *fmt); int main() { int a; a= &5; } ================================================ FILE: 38_Dangling_Else/tests/input46.c ================================================ int printf(char *fmt); int main() { int a; a= *5; } ================================================ FILE: 38_Dangling_Else/tests/input47.c ================================================ int printf(char *fmt); int main() { int a; a= ++5; } ================================================ FILE: 38_Dangling_Else/tests/input48.c ================================================ int printf(char *fmt); int main() { int a; a= --5; } ================================================ FILE: 38_Dangling_Else/tests/input49.c ================================================ int printf(char *fmt); int main() { int x; char y; y= x; } ================================================ FILE: 38_Dangling_Else/tests/input50.c ================================================ int printf(char *fmt); int main() { char *a; char *b; a= a + b; } ================================================ FILE: 38_Dangling_Else/tests/input51.c ================================================ int printf(char *fmt); int main() { char a; a= 'fred'; } ================================================ FILE: 38_Dangling_Else/tests/input52.c ================================================ int printf(char *fmt); int main() { int a; a= $5.00; } ================================================ FILE: 38_Dangling_Else/tests/input53.c ================================================ int printf(char *fmt); int main() { printf("Hello world, %d\n", 23); return(0); } ================================================ FILE: 38_Dangling_Else/tests/input54.c ================================================ int printf(char *fmt); int main() { int i; for (i=0; i < 20; i++) { printf("Hello world, %d\n", i); } return(0); } ================================================ FILE: 38_Dangling_Else/tests/input55.c ================================================ int printf(char *fmt); int main(int argc, char **argv) { int i; char *argument; printf("Hello world\n"); for (i=0; i < argc; i++) { argument= *argv; argv= argv + 1; printf("Argument %d is %s\n", i, argument); } return(0); } ================================================ FILE: 38_Dangling_Else/tests/input56.c ================================================ struct foo { int x; }; struct mary var1; ================================================ FILE: 38_Dangling_Else/tests/input57.c ================================================ struct fred { int x; } ; struct fred { char y; } ; ================================================ FILE: 38_Dangling_Else/tests/input58.c ================================================ int printf(char *fmt); struct fred { int x; char y; long z; }; struct fred var2; struct fred *varptr; int main() { long result; var2.x= 12; printf("%d\n", var2.x); var2.y= 'c'; printf("%d\n", var2.y); var2.z= 4005; printf("%d\n", var2.z); result= var2.x + var2.y + var2.z; printf("%d\n", result); varptr= &var2; result= varptr->x + varptr->y + varptr->z; printf("%d\n", result); return(0); } ================================================ FILE: 38_Dangling_Else/tests/input59.c ================================================ int main() { int x; x= y.foo; } ================================================ FILE: 38_Dangling_Else/tests/input60.c ================================================ int main() { int x; x= x.foo; } ================================================ FILE: 38_Dangling_Else/tests/input61.c ================================================ int main() { int x; x= x->foo; } ================================================ FILE: 38_Dangling_Else/tests/input62.c ================================================ int printf(char *fmt); union fred { char w; int x; int y; long z; }; union fred var1; union fred *varptr; int main() { var1.x= 65; printf("%d\n", var1.x); var1.x= 66; printf("%d\n", var1.x); printf("%d\n", var1.y); printf("The next two depend on the endian of the platform\n"); printf("%d\n", var1.w); printf("%d\n", var1.z); varptr= &var1; varptr->x= 67; printf("%d\n", varptr->x); printf("%d\n", varptr->y); return(0); } ================================================ FILE: 38_Dangling_Else/tests/input63.c ================================================ int printf(char *fmt); enum fred { apple=1, banana, carrot, pear=10, peach, mango, papaya }; enum jane { aple=1, bnana, crrot, par=10, pech, mago, paaya }; enum fred var1; enum jane var2; enum fred var3; int main() { var1= carrot + pear + mango; printf("%d\n", var1); return(0); } ================================================ FILE: 38_Dangling_Else/tests/input64.c ================================================ enum fred var3; ================================================ FILE: 38_Dangling_Else/tests/input65.c ================================================ enum fred { x, y, z }; enum fred { a, b }; ================================================ FILE: 38_Dangling_Else/tests/input66.c ================================================ enum fred { x, y, z }; enum mary { a, b, z }; ================================================ FILE: 38_Dangling_Else/tests/input67.c ================================================ int printf(char *fmt); typedef int FOO; FOO var1; struct bar { int x; int y} ; typedef struct bar BAR; BAR var2; int main() { var1= 5; printf("%d\n", var1); var2.x= 7; var2.y= 10; printf("%d\n", var2.x + var2.y); return(0); } ================================================ FILE: 38_Dangling_Else/tests/input68.c ================================================ typedef int FOO; typedef char FOO; ================================================ FILE: 38_Dangling_Else/tests/input69.c ================================================ typedef int FOO; FLOO y; ================================================ FILE: 38_Dangling_Else/tests/input70.c ================================================ #include typedef int FOO; int main() { FOO x; x= 56; printf("%d\n", x); return(0); } ================================================ FILE: 38_Dangling_Else/tests/input71.c ================================================ #include int main() { int x; x = 0; while (x < 100) { if (x == 5) { x = x + 2; continue; } printf("%d\n", x); if (x == 14) { break; } x = x + 1; } printf("Done\n"); return (0); } ================================================ FILE: 38_Dangling_Else/tests/input72.c ================================================ int main() { break; } ================================================ FILE: 38_Dangling_Else/tests/input73.c ================================================ int main() { continue; } ================================================ FILE: 38_Dangling_Else/tests/input74.c ================================================ #include int main() { int x; int y; y= 0; for (x=0; x < 5; x++) { switch(x) { case 1: { y= 5; break; } case 2: { y= 7; break; } case 3: { y= 9; } default: { y= 100; } } printf("%d\n", y); } return(0); } ================================================ FILE: 38_Dangling_Else/tests/input75.c ================================================ int main() { int x; switch(x) { if (x<5); } } ================================================ FILE: 38_Dangling_Else/tests/input76.c ================================================ int main() { int x; switch(x) { } } ================================================ FILE: 38_Dangling_Else/tests/input77.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } default: { x= 3; } case 4: { x= 2; } } } ================================================ FILE: 38_Dangling_Else/tests/input78.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } default: { x= 3; } default: { x= 2; } } } ================================================ FILE: 38_Dangling_Else/tests/input79.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } case 2: { x= 2; } case 1: { x= 2; } default: { x= 2; } } } ================================================ FILE: 38_Dangling_Else/tests/input80.c ================================================ #include int x; int y; int main() { for (x=0, y=1; x < 6; x++, y=y+2) { printf("%d %d\n", x, y); } return(0); } ================================================ FILE: 38_Dangling_Else/tests/input81.c ================================================ #include int x; int y; int main() { x= 0; y=1; for (;x<5;) { printf("%d %d\n", x, y); x=x+1; y=y+2; } return(0); } ================================================ FILE: 38_Dangling_Else/tests/input82.c ================================================ #include int main() { int x; x= 10; // Dangling else test if (x > 5) if (x > 15) printf("x > 15\n"); else printf("15 >= x > 5\n"); else printf("5 >= x\n"); return(0); } ================================================ FILE: 38_Dangling_Else/tests/input83.c ================================================ #include int main() { int x; // Dangling else test. // We should not print anything for x<= 5 for (x=0; x < 12; x++) if (x > 5) if (x > 10) printf("10 < %2d\n", x); else printf(" 5 < %2d <= 10\n", x); return(0); } ================================================ FILE: 38_Dangling_Else/tests/mktests ================================================ #!/bin/sh # Make the output files for each test # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make install) fi for i in input*c do if [ ! -f "out.$i" -a ! -f "err.$i" ] then ../cwj -o out $i 2> "err.$i" # If the err file is empty if [ ! -s "err.$i" ] then rm -f "err.$i" cc -o out $i ./out > "out.$i" fi fi rm -f out out.s done ================================================ FILE: 38_Dangling_Else/tests/out.input01.c ================================================ 36 10 25 ================================================ FILE: 38_Dangling_Else/tests/out.input02.c ================================================ 17 ================================================ FILE: 38_Dangling_Else/tests/out.input03.c ================================================ 1 2 3 4 5 ================================================ FILE: 38_Dangling_Else/tests/out.input04.c ================================================ 1 1 1 1 1 1 1 1 1 ================================================ FILE: 38_Dangling_Else/tests/out.input05.c ================================================ 6 ================================================ FILE: 38_Dangling_Else/tests/out.input06.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 38_Dangling_Else/tests/out.input07.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 38_Dangling_Else/tests/out.input08.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 38_Dangling_Else/tests/out.input09.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 38_Dangling_Else/tests/out.input10.c ================================================ 20 10 1 2 3 4 5 253 254 255 0 1 ================================================ FILE: 38_Dangling_Else/tests/out.input11.c ================================================ 10 20 30 1 2 3 4 5 253 254 255 0 1 2 3 1 2 3 4 5 ================================================ FILE: 38_Dangling_Else/tests/out.input12.c ================================================ 5 ================================================ FILE: 38_Dangling_Else/tests/out.input13.c ================================================ 23 56 ================================================ FILE: 38_Dangling_Else/tests/out.input14.c ================================================ 10 20 30 ================================================ FILE: 38_Dangling_Else/tests/out.input15.c ================================================ 18 18 12 12 ================================================ FILE: 38_Dangling_Else/tests/out.input16.c ================================================ 12 18 ================================================ FILE: 38_Dangling_Else/tests/out.input17.c ================================================ 19 12 ================================================ FILE: 38_Dangling_Else/tests/out.input18.c ================================================ 34 34 ================================================ FILE: 38_Dangling_Else/tests/out.input18a.c ================================================ 15 16 ================================================ FILE: 38_Dangling_Else/tests/out.input19.c ================================================ 30 ================================================ FILE: 38_Dangling_Else/tests/out.input20.c ================================================ 12 ================================================ FILE: 38_Dangling_Else/tests/out.input21.c ================================================ 10 Hello world ================================================ FILE: 38_Dangling_Else/tests/out.input22.c ================================================ 12 12 12 13 13 13 13 13 13 35 35 35 ================================================ FILE: 38_Dangling_Else/tests/out.input23.c ================================================ -23 100 -2 0 1 0 13 14 Hello world ================================================ FILE: 38_Dangling_Else/tests/out.input24.c ================================================ 2 59 57 8 7 ================================================ FILE: 38_Dangling_Else/tests/out.input25.c ================================================ 10 20 30 5 15 25 ================================================ FILE: 38_Dangling_Else/tests/out.input26.c ================================================ 13 23 34 44 54 64 74 84 94 95 96 ================================================ FILE: 38_Dangling_Else/tests/out.input27.c ================================================ 1 2 3 4 5 6 7 8 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 ================================================ FILE: 38_Dangling_Else/tests/out.input28.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 38_Dangling_Else/tests/out.input29.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 38_Dangling_Else/tests/out.input30.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input30.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 38_Dangling_Else/tests/out.input53.c ================================================ Hello world, 23 ================================================ FILE: 38_Dangling_Else/tests/out.input54.c ================================================ Hello world, 0 Hello world, 1 Hello world, 2 Hello world, 3 Hello world, 4 Hello world, 5 Hello world, 6 Hello world, 7 Hello world, 8 Hello world, 9 Hello world, 10 Hello world, 11 Hello world, 12 Hello world, 13 Hello world, 14 Hello world, 15 Hello world, 16 Hello world, 17 Hello world, 18 Hello world, 19 ================================================ FILE: 38_Dangling_Else/tests/out.input55.c ================================================ Hello world Argument 0 is ./out ================================================ FILE: 38_Dangling_Else/tests/out.input58.c ================================================ 12 99 4005 4116 4116 ================================================ FILE: 38_Dangling_Else/tests/out.input62.c ================================================ 65 66 66 The next two depend on the endian of the platform 66 66 67 67 ================================================ FILE: 38_Dangling_Else/tests/out.input63.c ================================================ 25 ================================================ FILE: 38_Dangling_Else/tests/out.input67.c ================================================ 5 17 ================================================ FILE: 38_Dangling_Else/tests/out.input70.c ================================================ 56 ================================================ FILE: 38_Dangling_Else/tests/out.input71.c ================================================ 0 1 2 3 4 7 8 9 10 11 12 13 14 Done ================================================ FILE: 38_Dangling_Else/tests/out.input74.c ================================================ 100 5 7 100 100 ================================================ FILE: 38_Dangling_Else/tests/out.input80.c ================================================ 0 1 1 3 2 5 3 7 4 9 5 11 ================================================ FILE: 38_Dangling_Else/tests/out.input81.c ================================================ 0 1 1 3 2 5 3 7 4 9 ================================================ FILE: 38_Dangling_Else/tests/out.input82.c ================================================ 15 >= x > 5 ================================================ FILE: 38_Dangling_Else/tests/out.input83.c ================================================ 5 < 6 <= 10 5 < 7 <= 10 5 < 8 <= 10 5 < 9 <= 10 5 < 10 <= 10 10 < 11 ================================================ FILE: 38_Dangling_Else/tests/runtests ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make install) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../cwj -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../cwj $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.s "trial.$i" done ================================================ FILE: 38_Dangling_Else/tests/runtestsn ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../compn ] then (cd ..; make install) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../compn -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../compn $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.o out.s "trial.$i" done ================================================ FILE: 38_Dangling_Else/tree.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree functions // Copyright (c) 2019 Warren Toomey, GPL3 // Build and return a generic AST node struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue) { struct ASTnode *n; // Malloc a new ASTnode n = (struct ASTnode *) malloc(sizeof(struct ASTnode)); if (n == NULL) fatal("Unable to malloc in mkastnode()"); // Copy in the field values and return it n->op = op; n->type = type; n->left = left; n->mid = mid; n->right = right; n->sym = sym; n->intvalue = intvalue; return (n); } // Make an AST leaf node struct ASTnode *mkastleaf(int op, int type, struct symtable *sym, int intvalue) { return (mkastnode(op, type, NULL, NULL, NULL, sym, intvalue)); } // Make a unary AST node: only one child struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, struct symtable *sym, int intvalue) { return (mkastnode(op, type, left, NULL, NULL, sym, intvalue)); } // Generate and return a new label number // just for AST dumping purposes static int gendumplabel(void) { static int id = 1; return (id++); } // Given an AST tree, print it out and follow the // traversal of the tree that genAST() follows void dumpAST(struct ASTnode *n, int label, int level) { int Lfalse, Lstart, Lend; switch (n->op) { case A_IF: Lfalse = gendumplabel(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_IF"); if (n->right) { Lend = gendumplabel(); fprintf(stdout, ", end L%d", Lend); } fprintf(stdout, "\n"); dumpAST(n->left, Lfalse, level + 2); dumpAST(n->mid, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); return; case A_WHILE: Lstart = gendumplabel(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_WHILE, start L%d\n", Lstart); Lend = gendumplabel(); dumpAST(n->left, Lend, level + 2); dumpAST(n->right, NOLABEL, level + 2); return; } // Reset level to -2 for A_GLUE if (n->op == A_GLUE) level = -2; // General AST node handling if (n->left) dumpAST(n->left, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); for (int i = 0; i < level; i++) fprintf(stdout, " "); switch (n->op) { case A_GLUE: fprintf(stdout, "\n\n"); return; case A_FUNCTION: fprintf(stdout, "A_FUNCTION %s\n", n->sym->name); return; case A_ADD: fprintf(stdout, "A_ADD\n"); return; case A_SUBTRACT: fprintf(stdout, "A_SUBTRACT\n"); return; case A_MULTIPLY: fprintf(stdout, "A_MULTIPLY\n"); return; case A_DIVIDE: fprintf(stdout, "A_DIVIDE\n"); return; case A_EQ: fprintf(stdout, "A_EQ\n"); return; case A_NE: fprintf(stdout, "A_NE\n"); return; case A_LT: fprintf(stdout, "A_LE\n"); return; case A_GT: fprintf(stdout, "A_GT\n"); return; case A_LE: fprintf(stdout, "A_LE\n"); return; case A_GE: fprintf(stdout, "A_GE\n"); return; case A_INTLIT: fprintf(stdout, "A_INTLIT %d\n", n->intvalue); return; case A_STRLIT: fprintf(stdout, "A_STRLIT rval label L%d\n", n->intvalue); return; case A_IDENT: if (n->rvalue) fprintf(stdout, "A_IDENT rval %s\n", n->sym->name); else fprintf(stdout, "A_IDENT %s\n", n->sym->name); return; case A_ASSIGN: fprintf(stdout, "A_ASSIGN\n"); return; case A_WIDEN: fprintf(stdout, "A_WIDEN\n"); return; case A_RETURN: fprintf(stdout, "A_RETURN\n"); return; case A_FUNCCALL: fprintf(stdout, "A_FUNCCALL %s\n", n->sym->name); return; case A_ADDR: fprintf(stdout, "A_ADDR %s\n", n->sym->name); return; case A_DEREF: if (n->rvalue) fprintf(stdout, "A_DEREF rval\n"); else fprintf(stdout, "A_DEREF\n"); return; case A_SCALE: fprintf(stdout, "A_SCALE %d\n", n->size); return; case A_PREINC: fprintf(stdout, "A_PREINC %s\n", n->sym->name); return; case A_PREDEC: fprintf(stdout, "A_PREDEC %s\n", n->sym->name); return; case A_POSTINC: fprintf(stdout, "A_POSTINC\n"); return; case A_POSTDEC: fprintf(stdout, "A_POSTDEC\n"); return; case A_NEGATE: fprintf(stdout, "A_NEGATE\n"); return; case A_BREAK: fprintf(stdout, "A_BREAK\n"); return; case A_CONTINUE: fprintf(stdout, "A_CONTINUE\n"); return; case A_CASE: fprintf(stdout, "A_CASE %d\n", n->intvalue); return; case A_DEFAULT: fprintf(stdout, "A_DEFAULT\n"); return; case A_SWITCH: fprintf(stdout, "A_SWITCH\n"); return; default: fatald("Unknown dumpAST operator", n->op); } } ================================================ FILE: 38_Dangling_Else/types.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Types and type handling // Copyright (c) 2019 Warren Toomey, GPL3 // Return true if a type is an int type // of any size, false otherwise int inttype(int type) { return (((type & 0xf) == 0) && (type <= P_LONG)); } // Return true if a type is of pointer type int ptrtype(int type) { return ((type & 0xf) != 0); } // Given a primitive type, return // the type which is a pointer to it int pointer_to(int type) { if ((type & 0xf) == 0xf) fatald("Unrecognised in pointer_to: type", type); return (type + 1); } // Given a primitive pointer type, return // the type which it points to int value_at(int type) { if ((type & 0xf) == 0x0) fatald("Unrecognised in value_at: type", type); return (type - 1); } // Given a type and a composite type pointer, return // the size of this type in bytes int typesize(int type, struct symtable *ctype) { if (type == P_STRUCT || type == P_UNION) return (ctype->size); return (genprimsize(type)); } // Given an AST tree and a type which we want it to become, // possibly modify the tree by widening or scaling so that // it is compatible with this type. Return the original tree // if no changes occurred, a modified tree, or NULL if the // tree is not compatible with the given type. // If this will be part of a binary operation, the AST op is not zero. struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op) { int ltype; int lsize, rsize; ltype = tree->type; // XXX No idea on these yet if (ltype == P_STRUCT || ltype == P_UNION) fatal("Don't know how to do this yet"); if (rtype == P_STRUCT || rtype == P_UNION) fatal("Don't know how to do this yet"); // Compare scalar int types if (inttype(ltype) && inttype(rtype)) { // Both types same, nothing to do if (ltype == rtype) return (tree); // Get the sizes for each type lsize = typesize(ltype, NULL); // XXX Fix soon rsize = typesize(rtype, NULL); // XXX Fix soon // Tree's size is too big if (lsize > rsize) return (NULL); // Widen to the right if (rsize > lsize) return (mkastunary(A_WIDEN, rtype, tree, NULL, 0)); } // For pointers on the left if (ptrtype(ltype)) { // OK is same type on right and not doing a binary op if (op == 0 && ltype == rtype) return (tree); } // We can scale only on A_ADD or A_SUBTRACT operation if (op == A_ADD || op == A_SUBTRACT) { // Left is int type, right is pointer type and the size // of the original type is >1: scale the left if (inttype(ltype) && ptrtype(rtype)) { rsize = genprimsize(value_at(rtype)); if (rsize > 1) return (mkastunary(A_SCALE, rtype, tree, NULL, rsize)); else return (tree); // Size 1, no need to scale } } // If we get here, the types are not compatible return (NULL); } ================================================ FILE: 39_Var_Initialisation_pt1/Makefile ================================================ # Define the location of the include directory # and the location to install the compiler binary INCDIR=/tmp/include BINDIR=/tmp HSRCS= data.h decl.h defs.h SRCS= cg.c decl.c expr.c gen.c main.c misc.c \ scan.c stmt.c sym.c tree.c types.c SRCN= cgn.c decl.c expr.c gen.c main.c misc.c \ scan.c stmt.c sym.c tree.c types.c ARMSRCS= cg_arm.c decl.c expr.c gen.c main.c misc.c \ scan.c stmt.c sym.c tree.c types.c cwj: $(SRCS) $(HSRCS) cc -o cwj -g -Wall -DINCDIR=\"$(INCDIR)\" $(SRCS) compn: $(SRCN) $(HSRCS) cc -D__NASM__ -o compn -g -Wall -DINCDIR=\"$(INCDIR)\" $(SRCN) cwjarm: $(ARMSRCS) $(HSRCS) cc -o cwjarm -g -Wall $(ARMSRCS) cp cwjarm cwj install: cwj mkdir -p $(INCDIR) rsync -a include/. $(INCDIR) cp cwj $(BINDIR) chmod +x $(BINDIR)/cwj installn: compn mkdir -p $(INCDIR) rsync -a include/. $(INCDIR) cp compn $(BINDIR) chmod +x $(BINDIR)/compn clean: rm -f cwj cwjarm compn *.o *.s out a.out test: install tests/runtests (cd tests; chmod +x runtests; ./runtests) armtest: cwjarm tests/runtests (cd tests; chmod +x runtests; ./runtests) testn: installn tests/runtestsn (cd tests; chmod +x runtestsn; ./runtestsn) ================================================ FILE: 39_Var_Initialisation_pt1/Readme.md ================================================ # Part 39: Variable Initialisation, part 1 We can declare variables in the language that our compiler accepts, but we can't initialise them at the same time. So in this part (and the following parts) I will work on fixing this. It's worth thinking about this now before we do any actual implementation because, hopefully, I can devise a way to share some of the code. So I might do a bit of a "brain dump" below to help me think about the problem. Right now, we can declare variables in three places: + Global variables are declared outside of any function + Function parameters are declared in a parameter list + Local variables are declared inside of a function Each declaration includes a description of the variable's type and its name. In terms of initialisation: + We can't initialise function parameters, as they will get values copied in from the function caller's arguments. + Global variables cannot be initialised with an expression, as there is no function in which the expression assembly code can run. + Local variables can be initialised with an expression. We also want to have a list of variable names after the type definition. This means that there will some similarities and some differences to deal with. In semi-BNF syntax: ``` global_declaration: type_definition global_var_list ';' ; global_var_list: global_var | global_var ',' global_var_list ; global_var: variable_name | variable_name '=' literal_value ; local_declaration: type_definition local_var_list ';' ; local_var_list: local_var | local_var ',' local_var_list ; local_var: variable_name | variable_name '=' expression ; parameter_list: parameter | parameter ',' parameter_list ; parameter: type_definition variable_name ; ``` Here is a set of examples that I do want to support in our compiler. ### Global Declarations ```c int x= 5; int a, b= 7, c, d= 6; char *e, f; // e is a pointer, f isn't! char g[]= "Hello", *h= "foo"; int j[]= { 1, 2, 3, 4, 5 }; char *k[]= { "fish", "cat", "ball" }; int l[70]; ``` The comment I added has deep implications. We will have to parse the type at the front and, for *each* following variable, parse any prefix '*' or postfix '[ ]' to decide if it's a pointer or an array. I will only deal with a single dimensional list of initialisation values as shown in the examples above. ### Local Declarations The above examples also apply, but we should also be able to do these local declarations: ```c int u= x + 3; char *v= k[0]; char *w= k[b-6]; int y= 2*b+c, z= l[d] + j[2*x+5]; ``` I was going to offer to parse `int list[]= { x+2, a+b, c*d, u+j[3], j[x] + j[a] };` but that looks like an absolute nightmare to deal with, so I think I will stick with either a list of literal values, or not even allow array initialisation in local scope. ## Now What? Right now, after looking at the above examples, I'm kind of terrified! I think I can do the global variable initialisation, but I'll have to rewrite how I parse the types of each individual variable in a list. Then I can parse the '='. If we are in global scope, I'll call a function to parse the literal values. If in local scope, I can't use the existing `binexpr()` function because it parses the variable name on the left and makes an lvalue AST node for it internally. Perhaps I can hand-build this lvalue AST node and pass the pointer to it into `binexpr()`. Then I can add code to `binexpr()` that says: ``` if we got an lvalue pointer { set left to this pointer } else { left = prefix(); deal with the operator token } rest of the existing code ``` Ok, so I have a sort-of plan. I'll do some refactoring first. And the first task is to work out how to rewrite the parsing of types and variable names so that we can parse lists of them. ## A Look at the Refactoring So I've just done the refactoring of the code and it feels like I've just rearranged code but that's not entirely true. So what I'll do is show you how all the new functions call each other, and then outline what each one does. I've drawn a call graph of the code in the new `decl.c`: ![](Figs/decl_call_graph.png) At the top, `global_declarations()` is called to parse anything which is global. It simply loops and calls `declaration list()`. Alternatively, we are in a function and we've hit a type token (`int`, `char` etc.). We call `declaration_list()` to parse what should be a variable. `declaration_list()` is new. It calls `parse_type()` to get the type (e.g. `int`, `char`, a struct, union or typedef etc.). This is the *base type* of a list, but each thing in the list can modify this type. As an example: ```c int a, *b, c[40], *d[100]; ``` So in `declaration_list()` we loop for each declaration in the list. For each declaration, we call `parse_stars()` to see how the base type is modified. At this point we can parse the identifier of the individual declaration, and this is done in `symbol_declaration()`. Based on what token follows, we call: + `function_declaration()` for functions, + `array_declaration` for arrays, or + `scalar_delaration` for scalar variables In a function declaration, there can be parameters, so `parameter_declaration_list()` is invoked to do this. Of course, the parameter list is a declaration, so we call `declaration_list()` to deal with this! Over on the left we have `parse_type()`. This gets ordinary types like `int` and `char`, but this is where new types such as structs, unions, enums and typedefs are also parsed. Parsing a typedef in `typedef_declaration()` should be easy because there is an existing type which we are aliasing. However, we can also write this: ```c typedef char * charptr; ``` Because `parse_type()` doesn't deal with any `*` tokens, `typedef_declaration()` has to manually call `parse_stars()` to see how the base type is modified before creating the alias. Any enum declaration is handled by `enum_declaration`. For structs and unions, we call `composite_declaration()`. And guess what?! The members inside a new struct or union form a list of member declarations, so we call `declaration_list()` to parse them! ## Regression Testing I'm so glad that I now have about eighty individual tests, because there is no way I could safely refactor `decl.c` without being able to confirm that the new code still produces the same errors or assembly output as before. ## New Functionality Although this part of the journey is mostly a redesign to get ready for variable initialisation, we now support lists in global and local variable declarations. Therefore, I have new tests: ```c // tests/input84.c, locals int main() { int x, y; x=2; y=3; .. } //input88.c, globals struct foo { int x; int y; } fred, mary; ``` ## Conclusion and What's Next I feel a bit happier now that I've got the compiler to parse a list of variables following a type, e.g. `int a, *b, **c;`. I've also put comments into the code where I will have to write the assignment functionality to go with declarations. In the next part of our compiler writing journey, we will try to add global variable declarations with assignments to our compiler. [Next step](../40_Var_Initialisation_pt2/Readme.md) ================================================ FILE: 39_Var_Initialisation_pt1/cg.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\t.text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\t.data\n", Outfile); currSeg = data_seg; } } // Given a scalar type value, return the // size of the type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch (type) { case P_CHAR: return (offset); case P_INT: case P_LONG: break; default: fatald("Bad type in calc_aligned_offset:", type); } // Here we have an int or a long. Align it on a 4-byte offset // I put the generic code here so it can be reused elsewhere. alignment = 4; offset = (offset + direction * (alignment - 1)) & ~(alignment - 1); return (offset); } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. static int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "%r10", "%r11", "%r12", "%r13", "%r9", "%r8", "%rcx", "%rdx", "%rsi", "%rdi" }; static char *breglist[] = { "%r10b", "%r11b", "%r12b", "%r13b", "%r9b", "%r8b", "%cl", "%dl", "%sil", "%dil" }; static char *dreglist[] = { "%r10d", "%r11d", "%r12d", "%r13d", "%r9d", "%r8d", "%ecx", "%edx", "%esi", "%edi" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); cgtextseg(); fprintf(Outfile, "# internal switch(expr) routine\n" "# %%rsi = switch table, %%rax = expr\n" "# from SubC: http://www.t3x.org/subc/\n" "\n" "switch:\n" " pushq %%rsi\n" " movq %%rdx, %%rsi\n" " movq %%rax, %%rbx\n" " cld\n" " lodsq\n" " movq %%rax, %%rcx\n" "next:\n" " lodsq\n" " movq %%rax, %%rdx\n" " lodsq\n" " cmpq %%rdx, %%rbx\n" " jnz no\n" " popq %%rsi\n" " jmp *%%rax\n" "no:\n" " loop next\n" " lodsq\n" " popq %%rsi\n" " jmp *%%rax\n" "\n"); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the %rsp and %rsp fprintf(Outfile, "\t.globl\t%s\n" "\t.type\t%s, @function\n" "%s:\n" "\tpushq\t%%rbp\n" "\tmovq\t%%rsp, %%rbp\n", name, name, name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->posn = paramOffset; paramOffset += 8; } else { parm->posn = newlocaloffset(parm->type); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->posn = newlocaloffset(locvar->type); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\taddq\t$%d,%%rsp\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->endlabel); fprintf(Outfile, "\taddq\t$%d,%%rsp\n", stackOffset); fputs("\tpopq %rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmovq\t$%d, %s\n", value, reglist[r]); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", sym->name); } else // Print out the code to initialise it switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovzbq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", sym->name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovslq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", sym->name); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", sym->posn); fprintf(Outfile, "\tmovq\t%d(%%rbp), %s\n", sym->posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", sym->posn); } else switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", sym->posn); fprintf(Outfile, "\tmovzbq\t%d(%%rbp), %s\n", sym->posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", sym->posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", sym->posn); fprintf(Outfile, "\tmovslq\t%d(%%rbp), %s\n", sym->posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", sym->posn); break; default: fatald("Bad type in cgloadlocal:", sym->type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tleaq\tL%d(%%rip), %s\n", label, reglist[r]); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\taddq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsubq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timulq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s,%%rax\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidivq\t%s\n", reglist[r2]); fprintf(Outfile, "\tmovq\t%%rax,%s\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tandq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\torq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txorq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshlq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshrq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tnegq\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnotq\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); return (r); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(struct symtable *sym, int numargs) { // Get a new register int outr = alloc_register(); // Call the function fprintf(Outfile, "\tcall\t%s@PLT\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\taddq\t$%d, %%rsp\n", 8 * (numargs - 6)); // and copy the return value into our register fprintf(Outfile, "\tmovq\t%%rax, %s\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpushq\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmovq\t%s, %s\n", reglist[r], reglist[FIRSTPARAMREG - argposn + 1]); } } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsalq\t$%d, %s\n", val, reglist[r]); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %s(%%rip)\n", reglist[r], sym->name); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %s(%%rip)\n", breglist[r], sym->name); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %s(%%rip)\n", dreglist[r], sym->name); break; default: fatald("Bad type in cgstorglob:", sym->type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %d(%%rbp)\n", reglist[r], sym->posn); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %d(%%rbp)\n", breglist[r], sym->posn); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %d(%%rbp)\n", dreglist[r], sym->posn); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the type size = typesize(node->type, node->ctype); // Generate the global identity and the label cgdataseg(); fprintf(Outfile, "\t.globl\t%s\n", node->name); fprintf(Outfile, "%s:", node->name); // Generate the space for this type switch (size) { case 1: fprintf(Outfile, "\t.byte\t0\n"); break; case 4: fprintf(Outfile, "\t.long\t0\n"); break; case 8: fprintf(Outfile, "\t.quad\t0\n"); break; default: for (int i = 0; i < size; i++) fprintf(Outfile, "\t.byte\t0\n"); } } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\t.byte\t%d\n", *cptr); } fprintf(Outfile, "\t.byte\t0\n"); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzbl\t%s, %%eax\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %%eax\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } cgjump(sym->endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = alloc_register(); if (sym->class == C_GLOBAL) fprintf(Outfile, "\tleaq\t%s(%%rip), %s\n", sym->name, reglist[r]); else fprintf(Outfile, "\tleaq\t%d(%%rbp), %s\n", sym->posn, reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzbq\t(%s), %s\n", reglist[r], reglist[r]); break; case 2: fprintf(Outfile, "\tmovslq\t(%s), %s\n", reglist[r], reglist[r]); break; case 4: case 8: fprintf(Outfile, "\tmovq\t(%s), %s\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { // Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmovb\t%s, (%s)\n", breglist[r1], reglist[r2]); break; case 2: case 4: case 8: fprintf(Outfile, "\tmovq\t%s, (%s)\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } // Generate a switch jump table and the code to // load the registers and call the switch() code void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel) { int i, label; // Get a label for the switch table label = genlabel(); cglabel(label); // Heuristic. If we have no cases, create one case // which points to the default case if (casecount == 0) { caseval[0] = 0; caselabel[0] = defaultlabel; casecount = 1; } // Generate the switch jump table. fprintf(Outfile, "\t.quad\t%d\n", casecount); for (i = 0; i < casecount; i++) fprintf(Outfile, "\t.quad\t%d, L%d\n", caseval[i], caselabel[i]); fprintf(Outfile, "\t.quad\tL%d\n", defaultlabel); // Load the specific registers cglabel(toplabel); fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); fprintf(Outfile, "\tleaq\tL%d(%%rip), %%rdx\n", label); fprintf(Outfile, "\tjmp\tswitch\n"); } ================================================ FILE: 39_Var_Initialisation_pt1/cg_arm.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for ARMv6 on Raspberry Pi // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. static int freereg[4]; static char *reglist[4] = { "r4", "r5", "r6", "r7" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // We have to store large integer literal values in memory. // Keep a list of them which will be output in the postamble #define MAXINTS 1024 int Intlist[MAXINTS]; static int Intslot = 0; // Determine the offset of a large integer // literal from the .L3 label. If the integer // isn't in the list, add it. static void set_int_offset(int val) { int offset = -1; // See if it is already there for (int i = 0; i < Intslot; i++) { if (Intlist[i] == val) { offset = 4 * i; break; } } // Not in the list, so add it if (offset == -1) { offset = 4 * Intslot; if (Intslot == MAXINTS) fatal("Out of int slots in set_int_offset()"); Intlist[Intslot++] = val; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L3+%d\n", offset); } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n", Outfile); } // Print out the assembly postamble void cgpostamble() { // Print out the global variables fprintf(Outfile, ".L2:\n"); for (int i = 0; i < Globs; i++) { if (Symtable[i].stype == S_VARIABLE) fprintf(Outfile, "\t.word %s\n", Symtable[i].name); } // Print out the integer literals fprintf(Outfile, ".L3:\n"); for (int i = 0; i < Intslot; i++) { fprintf(Outfile, "\t.word %d\n", Intlist[i]); } } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Symtable[id].name; fprintf(Outfile, "\t.text\n" "\t.globl\t%s\n" "\t.type\t%s, \%%function\n" "%s:\n" "\tpush\t{fp, lr}\n" "\tadd\tfp, sp, #4\n" "\tsub\tsp, sp, #8\n" "\tstr\tr0, [fp, #-8]\n", name, name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Symtable[id].endlabel); fputs("\tsub\tsp, fp, #4\n" "\tpop\t{fp, pc}\n" "\t.align\t2\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); // If the literal value is small, do it with one instruction if (value <= 1000) fprintf(Outfile, "\tmov\t%s, #%d\n", reglist[r], value); else { set_int_offset(value); fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); } return (r); } // Determine the offset of a variable from the .L2 // label. Yes, this is inefficient code. static void set_var_offset(int id) { int offset = 0; // Walk the symbol table up to id. // Find S_VARIABLEs and add on 4 until // we get to our variable for (int i = 0; i < id; i++) { if (Symtable[i].stype == S_VARIABLE) offset += 4; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L2+%d\n", offset); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tldrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgloadglob:", Symtable[id].type); } return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s, %s\n", reglist[r1], reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\tmul\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { // To do a divide: r0 holds the dividend, r1 holds the divisor. // The quotient is in r0. fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r1]); fprintf(Outfile, "\tmov\tr1, %s\n", reglist[r2]); fprintf(Outfile, "\tbl\t__aeabi_idiv\n"); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r1]); free_register(r2); return (r1); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r]); fprintf(Outfile, "\tbl\t%s\n", Symtable[id].name); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r]); return (r); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tlsl\t%s, %s, #%d\n", reglist[r], reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, int id) { // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tstr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgstorglob:", Symtable[id].type); } return (r); } // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (4); switch (type) { case P_CHAR: return (1); case P_INT: case P_LONG: return (4); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Symtable[id].type); fprintf(Outfile, "\t.data\n" "\t.globl\t%s\n", Symtable[id].name); switch (typesize) { case 1: fprintf(Outfile, "%s:\t.byte\t0\n", Symtable[id].name); break; case 4: fprintf(Outfile, "%s:\t.long\t0\n", Symtable[id].name); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "moveq", "movne", "movlt", "movgt", "movle", "movge" }; // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "movne", "moveq", "movge", "movle", "movgt", "movlt" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #1\n", cmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #0\n", invcmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\tuxtb\t%s, %s\n", reglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tb\tL%d\n", l); } // List of inverted branch instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *brlist[] = { "bne", "beq", "bge", "ble", "bgt", "blt" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", brlist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[reg]); cgjump(Symtable[id].endlabel); } // Generate code to load the address of a global // identifier into a variable. Return a new register int cgaddress(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); fprintf(Outfile, "\tmov\t%s, r3\n", reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tldrb\t%s, [%s]\n", reglist[r], reglist[r]); break; case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [%s]\n", reglist[r], reglist[r]); break; } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [%s]\n", reglist[r1], reglist[r2]); break; case P_INT: case P_LONG: fprintf(Outfile, "\tstr\t%s, [%s]\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 39_Var_Initialisation_pt1/cgn.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\tsection .text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\tsection .data\n", Outfile); currSeg = data_seg; } } // Given a scalar type value, return the // size of the type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch (type) { case P_CHAR: return (offset); case P_INT: case P_LONG: break; default: fatald("Bad type in calc_aligned_offset:", type); } // Here we have an int or a long. Align it on a 4-byte offset // I put the generic code here so it can be reused elsewhere. alignment = 4; offset = (offset + direction * (alignment - 1)) & ~(alignment - 1); return (offset); } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "r10", "r11", "r12", "r13", "r9", "r8", "rcx", "rdx", "rsi", "rdi" }; static char *breglist[] = { "r10b", "r11b", "r12b", "r13b", "r9b", "r8b", "cl", "dl", "sil", "dil" }; static char *dreglist[] = { "r10d", "r11d", "r12d", "r13d", "r9d", "r8d", "ecx", "edx", "esi", "edi" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\textern\tprintint\n", Outfile); fputs("\textern\tprintchar\n", Outfile); fputs("\textern\topen\n", Outfile); fputs("\textern\tclose\n", Outfile); fputs("\textern\tread\n", Outfile); fputs("\textern\twrite\n", Outfile); fputs("\textern\tprintf\n", Outfile); cgtextseg(); fprintf(Outfile, "; internal switch(expr) routine\n" "; rsi = switch table, rax = expr\n" "; from SubC: http://www.t3x.org/subc/\n" "\n" "switch:\n" " push rsi\n" " mov rsi, rdx\n" " mov rbx, rax\n" " cld\n" " lodsq\n" " mov rcx, rax\n" "next:\n" " lodsq\n" " mov rdx, rax\n" " lodsq\n" " cmp rbx, rdx\n" " jnz no\n" " pop rsi\n" " jmp rax\n" "no:\n" " loop next\n" " lodsq\n" " pop rsi\n" " jmp rax\n" "\n"); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the rsp and rbp fprintf(Outfile, "\tglobal\t%s\n" "%s:\n" "\tpush\trbp\n" "\tmov\trbp, rsp\n", name, name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->posn = paramOffset; paramOffset += 8; } else { parm->posn = newlocaloffset(parm->type); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->posn = newlocaloffset(locvar->type); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\tadd\trsp, %d\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->endlabel); fprintf(Outfile, "\tadd\trsp, %d\n", stackOffset); fputs("\tpop rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], value); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", sym->name); fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], sym->name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", sym->name); } else // Print out the code to initialise it switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", sym->name); fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], sym->name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", sym->name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", sym->name); fprintf(Outfile, "\tmovsx\t%s, word [%s]\n", dreglist[r], sym->name); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", sym->name); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", sym->posn); fprintf(Outfile, "\tmov\t%s, [rbp+%d]\n", reglist[r], sym->posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", sym->posn); } else switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", sym->posn); fprintf(Outfile, "\tmovzx\t%s, byte [rbp+%d]\n", reglist[r], sym->posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", sym->posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", sym->posn); fprintf(Outfile, "\tmovsx\t%s, dword [rbp+%d]\n", reglist[r], sym->posn); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", sym->posn); break; default: fatald("Bad type in cgloadlocal:", sym->type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, L%d\n", reglist[r], label); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timul\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmov\trax, %s\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidiv\t%s\n", reglist[r2]); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tand\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\tor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshl\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshr\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tneg\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnot\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r], breglist[r]); return (r); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, byte %s\n", reglist[r], breglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(struct symtable *sym, int numargs) { // Get a new register int outr = alloc_register(); // Call the function fprintf(Outfile, "\tcall\t%s\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\tadd\trsp, %d\n", 8 * (numargs - 6)); // and copy the return value into our register fprintf(Outfile, "\tmov\t%s, rax\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpush\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmov\t%s, %s\n", reglist[FIRSTPARAMREG - argposn + 1], reglist[r]); } } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsal\t%s, %d\n", reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, dreglist[r]); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\tqword\t[rbp+%d], %s\n", sym->posn, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\tbyte\t[rbp+%d], %s\n", sym->posn, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\tdword\t[rbp+%d], %s\n", sym->posn, dreglist[r]); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the type size = typesize(node->type, node->ctype); // Generate the global identity and the label cgdataseg(); fprintf(Outfile, "\tsection\t.data\n" "\tglobal\t%s\n", node->name); fprintf(Outfile, "%s:", node->name); // Generate the space for this type // original version for (int i = 0; i < node->size; i++) { switch(size) { case 1: fprintf(Outfile, "\tdb\t0\n"); break; case 4: fprintf(Outfile, "\tdd\t0\n"); break; case 8: fprintf(Outfile, "\tdq\t0\n"); break; default: for (int i = 0; i < size; i++) fprintf(Outfile, "\tdb\t0\n"); } } /* compact version using times instead of loop switch(size) { case 1: fprintf(Outfile, "\ttimes\t%d\tdb\t0\n", node->size); break; case 4: fprintf(Outfile, "\ttimes\t%d\tdd\t0\n", node->size); break; case 8: fprintf(Outfile, "\ttimes\t%d\tdq\t0\n", node->size); break; default: fprintf(Outfile, "\ttimes\t%d\tdb\t0\n", size); } */ } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\tdb\t%d\n", *cptr); } fprintf(Outfile, "\tdb\t0\n"); /* put string in readable format on a single line // probably went overboard with error checking int comma = 0, quote = 0, start = 1; fprintf(Outfile, "\tdb\t"); for (cptr=strvalue; *cptr; cptr++) { if ( ! isprint(*cptr) ) if (comma || start) { fprintf(Outfile, "%d, ", *cptr); start = 0; comma = 1; } else if (quote) { fprintf(Outfile, "\', %d, ", *cptr); comma = 1; quote = 0; } else { fprintf(Outfile, "%d, ", *cptr); comma = 1; quote = 0; } else if (start || comma) { fprintf(Outfile, "\'%c", *cptr); start = comma = 0; quote = 1; } else { fprintf(Outfile, "%c", *cptr); comma = 0; quote = 1; } } if (comma || start) fprintf(Outfile, "0\n"); else fprintf(Outfile, "\', 0\n"); */ } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r2], breglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzx\teax, %s\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmov\teax, %s\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } cgjump(sym->endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = alloc_register(); if (sym->class == C_GLOBAL) fprintf(Outfile, "\tmov\t%s, %s\n", reglist[r], sym->name); else fprintf(Outfile, "\tlea\t%s, [rbp+%d]\n", reglist[r], sym->posn); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], reglist[r]); break; case 2: fprintf(Outfile, "\tmovsx\t%s, dword [%s]\n", reglist[r], reglist[r]); break; case 4: case 8: fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { //Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmov\t[%s], byte %s\n", reglist[r2], breglist[r1]); break; case 2: case 4: case 8: fprintf(Outfile, "\tmov\t[%s], %s\n", reglist[r2], reglist[r1]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } // Generate a switch jump table and the code to // load the registers and call the switch() code void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel) { int i, label; // Get a label for the switch table label = genlabel(); cglabel(label); // Heuristic. If we have no cases, create one case // which points to the default case if (casecount == 0) { caseval[0] = 0; caselabel[0] = defaultlabel; casecount = 1; } // Generate the switch jump table. fprintf(Outfile, "\tdq\t%d\n", casecount); for (i = 0; i < casecount; i++) fprintf(Outfile, "\tdq\t%d, L%d\n", caseval[i], caselabel[i]); fprintf(Outfile, "\tdq\tL%d\n", defaultlabel); // Load the specific registers cglabel(toplabel); fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); fprintf(Outfile, "\tmov\trdx, L%d\n", label); fprintf(Outfile, "\tjmp\tswitch\n"); } ================================================ FILE: 39_Var_Initialisation_pt1/data.h ================================================ #ifndef extern_ #define extern_ extern #endif // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 extern_ int Line; // Current line number extern_ int Putback; // Character put back by scanner extern_ struct symtable *Functionid; // Symbol ptr of the current function extern_ FILE *Infile; // Input and output files extern_ FILE *Outfile; extern_ char *Infilename; // Name of file we are parsing extern_ char *Outfilename; // Name of file we opened as Outfile extern_ struct token Token; // Last token scanned extern_ char Text[TEXTLEN + 1]; // Last identifier scanned extern_ int Looplevel; // Depth of nested loops extern_ int Switchlevel; // Depth of nested switches // Symbol table lists extern_ struct symtable *Globhead, *Globtail; // Global variables and functions extern_ struct symtable *Loclhead, *Locltail; // Local variables extern_ struct symtable *Parmhead, *Parmtail; // Local parameters extern_ struct symtable *Membhead, *Membtail; // Temp list of struct/union members extern_ struct symtable *Structhead, *Structtail; // List of struct types extern_ struct symtable *Unionhead, *Uniontail; // List of union types extern_ struct symtable *Enumhead, *Enumtail; // List of enum types and values extern_ struct symtable *Typehead, *Typetail; // List of typedefs // Command-line flags extern_ int O_dumpAST; // If true, dump the AST trees extern_ int O_keepasm; // If true, keep any assembly files extern_ int O_assemble; // If true, assemble the assembly files extern_ int O_dolink; // If true, link the object files extern_ int O_verbose; // If true, print info on compilation stages ================================================ FILE: 39_Var_Initialisation_pt1/decl.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of declarations // Copyright (c) 2019 Warren Toomey, GPL3 static struct symtable *composite_declaration(int type); static int typedef_declaration(struct symtable **ctype); static int type_of_typedef(char *name, struct symtable **ctype); static void enum_declaration(void); // Parse the current token and return a primitive type enum value, // a pointer to any composite type and possibly modify // the class of the type. static int parse_type(struct symtable **ctype, int *class) { int type, exstatic = 1; // See if the class has been changed to extern (later, static) while (exstatic) { switch (Token.token) { case T_EXTERN: *class = C_EXTERN; scan(&Token); break; default: exstatic = 0; } } // Now work on the actual type keyword switch (Token.token) { case T_VOID: type = P_VOID; scan(&Token); break; case T_CHAR: type = P_CHAR; scan(&Token); break; case T_INT: type = P_INT; scan(&Token); break; case T_LONG: type = P_LONG; scan(&Token); break; // For the following, if we have a ';' after the // parsing then there is no type, so return -1. // Example: struct x {int y; int z}; case T_STRUCT: type = P_STRUCT; *ctype = composite_declaration(P_STRUCT); if (Token.token == T_SEMI) type = -1; break; case T_UNION: type = P_UNION; *ctype = composite_declaration(P_UNION); if (Token.token == T_SEMI) type = -1; break; case T_ENUM: type = P_INT; // Enums are really ints enum_declaration(); if (Token.token == T_SEMI) type = -1; break; case T_TYPEDEF: type = typedef_declaration(ctype); if (Token.token == T_SEMI) type = -1; break; case T_IDENT: type = type_of_typedef(Text, ctype); break; default: fatals("Illegal type, token", Token.tokstr); } return (type); } // Given a type parsed by parse_type(), scan in any following // '*' tokens and return the new type static int parse_stars(int type) { while (1) { if (Token.token != T_STAR) break; type = pointer_to(type); scan(&Token); } return (type); } static struct symtable *scalar_declaration(char *varname, int type, struct symtable *ctype, int class) { // Add this as a known scalar switch (class) { case C_EXTERN: case C_GLOBAL: return (addglob(varname, type, ctype, S_VARIABLE, class, 1)); break; case C_LOCAL: return (addlocl(varname, type, ctype, S_VARIABLE, 1)); break; case C_PARAM: return (addparm(varname, type, ctype, S_VARIABLE, 1)); break; case C_MEMBER: return (addmemb(varname, type, ctype, S_VARIABLE, 1)); break; } return (NULL); // Keep -Wall happy } static struct symtable *array_declaration(char *varname, int type, struct symtable *ctype, int class) { struct symtable *sym; // Skip past the '[' scan(&Token); // Check we have an array size if (Token.token == T_INTLIT) { // Add this as a known array // We treat the array as a pointer to its elements' type switch (class) { case C_EXTERN: case C_GLOBAL: sym = addglob(varname, pointer_to(type), ctype, S_ARRAY, class, Token.intvalue); break; case C_LOCAL: case C_PARAM: case C_MEMBER: fatal("For now, declaration of non-global arrays is not implemented"); } } // Ensure we have a following ']' scan(&Token); match(T_RBRACKET, "]"); return (sym); } static int param_declaration_list(struct symtable *oldfuncsym, struct symtable *newfuncsym) { int type, paramcnt = 0; struct symtable *ctype; struct symtable *protoptr = NULL; // Get the pointer to the first prototype parameter if (oldfuncsym != NULL) protoptr = oldfuncsym->member; // Loop getting any parameters while (Token.token != T_RPAREN) { // Get the type of the next parameter type = declaration_list(&ctype, C_PARAM, T_COMMA, T_RPAREN); if (type == -1) fatal("Bad type in parameter list"); // Ensure the type of this parameter matches the prototype if (protoptr != NULL) { if (type != protoptr->type) fatald("Type doesn't match prototype for parameter", paramcnt + 1); protoptr = protoptr->next; } paramcnt++; // Stop when we hit the right parenthesis if (Token.token == T_RPAREN) break; // We need a comma as separator comma(); } if (oldfuncsym != NULL && paramcnt != oldfuncsym->nelems) fatals("Parameter count mismatch for function", oldfuncsym->name); // Return the count of parameters return (paramcnt); } // // function_declaration: type identifier '(' parameter_list ')' ; // | type identifier '(' parameter_list ')' compound_statement ; // // Parse the declaration of function. static struct symtable *function_declaration(char *funcname, int type, struct symtable *ctype, int class) { struct ASTnode *tree, *finalstmt; struct symtable *oldfuncsym, *newfuncsym = NULL; int endlabel, paramcnt; // Text has the identifier's name. If this exists and is a // function, get the id. Otherwise, set oldfuncsym to NULL. if ((oldfuncsym = findsymbol(funcname)) != NULL) if (oldfuncsym->stype != S_FUNCTION) oldfuncsym = NULL; // If this is a new function declaration, get a // label-id for the end label, and add the function // to the symbol table, if (oldfuncsym == NULL) { endlabel = genlabel(); // Assumtion: functions only return scalar types, so NULL below newfuncsym = addglob(funcname, type, NULL, S_FUNCTION, C_GLOBAL, endlabel); } // Scan in the '(', any parameters and the ')'. // Pass in any existing function prototype pointer lparen(); paramcnt = param_declaration_list(oldfuncsym, newfuncsym); rparen(); // If this is a new function declaration, update the // function symbol entry with the number of parameters. // Also copy the parameter list into the function's node. if (newfuncsym) { newfuncsym->nelems = paramcnt; newfuncsym->member = Parmhead; oldfuncsym = newfuncsym; } // Clear out the parameter list Parmhead = Parmtail = NULL; // Declaration ends in a semicolon, only a prototype. if (Token.token == T_SEMI) return (oldfuncsym); // This is not just a prototype. // Set the Functionid global to the function's symbol pointer Functionid = oldfuncsym; // Get the AST tree for the compound statement and mark // that we have parsed no loops or switches yet Looplevel = 0; Switchlevel = 0; lbrace(); tree = compound_statement(0); rbrace(); // If the function type isn't P_VOID .. if (type != P_VOID) { // Error if no statements in the function if (tree == NULL) fatal("No statements in function with non-void type"); // Check that the last AST operation in the // compound statement was a return statement finalstmt = (tree->op == A_GLUE) ? tree->right : tree; if (finalstmt == NULL || finalstmt->op != A_RETURN) fatal("No return for function with non-void type"); } // Build the A_FUNCTION node which has the function's symbol pointer // and the compound statement sub-tree tree = mkastunary(A_FUNCTION, type, tree, oldfuncsym, endlabel); // Generate the assembly code for it if (O_dumpAST) { dumpAST(tree, NOLABEL, 0); fprintf(stdout, "\n\n"); } genAST(tree, NOLABEL, NOLABEL, NOLABEL, 0); // Now free the symbols associated // with this function freeloclsyms(); return (oldfuncsym); } // Parse composite type declarations: structs or unions. // Either find an existing struct/union declaration, or build // a struct/union symbol table entry and return its pointer. static struct symtable *composite_declaration(int type) { struct symtable *ctype = NULL; struct symtable *m; int offset; int t; // Skip the struct/union keyword scan(&Token); // See if there is a following struct/union name if (Token.token == T_IDENT) { // Find any matching composite type if (type == P_STRUCT) ctype = findstruct(Text); else ctype = findunion(Text); scan(&Token); } // If the next token isn't an LBRACE , this is // the usage of an existing struct/union type. // Return the pointer to the type. if (Token.token != T_LBRACE) { if (ctype == NULL) fatals("unknown struct/union type", Text); return (ctype); } // Ensure this struct/union type hasn't been // previously defined if (ctype) fatals("previously defined struct/union", Text); // Build the composite type and skip the left brace if (type == P_STRUCT) ctype = addstruct(Text, P_STRUCT, NULL, 0, 0); else ctype = addunion(Text, P_UNION, NULL, 0, 0); scan(&Token); // Scan in the list of members while (1) { // Get the next member. m is used as a dummy t= declaration_list(&m, C_MEMBER, T_SEMI, T_RBRACE); if (t== -1) fatal("Bad type in member list"); if (Token.token == T_SEMI) scan(&Token); if (Token.token == T_RBRACE) break; } // Attach to the struct type's node rbrace(); if (Membhead==NULL) fatals("No members in struct", ctype->name); ctype->member = Membhead; Membhead = Membtail = NULL; // Set the offset of the initial member // and find the first free byte after it m = ctype->member; m->posn = 0; offset = typesize(m->type, m->ctype); // Set the position of each successive member in the composite type // Unions are easy. For structs, align the member and find the next free byte for (m = m->next; m != NULL; m = m->next) { // Set the offset for this member if (type == P_STRUCT) m->posn = genalign(m->type, offset, 1); else m->posn = 0; // Get the offset of the next free byte after this member offset += typesize(m->type, m->ctype); } // Set the overall size of the composite type ctype->size = offset; return (ctype); } // Parse an enum declaration static void enum_declaration(void) { struct symtable *etype = NULL; char *name; int intval = 0; // Skip the enum keyword. scan(&Token); // If there's a following enum type name, get a // pointer to any existing enum type node. if (Token.token == T_IDENT) { etype = findenumtype(Text); name = strdup(Text); // As it gets tromped soon scan(&Token); } // If the next token isn't a LBRACE, check // that we have an enum type name, then return if (Token.token != T_LBRACE) { if (etype == NULL) fatals("undeclared enum type:", name); return; } // We do have an LBRACE. Skip it scan(&Token); // If we have an enum type name, ensure that it // hasn't been declared before. if (etype != NULL) fatals("enum type redeclared:", etype->name); else // Build an enum type node for this identifier etype = addenum(name, C_ENUMTYPE, 0); // Loop to get all the enum values while (1) { // Ensure we have an identifier // Copy it in case there's an int literal coming up ident(); name = strdup(Text); // Ensure this enum value hasn't been declared before etype = findenumval(name); if (etype != NULL) fatals("enum value redeclared:", Text); // If the next token is an '=', skip it and // get the following int literal if (Token.token == T_ASSIGN) { scan(&Token); if (Token.token != T_INTLIT) fatal("Expected int literal after '='"); intval = Token.intvalue; scan(&Token); } // Build an enum value node for this identifier. // Increment the value for the next enum identifier. etype = addenum(name, C_ENUMVAL, intval++); // Bail out on a right curly bracket, else get a comma if (Token.token == T_RBRACE) break; comma(); } scan(&Token); // Skip over the right curly bracket } // Parse a typedef declaration and return the type // and ctype that it represents static int typedef_declaration(struct symtable **ctype) { int type, class = 0; // Skip the typedef keyword. scan(&Token); // Get the actual type following the keyword type = parse_type(ctype, &class); if (class != 0) fatal("Can't have extern in a typedef declaration"); // See if the typedef identifier already exists if (findtypedef(Text) != NULL) fatals("redefinition of typedef", Text); // Get any following '*' tokens type = parse_stars(type); // It doesn't exist so add it to the typedef list addtypedef(Text, type, *ctype, 0, 0); scan(&Token); return (type); } // Given a typedef name, return the type it represents static int type_of_typedef(char *name, struct symtable **ctype) { struct symtable *t; // Look up the typedef in the list t = findtypedef(name); if (t == NULL) fatals("unknown type", name); scan(&Token); *ctype = t->ctype; return (t->type); } static void array_initialisation(struct symtable *sym, int type, struct symtable *ctype, int class) { fatal("No array initialisation yet!"); } // Parse the declaration of a variable or function. // The type and any following '*'s have been scanned, and we // have the identifier in the Token variable. // The class argument is the variable's class. // Return a pointer to the symbol's entry in the symbol table static struct symtable *symbol_declaration(int type, struct symtable *ctype, int class) { struct symtable *sym = NULL; char *varname = strdup(Text); int stype = S_VARIABLE; // struct ASTnode *expr = NULL; // Assume it will be a scalar variable. // Ensure that we have an identifier. // We copied it above so we can scan more tokens in, e.g. // an assignment expression for a local variable. ident(); // Deal with function declarations if (Token.token == T_LPAREN) { return (function_declaration(varname, type, ctype, class)); } // See if this array or scalar variable has already been declared switch (class) { case C_EXTERN: case C_GLOBAL: if (findglob(varname) != NULL) fatals("Duplicate global variable declaration", varname); case C_LOCAL: case C_PARAM: if (findlocl(varname) != NULL) fatals("Duplicate local variable declaration", varname); case C_MEMBER: if (findmember(varname) != NULL) fatals("Duplicate struct/union member declaration", varname); } // Add the array or scalar variable to the symbol table if (Token.token == T_LBRACKET) { sym = array_declaration(varname, type, ctype, class); stype = S_ARRAY; } else sym = scalar_declaration(varname, type, ctype, class); // The array or scalar variable is being initialised if (Token.token == T_ASSIGN) { // Not possible for a parameter or member if (class == C_PARAM) fatals("Initialisation of a parameter not permitted", varname); if (class == C_MEMBER) fatals("Initialisation of a member not permitted", varname); scan(&Token); // Array initialisation if (stype == S_ARRAY) array_initialisation(sym, type, ctype, class); else { fatal("Scalar variable initialisation not done yet"); // Variable initialisation // if (class== C_LOCAL) // Local variable, parse the expression // expr= binexpr(0); // else write more code! } } // Generate the storage for the array or scalar variable. SOON. // genstorage(sym, expr); return (sym); } // Parse a list of symbols where there is an initial type. // Return the type of the symbols. et1 and et2 are end tokens. int declaration_list(struct symtable **ctype, int class, int et1, int et2) { int inittype, type; struct symtable *sym; // Get the initial type. If -1, it was // a composite type definition, return this if ((inittype = parse_type(ctype, &class)) == -1) return (inittype); // Now parse the list of symbols while (1) { // See if this symbol is a pointer type = parse_stars(inittype); // Parse this symbol sym = symbol_declaration(type, *ctype, class); // We parsed a function, there is no list so leave if (sym->stype == S_FUNCTION) { if (class != C_GLOBAL) fatal("Function definition not at global level"); return (type); } // We are at the end of the list, leave if (Token.token == et1 || Token.token == et2) return (type); // Otherwise, we need a comma as separator comma(); } } // Parse one or more global declarations, either // variables, functions or structs void global_declarations(void) { struct symtable *ctype; while (Token.token != T_EOF) { declaration_list(&ctype, C_GLOBAL, T_SEMI, T_EOF); // Skip any semicolons and right curly brackets if (Token.token == T_SEMI) scan(&Token); } } ================================================ FILE: 39_Var_Initialisation_pt1/decl.h ================================================ // Function prototypes for all compiler files // Copyright (c) 2019 Warren Toomey, GPL3 // scan.c void reject_token(struct token *t); int scan(struct token *t); // tree.c struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue); struct ASTnode *mkastleaf(int op, int type, struct symtable *sym, int intvalue); struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, struct symtable *sym, int intvalue); void dumpAST(struct ASTnode *n, int label, int parentASTop); // gen.c int genlabel(void); int genAST(struct ASTnode *n, int iflabel, int looptoplabel, int loopendlabel, int parentASTop); void genpreamble(); void genpostamble(); void genfreeregs(); void genglobsym(struct symtable *node); int genglobstr(char *strvalue); int genprimsize(int type); int genalign(int type, int offset, int direction); void genreturn(int reg, int id); // cg.c int cgprimsize(int type); int cgalign(int type, int offset, int direction); void cgtextseg(); void cgdataseg(); void freeall_registers(void); void cgpreamble(); void cgpostamble(); void cgfuncpreamble(struct symtable *sym); void cgfuncpostamble(struct symtable *sym); int cgloadint(int value, int type); int cgloadglob(struct symtable *sym, int op); int cgloadlocal(struct symtable *sym, int op); int cgloadglobstr(int label); int cgadd(int r1, int r2); int cgsub(int r1, int r2); int cgmul(int r1, int r2); int cgdiv(int r1, int r2); int cgshlconst(int r, int val); int cgcall(struct symtable *sym, int numargs); void cgcopyarg(int r, int argposn); int cgstorglob(int r, struct symtable *sym); int cgstorlocal(int r, struct symtable *sym); void cgglobsym(struct symtable *node); void cgglobstr(int l, char *strvalue); int cgcompare_and_set(int ASTop, int r1, int r2); int cgcompare_and_jump(int ASTop, int r1, int r2, int label); void cglabel(int l); void cgjump(int l); int cgwiden(int r, int oldtype, int newtype); void cgreturn(int reg, struct symtable *sym); int cgaddress(struct symtable *sym); int cgderef(int r, int type); int cgstorderef(int r1, int r2, int type); int cgnegate(int r); int cginvert(int r); int cglognot(int r); int cgboolean(int r, int op, int label); int cgand(int r1, int r2); int cgor(int r1, int r2); int cgxor(int r1, int r2); int cgshl(int r1, int r2); int cgshr(int r1, int r2); void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel); // expr.c struct ASTnode *expression_list(int endtoken); struct ASTnode *binexpr(int ptp); // stmt.c struct ASTnode *compound_statement(int inswitch); // misc.c void match(int t, char *what); void semi(void); void lbrace(void); void rbrace(void); void lparen(void); void rparen(void); void ident(void); void comma(void); void fatal(char *s); void fatals(char *s1, char *s2); void fatald(char *s, int d); void fatalc(char *s, int c); // sym.c void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node); struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int size, int posn); struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int class, int size); struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int size); struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype, int size); struct symtable *addstruct(char *name, int type, struct symtable *ctype, int stype, int size); struct symtable *addunion(char *name, int type, struct symtable *ctype, int stype, int size); struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int size); struct symtable *addenum(char *name, int class, int value); struct symtable *addtypedef(char *name, int type, struct symtable *ctype, int stype, int size); struct symtable *findglob(char *s); struct symtable *findlocl(char *s); struct symtable *findsymbol(char *s); struct symtable *findmember(char *s); struct symtable *findstruct(char *s); struct symtable *findunion(char *s); struct symtable *findenumtype(char *s); struct symtable *findenumval(char *s); struct symtable *findtypedef(char *s); void clear_symtable(void); void freeloclsyms(void); // decl.c int declaration_list(struct symtable **ctype, int class, int et1, int et2); void global_declarations(void); // types.c int inttype(int type); int ptrtype(int type); int pointer_to(int type); int value_at(int type); int typesize(int type, struct symtable *ctype); struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op); ================================================ FILE: 39_Var_Initialisation_pt1/defs.h ================================================ #include #include #include #include // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 enum { TEXTLEN = 512 // Length of identifiers in input }; // Commands and default filenames #define AOUT "a.out" #ifdef __NASM__ #define ASCMD "nasm -f elf64 -o " #define LDCMD "cc -no-pie -fno-plt -Wall -o " #else #define ASCMD "as -o " #define LDCMD "cc -o " #endif #define CPPCMD "cpp -nostdinc -isystem " // Token types enum { T_EOF, // Binary operators T_ASSIGN, T_LOGOR, T_LOGAND, T_OR, T_XOR, T_AMPER, T_EQ, T_NE, T_LT, T_GT, T_LE, T_GE, T_LSHIFT, T_RSHIFT, T_PLUS, T_MINUS, T_STAR, T_SLASH, // Other operators T_INC, T_DEC, T_INVERT, T_LOGNOT, // Type keywords T_VOID, T_CHAR, T_INT, T_LONG, // Other keywords T_IF, T_ELSE, T_WHILE, T_FOR, T_RETURN, T_STRUCT, T_UNION, T_ENUM, T_TYPEDEF, T_EXTERN, T_BREAK, T_CONTINUE, T_SWITCH, T_CASE, T_DEFAULT, // Structural tokens T_INTLIT, T_STRLIT, T_SEMI, T_IDENT, T_LBRACE, T_RBRACE, T_LPAREN, T_RPAREN, T_LBRACKET, T_RBRACKET, T_COMMA, T_DOT, T_ARROW, T_COLON }; // Token structure struct token { int token; // Token type, from the enum list above char *tokstr; // String version of the token int intvalue; // For T_INTLIT, the integer value }; // AST node types. The first few line up // with the related tokens enum { A_ASSIGN = 1, A_LOGOR, A_LOGAND, A_OR, A_XOR, A_AND, A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE, A_LSHIFT, A_RSHIFT, A_ADD, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, A_INTLIT, A_STRLIT, A_IDENT, A_GLUE, A_IF, A_WHILE, A_FUNCTION, A_WIDEN, A_RETURN, A_FUNCCALL, A_DEREF, A_ADDR, A_SCALE, A_PREINC, A_PREDEC, A_POSTINC, A_POSTDEC, A_NEGATE, A_INVERT, A_LOGNOT, A_TOBOOL, A_BREAK, A_CONTINUE, A_SWITCH, A_CASE, A_DEFAULT }; // Primitive types. The bottom 4 bits is an integer // value that represents the level of indirection, // e.g. 0= no pointer, 1= pointer, 2= pointer pointer etc. enum { P_NONE, P_VOID = 16, P_CHAR = 32, P_INT = 48, P_LONG = 64, P_STRUCT=80, P_UNION=96 }; // Structural types enum { S_VARIABLE, S_FUNCTION, S_ARRAY }; // Storage classes enum { C_GLOBAL = 1, // Globally visible symbol C_LOCAL, // Locally visible symbol C_PARAM, // Locally visible function parameter C_EXTERN, // External globally visible symbol C_STRUCT, // A struct C_UNION, // A union C_MEMBER, // Member of a struct or union C_ENUMTYPE, // A named enumeration type C_ENUMVAL, // A named enumeration value C_TYPEDEF // A named typedef }; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol struct symtable *ctype; // If struct/union, ptr to that type int stype; // Structural type for the symbol int class; // Storage class for the symbol union { int size; // Number of elements in the symbol int endlabel; // For functions, the end label }; union { int nelems; // For functions, # of params int posn; // For locals, the negative offset // from the stack base pointer }; struct symtable *next; // Next symbol in one list struct symtable *member; // First member of a function, struct, }; // union or enum // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates int rvalue; // True if the node is an rvalue struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; struct symtable *sym; // For many AST nodes, the pointer to union { // the symbol in the symbol table int intvalue; // For A_INTLIT, the integer value int size; // For A_SCALE, the size to scale by }; }; enum { NOREG = -1, // Use NOREG when the AST generation // functions have no register to return NOLABEL = 0 // Use NOLABEL when we have no label to // pass to genAST() }; ================================================ FILE: 39_Var_Initialisation_pt1/expr.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of expressions // Copyright (c) 2019 Warren Toomey, GPL3 // expression_list: // | expression // | expression ',' expression_list // ; // Parse a list of zero or more comma-separated expressions and // return an AST composed of A_GLUE nodes with the left-hand child // being the sub-tree of previous expressions (or NULL) and the right-hand // child being the next expression. Each A_GLUE node will have size field // set to the number of expressions in the tree at this point. If no // expressions are parsed, NULL is returned struct ASTnode *expression_list(int endtoken) { struct ASTnode *tree = NULL; struct ASTnode *child = NULL; int exprcount = 0; // Loop until the end token while (Token.token != endtoken) { // Parse the next expression and increment the expression count child = binexpr(0); exprcount++; // Build an A_GLUE AST node with the previous tree as the left child // and the new expression as the right child. Store the expression count. tree = mkastnode(A_GLUE, P_NONE, tree, NULL, child, NULL, exprcount); // Stop when we reach the end token if (Token.token == endtoken) break; // Must have a ',' at this point match(T_COMMA, ","); } // Return the tree of expressions return (tree); } // Parse a function call and return its AST static struct ASTnode *funccall(void) { struct ASTnode *tree; struct symtable *funcptr; // Check that the identifier has been defined as a function, // then make a leaf node for it. if ((funcptr = findsymbol(Text)) == NULL || funcptr->stype != S_FUNCTION) { fatals("Undeclared function", Text); } // Get the '(' lparen(); // Parse the argument expression list tree = expression_list(T_RPAREN); // XXX Check type of each argument against the function's prototype // Build the function call AST node. Store the // function's return type as this node's type. // Also record the function's symbol-id tree = mkastunary(A_FUNCCALL, funcptr->type, tree, funcptr, 0); // Get the ')' rparen(); return (tree); } // Parse the index into an array and return an AST tree for it static struct ASTnode *array_access(void) { struct ASTnode *left, *right; struct symtable *aryptr; // Check that the identifier has been defined as an array // then make a leaf node for it that points at the base if ((aryptr = findsymbol(Text)) == NULL || aryptr->stype != S_ARRAY) { fatals("Undeclared array", Text); } left = mkastleaf(A_ADDR, aryptr->type, aryptr, 0); // Get the '[' scan(&Token); // Parse the following expression right = binexpr(0); // Get the ']' match(T_RBRACKET, "]"); // Ensure that this is of int type if (!inttype(right->type)) fatal("Array index is not of integer type"); // Scale the index by the size of the element's type right = modify_type(right, left->type, A_ADD); // Return an AST tree where the array's base has the offset // added to it, and dereference the element. Still an lvalue // at this point. left = mkastnode(A_ADD, aryptr->type, left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, value_at(left->type), left, NULL, 0); return (left); } // Parse the member reference of a struct or union // and return an AST tree for it. If withpointer is true, // the access is through a pointer to the member. static struct ASTnode *member_access(int withpointer) { struct ASTnode *left, *right; struct symtable *compvar; struct symtable *typeptr; struct symtable *m; // Check that the identifier has been declared as a // struct/union or a struct/union pointer if ((compvar = findsymbol(Text)) == NULL) fatals("Undeclared variable", Text); if (withpointer && compvar->type != pointer_to(P_STRUCT) && compvar->type != pointer_to(P_UNION)) fatals("Undeclared variable", Text); if (!withpointer && compvar->type != P_STRUCT && compvar->type != P_UNION) fatals("Undeclared variable", Text); // If a pointer to a struct/union, get the pointer's value. // Otherwise, make a leaf node that points at the base // Either way, it's an rvalue if (withpointer) { left = mkastleaf(A_IDENT, pointer_to(compvar->type), compvar, 0); } else left = mkastleaf(A_ADDR, compvar->type, compvar, 0); left->rvalue = 1; // Get the details of the composite type typeptr = compvar->ctype; // Skip the '.' or '->' token and get the member's name scan(&Token); ident(); // Find the matching member's name in the type // Die if we can't find it for (m = typeptr->member; m != NULL; m = m->next) if (!strcmp(m->name, Text)) break; if (m == NULL) fatals("No member found in struct/union: ", Text); // Build an A_INTLIT node with the offset right = mkastleaf(A_INTLIT, P_INT, NULL, m->posn); // Add the member's offset to the base of the struct/union // and dereference it. Still an lvalue at this point left = mkastnode(A_ADD, pointer_to(m->type), left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, m->type, left, NULL, 0); return (left); } // Parse a postfix expression and return // an AST node representing it. The // identifier is already in Text. static struct ASTnode *postfix(void) { struct ASTnode *n; struct symtable *varptr; struct symtable *enumptr; // If the identifier matches an enum value, // return an A_INTLIT node if ((enumptr = findenumval(Text)) != NULL) { scan(&Token); return (mkastleaf(A_INTLIT, P_INT, NULL, enumptr->posn)); } // Scan in the next token to see if we have a postfix expression scan(&Token); // Function call if (Token.token == T_LPAREN) return (funccall()); // An array reference if (Token.token == T_LBRACKET) return (array_access()); // Access into a struct or union if (Token.token == T_DOT) return (member_access(0)); if (Token.token == T_ARROW) return (member_access(1)); // A variable. Check that the variable exists. if ((varptr = findsymbol(Text)) == NULL || varptr->stype != S_VARIABLE) fatals("Unknown variable", Text); switch (Token.token) { // Post-increment: skip over the token case T_INC: scan(&Token); n = mkastleaf(A_POSTINC, varptr->type, varptr, 0); break; // Post-decrement: skip over the token case T_DEC: scan(&Token); n = mkastleaf(A_POSTDEC, varptr->type, varptr, 0); break; // Just a variable reference default: n = mkastleaf(A_IDENT, varptr->type, varptr, 0); } return (n); } // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(void) { struct ASTnode *n; int id; switch (Token.token) { case T_INTLIT: // For an INTLIT token, make a leaf AST node for it. // Make it a P_CHAR if it's within the P_CHAR range if ((Token.intvalue) >= 0 && (Token.intvalue < 256)) n = mkastleaf(A_INTLIT, P_CHAR, NULL, Token.intvalue); else n = mkastleaf(A_INTLIT, P_INT, NULL, Token.intvalue); break; case T_STRLIT: // For a STRLIT token, generate the assembly for it. // Then make a leaf AST node for it. id is the string's label. id = genglobstr(Text); n = mkastleaf(A_STRLIT, pointer_to(P_CHAR), NULL, id); break; case T_IDENT: return (postfix()); case T_LPAREN: // Beginning of a parenthesised expression, skip the '('. // Scan in the expression and the right parenthesis scan(&Token); n = binexpr(0); rparen(); return (n); default: fatals("Expecting a primary expression, got token", Token.tokstr); } // Scan in the next token and return the leaf node scan(&Token); return (n); } // Convert a binary operator token into a binary AST operation. // We rely on a 1:1 mapping from token to AST operation static int binastop(int tokentype) { if (tokentype > T_EOF && tokentype <= T_SLASH) return (tokentype); fatald("Syntax error, token", tokentype); return (0); // Keep -Wall happy } // Return true if a token is right-associative, // false otherwise. static int rightassoc(int tokentype) { if (tokentype == T_ASSIGN) return (1); return (0); } // Operator precedence for each token. Must // match up with the order of tokens in defs.h static int OpPrec[] = { 0, 10, 20, 30, // T_EOF, T_ASSIGN, T_LOGOR, T_LOGAND 40, 50, 60, // T_OR, T_XOR, T_AMPER 70, 70, // T_EQ, T_NE 80, 80, 80, 80, // T_LT, T_GT, T_LE, T_GE 90, 90, // T_LSHIFT, T_RSHIFT 100, 100, // T_PLUS, T_MINUS 110, 110 // T_STAR, T_SLASH }; // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec; if (tokentype > T_SLASH) fatald("Token with no precedence in op_precedence:", tokentype); prec = OpPrec[tokentype]; if (prec == 0) fatald("Syntax error, token", tokentype); return (prec); } // prefix_expression: primary // | '*' prefix_expression // | '&' prefix_expression // | '-' prefix_expression // | '++' prefix_expression // | '--' prefix_expression // ; // Parse a prefix expression and return // a sub-tree representing it. struct ASTnode *prefix(void) { struct ASTnode *tree; switch (Token.token) { case T_AMPER: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Ensure that it's an identifier if (tree->op != A_IDENT) fatal("& operator must be followed by an identifier"); // Now change the operator to A_ADDR and the type to // a pointer to the original type tree->op = A_ADDR; tree->type = pointer_to(tree->type); break; case T_STAR: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's either another deref or an // identifier if (tree->op != A_IDENT && tree->op != A_DEREF) fatal("* operator must be followed by an identifier or *"); // Prepend an A_DEREF operation to the tree tree = mkastunary(A_DEREF, value_at(tree->type), tree, NULL, 0); break; case T_MINUS: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_NEGATE operation to the tree and // make the child an rvalue. Because chars are unsigned, // also widen this to int so that it's signed tree->rvalue = 1; tree = modify_type(tree, P_INT, 0); tree = mkastunary(A_NEGATE, tree->type, tree, NULL, 0); break; case T_INVERT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_INVERT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_INVERT, tree->type, tree, NULL, 0); break; case T_LOGNOT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_LOGNOT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_LOGNOT, tree->type, tree, NULL, 0); break; case T_INC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("++ operator must be followed by an identifier"); // Prepend an A_PREINC operation to the tree tree = mkastunary(A_PREINC, tree->type, tree, NULL, 0); break; case T_DEC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("-- operator must be followed by an identifier"); // Prepend an A_PREDEC operation to the tree tree = mkastunary(A_PREDEC, tree->type, tree, NULL, 0); break; default: tree = primary(); } return (tree); } // Return an AST tree whose root is a binary operator. // Parameter ptp is the previous token's precedence. struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; struct ASTnode *ltemp, *rtemp; int ASTop; int tokentype; // Get the tree on the left. // Fetch the next token at the same time. left = prefix(); // If we hit one of several terminating tokens, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA || tokentype == T_COLON) { left->rvalue = 1; return (left); } // While the precedence of this token is more than that of the // previous token precedence, or it's right associative and // equal to the previous token's precedence while ((op_precedence(tokentype) > ptp) || (rightassoc(tokentype) && op_precedence(tokentype) == ptp)) { // Fetch in the next integer literal scan(&Token); // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Determine the operation to be performed on the sub-trees ASTop = binastop(tokentype); if (ASTop == A_ASSIGN) { // Assignment // Make the right tree into an rvalue right->rvalue = 1; // Ensure the right's type matches the left right = modify_type(right, left->type, 0); if (right == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree. However, switch // left and right around, so that the right expression's // code will be generated before the left expression ltemp = left; left = right; right = ltemp; } else { // We are not doing an assignment, so both trees should be rvalues // Convert both trees into rvalue if they are lvalue trees left->rvalue = 1; right->rvalue = 1; // Ensure the two types are compatible by trying // to modify each tree to match the other's type. ltemp = modify_type(left, right->type, ASTop); rtemp = modify_type(right, left->type, ASTop); if (ltemp == NULL && rtemp == NULL) fatal("Incompatible types in binary expression"); if (ltemp != NULL) left = ltemp; if (rtemp != NULL) right = rtemp; } // Join that sub-tree with ours. Convert the token // into an AST operation at the same time. left = mkastnode(binastop(tokentype), left->type, left, NULL, right, NULL, 0); // Update the details of the current token. // If we hit a terminating token, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA || tokentype == T_COLON) { left->rvalue = 1; return (left); } } // Return the tree we have when the precedence // is the same or lower left->rvalue = 1; return (left); } ================================================ FILE: 39_Var_Initialisation_pt1/gen.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Generic code generator // Copyright (c) 2019 Warren Toomey, GPL3 // Generate and return a new label number int genlabel(void) { static int id = 1; return (id++); } // Generate the code for an IF statement // and an optional ELSE clause static int genIF(struct ASTnode *n, int looptoplabel, int loopendlabel) { int Lfalse, Lend; // Generate two labels: one for the // false compound statement, and one // for the end of the overall IF statement. // When there is no ELSE clause, Lfalse _is_ // the ending label! Lfalse = genlabel(); if (n->right) Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. genAST(n->left, Lfalse, NOLABEL, NOLABEL, n->op); genfreeregs(); // Generate the true compound statement genAST(n->mid, NOLABEL, looptoplabel, loopendlabel, n->op); genfreeregs(); // If there is an optional ELSE clause, // generate the jump to skip to the end if (n->right) cgjump(Lend); // Now the false label cglabel(Lfalse); // Optional ELSE clause: generate the // false compound statement and the // end label if (n->right) { genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); genfreeregs(); cglabel(Lend); } return (NOREG); } // Generate the code for a WHILE statement static int genWHILE(struct ASTnode *n) { int Lstart, Lend; // Generate the start and end labels // and output the start label Lstart = genlabel(); Lend = genlabel(); cglabel(Lstart); // Generate the condition code followed // by a jump to the end label. genAST(n->left, Lend, Lstart, Lend, n->op); genfreeregs(); // Generate the compound statement for the body genAST(n->right, NOLABEL, Lstart, Lend, n->op); genfreeregs(); // Finally output the jump back to the condition, // and the end label cgjump(Lstart); cglabel(Lend); return (NOREG); } // Generate the code for a SWITCH statement static int genSWITCH(struct ASTnode *n) { int *caseval, *caselabel; int Ljumptop, Lend; int i, reg, defaultlabel = 0, casecount = 0; struct ASTnode *c; // Create arrays for the case values and associated labels. // Ensure that we have at least one position in each array. caseval = (int *) malloc((n->intvalue + 1) * sizeof(int)); caselabel = (int *) malloc((n->intvalue + 1) * sizeof(int)); // Generate labels for the top of the jump table, and the // end of the switch statement. Set a default label for // the end of the switch, in case we don't have a default. Ljumptop = genlabel(); Lend = genlabel(); defaultlabel = Lend; // Output the code to calculate the switch condition reg = genAST(n->left, NOLABEL, NOLABEL, NOLABEL, 0); cgjump(Ljumptop); genfreeregs(); // Walk the right-child linked list to // generate the code for each case for (i = 0, c = n->right; c != NULL; i++, c = c->right) { // Get a label for this case. Store it // and the case value in the arrays. // Record if it is the default case. caselabel[i] = genlabel(); caseval[i] = c->intvalue; cglabel(caselabel[i]); if (c->op == A_DEFAULT) defaultlabel = caselabel[i]; else casecount++; // Generate the case code. Pass in the end label for the breaks genAST(c->left, NOLABEL, NOLABEL, Lend, 0); genfreeregs(); } // Ensure the last case jumps past the switch table cgjump(Lend); // Now output the switch table and the end label. cgswitch(reg, casecount, Ljumptop, caselabel, caseval, defaultlabel); cglabel(Lend); return (NOREG); } // Generate the code to copy the arguments of a // function call to its parameters, then call the // function itself. Return the register that holds // the function's return value. static int gen_funccall(struct ASTnode *n) { struct ASTnode *gluetree = n->left; int reg; int numargs = 0; // If there is a list of arguments, walk this list // from the last argument (right-hand child) to the // first while (gluetree) { // Calculate the expression's value reg = genAST(gluetree->right, NOLABEL, NOLABEL, NOLABEL, gluetree->op); // Copy this into the n'th function parameter: size is 1, 2, 3, ... cgcopyarg(reg, gluetree->size); // Keep the first (highest) number of arguments if (numargs == 0) numargs = gluetree->size; genfreeregs(); gluetree = gluetree->left; } // Call the function, clean up the stack (based on numargs), // and return its result return (cgcall(n->sym, numargs)); } // Given an AST, an optional label, and the AST op // of the parent, generate assembly code recursively. // Return the register id with the tree's final value. int genAST(struct ASTnode *n, int iflabel, int looptoplabel, int loopendlabel, int parentASTop) { int leftreg, rightreg; // We have some specific AST node handling at the top // so that we don't evaluate the child sub-trees immediately switch (n->op) { case A_IF: return (genIF(n, looptoplabel, loopendlabel)); case A_WHILE: return (genWHILE(n)); case A_SWITCH: return (genSWITCH(n)); case A_FUNCCALL: return (gen_funccall(n)); case A_GLUE: // Do each child statement, and free the // registers after each child if (n->left != NULL) genAST(n->left, iflabel, looptoplabel, loopendlabel, n->op); genfreeregs(); if (n->right != NULL) genAST(n->right, iflabel, looptoplabel, loopendlabel, n->op); genfreeregs(); return (NOREG); case A_FUNCTION: // Generate the function's preamble before the code // in the child sub-tree cgfuncpreamble(n->sym); genAST(n->left, NOLABEL, NOLABEL, NOLABEL, n->op); cgfuncpostamble(n->sym); return (NOREG); } // General AST node handling below // Get the left and right sub-tree values if (n->left) leftreg = genAST(n->left, NOLABEL, NOLABEL, NOLABEL, n->op); if (n->right) rightreg = genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); switch (n->op) { case A_ADD: return (cgadd(leftreg, rightreg)); case A_SUBTRACT: return (cgsub(leftreg, rightreg)); case A_MULTIPLY: return (cgmul(leftreg, rightreg)); case A_DIVIDE: return (cgdiv(leftreg, rightreg)); case A_AND: return (cgand(leftreg, rightreg)); case A_OR: return (cgor(leftreg, rightreg)); case A_XOR: return (cgxor(leftreg, rightreg)); case A_LSHIFT: return (cgshl(leftreg, rightreg)); case A_RSHIFT: return (cgshr(leftreg, rightreg)); case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, compare registers // and set one to 1 or 0 based on the comparison. if (parentASTop == A_IF || parentASTop == A_WHILE) return (cgcompare_and_jump(n->op, leftreg, rightreg, iflabel)); else return (cgcompare_and_set(n->op, leftreg, rightreg)); case A_INTLIT: return (cgloadint(n->intvalue, n->type)); case A_STRLIT: return (cgloadglobstr(n->intvalue)); case A_IDENT: // Load our value if we are an rvalue // or we are being dereferenced if (n->rvalue || parentASTop == A_DEREF) { if (n->sym->class == C_GLOBAL) { return (cgloadglob(n->sym, n->op)); } else { return (cgloadlocal(n->sym, n->op)); } } else return (NOREG); case A_ASSIGN: // Are we assigning to an identifier or through a pointer? switch (n->right->op) { case A_IDENT: if (n->right->sym->class == C_GLOBAL) return (cgstorglob(leftreg, n->right->sym)); else return (cgstorlocal(leftreg, n->right->sym)); case A_DEREF: return (cgstorderef(leftreg, rightreg, n->right->type)); default: fatald("Can't A_ASSIGN in genAST(), op", n->op); } case A_WIDEN: // Widen the child's type to the parent's type return (cgwiden(leftreg, n->left->type, n->type)); case A_RETURN: cgreturn(leftreg, Functionid); return (NOREG); case A_ADDR: return (cgaddress(n->sym)); case A_DEREF: // If we are an rvalue, dereference to get the value we point at, // otherwise leave it for A_ASSIGN to store through the pointer if (n->rvalue) return (cgderef(leftreg, n->left->type)); else return (leftreg); case A_SCALE: // Small optimisation: use shift if the // scale value is a known power of two switch (n->size) { case 2: return (cgshlconst(leftreg, 1)); case 4: return (cgshlconst(leftreg, 2)); case 8: return (cgshlconst(leftreg, 3)); default: // Load a register with the size and // multiply the leftreg by this size rightreg = cgloadint(n->size, P_INT); return (cgmul(leftreg, rightreg)); } case A_POSTINC: case A_POSTDEC: // Load and decrement the variable's value into a register // and post increment/decrement it if (n->sym->class == C_GLOBAL) return (cgloadglob(n->sym, n->op)); else return (cgloadlocal(n->sym, n->op)); case A_PREINC: case A_PREDEC: // Load and decrement the variable's value into a register // and pre increment/decrement it if (n->left->sym->class == C_GLOBAL) return (cgloadglob(n->left->sym, n->op)); else return (cgloadlocal(n->left->sym, n->op)); case A_NEGATE: return (cgnegate(leftreg)); case A_INVERT: return (cginvert(leftreg)); case A_LOGNOT: return (cglognot(leftreg)); case A_TOBOOL: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, set the register // to 0 or 1 based on it's zeroeness or non-zeroeness return (cgboolean(leftreg, parentASTop, iflabel)); case A_BREAK: cgjump(loopendlabel); return (NOREG); case A_CONTINUE: cgjump(looptoplabel); return (NOREG); default: fatald("Unknown AST operator", n->op); } return (NOREG); // Keep -Wall happy } void genpreamble() { cgpreamble(); } void genpostamble() { cgpostamble(); } void genfreeregs() { freeall_registers(); } void genglobsym(struct symtable *node) { cgglobsym(node); } int genglobstr(char *strvalue) { int l = genlabel(); cgglobstr(l, strvalue); return (l); } int genprimsize(int type) { return (cgprimsize(type)); } int genalign(int type, int offset, int direction) { return (cgalign(type, offset, direction)); } ================================================ FILE: 39_Var_Initialisation_pt1/include/fcntl.h ================================================ #ifndef _FCNTL_H_ # define _FCNTL_H_ #define O_RDONLY 00 #define O_WRONLY 01 #define O_RDWR 02 int open(char *pathname, int flags); #endif // _FCNTL_H_ ================================================ FILE: 39_Var_Initialisation_pt1/include/stddef.h ================================================ #ifndef _STDDEF_H_ # define _STDDEF_H_ typedef long size_t; #endif //_STDDEF_H_ ================================================ FILE: 39_Var_Initialisation_pt1/include/stdio.h ================================================ #ifndef _STDIO_H_ # define _STDIO_H_ #include // This FILE definition will do for now typedef char * FILE; FILE *fopen(char *pathname, char *mode); size_t fread(void *ptr, size_t size, size_t nmemb, FILE *stream); size_t fwrite(void *ptr, size_t size, size_t nmemb, FILE *stream); int fclose(FILE *stream); int printf(char *format); int fprintf(FILE *stream, char *format); #endif // _STDIO_H_ ================================================ FILE: 39_Var_Initialisation_pt1/main.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "decl.h" #include #include // Compiler setup and top-level execution // Copyright (c) 2019 Warren Toomey, GPL3 // Given a string with a '.' and at least a 1-character suffix // after the '.', change the suffix to be the given character. // Return the new string or NULL if the original string could // not be modified char *alter_suffix(char *str, char suffix) { char *posn; char *newstr; // Clone the string if ((newstr = strdup(str)) == NULL) return (NULL); // Find the '.' if ((posn = strrchr(newstr, '.')) == NULL) return (NULL); // Ensure there is a suffix posn++; if (*posn == '\0') return (NULL); // Change the suffix and NUL-terminate the string *posn++ = suffix; *posn = '\0'; return (newstr); } // Given an input filename, compile that file // down to assembly code. Return the new file's name static char *do_compile(char *filename) { char cmd[TEXTLEN]; // Change the input file's suffix to .s Outfilename = alter_suffix(filename, 's'); if (Outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .c on the end\n", filename); exit(1); } // Generate the pre-processor command snprintf(cmd, TEXTLEN, "%s %s %s", CPPCMD, INCDIR, filename); // Open up the pre-processor pipe if ((Infile = popen(cmd, "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", filename, strerror(errno)); exit(1); } Infilename = filename; // Create the output file if ((Outfile = fopen(Outfilename, "w")) == NULL) { fprintf(stderr, "Unable to create %s: %s\n", Outfilename, strerror(errno)); exit(1); } Line = 1; // Reset the scanner Putback = '\n'; clear_symtable(); // Clear the symbol table if (O_verbose) printf("compiling %s\n", filename); scan(&Token); // Get the first token from the input genpreamble(); // Output the preamble global_declarations(); // Parse the global declarations genpostamble(); // Output the postamble fclose(Outfile); // Close the output file return (Outfilename); } // Given an input filename, assemble that file // down to object code. Return the object filename char *do_assemble(char *filename) { char cmd[TEXTLEN]; int err; char *outfilename = alter_suffix(filename, 'o'); if (outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .s on the end\n", filename); exit(1); } // Build the assembly command and run it snprintf(cmd, TEXTLEN, "%s %s %s", ASCMD, outfilename, filename); if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Assembly of %s failed\n", filename); exit(1); } return (outfilename); } // Given a list of object files and an output filename, // link all of the object filenames together. void do_link(char *outfilename, char *objlist[]) { int cnt, size = TEXTLEN; char cmd[TEXTLEN], *cptr; int err; // Start with the linker command and the output file cptr = cmd; cnt = snprintf(cptr, size, "%s %s ", LDCMD, outfilename); cptr += cnt; size -= cnt; // Now append each object file while (*objlist != NULL) { cnt = snprintf(cptr, size, "%s ", *objlist); cptr += cnt; size -= cnt; objlist++; } if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Linking failed\n"); exit(1); } } // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "Usage: %s [-vcST] [-o outfile] file [file ...]\n", prog); fprintf(stderr, " -v give verbose output of the compilation stages\n"); fprintf(stderr, " -c generate object files but don't link them\n"); fprintf(stderr, " -S generate assembly files but don't link them\n"); fprintf(stderr, " -T dump the AST trees for each input file\n"); fprintf(stderr, " -o outfile, produce the outfile executable file\n"); exit(1); } // Main program: check arguments and print a usage // if we don't have an argument. Open up the input // file and call scanfile() to scan the tokens in it. enum { MAXOBJ = 100 }; int main(int argc, char *argv[]) { char *outfilename = AOUT; char *asmfile, *objfile; char *objlist[MAXOBJ]; int i, objcnt = 0; // Initialise our variables O_dumpAST = 0; O_keepasm = 0; O_assemble = 0; O_verbose = 0; O_dolink = 1; // Scan for command-line options for (i = 1; i < argc; i++) { // No leading '-', stop scanning for options if (*argv[i] != '-') break; // For each option in this argument for (int j = 1; (*argv[i] == '-') && argv[i][j]; j++) { switch (argv[i][j]) { case 'o': outfilename = argv[++i]; // Save & skip to next argument break; case 'T': O_dumpAST = 1; break; case 'c': O_assemble = 1; O_keepasm = 0; O_dolink = 0; break; case 'S': O_keepasm = 1; O_assemble = 0; O_dolink = 0; break; case 'v': O_verbose = 1; break; default: usage(argv[0]); } } } // Ensure we have at lease one input file argument if (i >= argc) usage(argv[0]); // Work on each input file in turn while (i < argc) { asmfile = do_compile(argv[i]); // Compile the source file if (O_dolink || O_assemble) { objfile = do_assemble(asmfile); // Assemble it to object forma if (objcnt == (MAXOBJ - 2)) { fprintf(stderr, "Too many object files for the compiler to handle\n"); exit(1); } objlist[objcnt++] = objfile; // Add the object file's name objlist[objcnt] = NULL; // to the list of object files } if (!O_keepasm) // Remove the assembly file if unlink(asmfile); // we don't need to keep it i++; } // Now link all the object files together if (O_dolink) { do_link(outfilename, objlist); // If we don't need to keep the object // files, then remove them if (!O_assemble) { for (i = 0; objlist[i] != NULL; i++) unlink(objlist[i]); } } return (0); } ================================================ FILE: 39_Var_Initialisation_pt1/misc.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" #include // Miscellaneous functions // Copyright (c) 2019 Warren Toomey, GPL3 // Ensure that the current token is t, // and fetch the next token. Otherwise // throw an error void match(int t, char *what) { if (Token.token == t) { scan(&Token); } else { fatals("Expected", what); } } // Match a semicolon and fetch the next token void semi(void) { match(T_SEMI, ";"); } // Match a left brace and fetch the next token void lbrace(void) { match(T_LBRACE, "{"); } // Match a right brace and fetch the next token void rbrace(void) { match(T_RBRACE, "}"); } // Match a left parenthesis and fetch the next token void lparen(void) { match(T_LPAREN, "("); } // Match a right parenthesis and fetch the next token void rparen(void) { match(T_RPAREN, ")"); } // Match an identifier and fetch the next token void ident(void) { match(T_IDENT, "identifier"); } // Match a comma and fetch the next token void comma(void) { match(T_COMMA, "comma"); } // Print out fatal messages void fatal(char *s) { fprintf(stderr, "%s on line %d of %s\n", s, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatals(char *s1, char *s2) { fprintf(stderr, "%s:%s on line %d of %s\n", s1, s2, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatald(char *s, int d) { fprintf(stderr, "%s:%d on line %d of %s\n", s, d, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatalc(char *s, int c) { fprintf(stderr, "%s:%c on line %d of %s\n", s, c, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } ================================================ FILE: 39_Var_Initialisation_pt1/scan.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { char *p; p = strchr(s, c); return (p ? p - s : -1); } // Get the next character from the input file. static int next(void) { int c, l; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return (c); } c = fgetc(Infile); // Read from input file while (c == '#') { // We've hit a pre-processor statement scan(&Token); // Get the line number into l if (Token.token != T_INTLIT) fatals("Expecting pre-processor line number, got:", Text); l = Token.intvalue; scan(&Token); // Get the filename in Text if (Token.token != T_STRLIT) fatals("Expecting pre-processor file name, got:", Text); if (Text[0] != '<') { // If this is a real filename if (strcmp(Text, Infilename)) // and not the one we have now Infilename = strdup(Text); // save it. Then update the line num Line = l; } while ((c = fgetc(Infile)) != '\n'); // Skip to the end of the line c = fgetc(Infile); // and get the next character } if ('\n' == c) Line++; // Increment line count return (c); } // Put back an unwanted character static void putback(int c) { Putback = c; } // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } // Return the next character from a character // or string literal static int scanch(void) { int c; // Get the next input character and interpret // metacharacters that start with a backslash c = next(); if (c == '\\') { switch (c = next()) { case 'a': return '\a'; case 'b': return '\b'; case 'f': return '\f'; case 'n': return '\n'; case 'r': return '\r'; case 't': return '\t'; case 'v': return '\v'; case '\\': return '\\'; case '"': return '"'; case '\'': return '\''; default: fatalc("unknown escape sequence", c); } } return (c); // Just an ordinary old character! } // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0; // Convert each character into an int value while ((k = chrpos("0123456789", c)) >= 0) { val = val * 10 + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return (val); } // Scan in a string literal from the input file, // and store it in buf[]. Return the length of // the string. static int scanstr(char *buf) { int i, c; // Loop while we have enough buffer space for (i = 0; i < TEXTLEN - 1; i++) { // Get the next char and append to buf // Return when we hit the ending double quote if ((c = scanch()) == '"') { buf[i] = 0; return (i); } buf[i] = c; } // Ran out of buf[] space fatal("String literal too long"); return (0); } // Scan an identifier from the input file and // store it in buf[]. Return the identifier's length static int scanident(int c, char *buf, int lim) { int i = 0; // Allow digits, alpha and underscores while (isalpha(c) || isdigit(c) || '_' == c) { // Error if we hit the identifier length limit, // else append to buf[] and get next character if (lim - 1 == i) { fatal("Identifier too long"); } else if (i < lim - 1) { buf[i++] = c; } c = next(); } // We hit a non-valid character, put it back. // NUL-terminate the buf[] and return the length putback(c); buf[i] = '\0'; return (i); } // Given a word from the input, return the matching // keyword token number or 0 if it's not a keyword. // Switch on the first letter so that we don't have // to waste time strcmp()ing against all the keywords. static int keyword(char *s) { switch (*s) { case 'b': if (!strcmp(s, "break")) return (T_BREAK); break; case 'c': if (!strcmp(s, "case")) return (T_CASE); if (!strcmp(s, "char")) return (T_CHAR); if (!strcmp(s, "continue")) return (T_CONTINUE); break; case 'd': if (!strcmp(s, "default")) return (T_DEFAULT); break; case 'e': if (!strcmp(s, "else")) return (T_ELSE); if (!strcmp(s, "enum")) return (T_ENUM); if (!strcmp(s, "extern")) return (T_EXTERN); break; case 'f': if (!strcmp(s, "for")) return (T_FOR); break; case 'i': if (!strcmp(s, "if")) return (T_IF); if (!strcmp(s, "int")) return (T_INT); break; case 'l': if (!strcmp(s, "long")) return (T_LONG); break; case 'r': if (!strcmp(s, "return")) return (T_RETURN); break; case 's': if (!strcmp(s, "struct")) return (T_STRUCT); if (!strcmp(s, "switch")) return (T_SWITCH); break; case 't': if (!strcmp(s, "typedef")) return (T_TYPEDEF); break; case 'u': if (!strcmp(s, "union")) return (T_UNION); break; case 'v': if (!strcmp(s, "void")) return (T_VOID); break; case 'w': if (!strcmp(s, "while")) return (T_WHILE); break; } return (0); } // A pointer to a rejected token static struct token *Rejtoken = NULL; // Reject the token that we just scanned void reject_token(struct token *t) { if (Rejtoken != NULL) fatal("Can't reject token twice"); Rejtoken = t; } // List of token strings, for debugging purposes char *Tstring[] = { "EOF", "=", "||", "&&", "|", "^", "&", "==", "!=", ",", ">", "<=", ">=", "<<", ">>", "+", "-", "*", "/", "++", "--", "~", "!", "void", "char", "int", "long", "if", "else", "while", "for", "return", "struct", "union", "enum", "typedef", "extern", "break", "continue", "switch", "case", "default", "intlit", "strlit", ";", "identifier", "{", "}", "(", ")", "[", "]", ",", ".", "->", ":" }; // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c, tokentype; // If we have any rejected token, return it if (Rejtoken != NULL) { t = Rejtoken; Rejtoken = NULL; return (1); } // Skip whitespace c = skip(); // Determine the token based on // the input character switch (c) { case EOF: t->token = T_EOF; return (0); case '+': if ((c = next()) == '+') { t->token = T_INC; } else { putback(c); t->token = T_PLUS; } break; case '-': if ((c = next()) == '-') { t->token = T_DEC; } else if (c == '>') { t->token = T_ARROW; } else { putback(c); t->token = T_MINUS; } break; case '*': t->token = T_STAR; break; case '/': t->token = T_SLASH; break; case ';': t->token = T_SEMI; break; case '{': t->token = T_LBRACE; break; case '}': t->token = T_RBRACE; break; case '(': t->token = T_LPAREN; break; case ')': t->token = T_RPAREN; break; case '[': t->token = T_LBRACKET; break; case ']': t->token = T_RBRACKET; break; case '~': t->token = T_INVERT; break; case '^': t->token = T_XOR; break; case ',': t->token = T_COMMA; break; case '.': t->token = T_DOT; break; case ':': t->token = T_COLON; break; case '=': if ((c = next()) == '=') { t->token = T_EQ; } else { putback(c); t->token = T_ASSIGN; } break; case '!': if ((c = next()) == '=') { t->token = T_NE; } else { putback(c); t->token = T_LOGNOT; } break; case '<': if ((c = next()) == '=') { t->token = T_LE; } else if (c == '<') { t->token = T_LSHIFT; } else { putback(c); t->token = T_LT; } break; case '>': if ((c = next()) == '=') { t->token = T_GE; } else if (c == '>') { t->token = T_RSHIFT; } else { putback(c); t->token = T_GT; } break; case '&': if ((c = next()) == '&') { t->token = T_LOGAND; } else { putback(c); t->token = T_AMPER; } break; case '|': if ((c = next()) == '|') { t->token = T_LOGOR; } else { putback(c); t->token = T_OR; } break; case '\'': // If it's a quote, scan in the // literal character value and // the trailing quote t->intvalue = scanch(); t->token = T_INTLIT; if (next() != '\'') fatal("Expected '\\'' at end of char literal"); break; case '"': // Scan in a literal string scanstr(Text); t->token = T_STRLIT; break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } else if (isalpha(c) || '_' == c) { // Read in a keyword or identifier scanident(c, Text, TEXTLEN); // If it's a recognised keyword, return that token if ((tokentype = keyword(Text)) != 0) { t->token = tokentype; break; } // Not a recognised keyword, so it must be an identifier t->token = T_IDENT; break; } // The character isn't part of any recognised token, error fatalc("Unrecognised character", c); } // We found a token t->tokstr = Tstring[t->token]; return (1); } ================================================ FILE: 39_Var_Initialisation_pt1/stmt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of statements // Copyright (c) 2019 Warren Toomey, GPL3 // Prototypes static struct ASTnode *single_statement(void); // compound_statement: // empty, i.e. no statement // | statement // | statement statements // ; // // statement: declaration // | expression_statement // | function_call // | if_statement // | while_statement // | for_statement // | return_statement // ; // if_statement: if_head // | if_head 'else' statement // ; // // if_head: 'if' '(' true_false_expression ')' statement ; // // Parse an IF statement including any // optional ELSE clause and return its AST static struct ASTnode *if_statement(void) { struct ASTnode *condAST, *trueAST, *falseAST = NULL; // Ensure we have 'if' '(' match(T_IF, "if"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); rparen(); // Get the AST for the statement trueAST = single_statement(); // If we have an 'else', skip it // and get the AST for the statement if (Token.token == T_ELSE) { scan(&Token); falseAST = single_statement(); } // Build and return the AST for this statement return (mkastnode(A_IF, P_NONE, condAST, trueAST, falseAST, NULL, 0)); } // while_statement: 'while' '(' true_false_expression ')' statement ; // // Parse a WHILE statement and return its AST static struct ASTnode *while_statement(void) { struct ASTnode *condAST, *bodyAST; // Ensure we have 'while' '(' match(T_WHILE, "while"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); rparen(); // Get the AST for the statement. // Update the loop depth in the process Looplevel++; bodyAST = single_statement(); Looplevel--; // Build and return the AST for this statement return (mkastnode(A_WHILE, P_NONE, condAST, NULL, bodyAST, NULL, 0)); } // for_statement: 'for' '(' expression_list ';' // true_false_expression ';' // expression_list ')' statement ; // // Parse a FOR statement and return its AST static struct ASTnode *for_statement(void) { struct ASTnode *condAST, *bodyAST; struct ASTnode *preopAST, *postopAST; struct ASTnode *tree; // Ensure we have 'for' '(' match(T_FOR, "for"); lparen(); // Get the pre_op expression and the ';' preopAST = expression_list(T_SEMI); semi(); // Get the condition and the ';'. // Force a non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); semi(); // Get the post_op expression and the ')' postopAST = expression_list(T_RPAREN); rparen(); // Get the statement which is the body // Update the loop depth in the process Looplevel++; bodyAST = single_statement(); Looplevel--; // Glue the statement and the postop tree tree = mkastnode(A_GLUE, P_NONE, bodyAST, NULL, postopAST, NULL, 0); // Make a WHILE loop with the condition and this new body tree = mkastnode(A_WHILE, P_NONE, condAST, NULL, tree, NULL, 0); // And glue the preop tree to the A_WHILE tree return (mkastnode(A_GLUE, P_NONE, preopAST, NULL, tree, NULL, 0)); } // return_statement: 'return' '(' expression ')' ; // // Parse a return statement and return its AST static struct ASTnode *return_statement(void) { struct ASTnode *tree; // Can't return a value if function returns P_VOID if (Functionid->type == P_VOID) fatal("Can't return from a void function"); // Ensure we have 'return' '(' match(T_RETURN, "return"); lparen(); // Parse the following expression tree = binexpr(0); // Ensure this is compatible with the function's type tree = modify_type(tree, Functionid->type, 0); if (tree == NULL) fatal("Incompatible type to return"); // Add on the A_RETURN node tree = mkastunary(A_RETURN, P_NONE, tree, NULL, 0); // Get the ')' and ';' rparen(); semi(); return (tree); } // break_statement: 'break' ; // // Parse a break statement and return its AST static struct ASTnode *break_statement(void) { if (Looplevel == 0 && Switchlevel == 0) fatal("no loop or switch to break out from"); scan(&Token); semi(); return (mkastleaf(A_BREAK, 0, NULL, 0)); } // continue_statement: 'continue' ; // // Parse a continue statement and return its AST static struct ASTnode *continue_statement(void) { if (Looplevel == 0) fatal("no loop to continue to"); scan(&Token); semi(); return (mkastleaf(A_CONTINUE, 0, NULL, 0)); } // Parse a switch statement and return its AST static struct ASTnode *switch_statement(void) { struct ASTnode *left, *n, *c, *casetree= NULL, *casetail; int inloop=1, casecount=0; int seendefault=0; int ASTop, casevalue; // Skip the 'switch' and '(' scan(&Token); lparen(); // Get the switch expression, the ')' and the '{' left= binexpr(0); rparen(); lbrace(); // Ensure that this is of int type if (!inttype(left->type)) fatal("Switch expression is not of integer type"); // Build an A_SWITCH subtree with the expression as // the child n= mkastunary(A_SWITCH, 0, left, NULL, 0); // Now parse the cases Switchlevel++; while (inloop) { switch(Token.token) { // Leave the loop when we hit a '}' case T_RBRACE: if (casecount==0) fatal("No cases in switch"); inloop=0; break; case T_CASE: case T_DEFAULT: // Ensure this isn't after a previous 'default' if (seendefault) fatal("case or default after existing default"); // Set the AST operation. Scan the case value if required if (Token.token==T_DEFAULT) { ASTop= A_DEFAULT; seendefault= 1; scan(&Token); } else { ASTop= A_CASE; scan(&Token); left= binexpr(0); // Ensure the case value is an integer literal if (left->op != A_INTLIT) fatal("Expecting integer literal for case value"); casevalue= left->intvalue; // Walk the list of existing case values to ensure // that there isn't a duplicate case value for (c= casetree; c != NULL; c= c -> right) if (casevalue == c->intvalue) fatal("Duplicate case value"); } // Scan the ':' and get the compound expression match(T_COLON, ":"); left= compound_statement(1); casecount++; // Build a sub-tree with the compound statement as the left child // and link it in to the growing A_CASE tree if (casetree==NULL) { casetree= casetail= mkastunary(ASTop, 0, left, NULL, casevalue); } else { casetail->right= mkastunary(ASTop, 0, left, NULL, casevalue); casetail= casetail->right; } break; default: fatals("Unexpected token in switch", Token.tokstr); } } Switchlevel--; // We have a sub-tree with the cases and any default. Put the // case count into the A_SWITCH node and attach the case tree. n->intvalue= casecount; n->right= casetree; rbrace(); return(n); } // Parse a single statement and return its AST. static struct ASTnode *single_statement(void) { struct ASTnode *stmt; struct symtable *ctype; switch (Token.token) { case T_LBRACE: // We have a '{', so this is a compound statement lbrace(); stmt = compound_statement(0); rbrace(); return(stmt); case T_IDENT: // We have to see if the identifier matches a typedef. // If not, treat it as an expression. // Otherwise, fall down to the parse_type() call. if (findtypedef(Text) == NULL) { stmt= binexpr(0); semi(); return(stmt); } case T_CHAR: case T_INT: case T_LONG: case T_STRUCT: case T_UNION: case T_ENUM: case T_TYPEDEF: // The beginning of a variable declaration list. declaration_list(&ctype, C_LOCAL, T_SEMI, T_EOF); semi(); return (NULL); // No AST generated here case T_IF: return (if_statement()); case T_WHILE: return (while_statement()); case T_FOR: return (for_statement()); case T_RETURN: return (return_statement()); case T_BREAK: return (break_statement()); case T_CONTINUE: return (continue_statement()); case T_SWITCH: return (switch_statement()); default: // For now, see if this is an expression. // This catches assignment statements. stmt= binexpr(0); semi(); return(stmt); } return (NULL); // Keep -Wall happy } // Parse a compound statement // and return its AST. If inswitch is true, // we look for a '}', 'case' or 'default' token // to end the parsing. Otherwise, look for // just a '}' to end the parsing. struct ASTnode *compound_statement(int inswitch) { struct ASTnode *left = NULL; struct ASTnode *tree; while (1) { // Parse a single statement tree = single_statement(); // For each new tree, either save it in left // if left is empty, or glue the left and the // new tree together if (tree != NULL) { if (left == NULL) left = tree; else left = mkastnode(A_GLUE, P_NONE, left, NULL, tree, NULL, 0); } // Leave if we've hit the end token if (Token.token == T_RBRACE) return(left); if (inswitch && (Token.token == T_CASE || Token.token == T_DEFAULT)) return(left); } } ================================================ FILE: 39_Var_Initialisation_pt1/sym.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Symbol table functions // Copyright (c) 2019 Warren Toomey, GPL3 // Append a node to the singly-linked list pointed to by head or tail void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node) { // Check for valid pointers if (head == NULL || tail == NULL || node == NULL) fatal("Either head, tail or node is NULL in appendsym"); // Append to the list if (*tail) { (*tail)->next = node; *tail = node; } else *head = *tail = node; node->next = NULL; } // Create a symbol node to be added to a symbol table list. // Set up the node's: // + type: char, int etc. // + ctype: composite type pointer for struct/union // + structural type: var, function, array etc. // + size: number of elements, or endlabel: end label for a function // + posn: Position information for local symbols // Return a pointer to the new node struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int size, int posn) { // Get a new node struct symtable *node = (struct symtable *) malloc(sizeof(struct symtable)); if (node == NULL) fatal("Unable to malloc a symbol table node in newsym"); // Fill in the values node->name = strdup(name); node->type = type; node->ctype = ctype; node->stype = stype; node->class = class; node->size = size; node->posn = posn; node->next = NULL; node->member = NULL; // Generate any global space if (class == C_GLOBAL) genglobsym(node); return (node); } // Add a symbol to the global symbol list struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int class, int size) { struct symtable *sym = newsym(name, type, ctype, stype, class, size, 0); appendsym(&Globhead, &Globtail, sym); return (sym); } // Add a symbol to the local symbol list struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int size) { struct symtable *sym = newsym(name, type, ctype, stype, C_LOCAL, size, 0); appendsym(&Loclhead, &Locltail, sym); return (sym); } // Add a symbol to the parameter list struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype, int size) { struct symtable *sym = newsym(name, type, ctype, stype, C_PARAM, size, 0); appendsym(&Parmhead, &Parmtail, sym); return (sym); } // Add a symbol to the temporary member list struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int size) { struct symtable *sym = newsym(name, type, ctype, stype, C_MEMBER, size, 0); appendsym(&Membhead, &Membtail, sym); return (sym); } // Add a struct to the struct list struct symtable *addstruct(char *name, int type, struct symtable *ctype, int stype, int size) { struct symtable *sym = newsym(name, type, ctype, stype, C_STRUCT, size, 0); appendsym(&Structhead, &Structtail, sym); return (sym); } // Add a struct to the union list struct symtable *addunion(char *name, int type, struct symtable *ctype, int stype, int size) { struct symtable *sym = newsym(name, type, ctype, stype, C_UNION, size, 0); appendsym(&Unionhead, &Uniontail, sym); return (sym); } // Add an enum type or value to the enum list. // Class is C_ENUMTYPE or C_ENUMVAL. // Use posn to store the int value. struct symtable *addenum(char *name, int class, int value) { struct symtable *sym = newsym(name, P_INT, NULL, 0, class, 0, value); appendsym(&Enumhead, &Enumtail, sym); return (sym); } // Add a typedef to the typedef list struct symtable *addtypedef(char *name, int type, struct symtable *ctype, int stype, int size) { struct symtable *sym = newsym(name, type, ctype, stype, C_TYPEDEF, size, 0); appendsym(&Typehead, &Typetail, sym); return (sym); } // Search for a symbol in a specific list. // Return a pointer to the found node or NULL if not found. // If class is not zero, also match on the given class static struct symtable *findsyminlist(char *s, struct symtable *list, int class) { for (; list != NULL; list = list->next) if ((list->name != NULL) && !strcmp(s, list->name)) if (class==0 || class== list->class) return (list); return (NULL); } // Determine if the symbol s is in the global symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findglob(char *s) { return (findsyminlist(s, Globhead, 0)); } // Determine if the symbol s is in the local symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findlocl(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member, 0); if (node) return (node); } return (findsyminlist(s, Loclhead, 0)); } // Determine if the symbol s is in the symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findsymbol(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member, 0); if (node) return (node); } // Otherwise, try the local and global symbol lists node = findsyminlist(s, Loclhead, 0); if (node) return (node); return (findsyminlist(s, Globhead, 0)); } // Find a member in the member list // Return a pointer to the found node or NULL if not found. struct symtable *findmember(char *s) { return (findsyminlist(s, Membhead, 0)); } // Find a struct in the struct list // Return a pointer to the found node or NULL if not found. struct symtable *findstruct(char *s) { return (findsyminlist(s, Structhead, 0)); } // Find a struct in the union list // Return a pointer to the found node or NULL if not found. struct symtable *findunion(char *s) { return (findsyminlist(s, Unionhead, 0)); } // Find an enum type in the enum list // Return a pointer to the found node or NULL if not found. struct symtable *findenumtype(char *s) { return (findsyminlist(s, Enumhead, C_ENUMTYPE)); } // Find an enum value in the enum list // Return a pointer to the found node or NULL if not found. struct symtable *findenumval(char *s) { return (findsyminlist(s, Enumhead, C_ENUMVAL)); } // Find a type in the tyedef list // Return a pointer to the found node or NULL if not found. struct symtable *findtypedef(char *s) { return (findsyminlist(s, Typehead, 0)); } // Reset the contents of the symbol table void clear_symtable(void) { Globhead = Globtail = NULL; Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Membhead = Membtail = NULL; Structhead = Structtail = NULL; Unionhead = Uniontail = NULL; Enumhead = Enumtail = NULL; Typehead = Typetail = NULL; } // Clear all the entries in the local symbol table void freeloclsyms(void) { Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Functionid = NULL; } ================================================ FILE: 39_Var_Initialisation_pt1/tests/err.input31.c ================================================ Expecting a primary expression, got token:+ on line 5 of input31.c ================================================ FILE: 39_Var_Initialisation_pt1/tests/err.input32.c ================================================ Unknown variable:cow on line 4 of input32.c ================================================ FILE: 39_Var_Initialisation_pt1/tests/err.input33.c ================================================ Incompatible type to return on line 4 of input33.c ================================================ FILE: 39_Var_Initialisation_pt1/tests/err.input34.c ================================================ For now, declaration of non-global arrays is not implemented on line 4 of input34.c ================================================ FILE: 39_Var_Initialisation_pt1/tests/err.input35.c ================================================ Duplicate local variable declaration:a on line 4 of input35.c ================================================ FILE: 39_Var_Initialisation_pt1/tests/err.input36.c ================================================ Type doesn't match prototype for parameter:2 on line 4 of input36.c ================================================ FILE: 39_Var_Initialisation_pt1/tests/err.input37.c ================================================ Expected:comma on line 3 of input37.c ================================================ FILE: 39_Var_Initialisation_pt1/tests/err.input38.c ================================================ Type doesn't match prototype for parameter:2 on line 4 of input38.c ================================================ FILE: 39_Var_Initialisation_pt1/tests/err.input39.c ================================================ No statements in function with non-void type on line 4 of input39.c ================================================ FILE: 39_Var_Initialisation_pt1/tests/err.input40.c ================================================ No return for function with non-void type on line 4 of input40.c ================================================ FILE: 39_Var_Initialisation_pt1/tests/err.input41.c ================================================ Can't return from a void function on line 3 of input41.c ================================================ FILE: 39_Var_Initialisation_pt1/tests/err.input42.c ================================================ Undeclared function:fred on line 3 of input42.c ================================================ FILE: 39_Var_Initialisation_pt1/tests/err.input43.c ================================================ Undeclared array:b on line 3 of input43.c ================================================ FILE: 39_Var_Initialisation_pt1/tests/err.input44.c ================================================ Unknown variable:z on line 3 of input44.c ================================================ FILE: 39_Var_Initialisation_pt1/tests/err.input45.c ================================================ & operator must be followed by an identifier on line 3 of input45.c ================================================ FILE: 39_Var_Initialisation_pt1/tests/err.input46.c ================================================ * operator must be followed by an identifier or * on line 3 of input46.c ================================================ FILE: 39_Var_Initialisation_pt1/tests/err.input47.c ================================================ ++ operator must be followed by an identifier on line 3 of input47.c ================================================ FILE: 39_Var_Initialisation_pt1/tests/err.input48.c ================================================ -- operator must be followed by an identifier on line 3 of input48.c ================================================ FILE: 39_Var_Initialisation_pt1/tests/err.input49.c ================================================ Incompatible expression in assignment on line 6 of input49.c ================================================ FILE: 39_Var_Initialisation_pt1/tests/err.input50.c ================================================ Incompatible types in binary expression on line 6 of input50.c ================================================ FILE: 39_Var_Initialisation_pt1/tests/err.input51.c ================================================ Expected '\'' at end of char literal on line 4 of input51.c ================================================ FILE: 39_Var_Initialisation_pt1/tests/err.input52.c ================================================ Unrecognised character:$ on line 5 of input52.c ================================================ FILE: 39_Var_Initialisation_pt1/tests/err.input56.c ================================================ unknown struct/union type:var1 on line 2 of input56.c ================================================ FILE: 39_Var_Initialisation_pt1/tests/err.input57.c ================================================ previously defined struct/union:fred on line 2 of input57.c ================================================ FILE: 39_Var_Initialisation_pt1/tests/err.input59.c ================================================ Undeclared variable:y on line 3 of input59.c ================================================ FILE: 39_Var_Initialisation_pt1/tests/err.input60.c ================================================ Undeclared variable:x on line 3 of input60.c ================================================ FILE: 39_Var_Initialisation_pt1/tests/err.input61.c ================================================ Undeclared variable:x on line 3 of input61.c ================================================ FILE: 39_Var_Initialisation_pt1/tests/err.input64.c ================================================ undeclared enum type::fred on line 1 of input64.c ================================================ FILE: 39_Var_Initialisation_pt1/tests/err.input65.c ================================================ enum type redeclared::fred on line 2 of input65.c ================================================ FILE: 39_Var_Initialisation_pt1/tests/err.input66.c ================================================ enum value redeclared::z on line 2 of input66.c ================================================ FILE: 39_Var_Initialisation_pt1/tests/err.input68.c ================================================ redefinition of typedef:FOO on line 2 of input68.c ================================================ FILE: 39_Var_Initialisation_pt1/tests/err.input69.c ================================================ unknown type:FLOO on line 2 of input69.c ================================================ FILE: 39_Var_Initialisation_pt1/tests/err.input72.c ================================================ no loop or switch to break out from on line 1 of input72.c ================================================ FILE: 39_Var_Initialisation_pt1/tests/err.input73.c ================================================ no loop to continue to on line 1 of input73.c ================================================ FILE: 39_Var_Initialisation_pt1/tests/err.input75.c ================================================ Unexpected token in switch:if on line 4 of input75.c ================================================ FILE: 39_Var_Initialisation_pt1/tests/err.input76.c ================================================ No cases in switch on line 3 of input76.c ================================================ FILE: 39_Var_Initialisation_pt1/tests/err.input77.c ================================================ case or default after existing default on line 6 of input77.c ================================================ FILE: 39_Var_Initialisation_pt1/tests/err.input78.c ================================================ case or default after existing default on line 6 of input78.c ================================================ FILE: 39_Var_Initialisation_pt1/tests/err.input79.c ================================================ Duplicate case value on line 6 of input79.c ================================================ FILE: 39_Var_Initialisation_pt1/tests/err.input85.c ================================================ Bad type in parameter list on line 1 of input85.c ================================================ FILE: 39_Var_Initialisation_pt1/tests/err.input86.c ================================================ Function definition not at global level on line 3 of input86.c ================================================ FILE: 39_Var_Initialisation_pt1/tests/err.input87.c ================================================ Bad type in member list on line 4 of input87.c ================================================ FILE: 39_Var_Initialisation_pt1/tests/input01.c ================================================ int printf(char *fmt); void main() { printf("%d\n", 12 * 3); printf("%d\n", 18 - 2 * 4); printf("%d\n", 1 + 2 + 9 - 5/2 + 3*5); } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input02.c ================================================ int printf(char *fmt); void main() { int fred; int jim; fred= 5; jim= 12; printf("%d\n", fred + jim); } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input03.c ================================================ int printf(char *fmt); void main() { int x; x= 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input04.c ================================================ int printf(char *fmt); void main() { int x; x= 7 < 9; printf("%d\n", x); x= 7 <= 9; printf("%d\n", x); x= 7 != 9; printf("%d\n", x); x= 7 == 7; printf("%d\n", x); x= 7 >= 7; printf("%d\n", x); x= 7 <= 7; printf("%d\n", x); x= 9 > 7; printf("%d\n", x); x= 9 >= 7; printf("%d\n", x); x= 9 != 7; printf("%d\n", x); } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input05.c ================================================ int printf(char *fmt); void main() { int i; int j; i=6; j=12; if (i < j) { printf("%d\n", i); } else { printf("%d\n", j); } } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input06.c ================================================ int printf(char *fmt); void main() { int i; i=1; while (i <= 10) { printf("%d\n", i); i= i + 1; } } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input07.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input08.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input09.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } void fred() { int a; int b; a= 12; b= 3 * a; if (a >= b) { printf("%d\n", 2 * b - a); } } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input10.c ================================================ int printf(char *fmt); void main() { int i; char j; j= 20; printf("%d\n", j); i= 10; printf("%d\n", i); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 2; j= j + 1) { printf("%d\n", j); } } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input11.c ================================================ int printf(char *fmt); int main() { int i; char j; long k; i= 10; printf("%d\n", i); j= 20; printf("%d\n", j); k= 30; printf("%d\n", k); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 4; j= j + 1) { printf("%d\n", j); } for (k= 1; k <= 5; k= k + 1) { printf("%d\n", k); } return(i); printf("%d\n", 12345); return(3); } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input12.c ================================================ int printf(char *fmt); int fred() { return(5); } void main() { int x; x= fred(2); printf("%d\n", x); } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input13.c ================================================ int printf(char *fmt); int fred() { return(56); } void main() { int dummy; int result; dummy= printf("%d\n", 23); result= fred(10); dummy= printf("%d\n", result); } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input14.c ================================================ int printf(char *fmt); int fred() { return(20); } int main() { int result; printf("%d\n", 10); result= fred(15); printf("%d\n", result); printf("%d\n", fred(15)+10); return(0); } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input15.c ================================================ int printf(char *fmt); int main() { char a; char *b; char c; int d; int *e; int f; a= 18; printf("%d\n", a); b= &a; c= *b; printf("%d\n", c); d= 12; printf("%d\n", d); e= &d; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input16.c ================================================ int printf(char *fmt); int c; int d; int *e; int f; int main() { c= 12; d=18; printf("%d\n", c); e= &c + 1; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input17.c ================================================ int printf(char *fmt); int main() { char a; char *b; int d; int *e; b= &a; *b= 19; printf("%d\n", a); e= &d; *e= 12; printf("%d\n", d); return(0); } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input18.c ================================================ int printf(char *fmt); int main() { int a; int b; a= b= 34; printf("%d\n", a); printf("%d\n", b); return(0); } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input18a.c ================================================ int printf(char *fmt); int a; int *b; char c; char *d; int main() { b= &a; *b= 15; printf("%d\n", a); d= &c; *d= 16; printf("%d\n", c); return(0); } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input19.c ================================================ int printf(char *fmt); int a; int b; int c; int d; int e; int main() { a= 2; b= 4; c= 3; d= 2; e= (a+b) * (c+d); printf("%d\n", e); return(0); } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input20.c ================================================ int printf(char *fmt); int a; int b[25]; int main() { b[3]= 12; a= b[3]; printf("%d\n", a); return(0); } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input21.c ================================================ int printf(char *fmt); char c; char *str; int main() { c= '\n'; printf("%d\n", c); for (str= "Hello world\n"; *str != 0; str= str + 1) { printf("%c", *str); } return(0); } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input22.c ================================================ int printf(char *fmt); char a; char b; char c; int d; int e; int f; long g; long h; long i; int main() { b= 5; c= 7; a= b + c++; printf("%d\n", a); e= 5; f= 7; d= e + f++; printf("%d\n", d); h= 5; i= 7; g= h + i++; printf("%d\n", g); a= b-- + c; printf("%d\n", a); d= e-- + f; printf("%d\n", d); g= h-- + i; printf("%d\n", g); a= ++b + c; printf("%d\n", a); d= ++e + f; printf("%d\n", d); g= ++h + i; printf("%d\n", g); a= b * --c; printf("%d\n", a); d= e * --f; printf("%d\n", d); g= h * --i; printf("%d\n", g); return(0); } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input23.c ================================================ int printf(char *fmt); char *str; int x; int main() { x= -23; printf("%d\n", x); printf("%d\n", -10 * -10); x= 1; x= ~x; printf("%d\n", x); x= 2 > 5; printf("%d\n", x); x= !x; printf("%d\n", x); x= !x; printf("%d\n", x); x= 13; if (x) { printf("%d\n", 13); } x= 0; if (!x) { printf("%d\n", 14); } for (str= "Hello world\n"; *str; str++) { printf("%c", *str); } return(0); } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input24.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { a= 42; b= 19; printf("%d\n", a & b); printf("%d\n", a | b); printf("%d\n", a ^ b); printf("%d\n", 1 << 3); printf("%d\n", 63 >> 3); return(0); } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input25.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { char z; int y; int x; x= 10; y= 20; z= 30; printf("%d\n", x); printf("%d\n", y); printf("%d\n", z); a= 5; b= 15; c= 25; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); return(0); } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input26.c ================================================ int printf(char *fmt); int main(int a, char b, long c, int d, int e, int f, int g, int h) { int i; int j; int k; a= 13; printf("%d\n", a); b= 23; printf("%d\n", b); c= 34; printf("%d\n", c); d= 44; printf("%d\n", d); e= 54; printf("%d\n", e); f= 64; printf("%d\n", f); g= 74; printf("%d\n", g); h= 84; printf("%d\n", h); i= 94; printf("%d\n", i); j= 95; printf("%d\n", j); k= 96; printf("%d\n", k); return(0); } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input27.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int param5(int a, int b, int c, int d, int e) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param2(int a, int b) { int c; int d; int e; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param0() { int a; int b; int c; int d; int e; a= 1; b= 2; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int main() { param8(1,2,3,4,5,6,7,8); param5(1,2,3,4,5); param2(1,2); param0(); return(0); } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input28.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input29.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h); int fred(int a, int b, int c); int main(); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input30.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input30.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input31.c ================================================ int printf(char *fmt); int main() { int x; x= 2 + + 3 - * / ; } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input32.c ================================================ int printf(char *fmt); int main() { pizza cow llama sausage; } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input33.c ================================================ int printf(char *fmt); int main() { char *z; return(z); } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input34.c ================================================ int printf(char *fmt); int main() { int a[12]; return(0); } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input35.c ================================================ int printf(char *fmt); int fred(int a, int b) { int a; return(a); } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input36.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c); ================================================ FILE: 39_Var_Initialisation_pt1/tests/input37.c ================================================ int printf(char *fmt); int fred(int a, char b +, int z); ================================================ FILE: 39_Var_Initialisation_pt1/tests/input38.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c, int g); ================================================ FILE: 39_Var_Initialisation_pt1/tests/input39.c ================================================ int printf(char *fmt); int main() { int a; } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input40.c ================================================ int printf(char *fmt); int main() { int a; a= 5; } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input41.c ================================================ int printf(char *fmt); void fred() { return(5); } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input42.c ================================================ int printf(char *fmt); int main() { fred(5); } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input43.c ================================================ int printf(char *fmt); int main() { int a; a= b[4]; } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input44.c ================================================ int printf(char *fmt); int main() { int a; a= z; } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input45.c ================================================ int printf(char *fmt); int main() { int a; a= &5; } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input46.c ================================================ int printf(char *fmt); int main() { int a; a= *5; } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input47.c ================================================ int printf(char *fmt); int main() { int a; a= ++5; } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input48.c ================================================ int printf(char *fmt); int main() { int a; a= --5; } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input49.c ================================================ int printf(char *fmt); int main() { int x; char y; y= x; } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input50.c ================================================ int printf(char *fmt); int main() { char *a; char *b; a= a + b; } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input51.c ================================================ int printf(char *fmt); int main() { char a; a= 'fred'; } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input52.c ================================================ int printf(char *fmt); int main() { int a; a= $5.00; } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input53.c ================================================ int printf(char *fmt); int main() { printf("Hello world, %d\n", 23); return(0); } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input54.c ================================================ int printf(char *fmt); int main() { int i; for (i=0; i < 20; i++) { printf("Hello world, %d\n", i); } return(0); } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input55.c ================================================ int printf(char *fmt); int main(int argc, char **argv) { int i; char *argument; printf("Hello world\n"); for (i=0; i < argc; i++) { argument= *argv; argv= argv + 1; printf("Argument %d is %s\n", i, argument); } return(0); } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input56.c ================================================ struct foo { int x; }; struct mary var1; ================================================ FILE: 39_Var_Initialisation_pt1/tests/input57.c ================================================ struct fred { int x; } ; struct fred { char y; } ; ================================================ FILE: 39_Var_Initialisation_pt1/tests/input58.c ================================================ int printf(char *fmt); struct fred { int x; char y; long z; }; struct fred var2; struct fred *varptr; int main() { long result; var2.x= 12; printf("%d\n", var2.x); var2.y= 'c'; printf("%d\n", var2.y); var2.z= 4005; printf("%d\n", var2.z); result= var2.x + var2.y + var2.z; printf("%d\n", result); varptr= &var2; result= varptr->x + varptr->y + varptr->z; printf("%d\n", result); return(0); } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input59.c ================================================ int main() { int x; x= y.foo; } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input60.c ================================================ int main() { int x; x= x.foo; } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input61.c ================================================ int main() { int x; x= x->foo; } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input62.c ================================================ int printf(char *fmt); union fred { char w; int x; int y; long z; }; union fred var1; union fred *varptr; int main() { var1.x= 65; printf("%d\n", var1.x); var1.x= 66; printf("%d\n", var1.x); printf("%d\n", var1.y); printf("The next two depend on the endian of the platform\n"); printf("%d\n", var1.w); printf("%d\n", var1.z); varptr= &var1; varptr->x= 67; printf("%d\n", varptr->x); printf("%d\n", varptr->y); return(0); } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input63.c ================================================ int printf(char *fmt); enum fred { apple=1, banana, carrot, pear=10, peach, mango, papaya }; enum jane { aple=1, bnana, crrot, par=10, pech, mago, paaya }; enum fred var1; enum jane var2; enum fred var3; int main() { var1= carrot + pear + mango; printf("%d\n", var1); return(0); } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input64.c ================================================ enum fred var3; ================================================ FILE: 39_Var_Initialisation_pt1/tests/input65.c ================================================ enum fred { x, y, z }; enum fred { a, b }; ================================================ FILE: 39_Var_Initialisation_pt1/tests/input66.c ================================================ enum fred { x, y, z }; enum mary { a, b, z }; ================================================ FILE: 39_Var_Initialisation_pt1/tests/input67.c ================================================ int printf(char *fmt); typedef int FOO; FOO var1; struct bar { int x; int y} ; typedef struct bar BAR; BAR var2; int main() { var1= 5; printf("%d\n", var1); var2.x= 7; var2.y= 10; printf("%d\n", var2.x + var2.y); return(0); } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input68.c ================================================ typedef int FOO; typedef char FOO; ================================================ FILE: 39_Var_Initialisation_pt1/tests/input69.c ================================================ typedef int FOO; FLOO y; ================================================ FILE: 39_Var_Initialisation_pt1/tests/input70.c ================================================ #include typedef int FOO; int main() { FOO x; x= 56; printf("%d\n", x); return(0); } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input71.c ================================================ #include int main() { int x; x = 0; while (x < 100) { if (x == 5) { x = x + 2; continue; } printf("%d\n", x); if (x == 14) { break; } x = x + 1; } printf("Done\n"); return (0); } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input72.c ================================================ int main() { break; } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input73.c ================================================ int main() { continue; } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input74.c ================================================ #include int main() { int x; int y; y= 0; for (x=0; x < 5; x++) { switch(x) { case 1: { y= 5; break; } case 2: { y= 7; break; } case 3: { y= 9; } default: { y= 100; } } printf("%d\n", y); } return(0); } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input75.c ================================================ int main() { int x; switch(x) { if (x<5); } } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input76.c ================================================ int main() { int x; switch(x) { } } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input77.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } default: { x= 3; } case 4: { x= 2; } } } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input78.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } default: { x= 3; } default: { x= 2; } } } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input79.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } case 2: { x= 2; } case 1: { x= 2; } default: { x= 2; } } } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input80.c ================================================ #include int x; int y; int main() { for (x=0, y=1; x < 6; x++, y=y+2) { printf("%d %d\n", x, y); } return(0); } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input81.c ================================================ #include int x; int y; int main() { x= 0; y=1; for (;x<5;) { printf("%d %d\n", x, y); x=x+1; y=y+2; } return(0); } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input82.c ================================================ #include int main() { int x; x= 10; // Dangling else test if (x > 5) if (x > 15) printf("x > 15\n"); else printf("15 >= x > 5\n"); else printf("5 >= x\n"); return(0); } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input83.c ================================================ #include int main() { int x; // Dangling else test. // We should not print anything for x<= 5 for (x=0; x < 12; x++) if (x > 5) if (x > 10) printf("10 < %2d\n", x); else printf(" 5 < %2d <= 10\n", x); return(0); } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input84.c ================================================ #include int main() { int x, y; x=2; y=3; printf("%d %d\n", x, y); char a, *b; a= 'f'; b= &a; printf("%c %c\n", a, *b); return(0); } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input85.c ================================================ int main(struct foo { int x; }; , int a) { return(0); } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input86.c ================================================ int main() { int fred() { return(5); } int x; x=2; return(x); } ================================================ FILE: 39_Var_Initialisation_pt1/tests/input87.c ================================================ struct foo { int x; int y; struct blah { int g; }; }; ================================================ FILE: 39_Var_Initialisation_pt1/tests/input88.c ================================================ #include struct foo { int x; int y; } fred, mary; struct foo james; int main() { fred.x= 5; mary.y= 6; printf("%d %d\n", fred.x, mary.y); return(0); } ================================================ FILE: 39_Var_Initialisation_pt1/tests/mktests ================================================ #!/bin/sh # Make the output files for each test # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make install) fi for i in input*c do if [ ! -f "out.$i" -a ! -f "err.$i" ] then ../cwj -o out $i 2> "err.$i" # If the err file is empty if [ ! -s "err.$i" ] then rm -f "err.$i" cc -o out $i ./out > "out.$i" fi fi rm -f out out.s done ================================================ FILE: 39_Var_Initialisation_pt1/tests/out.input01.c ================================================ 36 10 25 ================================================ FILE: 39_Var_Initialisation_pt1/tests/out.input02.c ================================================ 17 ================================================ FILE: 39_Var_Initialisation_pt1/tests/out.input03.c ================================================ 1 2 3 4 5 ================================================ FILE: 39_Var_Initialisation_pt1/tests/out.input04.c ================================================ 1 1 1 1 1 1 1 1 1 ================================================ FILE: 39_Var_Initialisation_pt1/tests/out.input05.c ================================================ 6 ================================================ FILE: 39_Var_Initialisation_pt1/tests/out.input06.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 39_Var_Initialisation_pt1/tests/out.input07.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 39_Var_Initialisation_pt1/tests/out.input08.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 39_Var_Initialisation_pt1/tests/out.input09.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 39_Var_Initialisation_pt1/tests/out.input10.c ================================================ 20 10 1 2 3 4 5 253 254 255 0 1 ================================================ FILE: 39_Var_Initialisation_pt1/tests/out.input11.c ================================================ 10 20 30 1 2 3 4 5 253 254 255 0 1 2 3 1 2 3 4 5 ================================================ FILE: 39_Var_Initialisation_pt1/tests/out.input12.c ================================================ 5 ================================================ FILE: 39_Var_Initialisation_pt1/tests/out.input13.c ================================================ 23 56 ================================================ FILE: 39_Var_Initialisation_pt1/tests/out.input14.c ================================================ 10 20 30 ================================================ FILE: 39_Var_Initialisation_pt1/tests/out.input15.c ================================================ 18 18 12 12 ================================================ FILE: 39_Var_Initialisation_pt1/tests/out.input16.c ================================================ 12 18 ================================================ FILE: 39_Var_Initialisation_pt1/tests/out.input17.c ================================================ 19 12 ================================================ FILE: 39_Var_Initialisation_pt1/tests/out.input18.c ================================================ 34 34 ================================================ FILE: 39_Var_Initialisation_pt1/tests/out.input18a.c ================================================ 15 16 ================================================ FILE: 39_Var_Initialisation_pt1/tests/out.input19.c ================================================ 30 ================================================ FILE: 39_Var_Initialisation_pt1/tests/out.input20.c ================================================ 12 ================================================ FILE: 39_Var_Initialisation_pt1/tests/out.input21.c ================================================ 10 Hello world ================================================ FILE: 39_Var_Initialisation_pt1/tests/out.input22.c ================================================ 12 12 12 13 13 13 13 13 13 35 35 35 ================================================ FILE: 39_Var_Initialisation_pt1/tests/out.input23.c ================================================ -23 100 -2 0 1 0 13 14 Hello world ================================================ FILE: 39_Var_Initialisation_pt1/tests/out.input24.c ================================================ 2 59 57 8 7 ================================================ FILE: 39_Var_Initialisation_pt1/tests/out.input25.c ================================================ 10 20 30 5 15 25 ================================================ FILE: 39_Var_Initialisation_pt1/tests/out.input26.c ================================================ 13 23 34 44 54 64 74 84 94 95 96 ================================================ FILE: 39_Var_Initialisation_pt1/tests/out.input27.c ================================================ 1 2 3 4 5 6 7 8 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 ================================================ FILE: 39_Var_Initialisation_pt1/tests/out.input28.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 39_Var_Initialisation_pt1/tests/out.input29.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 39_Var_Initialisation_pt1/tests/out.input30.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input30.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 39_Var_Initialisation_pt1/tests/out.input53.c ================================================ Hello world, 23 ================================================ FILE: 39_Var_Initialisation_pt1/tests/out.input54.c ================================================ Hello world, 0 Hello world, 1 Hello world, 2 Hello world, 3 Hello world, 4 Hello world, 5 Hello world, 6 Hello world, 7 Hello world, 8 Hello world, 9 Hello world, 10 Hello world, 11 Hello world, 12 Hello world, 13 Hello world, 14 Hello world, 15 Hello world, 16 Hello world, 17 Hello world, 18 Hello world, 19 ================================================ FILE: 39_Var_Initialisation_pt1/tests/out.input55.c ================================================ Hello world Argument 0 is ./out ================================================ FILE: 39_Var_Initialisation_pt1/tests/out.input58.c ================================================ 12 99 4005 4116 4116 ================================================ FILE: 39_Var_Initialisation_pt1/tests/out.input62.c ================================================ 65 66 66 The next two depend on the endian of the platform 66 66 67 67 ================================================ FILE: 39_Var_Initialisation_pt1/tests/out.input63.c ================================================ 25 ================================================ FILE: 39_Var_Initialisation_pt1/tests/out.input67.c ================================================ 5 17 ================================================ FILE: 39_Var_Initialisation_pt1/tests/out.input70.c ================================================ 56 ================================================ FILE: 39_Var_Initialisation_pt1/tests/out.input71.c ================================================ 0 1 2 3 4 7 8 9 10 11 12 13 14 Done ================================================ FILE: 39_Var_Initialisation_pt1/tests/out.input74.c ================================================ 100 5 7 100 100 ================================================ FILE: 39_Var_Initialisation_pt1/tests/out.input80.c ================================================ 0 1 1 3 2 5 3 7 4 9 5 11 ================================================ FILE: 39_Var_Initialisation_pt1/tests/out.input81.c ================================================ 0 1 1 3 2 5 3 7 4 9 ================================================ FILE: 39_Var_Initialisation_pt1/tests/out.input82.c ================================================ 15 >= x > 5 ================================================ FILE: 39_Var_Initialisation_pt1/tests/out.input83.c ================================================ 5 < 6 <= 10 5 < 7 <= 10 5 < 8 <= 10 5 < 9 <= 10 5 < 10 <= 10 10 < 11 ================================================ FILE: 39_Var_Initialisation_pt1/tests/out.input84.c ================================================ 2 3 f f ================================================ FILE: 39_Var_Initialisation_pt1/tests/out.input88.c ================================================ 5 6 ================================================ FILE: 39_Var_Initialisation_pt1/tests/runtests ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make install) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../cwj -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../cwj $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.s "trial.$i" done ================================================ FILE: 39_Var_Initialisation_pt1/tests/runtestsn ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../compn ] then (cd ..; make install) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../compn -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../compn $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.o out.s "trial.$i" done ================================================ FILE: 39_Var_Initialisation_pt1/tree.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree functions // Copyright (c) 2019 Warren Toomey, GPL3 // Build and return a generic AST node struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue) { struct ASTnode *n; // Malloc a new ASTnode n = (struct ASTnode *) malloc(sizeof(struct ASTnode)); if (n == NULL) fatal("Unable to malloc in mkastnode()"); // Copy in the field values and return it n->op = op; n->type = type; n->left = left; n->mid = mid; n->right = right; n->sym = sym; n->intvalue = intvalue; return (n); } // Make an AST leaf node struct ASTnode *mkastleaf(int op, int type, struct symtable *sym, int intvalue) { return (mkastnode(op, type, NULL, NULL, NULL, sym, intvalue)); } // Make a unary AST node: only one child struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, struct symtable *sym, int intvalue) { return (mkastnode(op, type, left, NULL, NULL, sym, intvalue)); } // Generate and return a new label number // just for AST dumping purposes static int gendumplabel(void) { static int id = 1; return (id++); } // Given an AST tree, print it out and follow the // traversal of the tree that genAST() follows void dumpAST(struct ASTnode *n, int label, int level) { int Lfalse, Lstart, Lend; switch (n->op) { case A_IF: Lfalse = gendumplabel(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_IF"); if (n->right) { Lend = gendumplabel(); fprintf(stdout, ", end L%d", Lend); } fprintf(stdout, "\n"); dumpAST(n->left, Lfalse, level + 2); dumpAST(n->mid, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); return; case A_WHILE: Lstart = gendumplabel(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_WHILE, start L%d\n", Lstart); Lend = gendumplabel(); dumpAST(n->left, Lend, level + 2); dumpAST(n->right, NOLABEL, level + 2); return; } // Reset level to -2 for A_GLUE if (n->op == A_GLUE) level = -2; // General AST node handling if (n->left) dumpAST(n->left, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); for (int i = 0; i < level; i++) fprintf(stdout, " "); switch (n->op) { case A_GLUE: fprintf(stdout, "\n\n"); return; case A_FUNCTION: fprintf(stdout, "A_FUNCTION %s\n", n->sym->name); return; case A_ADD: fprintf(stdout, "A_ADD\n"); return; case A_SUBTRACT: fprintf(stdout, "A_SUBTRACT\n"); return; case A_MULTIPLY: fprintf(stdout, "A_MULTIPLY\n"); return; case A_DIVIDE: fprintf(stdout, "A_DIVIDE\n"); return; case A_EQ: fprintf(stdout, "A_EQ\n"); return; case A_NE: fprintf(stdout, "A_NE\n"); return; case A_LT: fprintf(stdout, "A_LE\n"); return; case A_GT: fprintf(stdout, "A_GT\n"); return; case A_LE: fprintf(stdout, "A_LE\n"); return; case A_GE: fprintf(stdout, "A_GE\n"); return; case A_INTLIT: fprintf(stdout, "A_INTLIT %d\n", n->intvalue); return; case A_STRLIT: fprintf(stdout, "A_STRLIT rval label L%d\n", n->intvalue); return; case A_IDENT: if (n->rvalue) fprintf(stdout, "A_IDENT rval %s\n", n->sym->name); else fprintf(stdout, "A_IDENT %s\n", n->sym->name); return; case A_ASSIGN: fprintf(stdout, "A_ASSIGN\n"); return; case A_WIDEN: fprintf(stdout, "A_WIDEN\n"); return; case A_RETURN: fprintf(stdout, "A_RETURN\n"); return; case A_FUNCCALL: fprintf(stdout, "A_FUNCCALL %s\n", n->sym->name); return; case A_ADDR: fprintf(stdout, "A_ADDR %s\n", n->sym->name); return; case A_DEREF: if (n->rvalue) fprintf(stdout, "A_DEREF rval\n"); else fprintf(stdout, "A_DEREF\n"); return; case A_SCALE: fprintf(stdout, "A_SCALE %d\n", n->size); return; case A_PREINC: fprintf(stdout, "A_PREINC %s\n", n->sym->name); return; case A_PREDEC: fprintf(stdout, "A_PREDEC %s\n", n->sym->name); return; case A_POSTINC: fprintf(stdout, "A_POSTINC\n"); return; case A_POSTDEC: fprintf(stdout, "A_POSTDEC\n"); return; case A_NEGATE: fprintf(stdout, "A_NEGATE\n"); return; case A_BREAK: fprintf(stdout, "A_BREAK\n"); return; case A_CONTINUE: fprintf(stdout, "A_CONTINUE\n"); return; case A_CASE: fprintf(stdout, "A_CASE %d\n", n->intvalue); return; case A_DEFAULT: fprintf(stdout, "A_DEFAULT\n"); return; case A_SWITCH: fprintf(stdout, "A_SWITCH\n"); return; default: fatald("Unknown dumpAST operator", n->op); } } ================================================ FILE: 39_Var_Initialisation_pt1/types.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Types and type handling // Copyright (c) 2019 Warren Toomey, GPL3 // Return true if a type is an int type // of any size, false otherwise int inttype(int type) { return (((type & 0xf) == 0) && (type <= P_LONG)); } // Return true if a type is of pointer type int ptrtype(int type) { return ((type & 0xf) != 0); } // Given a primitive type, return // the type which is a pointer to it int pointer_to(int type) { if ((type & 0xf) == 0xf) fatald("Unrecognised in pointer_to: type", type); return (type + 1); } // Given a primitive pointer type, return // the type which it points to int value_at(int type) { if ((type & 0xf) == 0x0) fatald("Unrecognised in value_at: type", type); return (type - 1); } // Given a type and a composite type pointer, return // the size of this type in bytes int typesize(int type, struct symtable *ctype) { if (type == P_STRUCT || type == P_UNION) return (ctype->size); return (genprimsize(type)); } // Given an AST tree and a type which we want it to become, // possibly modify the tree by widening or scaling so that // it is compatible with this type. Return the original tree // if no changes occurred, a modified tree, or NULL if the // tree is not compatible with the given type. // If this will be part of a binary operation, the AST op is not zero. struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op) { int ltype; int lsize, rsize; ltype = tree->type; // XXX No idea on these yet if (ltype == P_STRUCT || ltype == P_UNION) fatal("Don't know how to do this yet"); if (rtype == P_STRUCT || rtype == P_UNION) fatal("Don't know how to do this yet"); // Compare scalar int types if (inttype(ltype) && inttype(rtype)) { // Both types same, nothing to do if (ltype == rtype) return (tree); // Get the sizes for each type lsize = typesize(ltype, NULL); // XXX Fix soon rsize = typesize(rtype, NULL); // XXX Fix soon // Tree's size is too big if (lsize > rsize) return (NULL); // Widen to the right if (rsize > lsize) return (mkastunary(A_WIDEN, rtype, tree, NULL, 0)); } // For pointers on the left if (ptrtype(ltype)) { // OK is same type on right and not doing a binary op if (op == 0 && ltype == rtype) return (tree); } // We can scale only on A_ADD or A_SUBTRACT operation if (op == A_ADD || op == A_SUBTRACT) { // Left is int type, right is pointer type and the size // of the original type is >1: scale the left if (inttype(ltype) && ptrtype(rtype)) { rsize = genprimsize(value_at(rtype)); if (rsize > 1) return (mkastunary(A_SCALE, rtype, tree, NULL, rsize)); else return (tree); // Size 1, no need to scale } } // If we get here, the types are not compatible return (NULL); } ================================================ FILE: 40_Var_Initialisation_pt2/Makefile ================================================ # Define the location of the include directory # and the location to install the compiler binary INCDIR=/tmp/include BINDIR=/tmp HSRCS= data.h decl.h defs.h SRCS= cg.c decl.c expr.c gen.c main.c misc.c \ scan.c stmt.c sym.c tree.c types.c SRCN= cgn.c decl.c expr.c gen.c main.c misc.c \ scan.c stmt.c sym.c tree.c types.c ARMSRCS= cg_arm.c decl.c expr.c gen.c main.c misc.c \ scan.c stmt.c sym.c tree.c types.c cwj: $(SRCS) $(HSRCS) cc -o cwj -g -Wall -DINCDIR=\"$(INCDIR)\" $(SRCS) compn: $(SRCN) $(HSRCS) cc -D__NASM__ -o compn -g -Wall -DINCDIR=\"$(INCDIR)\" $(SRCN) cwjarm: $(ARMSRCS) $(HSRCS) cc -o cwjarm -g -Wall $(ARMSRCS) cp cwjarm cwj install: cwj mkdir -p $(INCDIR) rsync -a include/. $(INCDIR) cp cwj $(BINDIR) chmod +x $(BINDIR)/cwj installn: compn mkdir -p $(INCDIR) rsync -a include/. $(INCDIR) cp compn $(BINDIR) chmod +x $(BINDIR)/compn clean: rm -f cwj cwjarm compn *.o *.s out a.out test: install tests/runtests (cd tests; chmod +x runtests; ./runtests) armtest: cwjarm tests/runtests (cd tests; chmod +x runtests; ./runtests) testn: installn tests/runtestsn (cd tests; chmod +x runtestsn; ./runtestsn) ================================================ FILE: 40_Var_Initialisation_pt2/Readme.md ================================================ # Part 40: Global Variable Initialisation In the previous part of our compiler writing journey, I started the groundwork to add variable declarations to our language. I've been able to implement this for global scalar and array variables in this part of our compiler writing journey. At the same time, I realised that I hadn't designed the symbol table structure to properly deal with the size of a variable and the number of elements in an array variable. So half of this part is going to be a rewrite of some of the code that deals with the symbol table. ## A Quick Recap for Global Variable Assignments As a quick recap, below are a set of example global variable assignments that I want to support: ```c int x= 2; char y= 'a'; char *str= "Hello world"; int a[10]; char b[]= { 'q', 'w', 'e', 'r', 't', 'y' }; char c[10]= { 'q', 'w', 'e', 'r', 't', 'y' }; // Zero padded char *d[]= { "apple", "banana", "peach", "pear" }; ``` I'm not going to deal with initialisation of global structs or unions. Also, for now, I'm not going to deal with putting NULL into `char *` variables. I'll come back to that later, if we need it. ## Where We Go To In the last part of the journey, I'd written this in `decl.c`: ```c static struct symtable *symbol_declaration(...) { ... // The array or scalar variable is being initialised if (Token.token == T_ASSIGN) { ... // Array initialisation if (stype == S_ARRAY) array_initialisation(sym, type, ctype, class); else { fatal("Scalar variable initialisation not done yet"); // Variable initialisation // if (class== C_LOCAL) // Local variable, parse the expression // expr= binexpr(0); // else write more code! } } ... } ``` i.e. I knew where to put the code but I didn't know what code to write. First up, we need to parse some literal values... ## Scalar Variable Initialisation We are going to need to parse integer and string literals, as these are the only things which we can assign to global variables. We need to ensure that the type of each literal is compatible with the variable type that we are assigning. To this end, there's a new function in `decl.c`: ```c // Given a type, check that the latest token is a literal // of that type. If an integer literal, return this value. // If a string literal, return the label number of the string. // Do not scan the next token. int parse_literal(int type) { // We have a string literal. Store in memory and return the label if ((type == pointer_to(P_CHAR)) && (Token.token == T_STRLIT)) return(genglobstr(Text)); if (Token.token == T_INTLIT) { switch(type) { case P_CHAR: if (Token.intvalue < 0 || Token.intvalue > 255) fatal("Integer literal value too big for char type"); case P_INT: case P_LONG: break; default: fatal("Type mismatch: integer literal vs. variable"); } } else fatal("Expecting an integer literal value"); return(Token.intvalue); } ``` The first IF statement ensures that we can do: ```c char *str= "Hello world"; ``` and it returns the label number of the address where the string is stored. For integer literals, we check the range when we are assigning to a `char` variable. And for any other token type, we have a fatal error. ## Changes to the Symbol Table Structure The above function always returns an integer, regardless of what type of literal it parses. Now we need a location in each variable's symbol entry to store this. So, I've added (and/or modified) these fields in the symbol entry structure in `defs.h`: ```c // Symbol table structure struct symtable { ... int size; // Total size in bytes of this symbol int nelems; // Functions: # params. Arrays: # elements ... int *initlist; // List of initial values ... }; ``` For a scalar with one initial value, or for an array with several initial values, we store a count of elements in `nelems` and attach a list of integer values to `initlist`. Let's look at assignment to a scalar variable. ## Assignment to Scalar Variables The `scalar_declaration()` function is modified as follows: ```c static struct symtable *scalar_declaration(...) { ... // The variable is being initialised if (Token.token == T_ASSIGN) { // Only possible for a global or local if (class != C_GLOBAL && class != C_LOCAL) fatals("Variable can not be initialised", varname); scan(&Token); // Globals must be assigned a literal value if (class == C_GLOBAL) { // Create one initial value for the variable and // parse this value sym->initlist= (int *)malloc(sizeof(int)); sym->initlist[0]= parse_literal(type); scan(&Token); } // No else code yet, soon } // Generate any global space if (class == C_GLOBAL) genglobsym(sym); return (sym); } ``` We ensure that the assignment can only occur in global or local context, and we skip over the '=' token. We set up an `initlist` of exactly one and call `parse_literal()` with the type of this variable to get the literal value (or the label number of a string). Then we skip the literal value to get to the following token (either a ',' or a ';'). Previously, the `sym` symbol table entry was created with `addglob()` and the number of elements was set to one. I'll cover this change soon. We now move the call to `genglobsym()` (which previously was in `addglob()` to here, and we wait until the initial value is stored in the `sym` entry. This ensures that the literal we just parsed will be put into the storage for the variable in memory. ### Scalar Initialisation Examples As a quick example: ```c int x= 5; char *y= "Hello"; ``` generates: ``` .globl x x: .long 5 L1: .byte 72 .byte 101 .byte 108 .byte 108 .byte 111 .byte 0 .globl y y: .quad L1 ``` ## Changes to the Symbol Table Code Before we get to the parsing of array intialisation, we need to detour over to the changes to the symbol table code. As I highlighted before, my original code didn't properly handle the storage of the size of a variable nor the number of elements in an array. Let's look at the changes I've made to do this. Firstly, we have a bug fix. In `types.c`: ```c // Return true if a type is an int type // of any size, false otherwise int inttype(int type) { return (((type & 0xf) == 0) && (type >= P_CHAR && type <= P_LONG)); } ``` Previously, there was no test against P_CHAR, so a `void` type was treated as an integer type. Oops! In `sym.c` we now deal with the fact that each variable now has a: ```c int size; // Total size in bytes of this symbol int nelems; // Functions: # params. Arrays: # elements ``` Later, we will use the `size` field for the `sizeof()` operator. We now need to set up both fields when we add a symbol to the global or local symbol table. The `newsym()` function and all of the `addXX()` functions in `sym.c` now take an `nelems` argument instead of a `size` argument. For scalar variables, this is set to one. For arrays, this is set to the number of elements in the list. For functions, this is set to the number of function parameters. And for all other symbol tables, the value is unused. We now calculate the `size` value in `newsym()`: ```c // For pointers and integer types, set the size // of the symbol. structs and union declarations // manually set this up themselves. if (ptrtype(type) || inttype(type)) node->size = nelems * typesize(type, ctype); ``` `typesize()` consults the `ctype` pointer to get the size of a struct or union, or calls `genprimsize()` (which calls `cgprimsize()`) to get the size of a pointer or an integer type. Note the comment about structs and unions. We can't call `addstruct()` (which calls `newsym()`) with the details of a struct's size, because: ```c struct foo { // We call addglob() here int x; int y; // before we know the size of the structure int z; }; ``` So the code in `composite_declaration()` in `decl.c` now does this: ```c static struct symtable *composite_declaration(...) { ... // Build the composite type if (type == P_STRUCT) ctype = addstruct(Text); else ctype = addunion(Text); ... // Scan in the list of members while (1) { ... } // Attach to the struct type's node ctype->member = Membhead; ... // Set the overall size of the composite type ctype->size = offset; return (ctype); } ``` So, in summary, the `size` field in a symbol table entry now holds the size of all of the elements in the variable, and `nelems` is the count of elements in the variable: one for arrays, some non-zero positive number for arrays. ## Array Variable Initialisation We can finally get to array initialisation. I want to allow three forms: ```c int a[10]; // Ten zeroed elements char b[]= { 'q', 'w', 'e', 'r', 't', 'y' }; // Six elements char c[10]= { 'q', 'w', 'e', 'r', 't', 'y' }; // Ten elements, zero padded ``` but prevent an array declared with size *N* and more than *N* initialisation values. Let's look at the changes to `array_declaration()`. Previously, I was going to call an `array_initialisation()` function, but I decided to move all of the initialisation code into `array_declaration()` in `decl.c`. We will take it in stages. ```c // Given the type, name and class of an variable, parse // the size of the array, if any. Then parse any initialisation // value and allocate storage for it. // Return the variable's symbol table entry. static struct symtable *array_declaration(...) { int nelems= -1; // Assume the number of elements won't be given ... // Skip past the '[' scan(&Token); // See we have an array size if (Token.token == T_INTLIT) { if (Token.intvalue <= 0) fatald("Array size is illegal", Token.intvalue); nelems= Token.intvalue; scan(&Token); } // Ensure we have a following ']' match(T_RBRACKET, "]"); ``` If there's a number between the '[' ']' tokens, parse it and set `nelems` to this value. If there is no number, we leave it set to -1 to indicate this. We also check that the number is positive and non-zero. ```c // Array initialisation if (Token.token == T_ASSIGN) { if (class != C_GLOBAL) fatals("Variable can not be initialised", varname); scan(&Token); // Get the following left curly bracket match(T_LBRACE, "{"); ``` Right now I'm only dealing with global arrays. ```c #define TABLE_INCREMENT 10 // If the array already has nelems, allocate that many elements // in the list. Otherwise, start with TABLE_INCREMENT. if (nelems != -1) maxelems= nelems; else maxelems= TABLE_INCREMENT; initlist= (int *)malloc(maxelems *sizeof(int)); ``` We create an initial list of either 10 integers, or `nelems` if the array was given a fixed size. However, for arrays with no fixed size, we cannot predict how big the initialisation list will be. So we must be prepared to grow the list. ```c // Loop getting a new literal value from the list while (1) { // Check we can add the next value, then parse and add it if (nelems != -1 && i == maxelems) fatal("Too many values in initialisation list"); initlist[i++]= parse_literal(type); scan(&Token); ``` Get the next literal value and ensure we don't have more initial values that the array size if it was specified. ```c // Increase the list size if the original size was // not set and we have hit the end of the current list if (nelems == -1 && i == maxelems) { maxelems += TABLE_INCREMENT; initlist= (int *)realloc(initlist, maxelems *sizeof(int)); } ``` Here is where we increase the initialisation list size as necessary. ```c // Leave when we hit the right curly bracket if (Token.token == T_RBRACE) { scan(&Token); break; } // Next token must be a comma, then comma(); } ``` Parse the closing right curly bracket or a comma that separates values. Once out of the loop, we now have an `initlist` with values in it. ```c // Zero any unused elements in the initlist. // Attach the list to the symbol table entry for (j=i; j < sym->nelems; j++) initlist[j]=0; if (i > nelems) nelems = i; sym->initlist= initlist; } ``` We may not have been given enough initialisation values to meet the specified size of the initialisation list, so zero out all the ones that were not initialised. It is here that we attach the initialisation list to the symbol table entry. ```c // Set the size of the array and the number of elements sym->nelems= nelems; sym->size= sym->nelems * typesize(type, ctype); // Generate any global space if (class == C_GLOBAL) genglobsym(sym); return (sym); } ``` We can finally updated `nelems` and `size` in the symbol table entry. Once this is done, we can call `genglobsym()` to create the memory storage for the array. ## Changes to `cgglobsym()` Before we look at the assembly output of an example array initialisation, we need to see how the changes of `nelems` and `size` have affected the code that generates the assembly for the memory storage. `genglobsym()` is the front-end function which simply calls `cgglobsym()`. Let's look at this function in `cg.c`: ```c // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size, type; int initvalue; int i; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the variable (or its elements if an array) // and the type of the variable if (node->stype == S_ARRAY) { size= typesize(value_at(node->type), node->ctype); type= value_at(node->type); } else { size = node->size; type= node->type; } ``` Right now, arrays have their `type` set to be a pointer to the underlying element type. This allows us to do: ```c char a[45]; char *b; b= a; // as they are of same type ``` In terms of generating storage, we need to know the size of the elements, so we call `value_at()` to do this. For scalars, `size` and `type` are stored as-is in the symbol table entry. ```c // Generate the global identity and the label cgdataseg(); fprintf(Outfile, "\t.globl\t%s\n", node->name); fprintf(Outfile, "%s:\n", node->name); ``` As before. But now the code is different: ```c // Output space for one or more elements for (i=0; i < node->nelems; i++) { // Get any initial value initvalue= 0; if (node->initlist != NULL) initvalue= node->initlist[i]; // Generate the space for this type switch (size) { case 1: fprintf(Outfile, "\t.byte\t%d\n", initvalue); break; case 4: fprintf(Outfile, "\t.long\t%d\n", initvalue); break; case 8: // Generate the pointer to a string literal if (node->initlist != NULL && type== pointer_to(P_CHAR)) fprintf(Outfile, "\t.quad\tL%d\n", initvalue); else fprintf(Outfile, "\t.quad\t%d\n", initvalue); break; default: for (int i = 0; i < size; i++) fprintf(Outfile, "\t.byte\t0\n"); } } } ``` For every element, get its intial value from the `initlist` or use zero if no initialisation list. Based on the size of each element, output either a byte, a long or a quad. For `char *` elements, we have the label of the string literal's base in the initialisation list, so output "L%d" (i.e. the label) instead of the integer literal value. ### Array Initialisation Examples Here is a small example of an array initialisation: ```c int x[4]= { 1, 4, 17 }; ``` generates: ``` .globl x x: .long 1 .long 4 .long 17 .long 0 ``` ## Test Programs I won't go through the test programs, but the programs `tests/input89.c` through to `tests/input99.c` check that the compiler is generating sensible initialisation code as well as catching suitable fatal errors. ## Conclusion and What's Next So that was a lot of work! Three steps forward and one step back, as they say. I'm happy, though, because the changes to the symbol table make much more sense than what I had before. In the next part of our compiler writing journey, we will try to add local variable initialisation to the compiler. [Next step](../41_Local_Var_Init/Readme.md) ================================================ FILE: 40_Var_Initialisation_pt2/cg.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\t.text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\t.data\n", Outfile); currSeg = data_seg; } } // Given a scalar type value, return the // size of the type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch (type) { case P_CHAR: return (offset); case P_INT: case P_LONG: break; default: fatald("Bad type in calc_aligned_offset:", type); } // Here we have an int or a long. Align it on a 4-byte offset // I put the generic code here so it can be reused elsewhere. alignment = 4; offset = (offset + direction * (alignment - 1)) & ~(alignment - 1); return (offset); } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. static int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "%r10", "%r11", "%r12", "%r13", "%r9", "%r8", "%rcx", "%rdx", "%rsi", "%rdi" }; static char *breglist[] = { "%r10b", "%r11b", "%r12b", "%r13b", "%r9b", "%r8b", "%cl", "%dl", "%sil", "%dil" }; static char *dreglist[] = { "%r10d", "%r11d", "%r12d", "%r13d", "%r9d", "%r8d", "%ecx", "%edx", "%esi", "%edi" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); cgtextseg(); fprintf(Outfile, "# internal switch(expr) routine\n" "# %%rsi = switch table, %%rax = expr\n" "# from SubC: http://www.t3x.org/subc/\n" "\n" "switch:\n" " pushq %%rsi\n" " movq %%rdx, %%rsi\n" " movq %%rax, %%rbx\n" " cld\n" " lodsq\n" " movq %%rax, %%rcx\n" "next:\n" " lodsq\n" " movq %%rax, %%rdx\n" " lodsq\n" " cmpq %%rdx, %%rbx\n" " jnz no\n" " popq %%rsi\n" " jmp *%%rax\n" "no:\n" " loop next\n" " lodsq\n" " popq %%rsi\n" " jmp *%%rax\n" "\n"); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the %rsp and %rsp fprintf(Outfile, "\t.globl\t%s\n" "\t.type\t%s, @function\n" "%s:\n" "\tpushq\t%%rbp\n" "\tmovq\t%%rsp, %%rbp\n", name, name, name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->posn = paramOffset; paramOffset += 8; } else { parm->posn = newlocaloffset(parm->type); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->posn = newlocaloffset(locvar->type); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\taddq\t$%d,%%rsp\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->endlabel); fprintf(Outfile, "\taddq\t$%d,%%rsp\n", stackOffset); fputs("\tpopq %rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmovq\t$%d, %s\n", value, reglist[r]); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", sym->name); } else // Print out the code to initialise it switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovzbq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", sym->name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovslq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", sym->name); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", sym->posn); fprintf(Outfile, "\tmovq\t%d(%%rbp), %s\n", sym->posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", sym->posn); } else switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", sym->posn); fprintf(Outfile, "\tmovzbq\t%d(%%rbp), %s\n", sym->posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", sym->posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", sym->posn); fprintf(Outfile, "\tmovslq\t%d(%%rbp), %s\n", sym->posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", sym->posn); break; default: fatald("Bad type in cgloadlocal:", sym->type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tleaq\tL%d(%%rip), %s\n", label, reglist[r]); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\taddq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsubq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timulq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s,%%rax\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidivq\t%s\n", reglist[r2]); fprintf(Outfile, "\tmovq\t%%rax,%s\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tandq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\torq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txorq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshlq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshrq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tnegq\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnotq\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); return (r); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(struct symtable *sym, int numargs) { // Get a new register int outr = alloc_register(); // Call the function fprintf(Outfile, "\tcall\t%s@PLT\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\taddq\t$%d, %%rsp\n", 8 * (numargs - 6)); // and copy the return value into our register fprintf(Outfile, "\tmovq\t%%rax, %s\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpushq\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmovq\t%s, %s\n", reglist[r], reglist[FIRSTPARAMREG - argposn + 1]); } } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsalq\t$%d, %s\n", val, reglist[r]); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %s(%%rip)\n", reglist[r], sym->name); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %s(%%rip)\n", breglist[r], sym->name); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %s(%%rip)\n", dreglist[r], sym->name); break; default: fatald("Bad type in cgstorglob:", sym->type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %d(%%rbp)\n", reglist[r], sym->posn); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %d(%%rbp)\n", breglist[r], sym->posn); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %d(%%rbp)\n", dreglist[r], sym->posn); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size, type; int initvalue; int i; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the variable (or its elements if an array) // and the type of the variable if (node->stype == S_ARRAY) { size= typesize(value_at(node->type), node->ctype); type= value_at(node->type); } else { size = node->size; type= node->type; } // Generate the global identity and the label cgdataseg(); fprintf(Outfile, "\t.globl\t%s\n", node->name); fprintf(Outfile, "%s:\n", node->name); // Output space for one or more elements for (i=0; i < node->nelems; i++) { // Get any initial value initvalue= 0; if (node->initlist != NULL) initvalue= node->initlist[i]; // Generate the space for this type switch (size) { case 1: fprintf(Outfile, "\t.byte\t%d\n", initvalue); break; case 4: fprintf(Outfile, "\t.long\t%d\n", initvalue); break; case 8: // Generate the pointer to a string literal if (node->initlist != NULL && type== pointer_to(P_CHAR)) fprintf(Outfile, "\t.quad\tL%d\n", initvalue); else fprintf(Outfile, "\t.quad\t%d\n", initvalue); break; default: for (int i = 0; i < size; i++) fprintf(Outfile, "\t.byte\t0\n"); } } } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\t.byte\t%d\n", *cptr); } fprintf(Outfile, "\t.byte\t0\n"); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzbl\t%s, %%eax\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %%eax\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } cgjump(sym->endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = alloc_register(); if (sym->class == C_GLOBAL) fprintf(Outfile, "\tleaq\t%s(%%rip), %s\n", sym->name, reglist[r]); else fprintf(Outfile, "\tleaq\t%d(%%rbp), %s\n", sym->posn, reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzbq\t(%s), %s\n", reglist[r], reglist[r]); break; case 2: fprintf(Outfile, "\tmovslq\t(%s), %s\n", reglist[r], reglist[r]); break; case 4: case 8: fprintf(Outfile, "\tmovq\t(%s), %s\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { // Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmovb\t%s, (%s)\n", breglist[r1], reglist[r2]); break; case 2: case 4: case 8: fprintf(Outfile, "\tmovq\t%s, (%s)\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } // Generate a switch jump table and the code to // load the registers and call the switch() code void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel) { int i, label; // Get a label for the switch table label = genlabel(); cglabel(label); // Heuristic. If we have no cases, create one case // which points to the default case if (casecount == 0) { caseval[0] = 0; caselabel[0] = defaultlabel; casecount = 1; } // Generate the switch jump table. fprintf(Outfile, "\t.quad\t%d\n", casecount); for (i = 0; i < casecount; i++) fprintf(Outfile, "\t.quad\t%d, L%d\n", caseval[i], caselabel[i]); fprintf(Outfile, "\t.quad\tL%d\n", defaultlabel); // Load the specific registers cglabel(toplabel); fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); fprintf(Outfile, "\tleaq\tL%d(%%rip), %%rdx\n", label); fprintf(Outfile, "\tjmp\tswitch\n"); } ================================================ FILE: 40_Var_Initialisation_pt2/cg_arm.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for ARMv6 on Raspberry Pi // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. static int freereg[4]; static char *reglist[4] = { "r4", "r5", "r6", "r7" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // We have to store large integer literal values in memory. // Keep a list of them which will be output in the postamble #define MAXINTS 1024 int Intlist[MAXINTS]; static int Intslot = 0; // Determine the offset of a large integer // literal from the .L3 label. If the integer // isn't in the list, add it. static void set_int_offset(int val) { int offset = -1; // See if it is already there for (int i = 0; i < Intslot; i++) { if (Intlist[i] == val) { offset = 4 * i; break; } } // Not in the list, so add it if (offset == -1) { offset = 4 * Intslot; if (Intslot == MAXINTS) fatal("Out of int slots in set_int_offset()"); Intlist[Intslot++] = val; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L3+%d\n", offset); } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n", Outfile); } // Print out the assembly postamble void cgpostamble() { // Print out the global variables fprintf(Outfile, ".L2:\n"); for (int i = 0; i < Globs; i++) { if (Symtable[i].stype == S_VARIABLE) fprintf(Outfile, "\t.word %s\n", Symtable[i].name); } // Print out the integer literals fprintf(Outfile, ".L3:\n"); for (int i = 0; i < Intslot; i++) { fprintf(Outfile, "\t.word %d\n", Intlist[i]); } } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Symtable[id].name; fprintf(Outfile, "\t.text\n" "\t.globl\t%s\n" "\t.type\t%s, \%%function\n" "%s:\n" "\tpush\t{fp, lr}\n" "\tadd\tfp, sp, #4\n" "\tsub\tsp, sp, #8\n" "\tstr\tr0, [fp, #-8]\n", name, name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Symtable[id].endlabel); fputs("\tsub\tsp, fp, #4\n" "\tpop\t{fp, pc}\n" "\t.align\t2\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); // If the literal value is small, do it with one instruction if (value <= 1000) fprintf(Outfile, "\tmov\t%s, #%d\n", reglist[r], value); else { set_int_offset(value); fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); } return (r); } // Determine the offset of a variable from the .L2 // label. Yes, this is inefficient code. static void set_var_offset(int id) { int offset = 0; // Walk the symbol table up to id. // Find S_VARIABLEs and add on 4 until // we get to our variable for (int i = 0; i < id; i++) { if (Symtable[i].stype == S_VARIABLE) offset += 4; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L2+%d\n", offset); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tldrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgloadglob:", Symtable[id].type); } return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s, %s\n", reglist[r1], reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\tmul\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { // To do a divide: r0 holds the dividend, r1 holds the divisor. // The quotient is in r0. fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r1]); fprintf(Outfile, "\tmov\tr1, %s\n", reglist[r2]); fprintf(Outfile, "\tbl\t__aeabi_idiv\n"); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r1]); free_register(r2); return (r1); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r]); fprintf(Outfile, "\tbl\t%s\n", Symtable[id].name); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r]); return (r); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tlsl\t%s, %s, #%d\n", reglist[r], reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, int id) { // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tstr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgstorglob:", Symtable[id].type); } return (r); } // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (4); switch (type) { case P_CHAR: return (1); case P_INT: case P_LONG: return (4); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Symtable[id].type); fprintf(Outfile, "\t.data\n" "\t.globl\t%s\n", Symtable[id].name); switch (typesize) { case 1: fprintf(Outfile, "%s:\t.byte\t0\n", Symtable[id].name); break; case 4: fprintf(Outfile, "%s:\t.long\t0\n", Symtable[id].name); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "moveq", "movne", "movlt", "movgt", "movle", "movge" }; // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "movne", "moveq", "movge", "movle", "movgt", "movlt" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #1\n", cmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #0\n", invcmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\tuxtb\t%s, %s\n", reglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tb\tL%d\n", l); } // List of inverted branch instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *brlist[] = { "bne", "beq", "bge", "ble", "bgt", "blt" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", brlist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[reg]); cgjump(Symtable[id].endlabel); } // Generate code to load the address of a global // identifier into a variable. Return a new register int cgaddress(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); fprintf(Outfile, "\tmov\t%s, r3\n", reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tldrb\t%s, [%s]\n", reglist[r], reglist[r]); break; case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [%s]\n", reglist[r], reglist[r]); break; } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [%s]\n", reglist[r1], reglist[r2]); break; case P_INT: case P_LONG: fprintf(Outfile, "\tstr\t%s, [%s]\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 40_Var_Initialisation_pt2/cgn.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\tsection .text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\tsection .data\n", Outfile); currSeg = data_seg; } } // Given a scalar type value, return the // size of the type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch (type) { case P_CHAR: return (offset); case P_INT: case P_LONG: break; default: fatald("Bad type in calc_aligned_offset:", type); } // Here we have an int or a long. Align it on a 4-byte offset // I put the generic code here so it can be reused elsewhere. alignment = 4; offset = (offset + direction * (alignment - 1)) & ~(alignment - 1); return (offset); } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "r10", "r11", "r12", "r13", "r9", "r8", "rcx", "rdx", "rsi", "rdi" }; static char *breglist[] = { "r10b", "r11b", "r12b", "r13b", "r9b", "r8b", "cl", "dl", "sil", "dil" }; static char *dreglist[] = { "r10d", "r11d", "r12d", "r13d", "r9d", "r8d", "ecx", "edx", "esi", "edi" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\textern\tprintint\n", Outfile); fputs("\textern\tprintchar\n", Outfile); fputs("\textern\topen\n", Outfile); fputs("\textern\tclose\n", Outfile); fputs("\textern\tread\n", Outfile); fputs("\textern\twrite\n", Outfile); fputs("\textern\tprintf\n", Outfile); cgtextseg(); fprintf(Outfile, "; internal switch(expr) routine\n" "; rsi = switch table, rax = expr\n" "; from SubC: http://www.t3x.org/subc/\n" "\n" "switch:\n" " push rsi\n" " mov rsi, rdx\n" " mov rbx, rax\n" " cld\n" " lodsq\n" " mov rcx, rax\n" "next:\n" " lodsq\n" " mov rdx, rax\n" " lodsq\n" " cmp rbx, rdx\n" " jnz no\n" " pop rsi\n" " jmp rax\n" "no:\n" " loop next\n" " lodsq\n" " pop rsi\n" " jmp rax\n" "\n"); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the rsp and rbp fprintf(Outfile, "\tglobal\t%s\n" "%s:\n" "\tpush\trbp\n" "\tmov\trbp, rsp\n", name, name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->posn = paramOffset; paramOffset += 8; } else { parm->posn = newlocaloffset(parm->type); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->posn = newlocaloffset(locvar->type); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\tadd\trsp, %d\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->endlabel); fprintf(Outfile, "\tadd\trsp, %d\n", stackOffset); fputs("\tpop rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], value); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", sym->name); fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], sym->name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", sym->name); } else // Print out the code to initialise it switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", sym->name); fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], sym->name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", sym->name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", sym->name); fprintf(Outfile, "\tmovsx\t%s, word [%s]\n", dreglist[r], sym->name); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", sym->name); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", sym->posn); fprintf(Outfile, "\tmov\t%s, [rbp+%d]\n", reglist[r], sym->posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", sym->posn); } else switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", sym->posn); fprintf(Outfile, "\tmovzx\t%s, byte [rbp+%d]\n", reglist[r], sym->posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", sym->posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", sym->posn); fprintf(Outfile, "\tmovsx\t%s, dword [rbp+%d]\n", reglist[r], sym->posn); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", sym->posn); break; default: fatald("Bad type in cgloadlocal:", sym->type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, L%d\n", reglist[r], label); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timul\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmov\trax, %s\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidiv\t%s\n", reglist[r2]); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tand\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\tor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshl\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshr\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tneg\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnot\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r], breglist[r]); return (r); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, byte %s\n", reglist[r], breglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(struct symtable *sym, int numargs) { // Get a new register int outr = alloc_register(); // Call the function fprintf(Outfile, "\tcall\t%s\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\tadd\trsp, %d\n", 8 * (numargs - 6)); // and copy the return value into our register fprintf(Outfile, "\tmov\t%s, rax\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpush\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmov\t%s, %s\n", reglist[FIRSTPARAMREG - argposn + 1], reglist[r]); } } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsal\t%s, %d\n", reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, dreglist[r]); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\tqword\t[rbp+%d], %s\n", sym->posn, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\tbyte\t[rbp+%d], %s\n", sym->posn, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\tdword\t[rbp+%d], %s\n", sym->posn, dreglist[r]); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size, type; int initvalue; int i; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the variable (or its elements if an array) // and the type of the variable if (node->stype == S_ARRAY) { size= typesize(value_at(node->type), node->ctype); type= value_at(node->type); } else { size = node->size; type= node->type; } // Generate the global identity and the label cgdataseg(); fprintf(Outfile, "\tsection\t.data\n" "\tglobal\t%s\n", node->name); fprintf(Outfile, "%s:\n", node->name); // Output space for one or more elements for (i = 0; i < node->nelems; i++) { // Get any initial value initvalue = 0; if (node->initlist != NULL) initvalue = node->initlist[i]; // Generate the space for this type // original version switch(size) { case 1: fprintf(Outfile, "\tdb\t%d\n", initvalue); break; case 4: fprintf(Outfile, "\tdd\t%d\n", initvalue); break; case 8: // Generate the pointer to a string literal if (node->initlist != NULL && type == pointer_to(P_CHAR)) fprintf(Outfile, "\tdq\tL%d\n", initvalue); else fprintf(Outfile, "\tdq\t%d\n", initvalue); break; default: for (int i = 0; i < size; i++) fprintf(Outfile, "\tdb\t0\n"); /* compact version using times instead of loop fprintf(Outfile, "\ttimes\t%d\tdb\t0\n", size); */ } } } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\tdb\t%d\n", *cptr); } fprintf(Outfile, "\tdb\t0\n"); /* put string in readable format on a single line // probably went overboard with error checking int comma = 0, quote = 0, start = 1; fprintf(Outfile, "\tdb\t"); for (cptr=strvalue; *cptr; cptr++) { if ( ! isprint(*cptr) ) if (comma || start) { fprintf(Outfile, "%d, ", *cptr); start = 0; comma = 1; } else if (quote) { fprintf(Outfile, "\', %d, ", *cptr); comma = 1; quote = 0; } else { fprintf(Outfile, "%d, ", *cptr); comma = 1; quote = 0; } else if (start || comma) { fprintf(Outfile, "\'%c", *cptr); start = comma = 0; quote = 1; } else { fprintf(Outfile, "%c", *cptr); comma = 0; quote = 1; } } if (comma || start) fprintf(Outfile, "0\n"); else fprintf(Outfile, "\', 0\n"); */ } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r2], breglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzx\teax, %s\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmov\teax, %s\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } cgjump(sym->endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = alloc_register(); if (sym->class == C_GLOBAL) fprintf(Outfile, "\tmov\t%s, %s\n", reglist[r], sym->name); else fprintf(Outfile, "\tlea\t%s, [rbp+%d]\n", reglist[r], sym->posn); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], reglist[r]); break; case 2: fprintf(Outfile, "\tmovsx\t%s, dword [%s]\n", reglist[r], reglist[r]); break; case 4: case 8: fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { //Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmov\t[%s], byte %s\n", reglist[r2], breglist[r1]); break; case 2: case 4: case 8: fprintf(Outfile, "\tmov\t[%s], %s\n", reglist[r2], reglist[r1]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } // Generate a switch jump table and the code to // load the registers and call the switch() code void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel) { int i, label; // Get a label for the switch table label = genlabel(); cglabel(label); // Heuristic. If we have no cases, create one case // which points to the default case if (casecount == 0) { caseval[0] = 0; caselabel[0] = defaultlabel; casecount = 1; } // Generate the switch jump table. fprintf(Outfile, "\tdq\t%d\n", casecount); for (i = 0; i < casecount; i++) fprintf(Outfile, "\tdq\t%d, L%d\n", caseval[i], caselabel[i]); fprintf(Outfile, "\tdq\tL%d\n", defaultlabel); // Load the specific registers cglabel(toplabel); fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); fprintf(Outfile, "\tmov\trdx, L%d\n", label); fprintf(Outfile, "\tjmp\tswitch\n"); } ================================================ FILE: 40_Var_Initialisation_pt2/data.h ================================================ #ifndef extern_ #define extern_ extern #endif // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 extern_ int Line; // Current line number extern_ int Putback; // Character put back by scanner extern_ struct symtable *Functionid; // Symbol ptr of the current function extern_ FILE *Infile; // Input and output files extern_ FILE *Outfile; extern_ char *Infilename; // Name of file we are parsing extern_ char *Outfilename; // Name of file we opened as Outfile extern_ struct token Token; // Last token scanned extern_ char Text[TEXTLEN + 1]; // Last identifier scanned extern_ int Looplevel; // Depth of nested loops extern_ int Switchlevel; // Depth of nested switches // Symbol table lists extern_ struct symtable *Globhead, *Globtail; // Global variables and functions extern_ struct symtable *Loclhead, *Locltail; // Local variables extern_ struct symtable *Parmhead, *Parmtail; // Local parameters extern_ struct symtable *Membhead, *Membtail; // Temp list of struct/union members extern_ struct symtable *Structhead, *Structtail; // List of struct types extern_ struct symtable *Unionhead, *Uniontail; // List of union types extern_ struct symtable *Enumhead, *Enumtail; // List of enum types and values extern_ struct symtable *Typehead, *Typetail; // List of typedefs // Command-line flags extern_ int O_dumpAST; // If true, dump the AST trees extern_ int O_keepasm; // If true, keep any assembly files extern_ int O_assemble; // If true, assemble the assembly files extern_ int O_dolink; // If true, link the object files extern_ int O_verbose; // If true, print info on compilation stages ================================================ FILE: 40_Var_Initialisation_pt2/decl.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of declarations // Copyright (c) 2019 Warren Toomey, GPL3 static struct symtable *composite_declaration(int type); static int typedef_declaration(struct symtable **ctype); static int type_of_typedef(char *name, struct symtable **ctype); static void enum_declaration(void); // Parse the current token and return a primitive type enum value, // a pointer to any composite type and possibly modify // the class of the type. static int parse_type(struct symtable **ctype, int *class) { int type, exstatic = 1; // See if the class has been changed to extern (later, static) while (exstatic) { switch (Token.token) { case T_EXTERN: *class = C_EXTERN; scan(&Token); break; default: exstatic = 0; } } // Now work on the actual type keyword switch (Token.token) { case T_VOID: type = P_VOID; scan(&Token); break; case T_CHAR: type = P_CHAR; scan(&Token); break; case T_INT: type = P_INT; scan(&Token); break; case T_LONG: type = P_LONG; scan(&Token); break; // For the following, if we have a ';' after the // parsing then there is no type, so return -1. // Example: struct x {int y; int z}; case T_STRUCT: type = P_STRUCT; *ctype = composite_declaration(P_STRUCT); if (Token.token == T_SEMI) type = -1; break; case T_UNION: type = P_UNION; *ctype = composite_declaration(P_UNION); if (Token.token == T_SEMI) type = -1; break; case T_ENUM: type = P_INT; // Enums are really ints enum_declaration(); if (Token.token == T_SEMI) type = -1; break; case T_TYPEDEF: type = typedef_declaration(ctype); if (Token.token == T_SEMI) type = -1; break; case T_IDENT: type = type_of_typedef(Text, ctype); break; default: fatals("Illegal type, token", Token.tokstr); } return (type); } // Given a type parsed by parse_type(), scan in any following // '*' tokens and return the new type static int parse_stars(int type) { while (1) { if (Token.token != T_STAR) break; type = pointer_to(type); scan(&Token); } return (type); } // Given a type, check that the latest token is a literal // of that type. If an integer literal, return this value. // If a string literal, return the label number of the string. // Do not scan the next token. int parse_literal(int type) { // We have a string literal. Store in memory and return the label if ((type == pointer_to(P_CHAR)) && (Token.token == T_STRLIT)) return(genglobstr(Text)); if (Token.token == T_INTLIT) { switch(type) { case P_CHAR: if (Token.intvalue < 0 || Token.intvalue > 255) fatal("Integer literal value too big for char type"); case P_INT: case P_LONG: break; default: fatal("Type mismatch: integer literal vs. variable"); } } else fatal("Expecting an integer literal value"); return(Token.intvalue); } // Given the type, name and class of a scalar variable, // parse any initialisation value and allocate storage for it. // Return the variable's symbol table entry. static struct symtable *scalar_declaration(char *varname, int type, struct symtable *ctype, int class) { struct symtable *sym=NULL; // Add this as a known scalar switch (class) { case C_EXTERN: case C_GLOBAL: sym= addglob(varname, type, ctype, S_VARIABLE, class, 1, 0); break; case C_LOCAL: sym= addlocl(varname, type, ctype, S_VARIABLE, 1); break; case C_PARAM: sym= addparm(varname, type, ctype, S_VARIABLE); break; case C_MEMBER: sym= addmemb(varname, type, ctype, S_VARIABLE, 1); break; } // The variable is being initialised if (Token.token == T_ASSIGN) { // Only possible for a global or local if (class != C_GLOBAL && class != C_LOCAL) fatals("Variable can not be initialised", varname); scan(&Token); // Globals must be assigned a literal value if (class == C_GLOBAL) { // Create one initial value for the variable and // parse this value sym->initlist= (int *)malloc(sizeof(int)); sym->initlist[0]= parse_literal(type); scan(&Token); } } // Generate any global space if (class == C_GLOBAL) genglobsym(sym); return (sym); } // Given the type, name and class of an variable, parse // the size of the array, if any. Then parse any initialisation // value and allocate storage for it. // Return the variable's symbol table entry. static struct symtable *array_declaration(char *varname, int type, struct symtable *ctype, int class) { struct symtable *sym; // New symbol table entry int nelems= -1; // Assume the number of elements won't be given int maxelems; // The maximum number of elements in the init list int *initlist; // The list of initial elements int i=0, j; // Skip past the '[' scan(&Token); // See we have an array size if (Token.token == T_INTLIT) { if (Token.intvalue <= 0) fatald("Array size is illegal", Token.intvalue); nelems= Token.intvalue; scan(&Token); } // Ensure we have a following ']' match(T_RBRACKET, "]"); // Add this as a known array. We treat the // array as a pointer to its elements' type switch (class) { case C_EXTERN: case C_GLOBAL: sym = addglob(varname, pointer_to(type), ctype, S_ARRAY, class, 0, 0); break; default: fatal("For now, declaration of non-global arrays is not implemented"); } // Array initialisation if (Token.token == T_ASSIGN) { if (class != C_GLOBAL) fatals("Variable can not be initialised", varname); scan(&Token); // Get the following left curly bracket match(T_LBRACE, "{"); #define TABLE_INCREMENT 10 // If the array already has nelems, allocate that many elements // in the list. Otherwise, start with TABLE_INCREMENT. if (nelems != -1) maxelems= nelems; else maxelems= TABLE_INCREMENT; initlist= (int *)malloc(maxelems *sizeof(int)); // Loop getting a new literal value from the list while (1) { // Check we can add the next value, then parse and add it if (nelems != -1 && i == maxelems) fatal("Too many values in initialisation list"); initlist[i++]= parse_literal(type); scan(&Token); // Increase the list size if the original size was // not set and we have hit the end of the current list if (nelems == -1 && i == maxelems) { maxelems += TABLE_INCREMENT; initlist= (int *)realloc(initlist, maxelems *sizeof(int)); } // Leave when we hit the right curly bracket if (Token.token == T_RBRACE) { scan(&Token); break; } // Next token must be a comma, then comma(); } // Zero any unused elements in the initlist. // Attach the list to the symbol table entry for (j=i; j < sym->nelems; j++) initlist[j]=0; if (i > nelems) nelems = i; sym->initlist= initlist; } // Set the size of the array and the number of elements sym->nelems= nelems; sym->size= sym->nelems * typesize(type, ctype); // Generate any global space if (class == C_GLOBAL) genglobsym(sym); return (sym); } // Given a pointer to the new function being declared and // a possibly NULL pointer to the function's previous declaration, // parse a list of parameters and cross-check them against the // previous declaration. Return the count of parameters static int param_declaration_list(struct symtable *oldfuncsym, struct symtable *newfuncsym) { int type, paramcnt = 0; struct symtable *ctype; struct symtable *protoptr = NULL; // Get the pointer to the first prototype parameter if (oldfuncsym != NULL) protoptr = oldfuncsym->member; // Loop getting any parameters while (Token.token != T_RPAREN) { // Get the type of the next parameter type = declaration_list(&ctype, C_PARAM, T_COMMA, T_RPAREN); if (type == -1) fatal("Bad type in parameter list"); // Ensure the type of this parameter matches the prototype if (protoptr != NULL) { if (type != protoptr->type) fatald("Type doesn't match prototype for parameter", paramcnt + 1); protoptr = protoptr->next; } paramcnt++; // Stop when we hit the right parenthesis if (Token.token == T_RPAREN) break; // We need a comma as separator comma(); } if (oldfuncsym != NULL && paramcnt != oldfuncsym->nelems) fatals("Parameter count mismatch for function", oldfuncsym->name); // Return the count of parameters return (paramcnt); } // // function_declaration: type identifier '(' parameter_list ')' ; // | type identifier '(' parameter_list ')' compound_statement ; // // Parse the declaration of function. static struct symtable *function_declaration(char *funcname, int type, struct symtable *ctype, int class) { struct ASTnode *tree, *finalstmt; struct symtable *oldfuncsym, *newfuncsym = NULL; int endlabel, paramcnt; // Text has the identifier's name. If this exists and is a // function, get the id. Otherwise, set oldfuncsym to NULL. if ((oldfuncsym = findsymbol(funcname)) != NULL) if (oldfuncsym->stype != S_FUNCTION) oldfuncsym = NULL; // If this is a new function declaration, get a // label-id for the end label, and add the function // to the symbol table, if (oldfuncsym == NULL) { endlabel = genlabel(); // Assumtion: functions only return scalar types, so NULL below newfuncsym = addglob(funcname, type, NULL, S_FUNCTION, C_GLOBAL, 0, endlabel); } // Scan in the '(', any parameters and the ')'. // Pass in any existing function prototype pointer lparen(); paramcnt = param_declaration_list(oldfuncsym, newfuncsym); rparen(); // If this is a new function declaration, update the // function symbol entry with the number of parameters. // Also copy the parameter list into the function's node. if (newfuncsym) { newfuncsym->nelems = paramcnt; newfuncsym->member = Parmhead; oldfuncsym = newfuncsym; } // Clear out the parameter list Parmhead = Parmtail = NULL; // Declaration ends in a semicolon, only a prototype. if (Token.token == T_SEMI) return (oldfuncsym); // This is not just a prototype. // Set the Functionid global to the function's symbol pointer Functionid = oldfuncsym; // Get the AST tree for the compound statement and mark // that we have parsed no loops or switches yet Looplevel = 0; Switchlevel = 0; lbrace(); tree = compound_statement(0); rbrace(); // If the function type isn't P_VOID .. if (type != P_VOID) { // Error if no statements in the function if (tree == NULL) fatal("No statements in function with non-void type"); // Check that the last AST operation in the // compound statement was a return statement finalstmt = (tree->op == A_GLUE) ? tree->right : tree; if (finalstmt == NULL || finalstmt->op != A_RETURN) fatal("No return for function with non-void type"); } // Build the A_FUNCTION node which has the function's symbol pointer // and the compound statement sub-tree tree = mkastunary(A_FUNCTION, type, tree, oldfuncsym, endlabel); // Generate the assembly code for it if (O_dumpAST) { dumpAST(tree, NOLABEL, 0); fprintf(stdout, "\n\n"); } genAST(tree, NOLABEL, NOLABEL, NOLABEL, 0); // Now free the symbols associated // with this function freeloclsyms(); return (oldfuncsym); } // Parse composite type declarations: structs or unions. // Either find an existing struct/union declaration, or build // a struct/union symbol table entry and return its pointer. static struct symtable *composite_declaration(int type) { struct symtable *ctype = NULL; struct symtable *m; int offset; int t; // Skip the struct/union keyword scan(&Token); // See if there is a following struct/union name if (Token.token == T_IDENT) { // Find any matching composite type if (type == P_STRUCT) ctype = findstruct(Text); else ctype = findunion(Text); scan(&Token); } // If the next token isn't an LBRACE , this is // the usage of an existing struct/union type. // Return the pointer to the type. if (Token.token != T_LBRACE) { if (ctype == NULL) fatals("unknown struct/union type", Text); return (ctype); } // Ensure this struct/union type hasn't been // previously defined if (ctype) fatals("previously defined struct/union", Text); // Build the composite type and skip the left brace if (type == P_STRUCT) ctype = addstruct(Text); else ctype = addunion(Text); scan(&Token); // Scan in the list of members while (1) { // Get the next member. m is used as a dummy t= declaration_list(&m, C_MEMBER, T_SEMI, T_RBRACE); if (t== -1) fatal("Bad type in member list"); if (Token.token == T_SEMI) scan(&Token); if (Token.token == T_RBRACE) break; } // Attach to the struct type's node rbrace(); if (Membhead==NULL) fatals("No members in struct", ctype->name); ctype->member = Membhead; Membhead = Membtail = NULL; // Set the offset of the initial member // and find the first free byte after it m = ctype->member; m->posn = 0; offset = typesize(m->type, m->ctype); // Set the position of each successive member in the composite type // Unions are easy. For structs, align the member and find the next free byte for (m = m->next; m != NULL; m = m->next) { // Set the offset for this member if (type == P_STRUCT) m->posn = genalign(m->type, offset, 1); else m->posn = 0; // Get the offset of the next free byte after this member offset += typesize(m->type, m->ctype); } // Set the overall size of the composite type ctype->size = offset; return (ctype); } // Parse an enum declaration static void enum_declaration(void) { struct symtable *etype = NULL; char *name; int intval = 0; // Skip the enum keyword. scan(&Token); // If there's a following enum type name, get a // pointer to any existing enum type node. if (Token.token == T_IDENT) { etype = findenumtype(Text); name = strdup(Text); // As it gets tromped soon scan(&Token); } // If the next token isn't a LBRACE, check // that we have an enum type name, then return if (Token.token != T_LBRACE) { if (etype == NULL) fatals("undeclared enum type:", name); return; } // We do have an LBRACE. Skip it scan(&Token); // If we have an enum type name, ensure that it // hasn't been declared before. if (etype != NULL) fatals("enum type redeclared:", etype->name); else // Build an enum type node for this identifier etype = addenum(name, C_ENUMTYPE, 0); // Loop to get all the enum values while (1) { // Ensure we have an identifier // Copy it in case there's an int literal coming up ident(); name = strdup(Text); // Ensure this enum value hasn't been declared before etype = findenumval(name); if (etype != NULL) fatals("enum value redeclared:", Text); // If the next token is an '=', skip it and // get the following int literal if (Token.token == T_ASSIGN) { scan(&Token); if (Token.token != T_INTLIT) fatal("Expected int literal after '='"); intval = Token.intvalue; scan(&Token); } // Build an enum value node for this identifier. // Increment the value for the next enum identifier. etype = addenum(name, C_ENUMVAL, intval++); // Bail out on a right curly bracket, else get a comma if (Token.token == T_RBRACE) break; comma(); } scan(&Token); // Skip over the right curly bracket } // Parse a typedef declaration and return the type // and ctype that it represents static int typedef_declaration(struct symtable **ctype) { int type, class = 0; // Skip the typedef keyword. scan(&Token); // Get the actual type following the keyword type = parse_type(ctype, &class); if (class != 0) fatal("Can't have extern in a typedef declaration"); // See if the typedef identifier already exists if (findtypedef(Text) != NULL) fatals("redefinition of typedef", Text); // Get any following '*' tokens type = parse_stars(type); // It doesn't exist so add it to the typedef list addtypedef(Text, type, *ctype); scan(&Token); return (type); } // Given a typedef name, return the type it represents static int type_of_typedef(char *name, struct symtable **ctype) { struct symtable *t; // Look up the typedef in the list t = findtypedef(name); if (t == NULL) fatals("unknown type", name); scan(&Token); *ctype = t->ctype; return (t->type); } // Parse the declaration of a variable or function. // The type and any following '*'s have been scanned, and we // have the identifier in the Token variable. // The class argument is the variable's class. // Return a pointer to the symbol's entry in the symbol table static struct symtable *symbol_declaration(int type, struct symtable *ctype, int class) { struct symtable *sym = NULL; char *varname = strdup(Text); // Ensure that we have an identifier. // We copied it above so we can scan more tokens in, e.g. // an assignment expression for a local variable. ident(); // Deal with function declarations if (Token.token == T_LPAREN) { return (function_declaration(varname, type, ctype, class)); } // See if this array or scalar variable has already been declared switch (class) { case C_EXTERN: case C_GLOBAL: if (findglob(varname) != NULL) fatals("Duplicate global variable declaration", varname); case C_LOCAL: case C_PARAM: if (findlocl(varname) != NULL) fatals("Duplicate local variable declaration", varname); case C_MEMBER: if (findmember(varname) != NULL) fatals("Duplicate struct/union member declaration", varname); } // Add the array or scalar variable to the symbol table if (Token.token == T_LBRACKET) sym = array_declaration(varname, type, ctype, class); else sym = scalar_declaration(varname, type, ctype, class); return (sym); } // Parse a list of symbols where there is an initial type. // Return the type of the symbols. et1 and et2 are end tokens. int declaration_list(struct symtable **ctype, int class, int et1, int et2) { int inittype, type; struct symtable *sym; // Get the initial type. If -1, it was // a composite type definition, return this if ((inittype = parse_type(ctype, &class)) == -1) return (inittype); // Now parse the list of symbols while (1) { // See if this symbol is a pointer type = parse_stars(inittype); // Parse this symbol sym = symbol_declaration(type, *ctype, class); // We parsed a function, there is no list so leave if (sym->stype == S_FUNCTION) { if (class != C_GLOBAL) fatal("Function definition not at global level"); return (type); } // We are at the end of the list, leave if (Token.token == et1 || Token.token == et2) return (type); // Otherwise, we need a comma as separator comma(); } } // Parse one or more global declarations, either // variables, functions or structs void global_declarations(void) { struct symtable *ctype; while (Token.token != T_EOF) { declaration_list(&ctype, C_GLOBAL, T_SEMI, T_EOF); // Skip any semicolons and right curly brackets if (Token.token == T_SEMI) scan(&Token); } } ================================================ FILE: 40_Var_Initialisation_pt2/decl.h ================================================ // Function prototypes for all compiler files // Copyright (c) 2019 Warren Toomey, GPL3 // scan.c void reject_token(struct token *t); int scan(struct token *t); // tree.c struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue); struct ASTnode *mkastleaf(int op, int type, struct symtable *sym, int intvalue); struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, struct symtable *sym, int intvalue); void dumpAST(struct ASTnode *n, int label, int parentASTop); // gen.c int genlabel(void); int genAST(struct ASTnode *n, int iflabel, int looptoplabel, int loopendlabel, int parentASTop); void genpreamble(); void genpostamble(); void genfreeregs(); void genglobsym(struct symtable *node); int genglobstr(char *strvalue); int genprimsize(int type); int genalign(int type, int offset, int direction); void genreturn(int reg, int id); // cg.c int cgprimsize(int type); int cgalign(int type, int offset, int direction); void cgtextseg(); void cgdataseg(); void freeall_registers(void); void cgpreamble(); void cgpostamble(); void cgfuncpreamble(struct symtable *sym); void cgfuncpostamble(struct symtable *sym); int cgloadint(int value, int type); int cgloadglob(struct symtable *sym, int op); int cgloadlocal(struct symtable *sym, int op); int cgloadglobstr(int label); int cgadd(int r1, int r2); int cgsub(int r1, int r2); int cgmul(int r1, int r2); int cgdiv(int r1, int r2); int cgshlconst(int r, int val); int cgcall(struct symtable *sym, int numargs); void cgcopyarg(int r, int argposn); int cgstorglob(int r, struct symtable *sym); int cgstorlocal(int r, struct symtable *sym); void cgglobsym(struct symtable *node); void cgglobstr(int l, char *strvalue); int cgcompare_and_set(int ASTop, int r1, int r2); int cgcompare_and_jump(int ASTop, int r1, int r2, int label); void cglabel(int l); void cgjump(int l); int cgwiden(int r, int oldtype, int newtype); void cgreturn(int reg, struct symtable *sym); int cgaddress(struct symtable *sym); int cgderef(int r, int type); int cgstorderef(int r1, int r2, int type); int cgnegate(int r); int cginvert(int r); int cglognot(int r); int cgboolean(int r, int op, int label); int cgand(int r1, int r2); int cgor(int r1, int r2); int cgxor(int r1, int r2); int cgshl(int r1, int r2); int cgshr(int r1, int r2); void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel); // expr.c struct ASTnode *expression_list(int endtoken); struct ASTnode *binexpr(int ptp); // stmt.c struct ASTnode *compound_statement(int inswitch); // misc.c void match(int t, char *what); void semi(void); void lbrace(void); void rbrace(void); void lparen(void); void rparen(void); void ident(void); void comma(void); void fatal(char *s); void fatals(char *s1, char *s2); void fatald(char *s, int d); void fatalc(char *s, int c); // sym.c void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node); struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn); struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn); struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int nelems); struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype); struct symtable *addstruct(char *name); struct symtable *addunion(char *name); struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int nelems); struct symtable *addenum(char *name, int class, int value); struct symtable *addtypedef(char *name, int type, struct symtable *ctype); struct symtable *findglob(char *s); struct symtable *findlocl(char *s); struct symtable *findsymbol(char *s); struct symtable *findmember(char *s); struct symtable *findstruct(char *s); struct symtable *findunion(char *s); struct symtable *findenumtype(char *s); struct symtable *findenumval(char *s); struct symtable *findtypedef(char *s); void clear_symtable(void); void freeloclsyms(void); // decl.c int declaration_list(struct symtable **ctype, int class, int et1, int et2); void global_declarations(void); // types.c int inttype(int type); int ptrtype(int type); int pointer_to(int type); int value_at(int type); int typesize(int type, struct symtable *ctype); struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op); ================================================ FILE: 40_Var_Initialisation_pt2/defs.h ================================================ #include #include #include #include // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 enum { TEXTLEN = 512 // Length of identifiers in input }; // Commands and default filenames #define AOUT "a.out" #ifdef __NASM__ #define ASCMD "nasm -f elf64 -o " #define LDCMD "cc -no-pie -fno-plt -Wall -o " #else #define ASCMD "as -o " #define LDCMD "cc -o " #endif #define CPPCMD "cpp -nostdinc -isystem " // Token types enum { T_EOF, // Binary operators T_ASSIGN, T_LOGOR, T_LOGAND, T_OR, T_XOR, T_AMPER, T_EQ, T_NE, T_LT, T_GT, T_LE, T_GE, T_LSHIFT, T_RSHIFT, T_PLUS, T_MINUS, T_STAR, T_SLASH, // Other operators T_INC, T_DEC, T_INVERT, T_LOGNOT, // Type keywords T_VOID, T_CHAR, T_INT, T_LONG, // Other keywords T_IF, T_ELSE, T_WHILE, T_FOR, T_RETURN, T_STRUCT, T_UNION, T_ENUM, T_TYPEDEF, T_EXTERN, T_BREAK, T_CONTINUE, T_SWITCH, T_CASE, T_DEFAULT, // Structural tokens T_INTLIT, T_STRLIT, T_SEMI, T_IDENT, T_LBRACE, T_RBRACE, T_LPAREN, T_RPAREN, T_LBRACKET, T_RBRACKET, T_COMMA, T_DOT, T_ARROW, T_COLON }; // Token structure struct token { int token; // Token type, from the enum list above char *tokstr; // String version of the token int intvalue; // For T_INTLIT, the integer value }; // AST node types. The first few line up // with the related tokens enum { A_ASSIGN = 1, A_LOGOR, A_LOGAND, A_OR, A_XOR, A_AND, A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE, A_LSHIFT, A_RSHIFT, A_ADD, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, A_INTLIT, A_STRLIT, A_IDENT, A_GLUE, A_IF, A_WHILE, A_FUNCTION, A_WIDEN, A_RETURN, A_FUNCCALL, A_DEREF, A_ADDR, A_SCALE, A_PREINC, A_PREDEC, A_POSTINC, A_POSTDEC, A_NEGATE, A_INVERT, A_LOGNOT, A_TOBOOL, A_BREAK, A_CONTINUE, A_SWITCH, A_CASE, A_DEFAULT }; // Primitive types. The bottom 4 bits is an integer // value that represents the level of indirection, // e.g. 0= no pointer, 1= pointer, 2= pointer pointer etc. enum { P_NONE, P_VOID = 16, P_CHAR = 32, P_INT = 48, P_LONG = 64, P_STRUCT=80, P_UNION=96 }; // Structural types enum { S_VARIABLE, S_FUNCTION, S_ARRAY }; // Storage classes enum { C_GLOBAL = 1, // Globally visible symbol C_LOCAL, // Locally visible symbol C_PARAM, // Locally visible function parameter C_EXTERN, // External globally visible symbol C_STRUCT, // A struct C_UNION, // A union C_MEMBER, // Member of a struct or union C_ENUMTYPE, // A named enumeration type C_ENUMVAL, // A named enumeration value C_TYPEDEF // A named typedef }; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol struct symtable *ctype; // If struct/union, ptr to that type int stype; // Structural type for the symbol int class; // Storage class for the symbol int size; // Total size in bytes of this symbol int nelems; // Functions: # params. Arrays: # elements union { int endlabel; // For functions, the end label int posn; // For locals, the negative offset // from the stack base pointer }; int *initlist; // List of initial values struct symtable *next; // Next symbol in one list struct symtable *member; // First member of a function, struct, }; // union or enum // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates int rvalue; // True if the node is an rvalue struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; struct symtable *sym; // For many AST nodes, the pointer to union { // the symbol in the symbol table int intvalue; // For A_INTLIT, the integer value int size; // For A_SCALE, the size to scale by }; }; enum { NOREG = -1, // Use NOREG when the AST generation // functions have no register to return NOLABEL = 0 // Use NOLABEL when we have no label to // pass to genAST() }; ================================================ FILE: 40_Var_Initialisation_pt2/expr.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of expressions // Copyright (c) 2019 Warren Toomey, GPL3 // expression_list: // | expression // | expression ',' expression_list // ; // Parse a list of zero or more comma-separated expressions and // return an AST composed of A_GLUE nodes with the left-hand child // being the sub-tree of previous expressions (or NULL) and the right-hand // child being the next expression. Each A_GLUE node will have size field // set to the number of expressions in the tree at this point. If no // expressions are parsed, NULL is returned struct ASTnode *expression_list(int endtoken) { struct ASTnode *tree = NULL; struct ASTnode *child = NULL; int exprcount = 0; // Loop until the end token while (Token.token != endtoken) { // Parse the next expression and increment the expression count child = binexpr(0); exprcount++; // Build an A_GLUE AST node with the previous tree as the left child // and the new expression as the right child. Store the expression count. tree = mkastnode(A_GLUE, P_NONE, tree, NULL, child, NULL, exprcount); // Stop when we reach the end token if (Token.token == endtoken) break; // Must have a ',' at this point match(T_COMMA, ","); } // Return the tree of expressions return (tree); } // Parse a function call and return its AST static struct ASTnode *funccall(void) { struct ASTnode *tree; struct symtable *funcptr; // Check that the identifier has been defined as a function, // then make a leaf node for it. if ((funcptr = findsymbol(Text)) == NULL || funcptr->stype != S_FUNCTION) { fatals("Undeclared function", Text); } // Get the '(' lparen(); // Parse the argument expression list tree = expression_list(T_RPAREN); // XXX Check type of each argument against the function's prototype // Build the function call AST node. Store the // function's return type as this node's type. // Also record the function's symbol-id tree = mkastunary(A_FUNCCALL, funcptr->type, tree, funcptr, 0); // Get the ')' rparen(); return (tree); } // Parse the index into an array and return an AST tree for it static struct ASTnode *array_access(void) { struct ASTnode *left, *right; struct symtable *aryptr; // Check that the identifier has been defined as an array // then make a leaf node for it that points at the base if ((aryptr = findsymbol(Text)) == NULL || aryptr->stype != S_ARRAY) { fatals("Undeclared array", Text); } left = mkastleaf(A_ADDR, aryptr->type, aryptr, 0); // Get the '[' scan(&Token); // Parse the following expression right = binexpr(0); // Get the ']' match(T_RBRACKET, "]"); // Ensure that this is of int type if (!inttype(right->type)) fatal("Array index is not of integer type"); // Scale the index by the size of the element's type right = modify_type(right, left->type, A_ADD); // Return an AST tree where the array's base has the offset // added to it, and dereference the element. Still an lvalue // at this point. left = mkastnode(A_ADD, aryptr->type, left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, value_at(left->type), left, NULL, 0); return (left); } // Parse the member reference of a struct or union // and return an AST tree for it. If withpointer is true, // the access is through a pointer to the member. static struct ASTnode *member_access(int withpointer) { struct ASTnode *left, *right; struct symtable *compvar; struct symtable *typeptr; struct symtable *m; // Check that the identifier has been declared as a // struct/union or a struct/union pointer if ((compvar = findsymbol(Text)) == NULL) fatals("Undeclared variable", Text); if (withpointer && compvar->type != pointer_to(P_STRUCT) && compvar->type != pointer_to(P_UNION)) fatals("Undeclared variable", Text); if (!withpointer && compvar->type != P_STRUCT && compvar->type != P_UNION) fatals("Undeclared variable", Text); // If a pointer to a struct/union, get the pointer's value. // Otherwise, make a leaf node that points at the base // Either way, it's an rvalue if (withpointer) { left = mkastleaf(A_IDENT, pointer_to(compvar->type), compvar, 0); } else left = mkastleaf(A_ADDR, compvar->type, compvar, 0); left->rvalue = 1; // Get the details of the composite type typeptr = compvar->ctype; // Skip the '.' or '->' token and get the member's name scan(&Token); ident(); // Find the matching member's name in the type // Die if we can't find it for (m = typeptr->member; m != NULL; m = m->next) if (!strcmp(m->name, Text)) break; if (m == NULL) fatals("No member found in struct/union: ", Text); // Build an A_INTLIT node with the offset right = mkastleaf(A_INTLIT, P_INT, NULL, m->posn); // Add the member's offset to the base of the struct/union // and dereference it. Still an lvalue at this point left = mkastnode(A_ADD, pointer_to(m->type), left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, m->type, left, NULL, 0); return (left); } // Parse a postfix expression and return // an AST node representing it. The // identifier is already in Text. static struct ASTnode *postfix(void) { struct ASTnode *n; struct symtable *varptr; struct symtable *enumptr; // If the identifier matches an enum value, // return an A_INTLIT node if ((enumptr = findenumval(Text)) != NULL) { scan(&Token); return (mkastleaf(A_INTLIT, P_INT, NULL, enumptr->posn)); } // Scan in the next token to see if we have a postfix expression scan(&Token); // Function call if (Token.token == T_LPAREN) return (funccall()); // An array reference if (Token.token == T_LBRACKET) return (array_access()); // Access into a struct or union if (Token.token == T_DOT) return (member_access(0)); if (Token.token == T_ARROW) return (member_access(1)); // A variable. Check that the variable exists. if ((varptr = findsymbol(Text)) == NULL || varptr->stype != S_VARIABLE) fatals("Unknown variable", Text); switch (Token.token) { // Post-increment: skip over the token case T_INC: scan(&Token); n = mkastleaf(A_POSTINC, varptr->type, varptr, 0); break; // Post-decrement: skip over the token case T_DEC: scan(&Token); n = mkastleaf(A_POSTDEC, varptr->type, varptr, 0); break; // Just a variable reference default: n = mkastleaf(A_IDENT, varptr->type, varptr, 0); } return (n); } // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(void) { struct ASTnode *n; int id; switch (Token.token) { case T_INTLIT: // For an INTLIT token, make a leaf AST node for it. // Make it a P_CHAR if it's within the P_CHAR range if (Token.intvalue >= 0 && Token.intvalue < 256) n = mkastleaf(A_INTLIT, P_CHAR, NULL, Token.intvalue); else n = mkastleaf(A_INTLIT, P_INT, NULL, Token.intvalue); break; case T_STRLIT: // For a STRLIT token, generate the assembly for it. // Then make a leaf AST node for it. id is the string's label. id = genglobstr(Text); n = mkastleaf(A_STRLIT, pointer_to(P_CHAR), NULL, id); break; case T_IDENT: return (postfix()); case T_LPAREN: // Beginning of a parenthesised expression, skip the '('. // Scan in the expression and the right parenthesis scan(&Token); n = binexpr(0); rparen(); return (n); default: fatals("Expecting a primary expression, got token", Token.tokstr); } // Scan in the next token and return the leaf node scan(&Token); return (n); } // Convert a binary operator token into a binary AST operation. // We rely on a 1:1 mapping from token to AST operation static int binastop(int tokentype) { if (tokentype > T_EOF && tokentype <= T_SLASH) return (tokentype); fatald("Syntax error, token", tokentype); return (0); // Keep -Wall happy } // Return true if a token is right-associative, // false otherwise. static int rightassoc(int tokentype) { if (tokentype == T_ASSIGN) return (1); return (0); } // Operator precedence for each token. Must // match up with the order of tokens in defs.h static int OpPrec[] = { 0, 10, 20, 30, // T_EOF, T_ASSIGN, T_LOGOR, T_LOGAND 40, 50, 60, // T_OR, T_XOR, T_AMPER 70, 70, // T_EQ, T_NE 80, 80, 80, 80, // T_LT, T_GT, T_LE, T_GE 90, 90, // T_LSHIFT, T_RSHIFT 100, 100, // T_PLUS, T_MINUS 110, 110 // T_STAR, T_SLASH }; // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec; if (tokentype > T_SLASH) fatald("Token with no precedence in op_precedence:", tokentype); prec = OpPrec[tokentype]; if (prec == 0) fatald("Syntax error, token", tokentype); return (prec); } // prefix_expression: primary // | '*' prefix_expression // | '&' prefix_expression // | '-' prefix_expression // | '++' prefix_expression // | '--' prefix_expression // ; // Parse a prefix expression and return // a sub-tree representing it. struct ASTnode *prefix(void) { struct ASTnode *tree; switch (Token.token) { case T_AMPER: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Ensure that it's an identifier if (tree->op != A_IDENT) fatal("& operator must be followed by an identifier"); // Now change the operator to A_ADDR and the type to // a pointer to the original type tree->op = A_ADDR; tree->type = pointer_to(tree->type); break; case T_STAR: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's either another deref or an // identifier if (tree->op != A_IDENT && tree->op != A_DEREF) fatal("* operator must be followed by an identifier or *"); // Prepend an A_DEREF operation to the tree tree = mkastunary(A_DEREF, value_at(tree->type), tree, NULL, 0); break; case T_MINUS: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_NEGATE operation to the tree and // make the child an rvalue. Because chars are unsigned, // also widen this to int so that it's signed tree->rvalue = 1; tree = modify_type(tree, P_INT, 0); tree = mkastunary(A_NEGATE, tree->type, tree, NULL, 0); break; case T_INVERT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_INVERT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_INVERT, tree->type, tree, NULL, 0); break; case T_LOGNOT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_LOGNOT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_LOGNOT, tree->type, tree, NULL, 0); break; case T_INC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("++ operator must be followed by an identifier"); // Prepend an A_PREINC operation to the tree tree = mkastunary(A_PREINC, tree->type, tree, NULL, 0); break; case T_DEC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("-- operator must be followed by an identifier"); // Prepend an A_PREDEC operation to the tree tree = mkastunary(A_PREDEC, tree->type, tree, NULL, 0); break; default: tree = primary(); } return (tree); } // Return an AST tree whose root is a binary operator. // Parameter ptp is the previous token's precedence. struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; struct ASTnode *ltemp, *rtemp; int ASTop; int tokentype; // Get the tree on the left. // Fetch the next token at the same time. left = prefix(); // If we hit one of several terminating tokens, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA || tokentype == T_COLON) { left->rvalue = 1; return (left); } // While the precedence of this token is more than that of the // previous token precedence, or it's right associative and // equal to the previous token's precedence while ((op_precedence(tokentype) > ptp) || (rightassoc(tokentype) && op_precedence(tokentype) == ptp)) { // Fetch in the next integer literal scan(&Token); // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Determine the operation to be performed on the sub-trees ASTop = binastop(tokentype); if (ASTop == A_ASSIGN) { // Assignment // Make the right tree into an rvalue right->rvalue = 1; // Ensure the right's type matches the left right = modify_type(right, left->type, 0); if (right == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree. However, switch // left and right around, so that the right expression's // code will be generated before the left expression ltemp = left; left = right; right = ltemp; } else { // We are not doing an assignment, so both trees should be rvalues // Convert both trees into rvalue if they are lvalue trees left->rvalue = 1; right->rvalue = 1; // Ensure the two types are compatible by trying // to modify each tree to match the other's type. ltemp = modify_type(left, right->type, ASTop); rtemp = modify_type(right, left->type, ASTop); if (ltemp == NULL && rtemp == NULL) fatal("Incompatible types in binary expression"); if (ltemp != NULL) left = ltemp; if (rtemp != NULL) right = rtemp; } // Join that sub-tree with ours. Convert the token // into an AST operation at the same time. left = mkastnode(binastop(tokentype), left->type, left, NULL, right, NULL, 0); // Update the details of the current token. // If we hit a terminating token, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA || tokentype == T_COLON) { left->rvalue = 1; return (left); } } // Return the tree we have when the precedence // is the same or lower left->rvalue = 1; return (left); } ================================================ FILE: 40_Var_Initialisation_pt2/gen.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Generic code generator // Copyright (c) 2019 Warren Toomey, GPL3 // Generate and return a new label number int genlabel(void) { static int id = 1; return (id++); } // Generate the code for an IF statement // and an optional ELSE clause static int genIF(struct ASTnode *n, int looptoplabel, int loopendlabel) { int Lfalse, Lend; // Generate two labels: one for the // false compound statement, and one // for the end of the overall IF statement. // When there is no ELSE clause, Lfalse _is_ // the ending label! Lfalse = genlabel(); if (n->right) Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. genAST(n->left, Lfalse, NOLABEL, NOLABEL, n->op); genfreeregs(); // Generate the true compound statement genAST(n->mid, NOLABEL, looptoplabel, loopendlabel, n->op); genfreeregs(); // If there is an optional ELSE clause, // generate the jump to skip to the end if (n->right) cgjump(Lend); // Now the false label cglabel(Lfalse); // Optional ELSE clause: generate the // false compound statement and the // end label if (n->right) { genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); genfreeregs(); cglabel(Lend); } return (NOREG); } // Generate the code for a WHILE statement static int genWHILE(struct ASTnode *n) { int Lstart, Lend; // Generate the start and end labels // and output the start label Lstart = genlabel(); Lend = genlabel(); cglabel(Lstart); // Generate the condition code followed // by a jump to the end label. genAST(n->left, Lend, Lstart, Lend, n->op); genfreeregs(); // Generate the compound statement for the body genAST(n->right, NOLABEL, Lstart, Lend, n->op); genfreeregs(); // Finally output the jump back to the condition, // and the end label cgjump(Lstart); cglabel(Lend); return (NOREG); } // Generate the code for a SWITCH statement static int genSWITCH(struct ASTnode *n) { int *caseval, *caselabel; int Ljumptop, Lend; int i, reg, defaultlabel = 0, casecount = 0; struct ASTnode *c; // Create arrays for the case values and associated labels. // Ensure that we have at least one position in each array. caseval = (int *) malloc((n->intvalue + 1) * sizeof(int)); caselabel = (int *) malloc((n->intvalue + 1) * sizeof(int)); // Generate labels for the top of the jump table, and the // end of the switch statement. Set a default label for // the end of the switch, in case we don't have a default. Ljumptop = genlabel(); Lend = genlabel(); defaultlabel = Lend; // Output the code to calculate the switch condition reg = genAST(n->left, NOLABEL, NOLABEL, NOLABEL, 0); cgjump(Ljumptop); genfreeregs(); // Walk the right-child linked list to // generate the code for each case for (i = 0, c = n->right; c != NULL; i++, c = c->right) { // Get a label for this case. Store it // and the case value in the arrays. // Record if it is the default case. caselabel[i] = genlabel(); caseval[i] = c->intvalue; cglabel(caselabel[i]); if (c->op == A_DEFAULT) defaultlabel = caselabel[i]; else casecount++; // Generate the case code. Pass in the end label for the breaks genAST(c->left, NOLABEL, NOLABEL, Lend, 0); genfreeregs(); } // Ensure the last case jumps past the switch table cgjump(Lend); // Now output the switch table and the end label. cgswitch(reg, casecount, Ljumptop, caselabel, caseval, defaultlabel); cglabel(Lend); return (NOREG); } // Generate the code to copy the arguments of a // function call to its parameters, then call the // function itself. Return the register that holds // the function's return value. static int gen_funccall(struct ASTnode *n) { struct ASTnode *gluetree = n->left; int reg; int numargs = 0; // If there is a list of arguments, walk this list // from the last argument (right-hand child) to the // first while (gluetree) { // Calculate the expression's value reg = genAST(gluetree->right, NOLABEL, NOLABEL, NOLABEL, gluetree->op); // Copy this into the n'th function parameter: size is 1, 2, 3, ... cgcopyarg(reg, gluetree->size); // Keep the first (highest) number of arguments if (numargs == 0) numargs = gluetree->size; genfreeregs(); gluetree = gluetree->left; } // Call the function, clean up the stack (based on numargs), // and return its result return (cgcall(n->sym, numargs)); } // Given an AST, an optional label, and the AST op // of the parent, generate assembly code recursively. // Return the register id with the tree's final value. int genAST(struct ASTnode *n, int iflabel, int looptoplabel, int loopendlabel, int parentASTop) { int leftreg, rightreg; // We have some specific AST node handling at the top // so that we don't evaluate the child sub-trees immediately switch (n->op) { case A_IF: return (genIF(n, looptoplabel, loopendlabel)); case A_WHILE: return (genWHILE(n)); case A_SWITCH: return (genSWITCH(n)); case A_FUNCCALL: return (gen_funccall(n)); case A_GLUE: // Do each child statement, and free the // registers after each child if (n->left != NULL) genAST(n->left, iflabel, looptoplabel, loopendlabel, n->op); genfreeregs(); if (n->right != NULL) genAST(n->right, iflabel, looptoplabel, loopendlabel, n->op); genfreeregs(); return (NOREG); case A_FUNCTION: // Generate the function's preamble before the code // in the child sub-tree cgfuncpreamble(n->sym); genAST(n->left, NOLABEL, NOLABEL, NOLABEL, n->op); cgfuncpostamble(n->sym); return (NOREG); } // General AST node handling below // Get the left and right sub-tree values if (n->left) leftreg = genAST(n->left, NOLABEL, NOLABEL, NOLABEL, n->op); if (n->right) rightreg = genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); switch (n->op) { case A_ADD: return (cgadd(leftreg, rightreg)); case A_SUBTRACT: return (cgsub(leftreg, rightreg)); case A_MULTIPLY: return (cgmul(leftreg, rightreg)); case A_DIVIDE: return (cgdiv(leftreg, rightreg)); case A_AND: return (cgand(leftreg, rightreg)); case A_OR: return (cgor(leftreg, rightreg)); case A_XOR: return (cgxor(leftreg, rightreg)); case A_LSHIFT: return (cgshl(leftreg, rightreg)); case A_RSHIFT: return (cgshr(leftreg, rightreg)); case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, compare registers // and set one to 1 or 0 based on the comparison. if (parentASTop == A_IF || parentASTop == A_WHILE) return (cgcompare_and_jump(n->op, leftreg, rightreg, iflabel)); else return (cgcompare_and_set(n->op, leftreg, rightreg)); case A_INTLIT: return (cgloadint(n->intvalue, n->type)); case A_STRLIT: return (cgloadglobstr(n->intvalue)); case A_IDENT: // Load our value if we are an rvalue // or we are being dereferenced if (n->rvalue || parentASTop == A_DEREF) { if (n->sym->class == C_GLOBAL) { return (cgloadglob(n->sym, n->op)); } else { return (cgloadlocal(n->sym, n->op)); } } else return (NOREG); case A_ASSIGN: // Are we assigning to an identifier or through a pointer? switch (n->right->op) { case A_IDENT: if (n->right->sym->class == C_GLOBAL) return (cgstorglob(leftreg, n->right->sym)); else return (cgstorlocal(leftreg, n->right->sym)); case A_DEREF: return (cgstorderef(leftreg, rightreg, n->right->type)); default: fatald("Can't A_ASSIGN in genAST(), op", n->op); } case A_WIDEN: // Widen the child's type to the parent's type return (cgwiden(leftreg, n->left->type, n->type)); case A_RETURN: cgreturn(leftreg, Functionid); return (NOREG); case A_ADDR: return (cgaddress(n->sym)); case A_DEREF: // If we are an rvalue, dereference to get the value we point at, // otherwise leave it for A_ASSIGN to store through the pointer if (n->rvalue) return (cgderef(leftreg, n->left->type)); else return (leftreg); case A_SCALE: // Small optimisation: use shift if the // scale value is a known power of two switch (n->size) { case 2: return (cgshlconst(leftreg, 1)); case 4: return (cgshlconst(leftreg, 2)); case 8: return (cgshlconst(leftreg, 3)); default: // Load a register with the size and // multiply the leftreg by this size rightreg = cgloadint(n->size, P_INT); return (cgmul(leftreg, rightreg)); } case A_POSTINC: case A_POSTDEC: // Load and decrement the variable's value into a register // and post increment/decrement it if (n->sym->class == C_GLOBAL) return (cgloadglob(n->sym, n->op)); else return (cgloadlocal(n->sym, n->op)); case A_PREINC: case A_PREDEC: // Load and decrement the variable's value into a register // and pre increment/decrement it if (n->left->sym->class == C_GLOBAL) return (cgloadglob(n->left->sym, n->op)); else return (cgloadlocal(n->left->sym, n->op)); case A_NEGATE: return (cgnegate(leftreg)); case A_INVERT: return (cginvert(leftreg)); case A_LOGNOT: return (cglognot(leftreg)); case A_TOBOOL: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, set the register // to 0 or 1 based on it's zeroeness or non-zeroeness return (cgboolean(leftreg, parentASTop, iflabel)); case A_BREAK: cgjump(loopendlabel); return (NOREG); case A_CONTINUE: cgjump(looptoplabel); return (NOREG); default: fatald("Unknown AST operator", n->op); } return (NOREG); // Keep -Wall happy } void genpreamble() { cgpreamble(); } void genpostamble() { cgpostamble(); } void genfreeregs() { freeall_registers(); } void genglobsym(struct symtable *node) { cgglobsym(node); } int genglobstr(char *strvalue) { int l = genlabel(); cgglobstr(l, strvalue); return (l); } int genprimsize(int type) { return (cgprimsize(type)); } int genalign(int type, int offset, int direction) { return (cgalign(type, offset, direction)); } ================================================ FILE: 40_Var_Initialisation_pt2/include/fcntl.h ================================================ #ifndef _FCNTL_H_ # define _FCNTL_H_ #define O_RDONLY 00 #define O_WRONLY 01 #define O_RDWR 02 int open(char *pathname, int flags); #endif // _FCNTL_H_ ================================================ FILE: 40_Var_Initialisation_pt2/include/stddef.h ================================================ #ifndef _STDDEF_H_ # define _STDDEF_H_ typedef long size_t; #endif //_STDDEF_H_ ================================================ FILE: 40_Var_Initialisation_pt2/include/stdio.h ================================================ #ifndef _STDIO_H_ # define _STDIO_H_ #include // This FILE definition will do for now typedef char * FILE; FILE *fopen(char *pathname, char *mode); size_t fread(void *ptr, size_t size, size_t nmemb, FILE *stream); size_t fwrite(void *ptr, size_t size, size_t nmemb, FILE *stream); int fclose(FILE *stream); int printf(char *format); int fprintf(FILE *stream, char *format); #endif // _STDIO_H_ ================================================ FILE: 40_Var_Initialisation_pt2/main.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "decl.h" #include #include // Compiler setup and top-level execution // Copyright (c) 2019 Warren Toomey, GPL3 // Given a string with a '.' and at least a 1-character suffix // after the '.', change the suffix to be the given character. // Return the new string or NULL if the original string could // not be modified char *alter_suffix(char *str, char suffix) { char *posn; char *newstr; // Clone the string if ((newstr = strdup(str)) == NULL) return (NULL); // Find the '.' if ((posn = strrchr(newstr, '.')) == NULL) return (NULL); // Ensure there is a suffix posn++; if (*posn == '\0') return (NULL); // Change the suffix and NUL-terminate the string *posn++ = suffix; *posn = '\0'; return (newstr); } // Given an input filename, compile that file // down to assembly code. Return the new file's name static char *do_compile(char *filename) { char cmd[TEXTLEN]; // Change the input file's suffix to .s Outfilename = alter_suffix(filename, 's'); if (Outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .c on the end\n", filename); exit(1); } // Generate the pre-processor command snprintf(cmd, TEXTLEN, "%s %s %s", CPPCMD, INCDIR, filename); // Open up the pre-processor pipe if ((Infile = popen(cmd, "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", filename, strerror(errno)); exit(1); } Infilename = filename; // Create the output file if ((Outfile = fopen(Outfilename, "w")) == NULL) { fprintf(stderr, "Unable to create %s: %s\n", Outfilename, strerror(errno)); exit(1); } Line = 1; // Reset the scanner Putback = '\n'; clear_symtable(); // Clear the symbol table if (O_verbose) printf("compiling %s\n", filename); scan(&Token); // Get the first token from the input genpreamble(); // Output the preamble global_declarations(); // Parse the global declarations genpostamble(); // Output the postamble fclose(Outfile); // Close the output file return (Outfilename); } // Given an input filename, assemble that file // down to object code. Return the object filename char *do_assemble(char *filename) { char cmd[TEXTLEN]; int err; char *outfilename = alter_suffix(filename, 'o'); if (outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .s on the end\n", filename); exit(1); } // Build the assembly command and run it snprintf(cmd, TEXTLEN, "%s %s %s", ASCMD, outfilename, filename); if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Assembly of %s failed\n", filename); exit(1); } return (outfilename); } // Given a list of object files and an output filename, // link all of the object filenames together. void do_link(char *outfilename, char *objlist[]) { int cnt, size = TEXTLEN; char cmd[TEXTLEN], *cptr; int err; // Start with the linker command and the output file cptr = cmd; cnt = snprintf(cptr, size, "%s %s ", LDCMD, outfilename); cptr += cnt; size -= cnt; // Now append each object file while (*objlist != NULL) { cnt = snprintf(cptr, size, "%s ", *objlist); cptr += cnt; size -= cnt; objlist++; } if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Linking failed\n"); exit(1); } } // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "Usage: %s [-vcST] [-o outfile] file [file ...]\n", prog); fprintf(stderr, " -v give verbose output of the compilation stages\n"); fprintf(stderr, " -c generate object files but don't link them\n"); fprintf(stderr, " -S generate assembly files but don't link them\n"); fprintf(stderr, " -T dump the AST trees for each input file\n"); fprintf(stderr, " -o outfile, produce the outfile executable file\n"); exit(1); } // Main program: check arguments and print a usage // if we don't have an argument. Open up the input // file and call scanfile() to scan the tokens in it. enum { MAXOBJ = 100 }; int main(int argc, char *argv[]) { char *outfilename = AOUT; char *asmfile, *objfile; char *objlist[MAXOBJ]; int i, objcnt = 0; // Initialise our variables O_dumpAST = 0; O_keepasm = 0; O_assemble = 0; O_verbose = 0; O_dolink = 1; // Scan for command-line options for (i = 1; i < argc; i++) { // No leading '-', stop scanning for options if (*argv[i] != '-') break; // For each option in this argument for (int j = 1; (*argv[i] == '-') && argv[i][j]; j++) { switch (argv[i][j]) { case 'o': outfilename = argv[++i]; // Save & skip to next argument break; case 'T': O_dumpAST = 1; break; case 'c': O_assemble = 1; O_keepasm = 0; O_dolink = 0; break; case 'S': O_keepasm = 1; O_assemble = 0; O_dolink = 0; break; case 'v': O_verbose = 1; break; default: usage(argv[0]); } } } // Ensure we have at lease one input file argument if (i >= argc) usage(argv[0]); // Work on each input file in turn while (i < argc) { asmfile = do_compile(argv[i]); // Compile the source file if (O_dolink || O_assemble) { objfile = do_assemble(asmfile); // Assemble it to object forma if (objcnt == (MAXOBJ - 2)) { fprintf(stderr, "Too many object files for the compiler to handle\n"); exit(1); } objlist[objcnt++] = objfile; // Add the object file's name objlist[objcnt] = NULL; // to the list of object files } if (!O_keepasm) // Remove the assembly file if unlink(asmfile); // we don't need to keep it i++; } // Now link all the object files together if (O_dolink) { do_link(outfilename, objlist); // If we don't need to keep the object // files, then remove them if (!O_assemble) { for (i = 0; objlist[i] != NULL; i++) unlink(objlist[i]); } } return (0); } ================================================ FILE: 40_Var_Initialisation_pt2/misc.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" #include // Miscellaneous functions // Copyright (c) 2019 Warren Toomey, GPL3 // Ensure that the current token is t, // and fetch the next token. Otherwise // throw an error void match(int t, char *what) { if (Token.token == t) { scan(&Token); } else { fatals("Expected", what); } } // Match a semicolon and fetch the next token void semi(void) { match(T_SEMI, ";"); } // Match a left brace and fetch the next token void lbrace(void) { match(T_LBRACE, "{"); } // Match a right brace and fetch the next token void rbrace(void) { match(T_RBRACE, "}"); } // Match a left parenthesis and fetch the next token void lparen(void) { match(T_LPAREN, "("); } // Match a right parenthesis and fetch the next token void rparen(void) { match(T_RPAREN, ")"); } // Match an identifier and fetch the next token void ident(void) { match(T_IDENT, "identifier"); } // Match a comma and fetch the next token void comma(void) { match(T_COMMA, "comma"); } // Print out fatal messages void fatal(char *s) { fprintf(stderr, "%s on line %d of %s\n", s, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatals(char *s1, char *s2) { fprintf(stderr, "%s:%s on line %d of %s\n", s1, s2, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatald(char *s, int d) { fprintf(stderr, "%s:%d on line %d of %s\n", s, d, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatalc(char *s, int c) { fprintf(stderr, "%s:%c on line %d of %s\n", s, c, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } ================================================ FILE: 40_Var_Initialisation_pt2/scan.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { char *p; p = strchr(s, c); return (p ? p - s : -1); } // Get the next character from the input file. static int next(void) { int c, l; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return (c); } c = fgetc(Infile); // Read from input file while (c == '#') { // We've hit a pre-processor statement scan(&Token); // Get the line number into l if (Token.token != T_INTLIT) fatals("Expecting pre-processor line number, got:", Text); l = Token.intvalue; scan(&Token); // Get the filename in Text if (Token.token != T_STRLIT) fatals("Expecting pre-processor file name, got:", Text); if (Text[0] != '<') { // If this is a real filename if (strcmp(Text, Infilename)) // and not the one we have now Infilename = strdup(Text); // save it. Then update the line num Line = l; } while ((c = fgetc(Infile)) != '\n'); // Skip to the end of the line c = fgetc(Infile); // and get the next character } if ('\n' == c) Line++; // Increment line count return (c); } // Put back an unwanted character static void putback(int c) { Putback = c; } // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } // Return the next character from a character // or string literal static int scanch(void) { int c; // Get the next input character and interpret // metacharacters that start with a backslash c = next(); if (c == '\\') { switch (c = next()) { case 'a': return '\a'; case 'b': return '\b'; case 'f': return '\f'; case 'n': return '\n'; case 'r': return '\r'; case 't': return '\t'; case 'v': return '\v'; case '\\': return '\\'; case '"': return '"'; case '\'': return '\''; default: fatalc("unknown escape sequence", c); } } return (c); // Just an ordinary old character! } // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0; // Convert each character into an int value while ((k = chrpos("0123456789", c)) >= 0) { val = val * 10 + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return (val); } // Scan in a string literal from the input file, // and store it in buf[]. Return the length of // the string. static int scanstr(char *buf) { int i, c; // Loop while we have enough buffer space for (i = 0; i < TEXTLEN - 1; i++) { // Get the next char and append to buf // Return when we hit the ending double quote if ((c = scanch()) == '"') { buf[i] = 0; return (i); } buf[i] = c; } // Ran out of buf[] space fatal("String literal too long"); return (0); } // Scan an identifier from the input file and // store it in buf[]. Return the identifier's length static int scanident(int c, char *buf, int lim) { int i = 0; // Allow digits, alpha and underscores while (isalpha(c) || isdigit(c) || '_' == c) { // Error if we hit the identifier length limit, // else append to buf[] and get next character if (lim - 1 == i) { fatal("Identifier too long"); } else if (i < lim - 1) { buf[i++] = c; } c = next(); } // We hit a non-valid character, put it back. // NUL-terminate the buf[] and return the length putback(c); buf[i] = '\0'; return (i); } // Given a word from the input, return the matching // keyword token number or 0 if it's not a keyword. // Switch on the first letter so that we don't have // to waste time strcmp()ing against all the keywords. static int keyword(char *s) { switch (*s) { case 'b': if (!strcmp(s, "break")) return (T_BREAK); break; case 'c': if (!strcmp(s, "case")) return (T_CASE); if (!strcmp(s, "char")) return (T_CHAR); if (!strcmp(s, "continue")) return (T_CONTINUE); break; case 'd': if (!strcmp(s, "default")) return (T_DEFAULT); break; case 'e': if (!strcmp(s, "else")) return (T_ELSE); if (!strcmp(s, "enum")) return (T_ENUM); if (!strcmp(s, "extern")) return (T_EXTERN); break; case 'f': if (!strcmp(s, "for")) return (T_FOR); break; case 'i': if (!strcmp(s, "if")) return (T_IF); if (!strcmp(s, "int")) return (T_INT); break; case 'l': if (!strcmp(s, "long")) return (T_LONG); break; case 'r': if (!strcmp(s, "return")) return (T_RETURN); break; case 's': if (!strcmp(s, "struct")) return (T_STRUCT); if (!strcmp(s, "switch")) return (T_SWITCH); break; case 't': if (!strcmp(s, "typedef")) return (T_TYPEDEF); break; case 'u': if (!strcmp(s, "union")) return (T_UNION); break; case 'v': if (!strcmp(s, "void")) return (T_VOID); break; case 'w': if (!strcmp(s, "while")) return (T_WHILE); break; } return (0); } // A pointer to a rejected token static struct token *Rejtoken = NULL; // Reject the token that we just scanned void reject_token(struct token *t) { if (Rejtoken != NULL) fatal("Can't reject token twice"); Rejtoken = t; } // List of token strings, for debugging purposes char *Tstring[] = { "EOF", "=", "||", "&&", "|", "^", "&", "==", "!=", ",", ">", "<=", ">=", "<<", ">>", "+", "-", "*", "/", "++", "--", "~", "!", "void", "char", "int", "long", "if", "else", "while", "for", "return", "struct", "union", "enum", "typedef", "extern", "break", "continue", "switch", "case", "default", "intlit", "strlit", ";", "identifier", "{", "}", "(", ")", "[", "]", ",", ".", "->", ":" }; // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c, tokentype; // If we have any rejected token, return it if (Rejtoken != NULL) { t = Rejtoken; Rejtoken = NULL; return (1); } // Skip whitespace c = skip(); // Determine the token based on // the input character switch (c) { case EOF: t->token = T_EOF; return (0); case '+': if ((c = next()) == '+') { t->token = T_INC; } else { putback(c); t->token = T_PLUS; } break; case '-': if ((c = next()) == '-') { t->token = T_DEC; } else if (c == '>') { t->token = T_ARROW; } else { putback(c); t->token = T_MINUS; } break; case '*': t->token = T_STAR; break; case '/': t->token = T_SLASH; break; case ';': t->token = T_SEMI; break; case '{': t->token = T_LBRACE; break; case '}': t->token = T_RBRACE; break; case '(': t->token = T_LPAREN; break; case ')': t->token = T_RPAREN; break; case '[': t->token = T_LBRACKET; break; case ']': t->token = T_RBRACKET; break; case '~': t->token = T_INVERT; break; case '^': t->token = T_XOR; break; case ',': t->token = T_COMMA; break; case '.': t->token = T_DOT; break; case ':': t->token = T_COLON; break; case '=': if ((c = next()) == '=') { t->token = T_EQ; } else { putback(c); t->token = T_ASSIGN; } break; case '!': if ((c = next()) == '=') { t->token = T_NE; } else { putback(c); t->token = T_LOGNOT; } break; case '<': if ((c = next()) == '=') { t->token = T_LE; } else if (c == '<') { t->token = T_LSHIFT; } else { putback(c); t->token = T_LT; } break; case '>': if ((c = next()) == '=') { t->token = T_GE; } else if (c == '>') { t->token = T_RSHIFT; } else { putback(c); t->token = T_GT; } break; case '&': if ((c = next()) == '&') { t->token = T_LOGAND; } else { putback(c); t->token = T_AMPER; } break; case '|': if ((c = next()) == '|') { t->token = T_LOGOR; } else { putback(c); t->token = T_OR; } break; case '\'': // If it's a quote, scan in the // literal character value and // the trailing quote t->intvalue = scanch(); t->token = T_INTLIT; if (next() != '\'') fatal("Expected '\\'' at end of char literal"); break; case '"': // Scan in a literal string scanstr(Text); t->token = T_STRLIT; break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } else if (isalpha(c) || '_' == c) { // Read in a keyword or identifier scanident(c, Text, TEXTLEN); // If it's a recognised keyword, return that token if ((tokentype = keyword(Text)) != 0) { t->token = tokentype; break; } // Not a recognised keyword, so it must be an identifier t->token = T_IDENT; break; } // The character isn't part of any recognised token, error fatalc("Unrecognised character", c); } // We found a token t->tokstr = Tstring[t->token]; return (1); } ================================================ FILE: 40_Var_Initialisation_pt2/stmt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of statements // Copyright (c) 2019 Warren Toomey, GPL3 // Prototypes static struct ASTnode *single_statement(void); // compound_statement: // empty, i.e. no statement // | statement // | statement statements // ; // // statement: declaration // | expression_statement // | function_call // | if_statement // | while_statement // | for_statement // | return_statement // ; // if_statement: if_head // | if_head 'else' statement // ; // // if_head: 'if' '(' true_false_expression ')' statement ; // // Parse an IF statement including any // optional ELSE clause and return its AST static struct ASTnode *if_statement(void) { struct ASTnode *condAST, *trueAST, *falseAST = NULL; // Ensure we have 'if' '(' match(T_IF, "if"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); rparen(); // Get the AST for the statement trueAST = single_statement(); // If we have an 'else', skip it // and get the AST for the statement if (Token.token == T_ELSE) { scan(&Token); falseAST = single_statement(); } // Build and return the AST for this statement return (mkastnode(A_IF, P_NONE, condAST, trueAST, falseAST, NULL, 0)); } // while_statement: 'while' '(' true_false_expression ')' statement ; // // Parse a WHILE statement and return its AST static struct ASTnode *while_statement(void) { struct ASTnode *condAST, *bodyAST; // Ensure we have 'while' '(' match(T_WHILE, "while"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); rparen(); // Get the AST for the statement. // Update the loop depth in the process Looplevel++; bodyAST = single_statement(); Looplevel--; // Build and return the AST for this statement return (mkastnode(A_WHILE, P_NONE, condAST, NULL, bodyAST, NULL, 0)); } // for_statement: 'for' '(' expression_list ';' // true_false_expression ';' // expression_list ')' statement ; // // Parse a FOR statement and return its AST static struct ASTnode *for_statement(void) { struct ASTnode *condAST, *bodyAST; struct ASTnode *preopAST, *postopAST; struct ASTnode *tree; // Ensure we have 'for' '(' match(T_FOR, "for"); lparen(); // Get the pre_op expression and the ';' preopAST = expression_list(T_SEMI); semi(); // Get the condition and the ';'. // Force a non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); semi(); // Get the post_op expression and the ')' postopAST = expression_list(T_RPAREN); rparen(); // Get the statement which is the body // Update the loop depth in the process Looplevel++; bodyAST = single_statement(); Looplevel--; // Glue the statement and the postop tree tree = mkastnode(A_GLUE, P_NONE, bodyAST, NULL, postopAST, NULL, 0); // Make a WHILE loop with the condition and this new body tree = mkastnode(A_WHILE, P_NONE, condAST, NULL, tree, NULL, 0); // And glue the preop tree to the A_WHILE tree return (mkastnode(A_GLUE, P_NONE, preopAST, NULL, tree, NULL, 0)); } // return_statement: 'return' '(' expression ')' ; // // Parse a return statement and return its AST static struct ASTnode *return_statement(void) { struct ASTnode *tree; // Can't return a value if function returns P_VOID if (Functionid->type == P_VOID) fatal("Can't return from a void function"); // Ensure we have 'return' '(' match(T_RETURN, "return"); lparen(); // Parse the following expression tree = binexpr(0); // Ensure this is compatible with the function's type tree = modify_type(tree, Functionid->type, 0); if (tree == NULL) fatal("Incompatible type to return"); // Add on the A_RETURN node tree = mkastunary(A_RETURN, P_NONE, tree, NULL, 0); // Get the ')' and ';' rparen(); semi(); return (tree); } // break_statement: 'break' ; // // Parse a break statement and return its AST static struct ASTnode *break_statement(void) { if (Looplevel == 0 && Switchlevel == 0) fatal("no loop or switch to break out from"); scan(&Token); semi(); return (mkastleaf(A_BREAK, 0, NULL, 0)); } // continue_statement: 'continue' ; // // Parse a continue statement and return its AST static struct ASTnode *continue_statement(void) { if (Looplevel == 0) fatal("no loop to continue to"); scan(&Token); semi(); return (mkastleaf(A_CONTINUE, 0, NULL, 0)); } // Parse a switch statement and return its AST static struct ASTnode *switch_statement(void) { struct ASTnode *left, *n, *c, *casetree= NULL, *casetail; int inloop=1, casecount=0; int seendefault=0; int ASTop, casevalue; // Skip the 'switch' and '(' scan(&Token); lparen(); // Get the switch expression, the ')' and the '{' left= binexpr(0); rparen(); lbrace(); // Ensure that this is of int type if (!inttype(left->type)) fatal("Switch expression is not of integer type"); // Build an A_SWITCH subtree with the expression as // the child n= mkastunary(A_SWITCH, 0, left, NULL, 0); // Now parse the cases Switchlevel++; while (inloop) { switch(Token.token) { // Leave the loop when we hit a '}' case T_RBRACE: if (casecount==0) fatal("No cases in switch"); inloop=0; break; case T_CASE: case T_DEFAULT: // Ensure this isn't after a previous 'default' if (seendefault) fatal("case or default after existing default"); // Set the AST operation. Scan the case value if required if (Token.token==T_DEFAULT) { ASTop= A_DEFAULT; seendefault= 1; scan(&Token); } else { ASTop= A_CASE; scan(&Token); left= binexpr(0); // Ensure the case value is an integer literal if (left->op != A_INTLIT) fatal("Expecting integer literal for case value"); casevalue= left->intvalue; // Walk the list of existing case values to ensure // that there isn't a duplicate case value for (c= casetree; c != NULL; c= c -> right) if (casevalue == c->intvalue) fatal("Duplicate case value"); } // Scan the ':' and get the compound expression match(T_COLON, ":"); left= compound_statement(1); casecount++; // Build a sub-tree with the compound statement as the left child // and link it in to the growing A_CASE tree if (casetree==NULL) { casetree= casetail= mkastunary(ASTop, 0, left, NULL, casevalue); } else { casetail->right= mkastunary(ASTop, 0, left, NULL, casevalue); casetail= casetail->right; } break; default: fatals("Unexpected token in switch", Token.tokstr); } } Switchlevel--; // We have a sub-tree with the cases and any default. Put the // case count into the A_SWITCH node and attach the case tree. n->intvalue= casecount; n->right= casetree; rbrace(); return(n); } // Parse a single statement and return its AST. static struct ASTnode *single_statement(void) { struct ASTnode *stmt; struct symtable *ctype; switch (Token.token) { case T_LBRACE: // We have a '{', so this is a compound statement lbrace(); stmt = compound_statement(0); rbrace(); return(stmt); case T_IDENT: // We have to see if the identifier matches a typedef. // If not, treat it as an expression. // Otherwise, fall down to the parse_type() call. if (findtypedef(Text) == NULL) { stmt= binexpr(0); semi(); return(stmt); } case T_CHAR: case T_INT: case T_LONG: case T_STRUCT: case T_UNION: case T_ENUM: case T_TYPEDEF: // The beginning of a variable declaration list. declaration_list(&ctype, C_LOCAL, T_SEMI, T_EOF); semi(); return (NULL); // No AST generated here case T_IF: return (if_statement()); case T_WHILE: return (while_statement()); case T_FOR: return (for_statement()); case T_RETURN: return (return_statement()); case T_BREAK: return (break_statement()); case T_CONTINUE: return (continue_statement()); case T_SWITCH: return (switch_statement()); default: // For now, see if this is an expression. // This catches assignment statements. stmt= binexpr(0); semi(); return(stmt); } return (NULL); // Keep -Wall happy } // Parse a compound statement // and return its AST. If inswitch is true, // we look for a '}', 'case' or 'default' token // to end the parsing. Otherwise, look for // just a '}' to end the parsing. struct ASTnode *compound_statement(int inswitch) { struct ASTnode *left = NULL; struct ASTnode *tree; while (1) { // Parse a single statement tree = single_statement(); // For each new tree, either save it in left // if left is empty, or glue the left and the // new tree together if (tree != NULL) { if (left == NULL) left = tree; else left = mkastnode(A_GLUE, P_NONE, left, NULL, tree, NULL, 0); } // Leave if we've hit the end token if (Token.token == T_RBRACE) return(left); if (inswitch && (Token.token == T_CASE || Token.token == T_DEFAULT)) return(left); } } ================================================ FILE: 40_Var_Initialisation_pt2/sym.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Symbol table functions // Copyright (c) 2019 Warren Toomey, GPL3 // Append a node to the singly-linked list pointed to by head or tail void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node) { // Check for valid pointers if (head == NULL || tail == NULL || node == NULL) fatal("Either head, tail or node is NULL in appendsym"); // Append to the list if (*tail) { (*tail)->next = node; *tail = node; } else *head = *tail = node; node->next = NULL; } // Create a symbol node to be added to a symbol table list. // Set up the node's: // + type: char, int etc. // + ctype: composite type pointer for struct/union // + structural type: var, function, array etc. // + size: number of elements, or endlabel: end label for a function // + posn: Position information for local symbols // Return a pointer to the new node struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn) { // Get a new node struct symtable *node = (struct symtable *) malloc(sizeof(struct symtable)); if (node == NULL) fatal("Unable to malloc a symbol table node in newsym"); // Fill in the values node->name = strdup(name); node->type = type; node->ctype = ctype; node->stype = stype; node->class = class; node->nelems = nelems; // For pointers and integer types, set the size // of the symbol. structs and union declarations // manually set this up themselves. if (ptrtype(type) || inttype(type)) node->size = nelems * typesize(type, ctype); node->posn = posn; node->next = NULL; node->member = NULL; node->initlist = NULL; return (node); } // Add a symbol to the global symbol list struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn) { struct symtable *sym = newsym(name, type, ctype, stype, class, nelems, posn); // For structs and unions, copy the size from the type node if (type== P_STRUCT || type== P_UNION) sym->size = ctype->size; appendsym(&Globhead, &Globtail, sym); return (sym); } // Add a symbol to the local symbol list struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int nelems) { struct symtable *sym = newsym(name, type, ctype, stype, C_LOCAL, nelems, 0); // For structs and unions, copy the size from the type node if (type== P_STRUCT || type== P_UNION) sym->size = ctype->size; appendsym(&Loclhead, &Locltail, sym); return (sym); } // Add a symbol to the parameter list struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype) { struct symtable *sym = newsym(name, type, ctype, stype, C_PARAM, 1, 0); appendsym(&Parmhead, &Parmtail, sym); return (sym); } // Add a symbol to the temporary member list struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int nelems) { struct symtable *sym = newsym(name, type, ctype, stype, C_MEMBER, nelems, 0); // For structs and unions, copy the size from the type node if (type== P_STRUCT || type== P_UNION) sym->size = ctype->size; appendsym(&Membhead, &Membtail, sym); return (sym); } // Add a struct to the struct list struct symtable *addstruct(char *name) { struct symtable *sym = newsym(name, P_STRUCT, NULL, 0, C_STRUCT, 0, 0); appendsym(&Structhead, &Structtail, sym); return (sym); } // Add a struct to the union list struct symtable *addunion(char *name) { struct symtable *sym = newsym(name, P_UNION, NULL, 0, C_UNION, 0, 0); appendsym(&Unionhead, &Uniontail, sym); return (sym); } // Add an enum type or value to the enum list. // Class is C_ENUMTYPE or C_ENUMVAL. // Use posn to store the int value. struct symtable *addenum(char *name, int class, int value) { struct symtable *sym = newsym(name, P_INT, NULL, 0, class, 0, value); appendsym(&Enumhead, &Enumtail, sym); return (sym); } // Add a typedef to the typedef list struct symtable *addtypedef(char *name, int type, struct symtable *ctype) { struct symtable *sym = newsym(name, type, ctype, 0, C_TYPEDEF, 0, 0); appendsym(&Typehead, &Typetail, sym); return (sym); } // Search for a symbol in a specific list. // Return a pointer to the found node or NULL if not found. // If class is not zero, also match on the given class static struct symtable *findsyminlist(char *s, struct symtable *list, int class) { for (; list != NULL; list = list->next) if ((list->name != NULL) && !strcmp(s, list->name)) if (class==0 || class== list->class) return (list); return (NULL); } // Determine if the symbol s is in the global symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findglob(char *s) { return (findsyminlist(s, Globhead, 0)); } // Determine if the symbol s is in the local symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findlocl(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member, 0); if (node) return (node); } return (findsyminlist(s, Loclhead, 0)); } // Determine if the symbol s is in the symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findsymbol(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member, 0); if (node) return (node); } // Otherwise, try the local and global symbol lists node = findsyminlist(s, Loclhead, 0); if (node) return (node); return (findsyminlist(s, Globhead, 0)); } // Find a member in the member list // Return a pointer to the found node or NULL if not found. struct symtable *findmember(char *s) { return (findsyminlist(s, Membhead, 0)); } // Find a struct in the struct list // Return a pointer to the found node or NULL if not found. struct symtable *findstruct(char *s) { return (findsyminlist(s, Structhead, 0)); } // Find a struct in the union list // Return a pointer to the found node or NULL if not found. struct symtable *findunion(char *s) { return (findsyminlist(s, Unionhead, 0)); } // Find an enum type in the enum list // Return a pointer to the found node or NULL if not found. struct symtable *findenumtype(char *s) { return (findsyminlist(s, Enumhead, C_ENUMTYPE)); } // Find an enum value in the enum list // Return a pointer to the found node or NULL if not found. struct symtable *findenumval(char *s) { return (findsyminlist(s, Enumhead, C_ENUMVAL)); } // Find a type in the tyedef list // Return a pointer to the found node or NULL if not found. struct symtable *findtypedef(char *s) { return (findsyminlist(s, Typehead, 0)); } // Reset the contents of the symbol table void clear_symtable(void) { Globhead = Globtail = NULL; Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Membhead = Membtail = NULL; Structhead = Structtail = NULL; Unionhead = Uniontail = NULL; Enumhead = Enumtail = NULL; Typehead = Typetail = NULL; } // Clear all the entries in the local symbol table void freeloclsyms(void) { Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Functionid = NULL; } ================================================ FILE: 40_Var_Initialisation_pt2/tests/err.input31.c ================================================ Expecting a primary expression, got token:+ on line 5 of input31.c ================================================ FILE: 40_Var_Initialisation_pt2/tests/err.input32.c ================================================ Unknown variable:cow on line 4 of input32.c ================================================ FILE: 40_Var_Initialisation_pt2/tests/err.input33.c ================================================ Incompatible type to return on line 4 of input33.c ================================================ FILE: 40_Var_Initialisation_pt2/tests/err.input34.c ================================================ For now, declaration of non-global arrays is not implemented on line 4 of input34.c ================================================ FILE: 40_Var_Initialisation_pt2/tests/err.input35.c ================================================ Duplicate local variable declaration:a on line 4 of input35.c ================================================ FILE: 40_Var_Initialisation_pt2/tests/err.input36.c ================================================ Type doesn't match prototype for parameter:2 on line 4 of input36.c ================================================ FILE: 40_Var_Initialisation_pt2/tests/err.input37.c ================================================ Expected:comma on line 3 of input37.c ================================================ FILE: 40_Var_Initialisation_pt2/tests/err.input38.c ================================================ Type doesn't match prototype for parameter:2 on line 4 of input38.c ================================================ FILE: 40_Var_Initialisation_pt2/tests/err.input39.c ================================================ No statements in function with non-void type on line 4 of input39.c ================================================ FILE: 40_Var_Initialisation_pt2/tests/err.input40.c ================================================ No return for function with non-void type on line 4 of input40.c ================================================ FILE: 40_Var_Initialisation_pt2/tests/err.input41.c ================================================ Can't return from a void function on line 3 of input41.c ================================================ FILE: 40_Var_Initialisation_pt2/tests/err.input42.c ================================================ Undeclared function:fred on line 3 of input42.c ================================================ FILE: 40_Var_Initialisation_pt2/tests/err.input43.c ================================================ Undeclared array:b on line 3 of input43.c ================================================ FILE: 40_Var_Initialisation_pt2/tests/err.input44.c ================================================ Unknown variable:z on line 3 of input44.c ================================================ FILE: 40_Var_Initialisation_pt2/tests/err.input45.c ================================================ & operator must be followed by an identifier on line 3 of input45.c ================================================ FILE: 40_Var_Initialisation_pt2/tests/err.input46.c ================================================ * operator must be followed by an identifier or * on line 3 of input46.c ================================================ FILE: 40_Var_Initialisation_pt2/tests/err.input47.c ================================================ ++ operator must be followed by an identifier on line 3 of input47.c ================================================ FILE: 40_Var_Initialisation_pt2/tests/err.input48.c ================================================ -- operator must be followed by an identifier on line 3 of input48.c ================================================ FILE: 40_Var_Initialisation_pt2/tests/err.input49.c ================================================ Incompatible expression in assignment on line 6 of input49.c ================================================ FILE: 40_Var_Initialisation_pt2/tests/err.input50.c ================================================ Incompatible types in binary expression on line 6 of input50.c ================================================ FILE: 40_Var_Initialisation_pt2/tests/err.input51.c ================================================ Expected '\'' at end of char literal on line 4 of input51.c ================================================ FILE: 40_Var_Initialisation_pt2/tests/err.input52.c ================================================ Unrecognised character:$ on line 5 of input52.c ================================================ FILE: 40_Var_Initialisation_pt2/tests/err.input56.c ================================================ unknown struct/union type:var1 on line 2 of input56.c ================================================ FILE: 40_Var_Initialisation_pt2/tests/err.input57.c ================================================ previously defined struct/union:fred on line 2 of input57.c ================================================ FILE: 40_Var_Initialisation_pt2/tests/err.input59.c ================================================ Undeclared variable:y on line 3 of input59.c ================================================ FILE: 40_Var_Initialisation_pt2/tests/err.input60.c ================================================ Undeclared variable:x on line 3 of input60.c ================================================ FILE: 40_Var_Initialisation_pt2/tests/err.input61.c ================================================ Undeclared variable:x on line 3 of input61.c ================================================ FILE: 40_Var_Initialisation_pt2/tests/err.input64.c ================================================ undeclared enum type::fred on line 1 of input64.c ================================================ FILE: 40_Var_Initialisation_pt2/tests/err.input65.c ================================================ enum type redeclared::fred on line 2 of input65.c ================================================ FILE: 40_Var_Initialisation_pt2/tests/err.input66.c ================================================ enum value redeclared::z on line 2 of input66.c ================================================ FILE: 40_Var_Initialisation_pt2/tests/err.input68.c ================================================ redefinition of typedef:FOO on line 2 of input68.c ================================================ FILE: 40_Var_Initialisation_pt2/tests/err.input69.c ================================================ unknown type:FLOO on line 2 of input69.c ================================================ FILE: 40_Var_Initialisation_pt2/tests/err.input72.c ================================================ no loop or switch to break out from on line 1 of input72.c ================================================ FILE: 40_Var_Initialisation_pt2/tests/err.input73.c ================================================ no loop to continue to on line 1 of input73.c ================================================ FILE: 40_Var_Initialisation_pt2/tests/err.input75.c ================================================ Unexpected token in switch:if on line 4 of input75.c ================================================ FILE: 40_Var_Initialisation_pt2/tests/err.input76.c ================================================ No cases in switch on line 3 of input76.c ================================================ FILE: 40_Var_Initialisation_pt2/tests/err.input77.c ================================================ case or default after existing default on line 6 of input77.c ================================================ FILE: 40_Var_Initialisation_pt2/tests/err.input78.c ================================================ case or default after existing default on line 6 of input78.c ================================================ FILE: 40_Var_Initialisation_pt2/tests/err.input79.c ================================================ Duplicate case value on line 6 of input79.c ================================================ FILE: 40_Var_Initialisation_pt2/tests/err.input85.c ================================================ Bad type in parameter list on line 1 of input85.c ================================================ FILE: 40_Var_Initialisation_pt2/tests/err.input86.c ================================================ Function definition not at global level on line 3 of input86.c ================================================ FILE: 40_Var_Initialisation_pt2/tests/err.input87.c ================================================ Bad type in member list on line 4 of input87.c ================================================ FILE: 40_Var_Initialisation_pt2/tests/err.input92.c ================================================ Integer literal value too big for char type on line 1 of input92.c ================================================ FILE: 40_Var_Initialisation_pt2/tests/err.input93.c ================================================ Expecting an integer literal value on line 1 of input93.c ================================================ FILE: 40_Var_Initialisation_pt2/tests/err.input94.c ================================================ Type mismatch: integer literal vs. variable on line 1 of input94.c ================================================ FILE: 40_Var_Initialisation_pt2/tests/err.input95.c ================================================ Variable can not be initialised:x on line 1 of input95.c ================================================ FILE: 40_Var_Initialisation_pt2/tests/err.input96.c ================================================ Array size is illegal:0 on line 1 of input96.c ================================================ FILE: 40_Var_Initialisation_pt2/tests/err.input97.c ================================================ For now, declaration of non-global arrays is not implemented on line 2 of input97.c ================================================ FILE: 40_Var_Initialisation_pt2/tests/err.input98.c ================================================ Too many values in initialisation list on line 1 of input98.c ================================================ FILE: 40_Var_Initialisation_pt2/tests/input01.c ================================================ int printf(char *fmt); void main() { printf("%d\n", 12 * 3); printf("%d\n", 18 - 2 * 4); printf("%d\n", 1 + 2 + 9 - 5/2 + 3*5); } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input02.c ================================================ int printf(char *fmt); void main() { int fred; int jim; fred= 5; jim= 12; printf("%d\n", fred + jim); } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input03.c ================================================ int printf(char *fmt); void main() { int x; x= 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input04.c ================================================ int printf(char *fmt); void main() { int x; x= 7 < 9; printf("%d\n", x); x= 7 <= 9; printf("%d\n", x); x= 7 != 9; printf("%d\n", x); x= 7 == 7; printf("%d\n", x); x= 7 >= 7; printf("%d\n", x); x= 7 <= 7; printf("%d\n", x); x= 9 > 7; printf("%d\n", x); x= 9 >= 7; printf("%d\n", x); x= 9 != 7; printf("%d\n", x); } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input05.c ================================================ int printf(char *fmt); void main() { int i; int j; i=6; j=12; if (i < j) { printf("%d\n", i); } else { printf("%d\n", j); } } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input06.c ================================================ int printf(char *fmt); void main() { int i; i=1; while (i <= 10) { printf("%d\n", i); i= i + 1; } } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input07.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input08.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input09.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } void fred() { int a; int b; a= 12; b= 3 * a; if (a >= b) { printf("%d\n", 2 * b - a); } } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input10.c ================================================ int printf(char *fmt); void main() { int i; char j; j= 20; printf("%d\n", j); i= 10; printf("%d\n", i); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 2; j= j + 1) { printf("%d\n", j); } } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input11.c ================================================ int printf(char *fmt); int main() { int i; char j; long k; i= 10; printf("%d\n", i); j= 20; printf("%d\n", j); k= 30; printf("%d\n", k); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 4; j= j + 1) { printf("%d\n", j); } for (k= 1; k <= 5; k= k + 1) { printf("%d\n", k); } return(i); printf("%d\n", 12345); return(3); } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input12.c ================================================ int printf(char *fmt); int fred() { return(5); } void main() { int x; x= fred(2); printf("%d\n", x); } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input13.c ================================================ int printf(char *fmt); int fred() { return(56); } void main() { int dummy; int result; dummy= printf("%d\n", 23); result= fred(10); dummy= printf("%d\n", result); } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input14.c ================================================ int printf(char *fmt); int fred() { return(20); } int main() { int result; printf("%d\n", 10); result= fred(15); printf("%d\n", result); printf("%d\n", fred(15)+10); return(0); } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input15.c ================================================ int printf(char *fmt); int main() { char a; char *b; char c; int d; int *e; int f; a= 18; printf("%d\n", a); b= &a; c= *b; printf("%d\n", c); d= 12; printf("%d\n", d); e= &d; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input16.c ================================================ int printf(char *fmt); int c; int d; int *e; int f; int main() { c= 12; d=18; printf("%d\n", c); e= &c + 1; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input17.c ================================================ int printf(char *fmt); int main() { char a; char *b; int d; int *e; b= &a; *b= 19; printf("%d\n", a); e= &d; *e= 12; printf("%d\n", d); return(0); } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input18.c ================================================ int printf(char *fmt); int main() { int a; int b; a= b= 34; printf("%d\n", a); printf("%d\n", b); return(0); } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input18a.c ================================================ int printf(char *fmt); int a; int *b; char c; char *d; int main() { b= &a; *b= 15; printf("%d\n", a); d= &c; *d= 16; printf("%d\n", c); return(0); } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input19.c ================================================ int printf(char *fmt); int a; int b; int c; int d; int e; int main() { a= 2; b= 4; c= 3; d= 2; e= (a+b) * (c+d); printf("%d\n", e); return(0); } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input20.c ================================================ int printf(char *fmt); int a; int b[25]; int main() { b[3]= 12; a= b[3]; printf("%d\n", a); return(0); } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input21.c ================================================ int printf(char *fmt); char c; char *str; int main() { c= '\n'; printf("%d\n", c); for (str= "Hello world\n"; *str != 0; str= str + 1) { printf("%c", *str); } return(0); } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input22.c ================================================ int printf(char *fmt); char a; char b; char c; int d; int e; int f; long g; long h; long i; int main() { b= 5; c= 7; a= b + c++; printf("%d\n", a); e= 5; f= 7; d= e + f++; printf("%d\n", d); h= 5; i= 7; g= h + i++; printf("%d\n", g); a= b-- + c; printf("%d\n", a); d= e-- + f; printf("%d\n", d); g= h-- + i; printf("%d\n", g); a= ++b + c; printf("%d\n", a); d= ++e + f; printf("%d\n", d); g= ++h + i; printf("%d\n", g); a= b * --c; printf("%d\n", a); d= e * --f; printf("%d\n", d); g= h * --i; printf("%d\n", g); return(0); } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input23.c ================================================ int printf(char *fmt); char *str; int x; int main() { x= -23; printf("%d\n", x); printf("%d\n", -10 * -10); x= 1; x= ~x; printf("%d\n", x); x= 2 > 5; printf("%d\n", x); x= !x; printf("%d\n", x); x= !x; printf("%d\n", x); x= 13; if (x) { printf("%d\n", 13); } x= 0; if (!x) { printf("%d\n", 14); } for (str= "Hello world\n"; *str; str++) { printf("%c", *str); } return(0); } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input24.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { a= 42; b= 19; printf("%d\n", a & b); printf("%d\n", a | b); printf("%d\n", a ^ b); printf("%d\n", 1 << 3); printf("%d\n", 63 >> 3); return(0); } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input25.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { char z; int y; int x; x= 10; y= 20; z= 30; printf("%d\n", x); printf("%d\n", y); printf("%d\n", z); a= 5; b= 15; c= 25; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); return(0); } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input26.c ================================================ int printf(char *fmt); int main(int a, char b, long c, int d, int e, int f, int g, int h) { int i; int j; int k; a= 13; printf("%d\n", a); b= 23; printf("%d\n", b); c= 34; printf("%d\n", c); d= 44; printf("%d\n", d); e= 54; printf("%d\n", e); f= 64; printf("%d\n", f); g= 74; printf("%d\n", g); h= 84; printf("%d\n", h); i= 94; printf("%d\n", i); j= 95; printf("%d\n", j); k= 96; printf("%d\n", k); return(0); } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input27.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int param5(int a, int b, int c, int d, int e) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param2(int a, int b) { int c; int d; int e; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param0() { int a; int b; int c; int d; int e; a= 1; b= 2; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int main() { param8(1,2,3,4,5,6,7,8); param5(1,2,3,4,5); param2(1,2); param0(); return(0); } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input28.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input29.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h); int fred(int a, int b, int c); int main(); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input30.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input30.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input31.c ================================================ int printf(char *fmt); int main() { int x; x= 2 + + 3 - * / ; } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input32.c ================================================ int printf(char *fmt); int main() { pizza cow llama sausage; } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input33.c ================================================ int printf(char *fmt); int main() { char *z; return(z); } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input34.c ================================================ int printf(char *fmt); int main() { int a[12]; return(0); } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input35.c ================================================ int printf(char *fmt); int fred(int a, int b) { int a; return(a); } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input36.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c); ================================================ FILE: 40_Var_Initialisation_pt2/tests/input37.c ================================================ int printf(char *fmt); int fred(int a, char b +, int z); ================================================ FILE: 40_Var_Initialisation_pt2/tests/input38.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c, int g); ================================================ FILE: 40_Var_Initialisation_pt2/tests/input39.c ================================================ int printf(char *fmt); int main() { int a; } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input40.c ================================================ int printf(char *fmt); int main() { int a; a= 5; } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input41.c ================================================ int printf(char *fmt); void fred() { return(5); } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input42.c ================================================ int printf(char *fmt); int main() { fred(5); } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input43.c ================================================ int printf(char *fmt); int main() { int a; a= b[4]; } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input44.c ================================================ int printf(char *fmt); int main() { int a; a= z; } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input45.c ================================================ int printf(char *fmt); int main() { int a; a= &5; } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input46.c ================================================ int printf(char *fmt); int main() { int a; a= *5; } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input47.c ================================================ int printf(char *fmt); int main() { int a; a= ++5; } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input48.c ================================================ int printf(char *fmt); int main() { int a; a= --5; } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input49.c ================================================ int printf(char *fmt); int main() { int x; char y; y= x; } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input50.c ================================================ int printf(char *fmt); int main() { char *a; char *b; a= a + b; } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input51.c ================================================ int printf(char *fmt); int main() { char a; a= 'fred'; } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input52.c ================================================ int printf(char *fmt); int main() { int a; a= $5.00; } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input53.c ================================================ int printf(char *fmt); int main() { printf("Hello world, %d\n", 23); return(0); } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input54.c ================================================ int printf(char *fmt); int main() { int i; for (i=0; i < 20; i++) { printf("Hello world, %d\n", i); } return(0); } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input55.c ================================================ int printf(char *fmt); int main(int argc, char **argv) { int i; char *argument; printf("Hello world\n"); for (i=0; i < argc; i++) { argument= *argv; argv= argv + 1; printf("Argument %d is %s\n", i, argument); } return(0); } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input56.c ================================================ struct foo { int x; }; struct mary var1; ================================================ FILE: 40_Var_Initialisation_pt2/tests/input57.c ================================================ struct fred { int x; } ; struct fred { char y; } ; ================================================ FILE: 40_Var_Initialisation_pt2/tests/input58.c ================================================ int printf(char *fmt); struct fred { int x; char y; long z; }; struct fred var2; struct fred *varptr; int main() { long result; var2.x= 12; printf("%d\n", var2.x); var2.y= 'c'; printf("%d\n", var2.y); var2.z= 4005; printf("%d\n", var2.z); result= var2.x + var2.y + var2.z; printf("%d\n", result); varptr= &var2; result= varptr->x + varptr->y + varptr->z; printf("%d\n", result); return(0); } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input59.c ================================================ int main() { int x; x= y.foo; } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input60.c ================================================ int main() { int x; x= x.foo; } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input61.c ================================================ int main() { int x; x= x->foo; } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input62.c ================================================ int printf(char *fmt); union fred { char w; int x; int y; long z; }; union fred var1; union fred *varptr; int main() { var1.x= 65; printf("%d\n", var1.x); var1.x= 66; printf("%d\n", var1.x); printf("%d\n", var1.y); printf("The next two depend on the endian of the platform\n"); printf("%d\n", var1.w); printf("%d\n", var1.z); varptr= &var1; varptr->x= 67; printf("%d\n", varptr->x); printf("%d\n", varptr->y); return(0); } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input63.c ================================================ int printf(char *fmt); enum fred { apple=1, banana, carrot, pear=10, peach, mango, papaya }; enum jane { aple=1, bnana, crrot, par=10, pech, mago, paaya }; enum fred var1; enum jane var2; enum fred var3; int main() { var1= carrot + pear + mango; printf("%d\n", var1); return(0); } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input64.c ================================================ enum fred var3; ================================================ FILE: 40_Var_Initialisation_pt2/tests/input65.c ================================================ enum fred { x, y, z }; enum fred { a, b }; ================================================ FILE: 40_Var_Initialisation_pt2/tests/input66.c ================================================ enum fred { x, y, z }; enum mary { a, b, z }; ================================================ FILE: 40_Var_Initialisation_pt2/tests/input67.c ================================================ int printf(char *fmt); typedef int FOO; FOO var1; struct bar { int x; int y} ; typedef struct bar BAR; BAR var2; int main() { var1= 5; printf("%d\n", var1); var2.x= 7; var2.y= 10; printf("%d\n", var2.x + var2.y); return(0); } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input68.c ================================================ typedef int FOO; typedef char FOO; ================================================ FILE: 40_Var_Initialisation_pt2/tests/input69.c ================================================ typedef int FOO; FLOO y; ================================================ FILE: 40_Var_Initialisation_pt2/tests/input70.c ================================================ #include typedef int FOO; int main() { FOO x; x= 56; printf("%d\n", x); return(0); } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input71.c ================================================ #include int main() { int x; x = 0; while (x < 100) { if (x == 5) { x = x + 2; continue; } printf("%d\n", x); if (x == 14) { break; } x = x + 1; } printf("Done\n"); return (0); } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input72.c ================================================ int main() { break; } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input73.c ================================================ int main() { continue; } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input74.c ================================================ #include int main() { int x; int y; y= 0; for (x=0; x < 5; x++) { switch(x) { case 1: { y= 5; break; } case 2: { y= 7; break; } case 3: { y= 9; } default: { y= 100; } } printf("%d\n", y); } return(0); } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input75.c ================================================ int main() { int x; switch(x) { if (x<5); } } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input76.c ================================================ int main() { int x; switch(x) { } } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input77.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } default: { x= 3; } case 4: { x= 2; } } } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input78.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } default: { x= 3; } default: { x= 2; } } } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input79.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } case 2: { x= 2; } case 1: { x= 2; } default: { x= 2; } } } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input80.c ================================================ #include int x; int y; int main() { for (x=0, y=1; x < 6; x++, y=y+2) { printf("%d %d\n", x, y); } return(0); } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input81.c ================================================ #include int x; int y; int main() { x= 0; y=1; for (;x<5;) { printf("%d %d\n", x, y); x=x+1; y=y+2; } return(0); } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input82.c ================================================ #include int main() { int x; x= 10; // Dangling else test if (x > 5) if (x > 15) printf("x > 15\n"); else printf("15 >= x > 5\n"); else printf("5 >= x\n"); return(0); } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input83.c ================================================ #include int main() { int x; // Dangling else test. // We should not print anything for x<= 5 for (x=0; x < 12; x++) if (x > 5) if (x > 10) printf("10 < %2d\n", x); else printf(" 5 < %2d <= 10\n", x); return(0); } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input84.c ================================================ #include int main() { int x, y; x=2; y=3; printf("%d %d\n", x, y); char a, *b; a= 'f'; b= &a; printf("%c %c\n", a, *b); return(0); } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input85.c ================================================ int main(struct foo { int x; }; , int a) { return(0); } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input86.c ================================================ int main() { int fred() { return(5); } int x; x=2; return(x); } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input87.c ================================================ struct foo { int x; int y; struct blah { int g; }; }; ================================================ FILE: 40_Var_Initialisation_pt2/tests/input88.c ================================================ #include struct foo { int x; int y; } fred, mary; struct foo james; int main() { fred.x= 5; mary.y= 6; printf("%d %d\n", fred.x, mary.y); return(0); } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input89.c ================================================ #include int x= 23; char y= 'H'; char *z= "Hello world"; int main() { printf("%d %c %s\n", x, y, z); return(0); } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input90.c ================================================ #include int a = 23, b = 100; char y = 'H', *z = "Hello world"; int main() { printf("%d %d %c %s\n", a, b, y, z); return (0); } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input91.c ================================================ #include int fred[] = { 1, 2, 3, 4, 5 }; int jim[10] = { 1, 2, 3, 4, 5 }; int main() { int i; for (i=0; i < 5; i++) printf("%d\n", fred[i]); for (i=0; i < 10; i++) printf("%d\n", jim[i]); return(0); } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input92.c ================================================ char x= 3000; ================================================ FILE: 40_Var_Initialisation_pt2/tests/input93.c ================================================ char x= fred; ================================================ FILE: 40_Var_Initialisation_pt2/tests/input94.c ================================================ char *s= 54; ================================================ FILE: 40_Var_Initialisation_pt2/tests/input95.c ================================================ int fred(int x=2) { return(x); } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input96.c ================================================ int fred[0]; ================================================ FILE: 40_Var_Initialisation_pt2/tests/input97.c ================================================ int main() { int x[45]; return(0); } ================================================ FILE: 40_Var_Initialisation_pt2/tests/input98.c ================================================ int fred[3]= { 1, 2, 3, 4, 5 }; ================================================ FILE: 40_Var_Initialisation_pt2/tests/input99.c ================================================ #include // List of token strings, for debugging purposes. // As yet, we can't store a NULL into the list char *Tstring[] = { "EOF", "=", "||", "&&", "|", "^", "&", "==", "!=", ",", ">", "<=", ">=", "<<", ">>", "+", "-", "*", "/", "++", "--", "~", "!", "void", "char", "int", "long", "if", "else", "while", "for", "return", "struct", "union", "enum", "typedef", "extern", "break", "continue", "switch", "case", "default", "intlit", "strlit", ";", "identifier", "{", "}", "(", ")", "[", "]", ",", ".", "->", ":", "" }; int main() { int i; char *str; i=0; while (1) { str= Tstring[i]; if (*str == 0) break; printf("%s\n", str); i++; } return(0); } ================================================ FILE: 40_Var_Initialisation_pt2/tests/mktests ================================================ #!/bin/sh # Make the output files for each test # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make install) fi for i in input*c do if [ ! -f "out.$i" -a ! -f "err.$i" ] then ../cwj -o out $i 2> "err.$i" # If the err file is empty if [ ! -s "err.$i" ] then rm -f "err.$i" cc -o out $i ./out > "out.$i" fi fi rm -f out out.s done ================================================ FILE: 40_Var_Initialisation_pt2/tests/out.input01.c ================================================ 36 10 25 ================================================ FILE: 40_Var_Initialisation_pt2/tests/out.input02.c ================================================ 17 ================================================ FILE: 40_Var_Initialisation_pt2/tests/out.input03.c ================================================ 1 2 3 4 5 ================================================ FILE: 40_Var_Initialisation_pt2/tests/out.input04.c ================================================ 1 1 1 1 1 1 1 1 1 ================================================ FILE: 40_Var_Initialisation_pt2/tests/out.input05.c ================================================ 6 ================================================ FILE: 40_Var_Initialisation_pt2/tests/out.input06.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 40_Var_Initialisation_pt2/tests/out.input07.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 40_Var_Initialisation_pt2/tests/out.input08.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 40_Var_Initialisation_pt2/tests/out.input09.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 40_Var_Initialisation_pt2/tests/out.input10.c ================================================ 20 10 1 2 3 4 5 253 254 255 0 1 ================================================ FILE: 40_Var_Initialisation_pt2/tests/out.input11.c ================================================ 10 20 30 1 2 3 4 5 253 254 255 0 1 2 3 1 2 3 4 5 ================================================ FILE: 40_Var_Initialisation_pt2/tests/out.input12.c ================================================ 5 ================================================ FILE: 40_Var_Initialisation_pt2/tests/out.input13.c ================================================ 23 56 ================================================ FILE: 40_Var_Initialisation_pt2/tests/out.input14.c ================================================ 10 20 30 ================================================ FILE: 40_Var_Initialisation_pt2/tests/out.input15.c ================================================ 18 18 12 12 ================================================ FILE: 40_Var_Initialisation_pt2/tests/out.input16.c ================================================ 12 18 ================================================ FILE: 40_Var_Initialisation_pt2/tests/out.input17.c ================================================ 19 12 ================================================ FILE: 40_Var_Initialisation_pt2/tests/out.input18.c ================================================ 34 34 ================================================ FILE: 40_Var_Initialisation_pt2/tests/out.input18a.c ================================================ 15 16 ================================================ FILE: 40_Var_Initialisation_pt2/tests/out.input19.c ================================================ 30 ================================================ FILE: 40_Var_Initialisation_pt2/tests/out.input20.c ================================================ 12 ================================================ FILE: 40_Var_Initialisation_pt2/tests/out.input21.c ================================================ 10 Hello world ================================================ FILE: 40_Var_Initialisation_pt2/tests/out.input22.c ================================================ 12 12 12 13 13 13 13 13 13 35 35 35 ================================================ FILE: 40_Var_Initialisation_pt2/tests/out.input23.c ================================================ -23 100 -2 0 1 0 13 14 Hello world ================================================ FILE: 40_Var_Initialisation_pt2/tests/out.input24.c ================================================ 2 59 57 8 7 ================================================ FILE: 40_Var_Initialisation_pt2/tests/out.input25.c ================================================ 10 20 30 5 15 25 ================================================ FILE: 40_Var_Initialisation_pt2/tests/out.input26.c ================================================ 13 23 34 44 54 64 74 84 94 95 96 ================================================ FILE: 40_Var_Initialisation_pt2/tests/out.input27.c ================================================ 1 2 3 4 5 6 7 8 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 ================================================ FILE: 40_Var_Initialisation_pt2/tests/out.input28.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 40_Var_Initialisation_pt2/tests/out.input29.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 40_Var_Initialisation_pt2/tests/out.input30.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input30.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 40_Var_Initialisation_pt2/tests/out.input53.c ================================================ Hello world, 23 ================================================ FILE: 40_Var_Initialisation_pt2/tests/out.input54.c ================================================ Hello world, 0 Hello world, 1 Hello world, 2 Hello world, 3 Hello world, 4 Hello world, 5 Hello world, 6 Hello world, 7 Hello world, 8 Hello world, 9 Hello world, 10 Hello world, 11 Hello world, 12 Hello world, 13 Hello world, 14 Hello world, 15 Hello world, 16 Hello world, 17 Hello world, 18 Hello world, 19 ================================================ FILE: 40_Var_Initialisation_pt2/tests/out.input55.c ================================================ Hello world Argument 0 is ./out ================================================ FILE: 40_Var_Initialisation_pt2/tests/out.input58.c ================================================ 12 99 4005 4116 4116 ================================================ FILE: 40_Var_Initialisation_pt2/tests/out.input62.c ================================================ 65 66 66 The next two depend on the endian of the platform 66 66 67 67 ================================================ FILE: 40_Var_Initialisation_pt2/tests/out.input63.c ================================================ 25 ================================================ FILE: 40_Var_Initialisation_pt2/tests/out.input67.c ================================================ 5 17 ================================================ FILE: 40_Var_Initialisation_pt2/tests/out.input70.c ================================================ 56 ================================================ FILE: 40_Var_Initialisation_pt2/tests/out.input71.c ================================================ 0 1 2 3 4 7 8 9 10 11 12 13 14 Done ================================================ FILE: 40_Var_Initialisation_pt2/tests/out.input74.c ================================================ 100 5 7 100 100 ================================================ FILE: 40_Var_Initialisation_pt2/tests/out.input80.c ================================================ 0 1 1 3 2 5 3 7 4 9 5 11 ================================================ FILE: 40_Var_Initialisation_pt2/tests/out.input81.c ================================================ 0 1 1 3 2 5 3 7 4 9 ================================================ FILE: 40_Var_Initialisation_pt2/tests/out.input82.c ================================================ 15 >= x > 5 ================================================ FILE: 40_Var_Initialisation_pt2/tests/out.input83.c ================================================ 5 < 6 <= 10 5 < 7 <= 10 5 < 8 <= 10 5 < 9 <= 10 5 < 10 <= 10 10 < 11 ================================================ FILE: 40_Var_Initialisation_pt2/tests/out.input84.c ================================================ 2 3 f f ================================================ FILE: 40_Var_Initialisation_pt2/tests/out.input88.c ================================================ 5 6 ================================================ FILE: 40_Var_Initialisation_pt2/tests/out.input89.c ================================================ 23 H Hello world ================================================ FILE: 40_Var_Initialisation_pt2/tests/out.input90.c ================================================ 23 100 H Hello world ================================================ FILE: 40_Var_Initialisation_pt2/tests/out.input91.c ================================================ 1 2 3 4 5 1 2 3 4 5 0 0 0 0 0 ================================================ FILE: 40_Var_Initialisation_pt2/tests/out.input99.c ================================================ EOF = || && | ^ & == != , > <= >= << >> + - * / ++ -- ~ ! void char int long if else while for return struct union enum typedef extern break continue switch case default intlit strlit ; identifier { } ( ) [ ] , . -> : ================================================ FILE: 40_Var_Initialisation_pt2/tests/runtests ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make install) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../cwj -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../cwj $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.s "trial.$i" done ================================================ FILE: 40_Var_Initialisation_pt2/tests/runtestsn ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../compn ] then (cd ..; make install) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../compn -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../compn $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.o out.s "trial.$i" done ================================================ FILE: 40_Var_Initialisation_pt2/tree.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree functions // Copyright (c) 2019 Warren Toomey, GPL3 // Build and return a generic AST node struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue) { struct ASTnode *n; // Malloc a new ASTnode n = (struct ASTnode *) malloc(sizeof(struct ASTnode)); if (n == NULL) fatal("Unable to malloc in mkastnode()"); // Copy in the field values and return it n->op = op; n->type = type; n->left = left; n->mid = mid; n->right = right; n->sym = sym; n->intvalue = intvalue; return (n); } // Make an AST leaf node struct ASTnode *mkastleaf(int op, int type, struct symtable *sym, int intvalue) { return (mkastnode(op, type, NULL, NULL, NULL, sym, intvalue)); } // Make a unary AST node: only one child struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, struct symtable *sym, int intvalue) { return (mkastnode(op, type, left, NULL, NULL, sym, intvalue)); } // Generate and return a new label number // just for AST dumping purposes static int gendumplabel(void) { static int id = 1; return (id++); } // Given an AST tree, print it out and follow the // traversal of the tree that genAST() follows void dumpAST(struct ASTnode *n, int label, int level) { int Lfalse, Lstart, Lend; switch (n->op) { case A_IF: Lfalse = gendumplabel(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_IF"); if (n->right) { Lend = gendumplabel(); fprintf(stdout, ", end L%d", Lend); } fprintf(stdout, "\n"); dumpAST(n->left, Lfalse, level + 2); dumpAST(n->mid, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); return; case A_WHILE: Lstart = gendumplabel(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_WHILE, start L%d\n", Lstart); Lend = gendumplabel(); dumpAST(n->left, Lend, level + 2); dumpAST(n->right, NOLABEL, level + 2); return; } // Reset level to -2 for A_GLUE if (n->op == A_GLUE) level = -2; // General AST node handling if (n->left) dumpAST(n->left, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); for (int i = 0; i < level; i++) fprintf(stdout, " "); switch (n->op) { case A_GLUE: fprintf(stdout, "\n\n"); return; case A_FUNCTION: fprintf(stdout, "A_FUNCTION %s\n", n->sym->name); return; case A_ADD: fprintf(stdout, "A_ADD\n"); return; case A_SUBTRACT: fprintf(stdout, "A_SUBTRACT\n"); return; case A_MULTIPLY: fprintf(stdout, "A_MULTIPLY\n"); return; case A_DIVIDE: fprintf(stdout, "A_DIVIDE\n"); return; case A_EQ: fprintf(stdout, "A_EQ\n"); return; case A_NE: fprintf(stdout, "A_NE\n"); return; case A_LT: fprintf(stdout, "A_LE\n"); return; case A_GT: fprintf(stdout, "A_GT\n"); return; case A_LE: fprintf(stdout, "A_LE\n"); return; case A_GE: fprintf(stdout, "A_GE\n"); return; case A_INTLIT: fprintf(stdout, "A_INTLIT %d\n", n->intvalue); return; case A_STRLIT: fprintf(stdout, "A_STRLIT rval label L%d\n", n->intvalue); return; case A_IDENT: if (n->rvalue) fprintf(stdout, "A_IDENT rval %s\n", n->sym->name); else fprintf(stdout, "A_IDENT %s\n", n->sym->name); return; case A_ASSIGN: fprintf(stdout, "A_ASSIGN\n"); return; case A_WIDEN: fprintf(stdout, "A_WIDEN\n"); return; case A_RETURN: fprintf(stdout, "A_RETURN\n"); return; case A_FUNCCALL: fprintf(stdout, "A_FUNCCALL %s\n", n->sym->name); return; case A_ADDR: fprintf(stdout, "A_ADDR %s\n", n->sym->name); return; case A_DEREF: if (n->rvalue) fprintf(stdout, "A_DEREF rval\n"); else fprintf(stdout, "A_DEREF\n"); return; case A_SCALE: fprintf(stdout, "A_SCALE %d\n", n->size); return; case A_PREINC: fprintf(stdout, "A_PREINC %s\n", n->sym->name); return; case A_PREDEC: fprintf(stdout, "A_PREDEC %s\n", n->sym->name); return; case A_POSTINC: fprintf(stdout, "A_POSTINC\n"); return; case A_POSTDEC: fprintf(stdout, "A_POSTDEC\n"); return; case A_NEGATE: fprintf(stdout, "A_NEGATE\n"); return; case A_BREAK: fprintf(stdout, "A_BREAK\n"); return; case A_CONTINUE: fprintf(stdout, "A_CONTINUE\n"); return; case A_CASE: fprintf(stdout, "A_CASE %d\n", n->intvalue); return; case A_DEFAULT: fprintf(stdout, "A_DEFAULT\n"); return; case A_SWITCH: fprintf(stdout, "A_SWITCH\n"); return; default: fatald("Unknown dumpAST operator", n->op); } } ================================================ FILE: 40_Var_Initialisation_pt2/types.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Types and type handling // Copyright (c) 2019 Warren Toomey, GPL3 // Return true if a type is an int type // of any size, false otherwise int inttype(int type) { return (((type & 0xf) == 0) && (type >= P_CHAR && type <= P_LONG)); } // Return true if a type is of pointer type int ptrtype(int type) { return ((type & 0xf) != 0); } // Given a primitive type, return // the type which is a pointer to it int pointer_to(int type) { if ((type & 0xf) == 0xf) fatald("Unrecognised in pointer_to: type", type); return (type + 1); } // Given a primitive pointer type, return // the type which it points to int value_at(int type) { if ((type & 0xf) == 0x0) fatald("Unrecognised in value_at: type", type); return (type - 1); } // Given a type and a composite type pointer, return // the size of this type in bytes int typesize(int type, struct symtable *ctype) { if (type == P_STRUCT || type == P_UNION) return (ctype->size); return (genprimsize(type)); } // Given an AST tree and a type which we want it to become, // possibly modify the tree by widening or scaling so that // it is compatible with this type. Return the original tree // if no changes occurred, a modified tree, or NULL if the // tree is not compatible with the given type. // If this will be part of a binary operation, the AST op is not zero. struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op) { int ltype; int lsize, rsize; ltype = tree->type; // XXX No idea on these yet if (ltype == P_STRUCT || ltype == P_UNION) fatal("Don't know how to do this yet"); if (rtype == P_STRUCT || rtype == P_UNION) fatal("Don't know how to do this yet"); // Compare scalar int types if (inttype(ltype) && inttype(rtype)) { // Both types same, nothing to do if (ltype == rtype) return (tree); // Get the sizes for each type lsize = typesize(ltype, NULL); // XXX Fix soon rsize = typesize(rtype, NULL); // XXX Fix soon // Tree's size is too big if (lsize > rsize) return (NULL); // Widen to the right if (rsize > lsize) return (mkastunary(A_WIDEN, rtype, tree, NULL, 0)); } // For pointers on the left if (ptrtype(ltype)) { // OK is same type on right and not doing a binary op if (op == 0 && ltype == rtype) return (tree); } // We can scale only on A_ADD or A_SUBTRACT operation if (op == A_ADD || op == A_SUBTRACT) { // Left is int type, right is pointer type and the size // of the original type is >1: scale the left if (inttype(ltype) && ptrtype(rtype)) { rsize = genprimsize(value_at(rtype)); if (rsize > 1) return (mkastunary(A_SCALE, rtype, tree, NULL, rsize)); else return (tree); // Size 1, no need to scale } } // If we get here, the types are not compatible return (NULL); } ================================================ FILE: 41_Local_Var_Init/Makefile ================================================ # Define the location of the include directory # and the location to install the compiler binary INCDIR=/tmp/include BINDIR=/tmp HSRCS= data.h decl.h defs.h SRCS= cg.c decl.c expr.c gen.c main.c misc.c \ scan.c stmt.c sym.c tree.c types.c SRCN= cgn.c decl.c expr.c gen.c main.c misc.c \ scan.c stmt.c sym.c tree.c types.c ARMSRCS= cg_arm.c decl.c expr.c gen.c main.c misc.c \ scan.c stmt.c sym.c tree.c types.c cwj: $(SRCS) $(HSRCS) cc -o cwj -g -Wall -DINCDIR=\"$(INCDIR)\" $(SRCS) compn: $(SRCN) $(HSRCS) cc -D__NASM__ -o compn -g -Wall -DINCDIR=\"$(INCDIR)\" $(SRCN) cwjarm: $(ARMSRCS) $(HSRCS) cc -o cwjarm -g -Wall $(ARMSRCS) cp cwjarm cwj install: cwj mkdir -p $(INCDIR) rsync -a include/. $(INCDIR) cp cwj $(BINDIR) chmod +x $(BINDIR)/cwj installn: compn mkdir -p $(INCDIR) rsync -a include/. $(INCDIR) cp compn $(BINDIR) chmod +x $(BINDIR)/compn clean: rm -f cwj cwjarm compn *.o *.s out a.out test: install tests/runtests (cd tests; chmod +x runtests; ./runtests) armtest: cwjarm tests/runtests (cd tests; chmod +x runtests; ./runtests) testn: installn tests/runtestsn (cd tests; chmod +x runtestsn; ./runtestsn) ================================================ FILE: 41_Local_Var_Init/Readme.md ================================================ # Part 41: Local Variable Initialisation Well, after the significant list of changes in the last part, doing local variable initialisation was easy. We want to be able to do this sort of thing inside functions: ```c int x= 2, y= x+3, z= 5 * x - y; char *foo= "Hello world"; ``` As we are inside a function, we can build an AST tree for the expression, build an A_IDENT node for the variable and join them together with an A_ASSIGN parent node. And, because there can be several declarations with assignments, we may need to build an A_GLUE tree which holds all of the assignment trees. The only wrinkle is that the code which parses local declarations is quite a call distance away from the code that deals with statement parsing. In fact: + `single_statement()` in `stmt.c` sees a type identifier and calls + `declaration_list()` in `decl.c` to parse several declarations, which calls + `symbol_declaration()` to parse one declaration, which calls + `scalar_declaration()` to parse a scalar variable declaration and assignment The main problem is that all of these functions already return a value, so we can't build an AST tree in `scalar_declaration()` and return it back to `single_statement()`. Also, `declaration_list()` parses multiple declarations, so it will have the job of building the A_GLUE tree to hold them all together. The solution is to pass down a "pointer pointer" from `single_statement()` to `declaration_list()`, so that we can pass back the pointer to the A_GLUE tree. Similarly, we will pass a "pointer pointer" from `declaration_list()` down to `scalar_declaration()`, which will pass back the pointer to any assignment tree that it has built. ## Changes to `scalar_declaration()` If we are in local context and we hit an '=' in a scalar variable's declaration, here is what we do: ```c struct ASTnode *varnode, *exprnode; struct ASTnode **tree; // is the ptr ptr argument that we get passed // The variable is being initialised if (Token.token == T_ASSIGN) { ... if (class == C_LOCAL) { // Make an A_IDENT AST node with the variable varnode = mkastleaf(A_IDENT, sym->type, sym, 0); // Get the expression for the assignment, make into a rvalue exprnode = binexpr(0); exprnode->rvalue = 1; // Ensure the expression's type matches the variable exprnode = modify_type(exprnode, varnode->type, 0); if (exprnode == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree *tree = mkastnode(A_ASSIGN, exprnode->type, exprnode, NULL, varnode, NULL, 0); } } ``` That's it. We simulate the AST tree building that would normally occur in `expr.c` for an assignment expression. Once done, we pass back the assignment tree. This bubbles back up to `declaration_list()`. It now does: ```c struct ASTnode **gluetree; // is the ptr ptr argument that we get passed struct ASTnode *tree; *gluetree= NULL; ... // Now parse the list of symbols while (1) { ... // Parse this symbol sym = symbol_declaration(type, *ctype, class, &tree); ... // Glue any AST tree from a local declaration // to build a sequence of assignments to perform if (*gluetree== NULL) *gluetree= tree; else *gluetree = mkastnode(A_GLUE, P_NONE, *gluetree, NULL, tree, NULL, 0); ... } ``` So `gluetree` is set to the AST tree with a bunch of A_GLUE nodes, each of which has an A_ASSIGN child with an A_IDENT child and an expression child. And, way up in `single_statement()` in `stmt.c`: ```c ... case T_IDENT: // We have to see if the identifier matches a typedef. // If not, treat it as an expression. // Otherwise, fall down to the parse_type() call. if (findtypedef(Text) == NULL) { stmt= binexpr(0); semi(); return(stmt); } case T_CHAR: case T_INT: case T_LONG: case T_STRUCT: case T_UNION: case T_ENUM: case T_TYPEDEF: // The beginning of a variable declaration list. declaration_list(&ctype, C_LOCAL, T_SEMI, T_EOF, &stmt); semi(); return (stmt); // Any assignments from the declarations ... ``` ## Testing the New Code The above changes were so short and simple that they compiled and worked first time. This is not a regular occurrence! Our test program, `tests/input100.c` is this: ```c #include int main() { int x= 3, y=14; int z= 2 * x + y; char *str= "Hello world"; printf("%s %d %d\n", str, x+y, z); return(0); } ``` and produces the following correct output: `Hello world 17 20`. ## Conclusion and What's Next It's nice to have a simple part on this journey now and then. I'm now starting to take wagers with myself as to: + how many parts to the journey, in total, there will be, and + will I get it all done by the end of the year Right now I'm guessing about 60 parts and an 75% chance of completing by year's end. But we still have a bunch of small, but possibly difficult, features to add to the compiler. In the next part of our compiler writing journey, I will add cast parsing to the compiler. [Next step](../42_Casting/Readme.md) ================================================ FILE: 41_Local_Var_Init/cg.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\t.text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\t.data\n", Outfile); currSeg = data_seg; } } // Given a scalar type value, return the // size of the type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch (type) { case P_CHAR: return (offset); case P_INT: case P_LONG: break; default: fatald("Bad type in calc_aligned_offset:", type); } // Here we have an int or a long. Align it on a 4-byte offset // I put the generic code here so it can be reused elsewhere. alignment = 4; offset = (offset + direction * (alignment - 1)) & ~(alignment - 1); return (offset); } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. static int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "%r10", "%r11", "%r12", "%r13", "%r9", "%r8", "%rcx", "%rdx", "%rsi", "%rdi" }; static char *breglist[] = { "%r10b", "%r11b", "%r12b", "%r13b", "%r9b", "%r8b", "%cl", "%dl", "%sil", "%dil" }; static char *dreglist[] = { "%r10d", "%r11d", "%r12d", "%r13d", "%r9d", "%r8d", "%ecx", "%edx", "%esi", "%edi" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); cgtextseg(); fprintf(Outfile, "# internal switch(expr) routine\n" "# %%rsi = switch table, %%rax = expr\n" "# from SubC: http://www.t3x.org/subc/\n" "\n" "switch:\n" " pushq %%rsi\n" " movq %%rdx, %%rsi\n" " movq %%rax, %%rbx\n" " cld\n" " lodsq\n" " movq %%rax, %%rcx\n" "next:\n" " lodsq\n" " movq %%rax, %%rdx\n" " lodsq\n" " cmpq %%rdx, %%rbx\n" " jnz no\n" " popq %%rsi\n" " jmp *%%rax\n" "no:\n" " loop next\n" " lodsq\n" " popq %%rsi\n" " jmp *%%rax\n" "\n"); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the %rsp and %rsp fprintf(Outfile, "\t.globl\t%s\n" "\t.type\t%s, @function\n" "%s:\n" "\tpushq\t%%rbp\n" "\tmovq\t%%rsp, %%rbp\n", name, name, name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->posn = paramOffset; paramOffset += 8; } else { parm->posn = newlocaloffset(parm->type); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->posn = newlocaloffset(locvar->type); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\taddq\t$%d,%%rsp\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->endlabel); fprintf(Outfile, "\taddq\t$%d,%%rsp\n", stackOffset); fputs("\tpopq %rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmovq\t$%d, %s\n", value, reglist[r]); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", sym->name); } else // Print out the code to initialise it switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovzbq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", sym->name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovslq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", sym->name); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", sym->posn); fprintf(Outfile, "\tmovq\t%d(%%rbp), %s\n", sym->posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", sym->posn); } else switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", sym->posn); fprintf(Outfile, "\tmovzbq\t%d(%%rbp), %s\n", sym->posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", sym->posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", sym->posn); fprintf(Outfile, "\tmovslq\t%d(%%rbp), %s\n", sym->posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", sym->posn); break; default: fatald("Bad type in cgloadlocal:", sym->type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tleaq\tL%d(%%rip), %s\n", label, reglist[r]); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\taddq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsubq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timulq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s,%%rax\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidivq\t%s\n", reglist[r2]); fprintf(Outfile, "\tmovq\t%%rax,%s\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tandq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\torq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txorq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshlq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshrq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tnegq\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnotq\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); return (r); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(struct symtable *sym, int numargs) { // Get a new register int outr = alloc_register(); // Call the function fprintf(Outfile, "\tcall\t%s@PLT\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\taddq\t$%d, %%rsp\n", 8 * (numargs - 6)); // and copy the return value into our register fprintf(Outfile, "\tmovq\t%%rax, %s\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpushq\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmovq\t%s, %s\n", reglist[r], reglist[FIRSTPARAMREG - argposn + 1]); } } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsalq\t$%d, %s\n", val, reglist[r]); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %s(%%rip)\n", reglist[r], sym->name); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %s(%%rip)\n", breglist[r], sym->name); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %s(%%rip)\n", dreglist[r], sym->name); break; default: fatald("Bad type in cgstorglob:", sym->type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %d(%%rbp)\n", reglist[r], sym->posn); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %d(%%rbp)\n", breglist[r], sym->posn); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %d(%%rbp)\n", dreglist[r], sym->posn); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size, type; int initvalue; int i; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the variable (or its elements if an array) // and the type of the variable if (node->stype == S_ARRAY) { size= typesize(value_at(node->type), node->ctype); type= value_at(node->type); } else { size = node->size; type= node->type; } // Generate the global identity and the label cgdataseg(); fprintf(Outfile, "\t.globl\t%s\n", node->name); fprintf(Outfile, "%s:\n", node->name); // Output space for one or more elements for (i=0; i < node->nelems; i++) { // Get any initial value initvalue= 0; if (node->initlist != NULL) initvalue= node->initlist[i]; // Generate the space for this type switch (size) { case 1: fprintf(Outfile, "\t.byte\t%d\n", initvalue); break; case 4: fprintf(Outfile, "\t.long\t%d\n", initvalue); break; case 8: // Generate the pointer to a string literal if (node->initlist != NULL && type== pointer_to(P_CHAR)) fprintf(Outfile, "\t.quad\tL%d\n", initvalue); else fprintf(Outfile, "\t.quad\t%d\n", initvalue); break; default: for (int i = 0; i < size; i++) fprintf(Outfile, "\t.byte\t0\n"); } } } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\t.byte\t%d\n", *cptr); } fprintf(Outfile, "\t.byte\t0\n"); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzbl\t%s, %%eax\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %%eax\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } cgjump(sym->endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = alloc_register(); if (sym->class == C_GLOBAL) fprintf(Outfile, "\tleaq\t%s(%%rip), %s\n", sym->name, reglist[r]); else fprintf(Outfile, "\tleaq\t%d(%%rbp), %s\n", sym->posn, reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzbq\t(%s), %s\n", reglist[r], reglist[r]); break; case 2: fprintf(Outfile, "\tmovslq\t(%s), %s\n", reglist[r], reglist[r]); break; case 4: case 8: fprintf(Outfile, "\tmovq\t(%s), %s\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { // Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmovb\t%s, (%s)\n", breglist[r1], reglist[r2]); break; case 2: case 4: case 8: fprintf(Outfile, "\tmovq\t%s, (%s)\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } // Generate a switch jump table and the code to // load the registers and call the switch() code void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel) { int i, label; // Get a label for the switch table label = genlabel(); cglabel(label); // Heuristic. If we have no cases, create one case // which points to the default case if (casecount == 0) { caseval[0] = 0; caselabel[0] = defaultlabel; casecount = 1; } // Generate the switch jump table. fprintf(Outfile, "\t.quad\t%d\n", casecount); for (i = 0; i < casecount; i++) fprintf(Outfile, "\t.quad\t%d, L%d\n", caseval[i], caselabel[i]); fprintf(Outfile, "\t.quad\tL%d\n", defaultlabel); // Load the specific registers cglabel(toplabel); fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); fprintf(Outfile, "\tleaq\tL%d(%%rip), %%rdx\n", label); fprintf(Outfile, "\tjmp\tswitch\n"); } ================================================ FILE: 41_Local_Var_Init/cg_arm.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for ARMv6 on Raspberry Pi // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. static int freereg[4]; static char *reglist[4] = { "r4", "r5", "r6", "r7" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // We have to store large integer literal values in memory. // Keep a list of them which will be output in the postamble #define MAXINTS 1024 int Intlist[MAXINTS]; static int Intslot = 0; // Determine the offset of a large integer // literal from the .L3 label. If the integer // isn't in the list, add it. static void set_int_offset(int val) { int offset = -1; // See if it is already there for (int i = 0; i < Intslot; i++) { if (Intlist[i] == val) { offset = 4 * i; break; } } // Not in the list, so add it if (offset == -1) { offset = 4 * Intslot; if (Intslot == MAXINTS) fatal("Out of int slots in set_int_offset()"); Intlist[Intslot++] = val; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L3+%d\n", offset); } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n", Outfile); } // Print out the assembly postamble void cgpostamble() { // Print out the global variables fprintf(Outfile, ".L2:\n"); for (int i = 0; i < Globs; i++) { if (Symtable[i].stype == S_VARIABLE) fprintf(Outfile, "\t.word %s\n", Symtable[i].name); } // Print out the integer literals fprintf(Outfile, ".L3:\n"); for (int i = 0; i < Intslot; i++) { fprintf(Outfile, "\t.word %d\n", Intlist[i]); } } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Symtable[id].name; fprintf(Outfile, "\t.text\n" "\t.globl\t%s\n" "\t.type\t%s, \%%function\n" "%s:\n" "\tpush\t{fp, lr}\n" "\tadd\tfp, sp, #4\n" "\tsub\tsp, sp, #8\n" "\tstr\tr0, [fp, #-8]\n", name, name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Symtable[id].endlabel); fputs("\tsub\tsp, fp, #4\n" "\tpop\t{fp, pc}\n" "\t.align\t2\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); // If the literal value is small, do it with one instruction if (value <= 1000) fprintf(Outfile, "\tmov\t%s, #%d\n", reglist[r], value); else { set_int_offset(value); fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); } return (r); } // Determine the offset of a variable from the .L2 // label. Yes, this is inefficient code. static void set_var_offset(int id) { int offset = 0; // Walk the symbol table up to id. // Find S_VARIABLEs and add on 4 until // we get to our variable for (int i = 0; i < id; i++) { if (Symtable[i].stype == S_VARIABLE) offset += 4; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L2+%d\n", offset); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tldrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgloadglob:", Symtable[id].type); } return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s, %s\n", reglist[r1], reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\tmul\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { // To do a divide: r0 holds the dividend, r1 holds the divisor. // The quotient is in r0. fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r1]); fprintf(Outfile, "\tmov\tr1, %s\n", reglist[r2]); fprintf(Outfile, "\tbl\t__aeabi_idiv\n"); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r1]); free_register(r2); return (r1); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r]); fprintf(Outfile, "\tbl\t%s\n", Symtable[id].name); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r]); return (r); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tlsl\t%s, %s, #%d\n", reglist[r], reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, int id) { // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tstr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgstorglob:", Symtable[id].type); } return (r); } // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (4); switch (type) { case P_CHAR: return (1); case P_INT: case P_LONG: return (4); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Symtable[id].type); fprintf(Outfile, "\t.data\n" "\t.globl\t%s\n", Symtable[id].name); switch (typesize) { case 1: fprintf(Outfile, "%s:\t.byte\t0\n", Symtable[id].name); break; case 4: fprintf(Outfile, "%s:\t.long\t0\n", Symtable[id].name); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "moveq", "movne", "movlt", "movgt", "movle", "movge" }; // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "movne", "moveq", "movge", "movle", "movgt", "movlt" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #1\n", cmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #0\n", invcmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\tuxtb\t%s, %s\n", reglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tb\tL%d\n", l); } // List of inverted branch instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *brlist[] = { "bne", "beq", "bge", "ble", "bgt", "blt" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", brlist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[reg]); cgjump(Symtable[id].endlabel); } // Generate code to load the address of a global // identifier into a variable. Return a new register int cgaddress(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); fprintf(Outfile, "\tmov\t%s, r3\n", reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tldrb\t%s, [%s]\n", reglist[r], reglist[r]); break; case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [%s]\n", reglist[r], reglist[r]); break; } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [%s]\n", reglist[r1], reglist[r2]); break; case P_INT: case P_LONG: fprintf(Outfile, "\tstr\t%s, [%s]\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 41_Local_Var_Init/cgn.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\tsection .text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\tsection .data\n", Outfile); currSeg = data_seg; } } // Given a scalar type value, return the // size of the type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch (type) { case P_CHAR: return (offset); case P_INT: case P_LONG: break; default: fatald("Bad type in calc_aligned_offset:", type); } // Here we have an int or a long. Align it on a 4-byte offset // I put the generic code here so it can be reused elsewhere. alignment = 4; offset = (offset + direction * (alignment - 1)) & ~(alignment - 1); return (offset); } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "r10", "r11", "r12", "r13", "r9", "r8", "rcx", "rdx", "rsi", "rdi" }; static char *breglist[] = { "r10b", "r11b", "r12b", "r13b", "r9b", "r8b", "cl", "dl", "sil", "dil" }; static char *dreglist[] = { "r10d", "r11d", "r12d", "r13d", "r9d", "r8d", "ecx", "edx", "esi", "edi" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\textern\tprintint\n", Outfile); fputs("\textern\tprintchar\n", Outfile); fputs("\textern\topen\n", Outfile); fputs("\textern\tclose\n", Outfile); fputs("\textern\tread\n", Outfile); fputs("\textern\twrite\n", Outfile); fputs("\textern\tprintf\n", Outfile); cgtextseg(); fprintf(Outfile, "; internal switch(expr) routine\n" "; rsi = switch table, rax = expr\n" "; from SubC: http://www.t3x.org/subc/\n" "\n" "switch:\n" " push rsi\n" " mov rsi, rdx\n" " mov rbx, rax\n" " cld\n" " lodsq\n" " mov rcx, rax\n" "next:\n" " lodsq\n" " mov rdx, rax\n" " lodsq\n" " cmp rbx, rdx\n" " jnz no\n" " pop rsi\n" " jmp rax\n" "no:\n" " loop next\n" " lodsq\n" " pop rsi\n" " jmp rax\n" "\n"); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the rsp and rbp fprintf(Outfile, "\tglobal\t%s\n" "%s:\n" "\tpush\trbp\n" "\tmov\trbp, rsp\n", name, name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->posn = paramOffset; paramOffset += 8; } else { parm->posn = newlocaloffset(parm->type); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->posn = newlocaloffset(locvar->type); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\tadd\trsp, %d\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->endlabel); fprintf(Outfile, "\tadd\trsp, %d\n", stackOffset); fputs("\tpop rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], value); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", sym->name); fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], sym->name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", sym->name); } else // Print out the code to initialise it switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", sym->name); fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], sym->name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", sym->name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", sym->name); fprintf(Outfile, "\tmovsx\t%s, word [%s]\n", dreglist[r], sym->name); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", sym->name); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", sym->posn); fprintf(Outfile, "\tmov\t%s, [rbp+%d]\n", reglist[r], sym->posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", sym->posn); } else switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", sym->posn); fprintf(Outfile, "\tmovzx\t%s, byte [rbp+%d]\n", reglist[r], sym->posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", sym->posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", sym->posn); fprintf(Outfile, "\tmovsx\t%s, dword [rbp+%d]\n", reglist[r], sym->posn); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", sym->posn); break; default: fatald("Bad type in cgloadlocal:", sym->type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, L%d\n", reglist[r], label); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timul\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmov\trax, %s\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidiv\t%s\n", reglist[r2]); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tand\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\tor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshl\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshr\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tneg\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnot\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r], breglist[r]); return (r); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, byte %s\n", reglist[r], breglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(struct symtable *sym, int numargs) { // Get a new register int outr = alloc_register(); // Call the function fprintf(Outfile, "\tcall\t%s\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\tadd\trsp, %d\n", 8 * (numargs - 6)); // and copy the return value into our register fprintf(Outfile, "\tmov\t%s, rax\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpush\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmov\t%s, %s\n", reglist[FIRSTPARAMREG - argposn + 1], reglist[r]); } } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsal\t%s, %d\n", reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, dreglist[r]); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\tqword\t[rbp+%d], %s\n", sym->posn, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\tbyte\t[rbp+%d], %s\n", sym->posn, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\tdword\t[rbp+%d], %s\n", sym->posn, dreglist[r]); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size, type; int initvalue; int i; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the variable (or its elements if an array) // and the type of the variable if (node->stype == S_ARRAY) { size= typesize(value_at(node->type), node->ctype); type= value_at(node->type); } else { size = node->size; type= node->type; } // Generate the global identity and the label cgdataseg(); fprintf(Outfile, "\tsection\t.data\n" "\tglobal\t%s\n", node->name); fprintf(Outfile, "%s:\n", node->name); // Output space for one or more elements for (i = 0; i < node->nelems; i++) { // Get any initial value initvalue = 0; if (node->initlist != NULL) initvalue = node->initlist[i]; // Generate the space for this type // original version switch(size) { case 1: fprintf(Outfile, "\tdb\t%d\n", initvalue); break; case 4: fprintf(Outfile, "\tdd\t%d\n", initvalue); break; case 8: // Generate the pointer to a string literal if (node->initlist != NULL && type == pointer_to(P_CHAR)) fprintf(Outfile, "\tdq\tL%d\n", initvalue); else fprintf(Outfile, "\tdq\t%d\n", initvalue); break; default: for (int i = 0; i < size; i++) fprintf(Outfile, "\tdb\t0\n"); /* compact version using times instead of loop fprintf(Outfile, "\ttimes\t%d\tdb\t0\n", size); */ } } } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\tdb\t%d\n", *cptr); } fprintf(Outfile, "\tdb\t0\n"); /* put string in readable format on a single line // probably went overboard with error checking int comma = 0, quote = 0, start = 1; fprintf(Outfile, "\tdb\t"); for (cptr=strvalue; *cptr; cptr++) { if ( ! isprint(*cptr) ) if (comma || start) { fprintf(Outfile, "%d, ", *cptr); start = 0; comma = 1; } else if (quote) { fprintf(Outfile, "\', %d, ", *cptr); comma = 1; quote = 0; } else { fprintf(Outfile, "%d, ", *cptr); comma = 1; quote = 0; } else if (start || comma) { fprintf(Outfile, "\'%c", *cptr); start = comma = 0; quote = 1; } else { fprintf(Outfile, "%c", *cptr); comma = 0; quote = 1; } } if (comma || start) fprintf(Outfile, "0\n"); else fprintf(Outfile, "\', 0\n"); */ } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r2], breglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzx\teax, %s\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmov\teax, %s\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } cgjump(sym->endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = alloc_register(); if (sym->class == C_GLOBAL) fprintf(Outfile, "\tmov\t%s, %s\n", reglist[r], sym->name); else fprintf(Outfile, "\tlea\t%s, [rbp+%d]\n", reglist[r], sym->posn); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], reglist[r]); break; case 2: fprintf(Outfile, "\tmovsx\t%s, dword [%s]\n", reglist[r], reglist[r]); break; case 4: case 8: fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { //Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmov\t[%s], byte %s\n", reglist[r2], breglist[r1]); break; case 2: case 4: case 8: fprintf(Outfile, "\tmov\t[%s], %s\n", reglist[r2], reglist[r1]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } // Generate a switch jump table and the code to // load the registers and call the switch() code void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel) { int i, label; // Get a label for the switch table label = genlabel(); cglabel(label); // Heuristic. If we have no cases, create one case // which points to the default case if (casecount == 0) { caseval[0] = 0; caselabel[0] = defaultlabel; casecount = 1; } // Generate the switch jump table. fprintf(Outfile, "\tdq\t%d\n", casecount); for (i = 0; i < casecount; i++) fprintf(Outfile, "\tdq\t%d, L%d\n", caseval[i], caselabel[i]); fprintf(Outfile, "\tdq\tL%d\n", defaultlabel); // Load the specific registers cglabel(toplabel); fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); fprintf(Outfile, "\tmov\trdx, L%d\n", label); fprintf(Outfile, "\tjmp\tswitch\n"); } ================================================ FILE: 41_Local_Var_Init/data.h ================================================ #ifndef extern_ #define extern_ extern #endif // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 extern_ int Line; // Current line number extern_ int Putback; // Character put back by scanner extern_ struct symtable *Functionid; // Symbol ptr of the current function extern_ FILE *Infile; // Input and output files extern_ FILE *Outfile; extern_ char *Infilename; // Name of file we are parsing extern_ char *Outfilename; // Name of file we opened as Outfile extern_ struct token Token; // Last token scanned extern_ char Text[TEXTLEN + 1]; // Last identifier scanned extern_ int Looplevel; // Depth of nested loops extern_ int Switchlevel; // Depth of nested switches // Symbol table lists extern_ struct symtable *Globhead, *Globtail; // Global variables and functions extern_ struct symtable *Loclhead, *Locltail; // Local variables extern_ struct symtable *Parmhead, *Parmtail; // Local parameters extern_ struct symtable *Membhead, *Membtail; // Temp list of struct/union members extern_ struct symtable *Structhead, *Structtail; // List of struct types extern_ struct symtable *Unionhead, *Uniontail; // List of union types extern_ struct symtable *Enumhead, *Enumtail; // List of enum types and values extern_ struct symtable *Typehead, *Typetail; // List of typedefs // Command-line flags extern_ int O_dumpAST; // If true, dump the AST trees extern_ int O_keepasm; // If true, keep any assembly files extern_ int O_assemble; // If true, assemble the assembly files extern_ int O_dolink; // If true, link the object files extern_ int O_verbose; // If true, print info on compilation stages ================================================ FILE: 41_Local_Var_Init/decl.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of declarations // Copyright (c) 2019 Warren Toomey, GPL3 static struct symtable *composite_declaration(int type); static int typedef_declaration(struct symtable **ctype); static int type_of_typedef(char *name, struct symtable **ctype); static void enum_declaration(void); // Parse the current token and return a primitive type enum value, // a pointer to any composite type and possibly modify // the class of the type. static int parse_type(struct symtable **ctype, int *class) { int type, exstatic = 1; // See if the class has been changed to extern (later, static) while (exstatic) { switch (Token.token) { case T_EXTERN: *class = C_EXTERN; scan(&Token); break; default: exstatic = 0; } } // Now work on the actual type keyword switch (Token.token) { case T_VOID: type = P_VOID; scan(&Token); break; case T_CHAR: type = P_CHAR; scan(&Token); break; case T_INT: type = P_INT; scan(&Token); break; case T_LONG: type = P_LONG; scan(&Token); break; // For the following, if we have a ';' after the // parsing then there is no type, so return -1. // Example: struct x {int y; int z}; case T_STRUCT: type = P_STRUCT; *ctype = composite_declaration(P_STRUCT); if (Token.token == T_SEMI) type = -1; break; case T_UNION: type = P_UNION; *ctype = composite_declaration(P_UNION); if (Token.token == T_SEMI) type = -1; break; case T_ENUM: type = P_INT; // Enums are really ints enum_declaration(); if (Token.token == T_SEMI) type = -1; break; case T_TYPEDEF: type = typedef_declaration(ctype); if (Token.token == T_SEMI) type = -1; break; case T_IDENT: type = type_of_typedef(Text, ctype); break; default: fatals("Illegal type, token", Token.tokstr); } return (type); } // Given a type parsed by parse_type(), scan in any following // '*' tokens and return the new type static int parse_stars(int type) { while (1) { if (Token.token != T_STAR) break; type = pointer_to(type); scan(&Token); } return (type); } // Given a type, check that the latest token is a literal // of that type. If an integer literal, return this value. // If a string literal, return the label number of the string. // Do not scan the next token. int parse_literal(int type) { // We have a string literal. Store in memory and return the label if ((type == pointer_to(P_CHAR)) && (Token.token == T_STRLIT)) return(genglobstr(Text)); if (Token.token == T_INTLIT) { switch(type) { case P_CHAR: if (Token.intvalue < 0 || Token.intvalue > 255) fatal("Integer literal value too big for char type"); case P_INT: case P_LONG: break; default: fatal("Type mismatch: integer literal vs. variable"); } } else fatal("Expecting an integer literal value"); return(Token.intvalue); } // Given the type, name and class of a scalar variable, // parse any initialisation value and allocate storage for it. // Return the variable's symbol table entry. static struct symtable *scalar_declaration(char *varname, int type, struct symtable *ctype, int class, struct ASTnode **tree) { struct symtable *sym=NULL; struct ASTnode *varnode, *exprnode; *tree= NULL; // Add this as a known scalar switch (class) { case C_EXTERN: case C_GLOBAL: sym= addglob(varname, type, ctype, S_VARIABLE, class, 1, 0); break; case C_LOCAL: sym= addlocl(varname, type, ctype, S_VARIABLE, 1); break; case C_PARAM: sym= addparm(varname, type, ctype, S_VARIABLE); break; case C_MEMBER: sym= addmemb(varname, type, ctype, S_VARIABLE, 1); break; } // The variable is being initialised if (Token.token == T_ASSIGN) { // Only possible for a global or local if (class != C_GLOBAL && class != C_LOCAL) fatals("Variable can not be initialised", varname); scan(&Token); // Globals must be assigned a literal value if (class == C_GLOBAL) { // Create one initial value for the variable and // parse this value sym->initlist= (int *)malloc(sizeof(int)); sym->initlist[0]= parse_literal(type); scan(&Token); } if (class == C_LOCAL) { // Make an A_IDENT AST node with the variable varnode = mkastleaf(A_IDENT, sym->type, sym, 0); // Get the expression for the assignment, make into a rvalue exprnode = binexpr(0); exprnode->rvalue = 1; // Ensure the expression's type matches the variable exprnode = modify_type(exprnode, varnode->type, 0); if (exprnode == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree *tree = mkastnode(A_ASSIGN, exprnode->type, exprnode, NULL, varnode, NULL, 0); } } // Generate any global space if (class == C_GLOBAL) genglobsym(sym); return (sym); } // Given the type, name and class of an variable, parse // the size of the array, if any. Then parse any initialisation // value and allocate storage for it. // Return the variable's symbol table entry. static struct symtable *array_declaration(char *varname, int type, struct symtable *ctype, int class) { struct symtable *sym; // New symbol table entry int nelems= -1; // Assume the number of elements won't be given int maxelems; // The maximum number of elements in the init list int *initlist; // The list of initial elements int i=0, j; // Skip past the '[' scan(&Token); // See we have an array size if (Token.token == T_INTLIT) { if (Token.intvalue <= 0) fatald("Array size is illegal", Token.intvalue); nelems= Token.intvalue; scan(&Token); } // Ensure we have a following ']' match(T_RBRACKET, "]"); // Add this as a known array. We treat the // array as a pointer to its elements' type switch (class) { case C_EXTERN: case C_GLOBAL: sym = addglob(varname, pointer_to(type), ctype, S_ARRAY, class, 0, 0); break; default: fatal("For now, declaration of non-global arrays is not implemented"); } // Array initialisation if (Token.token == T_ASSIGN) { if (class != C_GLOBAL) fatals("Variable can not be initialised", varname); scan(&Token); // Get the following left curly bracket match(T_LBRACE, "{"); #define TABLE_INCREMENT 10 // If the array already has nelems, allocate that many elements // in the list. Otherwise, start with TABLE_INCREMENT. if (nelems != -1) maxelems= nelems; else maxelems= TABLE_INCREMENT; initlist= (int *)malloc(maxelems *sizeof(int)); // Loop getting a new literal value from the list while (1) { // Check we can add the next value, then parse and add it if (nelems != -1 && i == maxelems) fatal("Too many values in initialisation list"); initlist[i++]= parse_literal(type); scan(&Token); // Increase the list size if the original size was // not set and we have hit the end of the current list if (nelems == -1 && i == maxelems) { maxelems += TABLE_INCREMENT; initlist= (int *)realloc(initlist, maxelems *sizeof(int)); } // Leave when we hit the right curly bracket if (Token.token == T_RBRACE) { scan(&Token); break; } // Next token must be a comma, then comma(); } // Zero any unused elements in the initlist. // Attach the list to the symbol table entry for (j=i; j < sym->nelems; j++) initlist[j]=0; if (i > nelems) nelems = i; sym->initlist= initlist; } // Set the size of the array and the number of elements sym->nelems= nelems; sym->size= sym->nelems * typesize(type, ctype); // Generate any global space if (class == C_GLOBAL) genglobsym(sym); return (sym); } // Given a pointer to the new function being declared and // a possibly NULL pointer to the function's previous declaration, // parse a list of parameters and cross-check them against the // previous declaration. Return the count of parameters static int param_declaration_list(struct symtable *oldfuncsym, struct symtable *newfuncsym) { int type, paramcnt = 0; struct symtable *ctype; struct symtable *protoptr = NULL; struct ASTnode *unused; // Get the pointer to the first prototype parameter if (oldfuncsym != NULL) protoptr = oldfuncsym->member; // Loop getting any parameters while (Token.token != T_RPAREN) { // Get the type of the next parameter type = declaration_list(&ctype, C_PARAM, T_COMMA, T_RPAREN, &unused); if (type == -1) fatal("Bad type in parameter list"); // Ensure the type of this parameter matches the prototype if (protoptr != NULL) { if (type != protoptr->type) fatald("Type doesn't match prototype for parameter", paramcnt + 1); protoptr = protoptr->next; } paramcnt++; // Stop when we hit the right parenthesis if (Token.token == T_RPAREN) break; // We need a comma as separator comma(); } if (oldfuncsym != NULL && paramcnt != oldfuncsym->nelems) fatals("Parameter count mismatch for function", oldfuncsym->name); // Return the count of parameters return (paramcnt); } // // function_declaration: type identifier '(' parameter_list ')' ; // | type identifier '(' parameter_list ')' compound_statement ; // // Parse the declaration of function. static struct symtable *function_declaration(char *funcname, int type, struct symtable *ctype, int class) { struct ASTnode *tree, *finalstmt; struct symtable *oldfuncsym, *newfuncsym = NULL; int endlabel, paramcnt; // Text has the identifier's name. If this exists and is a // function, get the id. Otherwise, set oldfuncsym to NULL. if ((oldfuncsym = findsymbol(funcname)) != NULL) if (oldfuncsym->stype != S_FUNCTION) oldfuncsym = NULL; // If this is a new function declaration, get a // label-id for the end label, and add the function // to the symbol table, if (oldfuncsym == NULL) { endlabel = genlabel(); // Assumtion: functions only return scalar types, so NULL below newfuncsym = addglob(funcname, type, NULL, S_FUNCTION, C_GLOBAL, 0, endlabel); } // Scan in the '(', any parameters and the ')'. // Pass in any existing function prototype pointer lparen(); paramcnt = param_declaration_list(oldfuncsym, newfuncsym); rparen(); // If this is a new function declaration, update the // function symbol entry with the number of parameters. // Also copy the parameter list into the function's node. if (newfuncsym) { newfuncsym->nelems = paramcnt; newfuncsym->member = Parmhead; oldfuncsym = newfuncsym; } // Clear out the parameter list Parmhead = Parmtail = NULL; // Declaration ends in a semicolon, only a prototype. if (Token.token == T_SEMI) return (oldfuncsym); // This is not just a prototype. // Set the Functionid global to the function's symbol pointer Functionid = oldfuncsym; // Get the AST tree for the compound statement and mark // that we have parsed no loops or switches yet Looplevel = 0; Switchlevel = 0; lbrace(); tree = compound_statement(0); rbrace(); // If the function type isn't P_VOID .. if (type != P_VOID) { // Error if no statements in the function if (tree == NULL) fatal("No statements in function with non-void type"); // Check that the last AST operation in the // compound statement was a return statement finalstmt = (tree->op == A_GLUE) ? tree->right : tree; if (finalstmt == NULL || finalstmt->op != A_RETURN) fatal("No return for function with non-void type"); } // Build the A_FUNCTION node which has the function's symbol pointer // and the compound statement sub-tree tree = mkastunary(A_FUNCTION, type, tree, oldfuncsym, endlabel); // Generate the assembly code for it if (O_dumpAST) { dumpAST(tree, NOLABEL, 0); fprintf(stdout, "\n\n"); } genAST(tree, NOLABEL, NOLABEL, NOLABEL, 0); // Now free the symbols associated // with this function freeloclsyms(); return (oldfuncsym); } // Parse composite type declarations: structs or unions. // Either find an existing struct/union declaration, or build // a struct/union symbol table entry and return its pointer. static struct symtable *composite_declaration(int type) { struct symtable *ctype = NULL; struct symtable *m; struct ASTnode *unused; int offset; int t; // Skip the struct/union keyword scan(&Token); // See if there is a following struct/union name if (Token.token == T_IDENT) { // Find any matching composite type if (type == P_STRUCT) ctype = findstruct(Text); else ctype = findunion(Text); scan(&Token); } // If the next token isn't an LBRACE , this is // the usage of an existing struct/union type. // Return the pointer to the type. if (Token.token != T_LBRACE) { if (ctype == NULL) fatals("unknown struct/union type", Text); return (ctype); } // Ensure this struct/union type hasn't been // previously defined if (ctype) fatals("previously defined struct/union", Text); // Build the composite type and skip the left brace if (type == P_STRUCT) ctype = addstruct(Text); else ctype = addunion(Text); scan(&Token); // Scan in the list of members while (1) { // Get the next member. m is used as a dummy t= declaration_list(&m, C_MEMBER, T_SEMI, T_RBRACE, &unused); if (t== -1) fatal("Bad type in member list"); if (Token.token == T_SEMI) scan(&Token); if (Token.token == T_RBRACE) break; } // Attach to the struct type's node rbrace(); if (Membhead==NULL) fatals("No members in struct", ctype->name); ctype->member = Membhead; Membhead = Membtail = NULL; // Set the offset of the initial member // and find the first free byte after it m = ctype->member; m->posn = 0; offset = typesize(m->type, m->ctype); // Set the position of each successive member in the composite type // Unions are easy. For structs, align the member and find the next free byte for (m = m->next; m != NULL; m = m->next) { // Set the offset for this member if (type == P_STRUCT) m->posn = genalign(m->type, offset, 1); else m->posn = 0; // Get the offset of the next free byte after this member offset += typesize(m->type, m->ctype); } // Set the overall size of the composite type ctype->size = offset; return (ctype); } // Parse an enum declaration static void enum_declaration(void) { struct symtable *etype = NULL; char *name; int intval = 0; // Skip the enum keyword. scan(&Token); // If there's a following enum type name, get a // pointer to any existing enum type node. if (Token.token == T_IDENT) { etype = findenumtype(Text); name = strdup(Text); // As it gets tromped soon scan(&Token); } // If the next token isn't a LBRACE, check // that we have an enum type name, then return if (Token.token != T_LBRACE) { if (etype == NULL) fatals("undeclared enum type:", name); return; } // We do have an LBRACE. Skip it scan(&Token); // If we have an enum type name, ensure that it // hasn't been declared before. if (etype != NULL) fatals("enum type redeclared:", etype->name); else // Build an enum type node for this identifier etype = addenum(name, C_ENUMTYPE, 0); // Loop to get all the enum values while (1) { // Ensure we have an identifier // Copy it in case there's an int literal coming up ident(); name = strdup(Text); // Ensure this enum value hasn't been declared before etype = findenumval(name); if (etype != NULL) fatals("enum value redeclared:", Text); // If the next token is an '=', skip it and // get the following int literal if (Token.token == T_ASSIGN) { scan(&Token); if (Token.token != T_INTLIT) fatal("Expected int literal after '='"); intval = Token.intvalue; scan(&Token); } // Build an enum value node for this identifier. // Increment the value for the next enum identifier. etype = addenum(name, C_ENUMVAL, intval++); // Bail out on a right curly bracket, else get a comma if (Token.token == T_RBRACE) break; comma(); } scan(&Token); // Skip over the right curly bracket } // Parse a typedef declaration and return the type // and ctype that it represents static int typedef_declaration(struct symtable **ctype) { int type, class = 0; // Skip the typedef keyword. scan(&Token); // Get the actual type following the keyword type = parse_type(ctype, &class); if (class != 0) fatal("Can't have extern in a typedef declaration"); // See if the typedef identifier already exists if (findtypedef(Text) != NULL) fatals("redefinition of typedef", Text); // Get any following '*' tokens type = parse_stars(type); // It doesn't exist so add it to the typedef list addtypedef(Text, type, *ctype); scan(&Token); return (type); } // Given a typedef name, return the type it represents static int type_of_typedef(char *name, struct symtable **ctype) { struct symtable *t; // Look up the typedef in the list t = findtypedef(name); if (t == NULL) fatals("unknown type", name); scan(&Token); *ctype = t->ctype; return (t->type); } // Parse the declaration of a variable or function. // The type and any following '*'s have been scanned, and we // have the identifier in the Token variable. // The class argument is the variable's class. // Return a pointer to the symbol's entry in the symbol table static struct symtable *symbol_declaration(int type, struct symtable *ctype, int class, struct ASTnode **tree) { struct symtable *sym = NULL; char *varname = strdup(Text); // Ensure that we have an identifier. // We copied it above so we can scan more tokens in, e.g. // an assignment expression for a local variable. ident(); // Deal with function declarations if (Token.token == T_LPAREN) { return (function_declaration(varname, type, ctype, class)); } // See if this array or scalar variable has already been declared switch (class) { case C_EXTERN: case C_GLOBAL: if (findglob(varname) != NULL) fatals("Duplicate global variable declaration", varname); case C_LOCAL: case C_PARAM: if (findlocl(varname) != NULL) fatals("Duplicate local variable declaration", varname); case C_MEMBER: if (findmember(varname) != NULL) fatals("Duplicate struct/union member declaration", varname); } // Add the array or scalar variable to the symbol table if (Token.token == T_LBRACKET) sym = array_declaration(varname, type, ctype, class); else sym = scalar_declaration(varname, type, ctype, class, tree); return (sym); } // Parse a list of symbols where there is an initial type. // Return the type of the symbols. et1 and et2 are end tokens. int declaration_list(struct symtable **ctype, int class, int et1, int et2, struct ASTnode **gluetree) { int inittype, type; struct symtable *sym; struct ASTnode *tree; *gluetree= NULL; // Get the initial type. If -1, it was // a composite type definition, return this if ((inittype = parse_type(ctype, &class)) == -1) return (inittype); // Now parse the list of symbols while (1) { // See if this symbol is a pointer type = parse_stars(inittype); // Parse this symbol sym = symbol_declaration(type, *ctype, class, &tree); // We parsed a function, there is no list so leave if (sym->stype == S_FUNCTION) { if (class != C_GLOBAL) fatal("Function definition not at global level"); return (type); } // Glue any AST tree from a local declaration // to build a sequence of assignments to perform if (*gluetree== NULL) *gluetree= tree; else *gluetree = mkastnode(A_GLUE, P_NONE, *gluetree, NULL, tree, NULL, 0); // We are at the end of the list, leave if (Token.token == et1 || Token.token == et2) return (type); // Otherwise, we need a comma as separator comma(); } } // Parse one or more global declarations, either // variables, functions or structs void global_declarations(void) { struct symtable *ctype; struct ASTnode *unused; while (Token.token != T_EOF) { declaration_list(&ctype, C_GLOBAL, T_SEMI, T_EOF, &unused); // Skip any semicolons and right curly brackets if (Token.token == T_SEMI) scan(&Token); } } ================================================ FILE: 41_Local_Var_Init/decl.h ================================================ // Function prototypes for all compiler files // Copyright (c) 2019 Warren Toomey, GPL3 // scan.c void reject_token(struct token *t); int scan(struct token *t); // tree.c struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue); struct ASTnode *mkastleaf(int op, int type, struct symtable *sym, int intvalue); struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, struct symtable *sym, int intvalue); void dumpAST(struct ASTnode *n, int label, int parentASTop); // gen.c int genlabel(void); int genAST(struct ASTnode *n, int iflabel, int looptoplabel, int loopendlabel, int parentASTop); void genpreamble(); void genpostamble(); void genfreeregs(); void genglobsym(struct symtable *node); int genglobstr(char *strvalue); int genprimsize(int type); int genalign(int type, int offset, int direction); void genreturn(int reg, int id); // cg.c int cgprimsize(int type); int cgalign(int type, int offset, int direction); void cgtextseg(); void cgdataseg(); void freeall_registers(void); void cgpreamble(); void cgpostamble(); void cgfuncpreamble(struct symtable *sym); void cgfuncpostamble(struct symtable *sym); int cgloadint(int value, int type); int cgloadglob(struct symtable *sym, int op); int cgloadlocal(struct symtable *sym, int op); int cgloadglobstr(int label); int cgadd(int r1, int r2); int cgsub(int r1, int r2); int cgmul(int r1, int r2); int cgdiv(int r1, int r2); int cgshlconst(int r, int val); int cgcall(struct symtable *sym, int numargs); void cgcopyarg(int r, int argposn); int cgstorglob(int r, struct symtable *sym); int cgstorlocal(int r, struct symtable *sym); void cgglobsym(struct symtable *node); void cgglobstr(int l, char *strvalue); int cgcompare_and_set(int ASTop, int r1, int r2); int cgcompare_and_jump(int ASTop, int r1, int r2, int label); void cglabel(int l); void cgjump(int l); int cgwiden(int r, int oldtype, int newtype); void cgreturn(int reg, struct symtable *sym); int cgaddress(struct symtable *sym); int cgderef(int r, int type); int cgstorderef(int r1, int r2, int type); int cgnegate(int r); int cginvert(int r); int cglognot(int r); int cgboolean(int r, int op, int label); int cgand(int r1, int r2); int cgor(int r1, int r2); int cgxor(int r1, int r2); int cgshl(int r1, int r2); int cgshr(int r1, int r2); void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel); // expr.c struct ASTnode *expression_list(int endtoken); struct ASTnode *binexpr(int ptp); // stmt.c struct ASTnode *compound_statement(int inswitch); // misc.c void match(int t, char *what); void semi(void); void lbrace(void); void rbrace(void); void lparen(void); void rparen(void); void ident(void); void comma(void); void fatal(char *s); void fatals(char *s1, char *s2); void fatald(char *s, int d); void fatalc(char *s, int c); // sym.c void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node); struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn); struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn); struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int nelems); struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype); struct symtable *addstruct(char *name); struct symtable *addunion(char *name); struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int nelems); struct symtable *addenum(char *name, int class, int value); struct symtable *addtypedef(char *name, int type, struct symtable *ctype); struct symtable *findglob(char *s); struct symtable *findlocl(char *s); struct symtable *findsymbol(char *s); struct symtable *findmember(char *s); struct symtable *findstruct(char *s); struct symtable *findunion(char *s); struct symtable *findenumtype(char *s); struct symtable *findenumval(char *s); struct symtable *findtypedef(char *s); void clear_symtable(void); void freeloclsyms(void); // decl.c int declaration_list(struct symtable **ctype, int class, int et1, int et2, struct ASTnode **gluetree); void global_declarations(void); // types.c int inttype(int type); int ptrtype(int type); int pointer_to(int type); int value_at(int type); int typesize(int type, struct symtable *ctype); struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op); ================================================ FILE: 41_Local_Var_Init/defs.h ================================================ #include #include #include #include // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 enum { TEXTLEN = 512 // Length of identifiers in input }; // Commands and default filenames #define AOUT "a.out" #ifdef __NASM__ #define ASCMD "nasm -f elf64 -o " #define LDCMD "cc -no-pie -fno-plt -Wall -o " #else #define ASCMD "as -o " #define LDCMD "cc -o " #endif #define CPPCMD "cpp -nostdinc -isystem " // Token types enum { T_EOF, // Binary operators T_ASSIGN, T_LOGOR, T_LOGAND, T_OR, T_XOR, T_AMPER, T_EQ, T_NE, T_LT, T_GT, T_LE, T_GE, T_LSHIFT, T_RSHIFT, T_PLUS, T_MINUS, T_STAR, T_SLASH, // Other operators T_INC, T_DEC, T_INVERT, T_LOGNOT, // Type keywords T_VOID, T_CHAR, T_INT, T_LONG, // Other keywords T_IF, T_ELSE, T_WHILE, T_FOR, T_RETURN, T_STRUCT, T_UNION, T_ENUM, T_TYPEDEF, T_EXTERN, T_BREAK, T_CONTINUE, T_SWITCH, T_CASE, T_DEFAULT, // Structural tokens T_INTLIT, T_STRLIT, T_SEMI, T_IDENT, T_LBRACE, T_RBRACE, T_LPAREN, T_RPAREN, T_LBRACKET, T_RBRACKET, T_COMMA, T_DOT, T_ARROW, T_COLON }; // Token structure struct token { int token; // Token type, from the enum list above char *tokstr; // String version of the token int intvalue; // For T_INTLIT, the integer value }; // AST node types. The first few line up // with the related tokens enum { A_ASSIGN = 1, A_LOGOR, A_LOGAND, A_OR, A_XOR, A_AND, A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE, A_LSHIFT, A_RSHIFT, A_ADD, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, A_INTLIT, A_STRLIT, A_IDENT, A_GLUE, A_IF, A_WHILE, A_FUNCTION, A_WIDEN, A_RETURN, A_FUNCCALL, A_DEREF, A_ADDR, A_SCALE, A_PREINC, A_PREDEC, A_POSTINC, A_POSTDEC, A_NEGATE, A_INVERT, A_LOGNOT, A_TOBOOL, A_BREAK, A_CONTINUE, A_SWITCH, A_CASE, A_DEFAULT }; // Primitive types. The bottom 4 bits is an integer // value that represents the level of indirection, // e.g. 0= no pointer, 1= pointer, 2= pointer pointer etc. enum { P_NONE, P_VOID = 16, P_CHAR = 32, P_INT = 48, P_LONG = 64, P_STRUCT=80, P_UNION=96 }; // Structural types enum { S_VARIABLE, S_FUNCTION, S_ARRAY }; // Storage classes enum { C_GLOBAL = 1, // Globally visible symbol C_LOCAL, // Locally visible symbol C_PARAM, // Locally visible function parameter C_EXTERN, // External globally visible symbol C_STRUCT, // A struct C_UNION, // A union C_MEMBER, // Member of a struct or union C_ENUMTYPE, // A named enumeration type C_ENUMVAL, // A named enumeration value C_TYPEDEF // A named typedef }; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol struct symtable *ctype; // If struct/union, ptr to that type int stype; // Structural type for the symbol int class; // Storage class for the symbol int size; // Total size in bytes of this symbol int nelems; // Functions: # params. Arrays: # elements union { int endlabel; // For functions, the end label int posn; // For locals, the negative offset // from the stack base pointer }; int *initlist; // List of initial values struct symtable *next; // Next symbol in one list struct symtable *member; // First member of a function, struct, }; // union or enum // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates int rvalue; // True if the node is an rvalue struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; struct symtable *sym; // For many AST nodes, the pointer to union { // the symbol in the symbol table int intvalue; // For A_INTLIT, the integer value int size; // For A_SCALE, the size to scale by }; }; enum { NOREG = -1, // Use NOREG when the AST generation // functions have no register to return NOLABEL = 0 // Use NOLABEL when we have no label to // pass to genAST() }; ================================================ FILE: 41_Local_Var_Init/expr.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of expressions // Copyright (c) 2019 Warren Toomey, GPL3 // expression_list: // | expression // | expression ',' expression_list // ; // Parse a list of zero or more comma-separated expressions and // return an AST composed of A_GLUE nodes with the left-hand child // being the sub-tree of previous expressions (or NULL) and the right-hand // child being the next expression. Each A_GLUE node will have size field // set to the number of expressions in the tree at this point. If no // expressions are parsed, NULL is returned struct ASTnode *expression_list(int endtoken) { struct ASTnode *tree = NULL; struct ASTnode *child = NULL; int exprcount = 0; // Loop until the end token while (Token.token != endtoken) { // Parse the next expression and increment the expression count child = binexpr(0); exprcount++; // Build an A_GLUE AST node with the previous tree as the left child // and the new expression as the right child. Store the expression count. tree = mkastnode(A_GLUE, P_NONE, tree, NULL, child, NULL, exprcount); // Stop when we reach the end token if (Token.token == endtoken) break; // Must have a ',' at this point match(T_COMMA, ","); } // Return the tree of expressions return (tree); } // Parse a function call and return its AST static struct ASTnode *funccall(void) { struct ASTnode *tree; struct symtable *funcptr; // Check that the identifier has been defined as a function, // then make a leaf node for it. if ((funcptr = findsymbol(Text)) == NULL || funcptr->stype != S_FUNCTION) { fatals("Undeclared function", Text); } // Get the '(' lparen(); // Parse the argument expression list tree = expression_list(T_RPAREN); // XXX Check type of each argument against the function's prototype // Build the function call AST node. Store the // function's return type as this node's type. // Also record the function's symbol-id tree = mkastunary(A_FUNCCALL, funcptr->type, tree, funcptr, 0); // Get the ')' rparen(); return (tree); } // Parse the index into an array and return an AST tree for it static struct ASTnode *array_access(void) { struct ASTnode *left, *right; struct symtable *aryptr; // Check that the identifier has been defined as an array // then make a leaf node for it that points at the base if ((aryptr = findsymbol(Text)) == NULL || aryptr->stype != S_ARRAY) { fatals("Undeclared array", Text); } left = mkastleaf(A_ADDR, aryptr->type, aryptr, 0); // Get the '[' scan(&Token); // Parse the following expression right = binexpr(0); // Get the ']' match(T_RBRACKET, "]"); // Ensure that this is of int type if (!inttype(right->type)) fatal("Array index is not of integer type"); // Scale the index by the size of the element's type right = modify_type(right, left->type, A_ADD); // Return an AST tree where the array's base has the offset // added to it, and dereference the element. Still an lvalue // at this point. left = mkastnode(A_ADD, aryptr->type, left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, value_at(left->type), left, NULL, 0); return (left); } // Parse the member reference of a struct or union // and return an AST tree for it. If withpointer is true, // the access is through a pointer to the member. static struct ASTnode *member_access(int withpointer) { struct ASTnode *left, *right; struct symtable *compvar; struct symtable *typeptr; struct symtable *m; // Check that the identifier has been declared as a // struct/union or a struct/union pointer if ((compvar = findsymbol(Text)) == NULL) fatals("Undeclared variable", Text); if (withpointer && compvar->type != pointer_to(P_STRUCT) && compvar->type != pointer_to(P_UNION)) fatals("Undeclared variable", Text); if (!withpointer && compvar->type != P_STRUCT && compvar->type != P_UNION) fatals("Undeclared variable", Text); // If a pointer to a struct/union, get the pointer's value. // Otherwise, make a leaf node that points at the base // Either way, it's an rvalue if (withpointer) { left = mkastleaf(A_IDENT, pointer_to(compvar->type), compvar, 0); } else left = mkastleaf(A_ADDR, compvar->type, compvar, 0); left->rvalue = 1; // Get the details of the composite type typeptr = compvar->ctype; // Skip the '.' or '->' token and get the member's name scan(&Token); ident(); // Find the matching member's name in the type // Die if we can't find it for (m = typeptr->member; m != NULL; m = m->next) if (!strcmp(m->name, Text)) break; if (m == NULL) fatals("No member found in struct/union: ", Text); // Build an A_INTLIT node with the offset right = mkastleaf(A_INTLIT, P_INT, NULL, m->posn); // Add the member's offset to the base of the struct/union // and dereference it. Still an lvalue at this point left = mkastnode(A_ADD, pointer_to(m->type), left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, m->type, left, NULL, 0); return (left); } // Parse a postfix expression and return // an AST node representing it. The // identifier is already in Text. static struct ASTnode *postfix(void) { struct ASTnode *n; struct symtable *varptr; struct symtable *enumptr; // If the identifier matches an enum value, // return an A_INTLIT node if ((enumptr = findenumval(Text)) != NULL) { scan(&Token); return (mkastleaf(A_INTLIT, P_INT, NULL, enumptr->posn)); } // Scan in the next token to see if we have a postfix expression scan(&Token); // Function call if (Token.token == T_LPAREN) return (funccall()); // An array reference if (Token.token == T_LBRACKET) return (array_access()); // Access into a struct or union if (Token.token == T_DOT) return (member_access(0)); if (Token.token == T_ARROW) return (member_access(1)); // A variable. Check that the variable exists. if ((varptr = findsymbol(Text)) == NULL || varptr->stype != S_VARIABLE) fatals("Unknown variable", Text); switch (Token.token) { // Post-increment: skip over the token case T_INC: scan(&Token); n = mkastleaf(A_POSTINC, varptr->type, varptr, 0); break; // Post-decrement: skip over the token case T_DEC: scan(&Token); n = mkastleaf(A_POSTDEC, varptr->type, varptr, 0); break; // Just a variable reference default: n = mkastleaf(A_IDENT, varptr->type, varptr, 0); } return (n); } // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(void) { struct ASTnode *n; int id; switch (Token.token) { case T_INTLIT: // For an INTLIT token, make a leaf AST node for it. // Make it a P_CHAR if it's within the P_CHAR range if (Token.intvalue >= 0 && Token.intvalue < 256) n = mkastleaf(A_INTLIT, P_CHAR, NULL, Token.intvalue); else n = mkastleaf(A_INTLIT, P_INT, NULL, Token.intvalue); break; case T_STRLIT: // For a STRLIT token, generate the assembly for it. // Then make a leaf AST node for it. id is the string's label. id = genglobstr(Text); n = mkastleaf(A_STRLIT, pointer_to(P_CHAR), NULL, id); break; case T_IDENT: return (postfix()); case T_LPAREN: // Beginning of a parenthesised expression, skip the '('. // Scan in the expression and the right parenthesis scan(&Token); n = binexpr(0); rparen(); return (n); default: fatals("Expecting a primary expression, got token", Token.tokstr); } // Scan in the next token and return the leaf node scan(&Token); return (n); } // Convert a binary operator token into a binary AST operation. // We rely on a 1:1 mapping from token to AST operation static int binastop(int tokentype) { if (tokentype > T_EOF && tokentype <= T_SLASH) return (tokentype); fatald("Syntax error, token", tokentype); return (0); // Keep -Wall happy } // Return true if a token is right-associative, // false otherwise. static int rightassoc(int tokentype) { if (tokentype == T_ASSIGN) return (1); return (0); } // Operator precedence for each token. Must // match up with the order of tokens in defs.h static int OpPrec[] = { 0, 10, 20, 30, // T_EOF, T_ASSIGN, T_LOGOR, T_LOGAND 40, 50, 60, // T_OR, T_XOR, T_AMPER 70, 70, // T_EQ, T_NE 80, 80, 80, 80, // T_LT, T_GT, T_LE, T_GE 90, 90, // T_LSHIFT, T_RSHIFT 100, 100, // T_PLUS, T_MINUS 110, 110 // T_STAR, T_SLASH }; // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec; if (tokentype > T_SLASH) fatald("Token with no precedence in op_precedence:", tokentype); prec = OpPrec[tokentype]; if (prec == 0) fatald("Syntax error, token", tokentype); return (prec); } // prefix_expression: primary // | '*' prefix_expression // | '&' prefix_expression // | '-' prefix_expression // | '++' prefix_expression // | '--' prefix_expression // ; // Parse a prefix expression and return // a sub-tree representing it. struct ASTnode *prefix(void) { struct ASTnode *tree; switch (Token.token) { case T_AMPER: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Ensure that it's an identifier if (tree->op != A_IDENT) fatal("& operator must be followed by an identifier"); // Now change the operator to A_ADDR and the type to // a pointer to the original type tree->op = A_ADDR; tree->type = pointer_to(tree->type); break; case T_STAR: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's either another deref or an // identifier if (tree->op != A_IDENT && tree->op != A_DEREF) fatal("* operator must be followed by an identifier or *"); // Prepend an A_DEREF operation to the tree tree = mkastunary(A_DEREF, value_at(tree->type), tree, NULL, 0); break; case T_MINUS: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_NEGATE operation to the tree and // make the child an rvalue. Because chars are unsigned, // also widen this to int so that it's signed tree->rvalue = 1; tree = modify_type(tree, P_INT, 0); tree = mkastunary(A_NEGATE, tree->type, tree, NULL, 0); break; case T_INVERT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_INVERT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_INVERT, tree->type, tree, NULL, 0); break; case T_LOGNOT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_LOGNOT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_LOGNOT, tree->type, tree, NULL, 0); break; case T_INC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("++ operator must be followed by an identifier"); // Prepend an A_PREINC operation to the tree tree = mkastunary(A_PREINC, tree->type, tree, NULL, 0); break; case T_DEC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("-- operator must be followed by an identifier"); // Prepend an A_PREDEC operation to the tree tree = mkastunary(A_PREDEC, tree->type, tree, NULL, 0); break; default: tree = primary(); } return (tree); } // Return an AST tree whose root is a binary operator. // Parameter ptp is the previous token's precedence. struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; struct ASTnode *ltemp, *rtemp; int ASTop; int tokentype; // Get the tree on the left. // Fetch the next token at the same time. left = prefix(); // If we hit one of several terminating tokens, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA || tokentype == T_COLON) { left->rvalue = 1; return (left); } // While the precedence of this token is more than that of the // previous token precedence, or it's right associative and // equal to the previous token's precedence while ((op_precedence(tokentype) > ptp) || (rightassoc(tokentype) && op_precedence(tokentype) == ptp)) { // Fetch in the next integer literal scan(&Token); // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Determine the operation to be performed on the sub-trees ASTop = binastop(tokentype); if (ASTop == A_ASSIGN) { // Assignment // Make the right tree into an rvalue right->rvalue = 1; // Ensure the right's type matches the left right = modify_type(right, left->type, 0); if (right == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree. However, switch // left and right around, so that the right expression's // code will be generated before the left expression ltemp = left; left = right; right = ltemp; } else { // We are not doing an assignment, so both trees should be rvalues // Convert both trees into rvalue if they are lvalue trees left->rvalue = 1; right->rvalue = 1; // Ensure the two types are compatible by trying // to modify each tree to match the other's type. ltemp = modify_type(left, right->type, ASTop); rtemp = modify_type(right, left->type, ASTop); if (ltemp == NULL && rtemp == NULL) fatal("Incompatible types in binary expression"); if (ltemp != NULL) left = ltemp; if (rtemp != NULL) right = rtemp; } // Join that sub-tree with ours. Convert the token // into an AST operation at the same time. left = mkastnode(binastop(tokentype), left->type, left, NULL, right, NULL, 0); // Update the details of the current token. // If we hit a terminating token, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA || tokentype == T_COLON) { left->rvalue = 1; return (left); } } // Return the tree we have when the precedence // is the same or lower left->rvalue = 1; return (left); } ================================================ FILE: 41_Local_Var_Init/gen.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Generic code generator // Copyright (c) 2019 Warren Toomey, GPL3 // Generate and return a new label number int genlabel(void) { static int id = 1; return (id++); } // Generate the code for an IF statement // and an optional ELSE clause static int genIF(struct ASTnode *n, int looptoplabel, int loopendlabel) { int Lfalse, Lend; // Generate two labels: one for the // false compound statement, and one // for the end of the overall IF statement. // When there is no ELSE clause, Lfalse _is_ // the ending label! Lfalse = genlabel(); if (n->right) Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. genAST(n->left, Lfalse, NOLABEL, NOLABEL, n->op); genfreeregs(); // Generate the true compound statement genAST(n->mid, NOLABEL, looptoplabel, loopendlabel, n->op); genfreeregs(); // If there is an optional ELSE clause, // generate the jump to skip to the end if (n->right) cgjump(Lend); // Now the false label cglabel(Lfalse); // Optional ELSE clause: generate the // false compound statement and the // end label if (n->right) { genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); genfreeregs(); cglabel(Lend); } return (NOREG); } // Generate the code for a WHILE statement static int genWHILE(struct ASTnode *n) { int Lstart, Lend; // Generate the start and end labels // and output the start label Lstart = genlabel(); Lend = genlabel(); cglabel(Lstart); // Generate the condition code followed // by a jump to the end label. genAST(n->left, Lend, Lstart, Lend, n->op); genfreeregs(); // Generate the compound statement for the body genAST(n->right, NOLABEL, Lstart, Lend, n->op); genfreeregs(); // Finally output the jump back to the condition, // and the end label cgjump(Lstart); cglabel(Lend); return (NOREG); } // Generate the code for a SWITCH statement static int genSWITCH(struct ASTnode *n) { int *caseval, *caselabel; int Ljumptop, Lend; int i, reg, defaultlabel = 0, casecount = 0; struct ASTnode *c; // Create arrays for the case values and associated labels. // Ensure that we have at least one position in each array. caseval = (int *) malloc((n->intvalue + 1) * sizeof(int)); caselabel = (int *) malloc((n->intvalue + 1) * sizeof(int)); // Generate labels for the top of the jump table, and the // end of the switch statement. Set a default label for // the end of the switch, in case we don't have a default. Ljumptop = genlabel(); Lend = genlabel(); defaultlabel = Lend; // Output the code to calculate the switch condition reg = genAST(n->left, NOLABEL, NOLABEL, NOLABEL, 0); cgjump(Ljumptop); genfreeregs(); // Walk the right-child linked list to // generate the code for each case for (i = 0, c = n->right; c != NULL; i++, c = c->right) { // Get a label for this case. Store it // and the case value in the arrays. // Record if it is the default case. caselabel[i] = genlabel(); caseval[i] = c->intvalue; cglabel(caselabel[i]); if (c->op == A_DEFAULT) defaultlabel = caselabel[i]; else casecount++; // Generate the case code. Pass in the end label for the breaks genAST(c->left, NOLABEL, NOLABEL, Lend, 0); genfreeregs(); } // Ensure the last case jumps past the switch table cgjump(Lend); // Now output the switch table and the end label. cgswitch(reg, casecount, Ljumptop, caselabel, caseval, defaultlabel); cglabel(Lend); return (NOREG); } // Generate the code to copy the arguments of a // function call to its parameters, then call the // function itself. Return the register that holds // the function's return value. static int gen_funccall(struct ASTnode *n) { struct ASTnode *gluetree = n->left; int reg; int numargs = 0; // If there is a list of arguments, walk this list // from the last argument (right-hand child) to the // first while (gluetree) { // Calculate the expression's value reg = genAST(gluetree->right, NOLABEL, NOLABEL, NOLABEL, gluetree->op); // Copy this into the n'th function parameter: size is 1, 2, 3, ... cgcopyarg(reg, gluetree->size); // Keep the first (highest) number of arguments if (numargs == 0) numargs = gluetree->size; genfreeregs(); gluetree = gluetree->left; } // Call the function, clean up the stack (based on numargs), // and return its result return (cgcall(n->sym, numargs)); } // Given an AST, an optional label, and the AST op // of the parent, generate assembly code recursively. // Return the register id with the tree's final value. int genAST(struct ASTnode *n, int iflabel, int looptoplabel, int loopendlabel, int parentASTop) { int leftreg, rightreg; // We have some specific AST node handling at the top // so that we don't evaluate the child sub-trees immediately switch (n->op) { case A_IF: return (genIF(n, looptoplabel, loopendlabel)); case A_WHILE: return (genWHILE(n)); case A_SWITCH: return (genSWITCH(n)); case A_FUNCCALL: return (gen_funccall(n)); case A_GLUE: // Do each child statement, and free the // registers after each child if (n->left != NULL) genAST(n->left, iflabel, looptoplabel, loopendlabel, n->op); genfreeregs(); if (n->right != NULL) genAST(n->right, iflabel, looptoplabel, loopendlabel, n->op); genfreeregs(); return (NOREG); case A_FUNCTION: // Generate the function's preamble before the code // in the child sub-tree cgfuncpreamble(n->sym); genAST(n->left, NOLABEL, NOLABEL, NOLABEL, n->op); cgfuncpostamble(n->sym); return (NOREG); } // General AST node handling below // Get the left and right sub-tree values if (n->left) leftreg = genAST(n->left, NOLABEL, NOLABEL, NOLABEL, n->op); if (n->right) rightreg = genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); switch (n->op) { case A_ADD: return (cgadd(leftreg, rightreg)); case A_SUBTRACT: return (cgsub(leftreg, rightreg)); case A_MULTIPLY: return (cgmul(leftreg, rightreg)); case A_DIVIDE: return (cgdiv(leftreg, rightreg)); case A_AND: return (cgand(leftreg, rightreg)); case A_OR: return (cgor(leftreg, rightreg)); case A_XOR: return (cgxor(leftreg, rightreg)); case A_LSHIFT: return (cgshl(leftreg, rightreg)); case A_RSHIFT: return (cgshr(leftreg, rightreg)); case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, compare registers // and set one to 1 or 0 based on the comparison. if (parentASTop == A_IF || parentASTop == A_WHILE) return (cgcompare_and_jump(n->op, leftreg, rightreg, iflabel)); else return (cgcompare_and_set(n->op, leftreg, rightreg)); case A_INTLIT: return (cgloadint(n->intvalue, n->type)); case A_STRLIT: return (cgloadglobstr(n->intvalue)); case A_IDENT: // Load our value if we are an rvalue // or we are being dereferenced if (n->rvalue || parentASTop == A_DEREF) { if (n->sym->class == C_GLOBAL) { return (cgloadglob(n->sym, n->op)); } else { return (cgloadlocal(n->sym, n->op)); } } else return (NOREG); case A_ASSIGN: // Are we assigning to an identifier or through a pointer? switch (n->right->op) { case A_IDENT: if (n->right->sym->class == C_GLOBAL) return (cgstorglob(leftreg, n->right->sym)); else return (cgstorlocal(leftreg, n->right->sym)); case A_DEREF: return (cgstorderef(leftreg, rightreg, n->right->type)); default: fatald("Can't A_ASSIGN in genAST(), op", n->op); } case A_WIDEN: // Widen the child's type to the parent's type return (cgwiden(leftreg, n->left->type, n->type)); case A_RETURN: cgreturn(leftreg, Functionid); return (NOREG); case A_ADDR: return (cgaddress(n->sym)); case A_DEREF: // If we are an rvalue, dereference to get the value we point at, // otherwise leave it for A_ASSIGN to store through the pointer if (n->rvalue) return (cgderef(leftreg, n->left->type)); else return (leftreg); case A_SCALE: // Small optimisation: use shift if the // scale value is a known power of two switch (n->size) { case 2: return (cgshlconst(leftreg, 1)); case 4: return (cgshlconst(leftreg, 2)); case 8: return (cgshlconst(leftreg, 3)); default: // Load a register with the size and // multiply the leftreg by this size rightreg = cgloadint(n->size, P_INT); return (cgmul(leftreg, rightreg)); } case A_POSTINC: case A_POSTDEC: // Load and decrement the variable's value into a register // and post increment/decrement it if (n->sym->class == C_GLOBAL) return (cgloadglob(n->sym, n->op)); else return (cgloadlocal(n->sym, n->op)); case A_PREINC: case A_PREDEC: // Load and decrement the variable's value into a register // and pre increment/decrement it if (n->left->sym->class == C_GLOBAL) return (cgloadglob(n->left->sym, n->op)); else return (cgloadlocal(n->left->sym, n->op)); case A_NEGATE: return (cgnegate(leftreg)); case A_INVERT: return (cginvert(leftreg)); case A_LOGNOT: return (cglognot(leftreg)); case A_TOBOOL: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, set the register // to 0 or 1 based on it's zeroeness or non-zeroeness return (cgboolean(leftreg, parentASTop, iflabel)); case A_BREAK: cgjump(loopendlabel); return (NOREG); case A_CONTINUE: cgjump(looptoplabel); return (NOREG); default: fatald("Unknown AST operator", n->op); } return (NOREG); // Keep -Wall happy } void genpreamble() { cgpreamble(); } void genpostamble() { cgpostamble(); } void genfreeregs() { freeall_registers(); } void genglobsym(struct symtable *node) { cgglobsym(node); } int genglobstr(char *strvalue) { int l = genlabel(); cgglobstr(l, strvalue); return (l); } int genprimsize(int type) { return (cgprimsize(type)); } int genalign(int type, int offset, int direction) { return (cgalign(type, offset, direction)); } ================================================ FILE: 41_Local_Var_Init/include/fcntl.h ================================================ #ifndef _FCNTL_H_ # define _FCNTL_H_ #define O_RDONLY 00 #define O_WRONLY 01 #define O_RDWR 02 int open(char *pathname, int flags); #endif // _FCNTL_H_ ================================================ FILE: 41_Local_Var_Init/include/stddef.h ================================================ #ifndef _STDDEF_H_ # define _STDDEF_H_ typedef long size_t; #endif //_STDDEF_H_ ================================================ FILE: 41_Local_Var_Init/include/stdio.h ================================================ #ifndef _STDIO_H_ # define _STDIO_H_ #include // This FILE definition will do for now typedef char * FILE; FILE *fopen(char *pathname, char *mode); size_t fread(void *ptr, size_t size, size_t nmemb, FILE *stream); size_t fwrite(void *ptr, size_t size, size_t nmemb, FILE *stream); int fclose(FILE *stream); int printf(char *format); int fprintf(FILE *stream, char *format); #endif // _STDIO_H_ ================================================ FILE: 41_Local_Var_Init/main.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "decl.h" #include #include // Compiler setup and top-level execution // Copyright (c) 2019 Warren Toomey, GPL3 // Given a string with a '.' and at least a 1-character suffix // after the '.', change the suffix to be the given character. // Return the new string or NULL if the original string could // not be modified char *alter_suffix(char *str, char suffix) { char *posn; char *newstr; // Clone the string if ((newstr = strdup(str)) == NULL) return (NULL); // Find the '.' if ((posn = strrchr(newstr, '.')) == NULL) return (NULL); // Ensure there is a suffix posn++; if (*posn == '\0') return (NULL); // Change the suffix and NUL-terminate the string *posn++ = suffix; *posn = '\0'; return (newstr); } // Given an input filename, compile that file // down to assembly code. Return the new file's name static char *do_compile(char *filename) { char cmd[TEXTLEN]; // Change the input file's suffix to .s Outfilename = alter_suffix(filename, 's'); if (Outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .c on the end\n", filename); exit(1); } // Generate the pre-processor command snprintf(cmd, TEXTLEN, "%s %s %s", CPPCMD, INCDIR, filename); // Open up the pre-processor pipe if ((Infile = popen(cmd, "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", filename, strerror(errno)); exit(1); } Infilename = filename; // Create the output file if ((Outfile = fopen(Outfilename, "w")) == NULL) { fprintf(stderr, "Unable to create %s: %s\n", Outfilename, strerror(errno)); exit(1); } Line = 1; // Reset the scanner Putback = '\n'; clear_symtable(); // Clear the symbol table if (O_verbose) printf("compiling %s\n", filename); scan(&Token); // Get the first token from the input genpreamble(); // Output the preamble global_declarations(); // Parse the global declarations genpostamble(); // Output the postamble fclose(Outfile); // Close the output file return (Outfilename); } // Given an input filename, assemble that file // down to object code. Return the object filename char *do_assemble(char *filename) { char cmd[TEXTLEN]; int err; char *outfilename = alter_suffix(filename, 'o'); if (outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .s on the end\n", filename); exit(1); } // Build the assembly command and run it snprintf(cmd, TEXTLEN, "%s %s %s", ASCMD, outfilename, filename); if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Assembly of %s failed\n", filename); exit(1); } return (outfilename); } // Given a list of object files and an output filename, // link all of the object filenames together. void do_link(char *outfilename, char *objlist[]) { int cnt, size = TEXTLEN; char cmd[TEXTLEN], *cptr; int err; // Start with the linker command and the output file cptr = cmd; cnt = snprintf(cptr, size, "%s %s ", LDCMD, outfilename); cptr += cnt; size -= cnt; // Now append each object file while (*objlist != NULL) { cnt = snprintf(cptr, size, "%s ", *objlist); cptr += cnt; size -= cnt; objlist++; } if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Linking failed\n"); exit(1); } } // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "Usage: %s [-vcST] [-o outfile] file [file ...]\n", prog); fprintf(stderr, " -v give verbose output of the compilation stages\n"); fprintf(stderr, " -c generate object files but don't link them\n"); fprintf(stderr, " -S generate assembly files but don't link them\n"); fprintf(stderr, " -T dump the AST trees for each input file\n"); fprintf(stderr, " -o outfile, produce the outfile executable file\n"); exit(1); } // Main program: check arguments and print a usage // if we don't have an argument. Open up the input // file and call scanfile() to scan the tokens in it. enum { MAXOBJ = 100 }; int main(int argc, char *argv[]) { char *outfilename = AOUT; char *asmfile, *objfile; char *objlist[MAXOBJ]; int i, objcnt = 0; // Initialise our variables O_dumpAST = 0; O_keepasm = 0; O_assemble = 0; O_verbose = 0; O_dolink = 1; // Scan for command-line options for (i = 1; i < argc; i++) { // No leading '-', stop scanning for options if (*argv[i] != '-') break; // For each option in this argument for (int j = 1; (*argv[i] == '-') && argv[i][j]; j++) { switch (argv[i][j]) { case 'o': outfilename = argv[++i]; // Save & skip to next argument break; case 'T': O_dumpAST = 1; break; case 'c': O_assemble = 1; O_keepasm = 0; O_dolink = 0; break; case 'S': O_keepasm = 1; O_assemble = 0; O_dolink = 0; break; case 'v': O_verbose = 1; break; default: usage(argv[0]); } } } // Ensure we have at lease one input file argument if (i >= argc) usage(argv[0]); // Work on each input file in turn while (i < argc) { asmfile = do_compile(argv[i]); // Compile the source file if (O_dolink || O_assemble) { objfile = do_assemble(asmfile); // Assemble it to object forma if (objcnt == (MAXOBJ - 2)) { fprintf(stderr, "Too many object files for the compiler to handle\n"); exit(1); } objlist[objcnt++] = objfile; // Add the object file's name objlist[objcnt] = NULL; // to the list of object files } if (!O_keepasm) // Remove the assembly file if unlink(asmfile); // we don't need to keep it i++; } // Now link all the object files together if (O_dolink) { do_link(outfilename, objlist); // If we don't need to keep the object // files, then remove them if (!O_assemble) { for (i = 0; objlist[i] != NULL; i++) unlink(objlist[i]); } } return (0); } ================================================ FILE: 41_Local_Var_Init/misc.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" #include // Miscellaneous functions // Copyright (c) 2019 Warren Toomey, GPL3 // Ensure that the current token is t, // and fetch the next token. Otherwise // throw an error void match(int t, char *what) { if (Token.token == t) { scan(&Token); } else { fatals("Expected", what); } } // Match a semicolon and fetch the next token void semi(void) { match(T_SEMI, ";"); } // Match a left brace and fetch the next token void lbrace(void) { match(T_LBRACE, "{"); } // Match a right brace and fetch the next token void rbrace(void) { match(T_RBRACE, "}"); } // Match a left parenthesis and fetch the next token void lparen(void) { match(T_LPAREN, "("); } // Match a right parenthesis and fetch the next token void rparen(void) { match(T_RPAREN, ")"); } // Match an identifier and fetch the next token void ident(void) { match(T_IDENT, "identifier"); } // Match a comma and fetch the next token void comma(void) { match(T_COMMA, "comma"); } // Print out fatal messages void fatal(char *s) { fprintf(stderr, "%s on line %d of %s\n", s, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatals(char *s1, char *s2) { fprintf(stderr, "%s:%s on line %d of %s\n", s1, s2, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatald(char *s, int d) { fprintf(stderr, "%s:%d on line %d of %s\n", s, d, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatalc(char *s, int c) { fprintf(stderr, "%s:%c on line %d of %s\n", s, c, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } ================================================ FILE: 41_Local_Var_Init/scan.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { char *p; p = strchr(s, c); return (p ? p - s : -1); } // Get the next character from the input file. static int next(void) { int c, l; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return (c); } c = fgetc(Infile); // Read from input file while (c == '#') { // We've hit a pre-processor statement scan(&Token); // Get the line number into l if (Token.token != T_INTLIT) fatals("Expecting pre-processor line number, got:", Text); l = Token.intvalue; scan(&Token); // Get the filename in Text if (Token.token != T_STRLIT) fatals("Expecting pre-processor file name, got:", Text); if (Text[0] != '<') { // If this is a real filename if (strcmp(Text, Infilename)) // and not the one we have now Infilename = strdup(Text); // save it. Then update the line num Line = l; } while ((c = fgetc(Infile)) != '\n'); // Skip to the end of the line c = fgetc(Infile); // and get the next character } if ('\n' == c) Line++; // Increment line count return (c); } // Put back an unwanted character static void putback(int c) { Putback = c; } // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } // Return the next character from a character // or string literal static int scanch(void) { int c; // Get the next input character and interpret // metacharacters that start with a backslash c = next(); if (c == '\\') { switch (c = next()) { case 'a': return '\a'; case 'b': return '\b'; case 'f': return '\f'; case 'n': return '\n'; case 'r': return '\r'; case 't': return '\t'; case 'v': return '\v'; case '\\': return '\\'; case '"': return '"'; case '\'': return '\''; default: fatalc("unknown escape sequence", c); } } return (c); // Just an ordinary old character! } // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0; // Convert each character into an int value while ((k = chrpos("0123456789", c)) >= 0) { val = val * 10 + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return (val); } // Scan in a string literal from the input file, // and store it in buf[]. Return the length of // the string. static int scanstr(char *buf) { int i, c; // Loop while we have enough buffer space for (i = 0; i < TEXTLEN - 1; i++) { // Get the next char and append to buf // Return when we hit the ending double quote if ((c = scanch()) == '"') { buf[i] = 0; return (i); } buf[i] = c; } // Ran out of buf[] space fatal("String literal too long"); return (0); } // Scan an identifier from the input file and // store it in buf[]. Return the identifier's length static int scanident(int c, char *buf, int lim) { int i = 0; // Allow digits, alpha and underscores while (isalpha(c) || isdigit(c) || '_' == c) { // Error if we hit the identifier length limit, // else append to buf[] and get next character if (lim - 1 == i) { fatal("Identifier too long"); } else if (i < lim - 1) { buf[i++] = c; } c = next(); } // We hit a non-valid character, put it back. // NUL-terminate the buf[] and return the length putback(c); buf[i] = '\0'; return (i); } // Given a word from the input, return the matching // keyword token number or 0 if it's not a keyword. // Switch on the first letter so that we don't have // to waste time strcmp()ing against all the keywords. static int keyword(char *s) { switch (*s) { case 'b': if (!strcmp(s, "break")) return (T_BREAK); break; case 'c': if (!strcmp(s, "case")) return (T_CASE); if (!strcmp(s, "char")) return (T_CHAR); if (!strcmp(s, "continue")) return (T_CONTINUE); break; case 'd': if (!strcmp(s, "default")) return (T_DEFAULT); break; case 'e': if (!strcmp(s, "else")) return (T_ELSE); if (!strcmp(s, "enum")) return (T_ENUM); if (!strcmp(s, "extern")) return (T_EXTERN); break; case 'f': if (!strcmp(s, "for")) return (T_FOR); break; case 'i': if (!strcmp(s, "if")) return (T_IF); if (!strcmp(s, "int")) return (T_INT); break; case 'l': if (!strcmp(s, "long")) return (T_LONG); break; case 'r': if (!strcmp(s, "return")) return (T_RETURN); break; case 's': if (!strcmp(s, "struct")) return (T_STRUCT); if (!strcmp(s, "switch")) return (T_SWITCH); break; case 't': if (!strcmp(s, "typedef")) return (T_TYPEDEF); break; case 'u': if (!strcmp(s, "union")) return (T_UNION); break; case 'v': if (!strcmp(s, "void")) return (T_VOID); break; case 'w': if (!strcmp(s, "while")) return (T_WHILE); break; } return (0); } // A pointer to a rejected token static struct token *Rejtoken = NULL; // Reject the token that we just scanned void reject_token(struct token *t) { if (Rejtoken != NULL) fatal("Can't reject token twice"); Rejtoken = t; } // List of token strings, for debugging purposes char *Tstring[] = { "EOF", "=", "||", "&&", "|", "^", "&", "==", "!=", ",", ">", "<=", ">=", "<<", ">>", "+", "-", "*", "/", "++", "--", "~", "!", "void", "char", "int", "long", "if", "else", "while", "for", "return", "struct", "union", "enum", "typedef", "extern", "break", "continue", "switch", "case", "default", "intlit", "strlit", ";", "identifier", "{", "}", "(", ")", "[", "]", ",", ".", "->", ":" }; // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c, tokentype; // If we have any rejected token, return it if (Rejtoken != NULL) { t = Rejtoken; Rejtoken = NULL; return (1); } // Skip whitespace c = skip(); // Determine the token based on // the input character switch (c) { case EOF: t->token = T_EOF; return (0); case '+': if ((c = next()) == '+') { t->token = T_INC; } else { putback(c); t->token = T_PLUS; } break; case '-': if ((c = next()) == '-') { t->token = T_DEC; } else if (c == '>') { t->token = T_ARROW; } else { putback(c); t->token = T_MINUS; } break; case '*': t->token = T_STAR; break; case '/': t->token = T_SLASH; break; case ';': t->token = T_SEMI; break; case '{': t->token = T_LBRACE; break; case '}': t->token = T_RBRACE; break; case '(': t->token = T_LPAREN; break; case ')': t->token = T_RPAREN; break; case '[': t->token = T_LBRACKET; break; case ']': t->token = T_RBRACKET; break; case '~': t->token = T_INVERT; break; case '^': t->token = T_XOR; break; case ',': t->token = T_COMMA; break; case '.': t->token = T_DOT; break; case ':': t->token = T_COLON; break; case '=': if ((c = next()) == '=') { t->token = T_EQ; } else { putback(c); t->token = T_ASSIGN; } break; case '!': if ((c = next()) == '=') { t->token = T_NE; } else { putback(c); t->token = T_LOGNOT; } break; case '<': if ((c = next()) == '=') { t->token = T_LE; } else if (c == '<') { t->token = T_LSHIFT; } else { putback(c); t->token = T_LT; } break; case '>': if ((c = next()) == '=') { t->token = T_GE; } else if (c == '>') { t->token = T_RSHIFT; } else { putback(c); t->token = T_GT; } break; case '&': if ((c = next()) == '&') { t->token = T_LOGAND; } else { putback(c); t->token = T_AMPER; } break; case '|': if ((c = next()) == '|') { t->token = T_LOGOR; } else { putback(c); t->token = T_OR; } break; case '\'': // If it's a quote, scan in the // literal character value and // the trailing quote t->intvalue = scanch(); t->token = T_INTLIT; if (next() != '\'') fatal("Expected '\\'' at end of char literal"); break; case '"': // Scan in a literal string scanstr(Text); t->token = T_STRLIT; break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } else if (isalpha(c) || '_' == c) { // Read in a keyword or identifier scanident(c, Text, TEXTLEN); // If it's a recognised keyword, return that token if ((tokentype = keyword(Text)) != 0) { t->token = tokentype; break; } // Not a recognised keyword, so it must be an identifier t->token = T_IDENT; break; } // The character isn't part of any recognised token, error fatalc("Unrecognised character", c); } // We found a token t->tokstr = Tstring[t->token]; return (1); } ================================================ FILE: 41_Local_Var_Init/stmt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of statements // Copyright (c) 2019 Warren Toomey, GPL3 // Prototypes static struct ASTnode *single_statement(void); // compound_statement: // empty, i.e. no statement // | statement // | statement statements // ; // // statement: declaration // | expression_statement // | function_call // | if_statement // | while_statement // | for_statement // | return_statement // ; // if_statement: if_head // | if_head 'else' statement // ; // // if_head: 'if' '(' true_false_expression ')' statement ; // // Parse an IF statement including any // optional ELSE clause and return its AST static struct ASTnode *if_statement(void) { struct ASTnode *condAST, *trueAST, *falseAST = NULL; // Ensure we have 'if' '(' match(T_IF, "if"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); rparen(); // Get the AST for the statement trueAST = single_statement(); // If we have an 'else', skip it // and get the AST for the statement if (Token.token == T_ELSE) { scan(&Token); falseAST = single_statement(); } // Build and return the AST for this statement return (mkastnode(A_IF, P_NONE, condAST, trueAST, falseAST, NULL, 0)); } // while_statement: 'while' '(' true_false_expression ')' statement ; // // Parse a WHILE statement and return its AST static struct ASTnode *while_statement(void) { struct ASTnode *condAST, *bodyAST; // Ensure we have 'while' '(' match(T_WHILE, "while"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); rparen(); // Get the AST for the statement. // Update the loop depth in the process Looplevel++; bodyAST = single_statement(); Looplevel--; // Build and return the AST for this statement return (mkastnode(A_WHILE, P_NONE, condAST, NULL, bodyAST, NULL, 0)); } // for_statement: 'for' '(' expression_list ';' // true_false_expression ';' // expression_list ')' statement ; // // Parse a FOR statement and return its AST static struct ASTnode *for_statement(void) { struct ASTnode *condAST, *bodyAST; struct ASTnode *preopAST, *postopAST; struct ASTnode *tree; // Ensure we have 'for' '(' match(T_FOR, "for"); lparen(); // Get the pre_op expression and the ';' preopAST = expression_list(T_SEMI); semi(); // Get the condition and the ';'. // Force a non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); semi(); // Get the post_op expression and the ')' postopAST = expression_list(T_RPAREN); rparen(); // Get the statement which is the body // Update the loop depth in the process Looplevel++; bodyAST = single_statement(); Looplevel--; // Glue the statement and the postop tree tree = mkastnode(A_GLUE, P_NONE, bodyAST, NULL, postopAST, NULL, 0); // Make a WHILE loop with the condition and this new body tree = mkastnode(A_WHILE, P_NONE, condAST, NULL, tree, NULL, 0); // And glue the preop tree to the A_WHILE tree return (mkastnode(A_GLUE, P_NONE, preopAST, NULL, tree, NULL, 0)); } // return_statement: 'return' '(' expression ')' ; // // Parse a return statement and return its AST static struct ASTnode *return_statement(void) { struct ASTnode *tree; // Can't return a value if function returns P_VOID if (Functionid->type == P_VOID) fatal("Can't return from a void function"); // Ensure we have 'return' '(' match(T_RETURN, "return"); lparen(); // Parse the following expression tree = binexpr(0); // Ensure this is compatible with the function's type tree = modify_type(tree, Functionid->type, 0); if (tree == NULL) fatal("Incompatible type to return"); // Add on the A_RETURN node tree = mkastunary(A_RETURN, P_NONE, tree, NULL, 0); // Get the ')' and ';' rparen(); semi(); return (tree); } // break_statement: 'break' ; // // Parse a break statement and return its AST static struct ASTnode *break_statement(void) { if (Looplevel == 0 && Switchlevel == 0) fatal("no loop or switch to break out from"); scan(&Token); semi(); return (mkastleaf(A_BREAK, 0, NULL, 0)); } // continue_statement: 'continue' ; // // Parse a continue statement and return its AST static struct ASTnode *continue_statement(void) { if (Looplevel == 0) fatal("no loop to continue to"); scan(&Token); semi(); return (mkastleaf(A_CONTINUE, 0, NULL, 0)); } // Parse a switch statement and return its AST static struct ASTnode *switch_statement(void) { struct ASTnode *left, *n, *c, *casetree= NULL, *casetail; int inloop=1, casecount=0; int seendefault=0; int ASTop, casevalue; // Skip the 'switch' and '(' scan(&Token); lparen(); // Get the switch expression, the ')' and the '{' left= binexpr(0); rparen(); lbrace(); // Ensure that this is of int type if (!inttype(left->type)) fatal("Switch expression is not of integer type"); // Build an A_SWITCH subtree with the expression as // the child n= mkastunary(A_SWITCH, 0, left, NULL, 0); // Now parse the cases Switchlevel++; while (inloop) { switch(Token.token) { // Leave the loop when we hit a '}' case T_RBRACE: if (casecount==0) fatal("No cases in switch"); inloop=0; break; case T_CASE: case T_DEFAULT: // Ensure this isn't after a previous 'default' if (seendefault) fatal("case or default after existing default"); // Set the AST operation. Scan the case value if required if (Token.token==T_DEFAULT) { ASTop= A_DEFAULT; seendefault= 1; scan(&Token); } else { ASTop= A_CASE; scan(&Token); left= binexpr(0); // Ensure the case value is an integer literal if (left->op != A_INTLIT) fatal("Expecting integer literal for case value"); casevalue= left->intvalue; // Walk the list of existing case values to ensure // that there isn't a duplicate case value for (c= casetree; c != NULL; c= c -> right) if (casevalue == c->intvalue) fatal("Duplicate case value"); } // Scan the ':' and get the compound expression match(T_COLON, ":"); left= compound_statement(1); casecount++; // Build a sub-tree with the compound statement as the left child // and link it in to the growing A_CASE tree if (casetree==NULL) { casetree= casetail= mkastunary(ASTop, 0, left, NULL, casevalue); } else { casetail->right= mkastunary(ASTop, 0, left, NULL, casevalue); casetail= casetail->right; } break; default: fatals("Unexpected token in switch", Token.tokstr); } } Switchlevel--; // We have a sub-tree with the cases and any default. Put the // case count into the A_SWITCH node and attach the case tree. n->intvalue= casecount; n->right= casetree; rbrace(); return(n); } // Parse a single statement and return its AST. static struct ASTnode *single_statement(void) { struct ASTnode *stmt; struct symtable *ctype; switch (Token.token) { case T_LBRACE: // We have a '{', so this is a compound statement lbrace(); stmt = compound_statement(0); rbrace(); return(stmt); case T_IDENT: // We have to see if the identifier matches a typedef. // If not, treat it as an expression. // Otherwise, fall down to the parse_type() call. if (findtypedef(Text) == NULL) { stmt= binexpr(0); semi(); return(stmt); } case T_CHAR: case T_INT: case T_LONG: case T_STRUCT: case T_UNION: case T_ENUM: case T_TYPEDEF: // The beginning of a variable declaration list. declaration_list(&ctype, C_LOCAL, T_SEMI, T_EOF, &stmt); semi(); return (stmt); // Any assignments from the declarations case T_IF: return (if_statement()); case T_WHILE: return (while_statement()); case T_FOR: return (for_statement()); case T_RETURN: return (return_statement()); case T_BREAK: return (break_statement()); case T_CONTINUE: return (continue_statement()); case T_SWITCH: return (switch_statement()); default: // For now, see if this is an expression. // This catches assignment statements. stmt= binexpr(0); semi(); return(stmt); } return (NULL); // Keep -Wall happy } // Parse a compound statement // and return its AST. If inswitch is true, // we look for a '}', 'case' or 'default' token // to end the parsing. Otherwise, look for // just a '}' to end the parsing. struct ASTnode *compound_statement(int inswitch) { struct ASTnode *left = NULL; struct ASTnode *tree; while (1) { // Parse a single statement tree = single_statement(); // For each new tree, either save it in left // if left is empty, or glue the left and the // new tree together if (tree != NULL) { if (left == NULL) left = tree; else left = mkastnode(A_GLUE, P_NONE, left, NULL, tree, NULL, 0); } // Leave if we've hit the end token if (Token.token == T_RBRACE) return(left); if (inswitch && (Token.token == T_CASE || Token.token == T_DEFAULT)) return(left); } } ================================================ FILE: 41_Local_Var_Init/sym.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Symbol table functions // Copyright (c) 2019 Warren Toomey, GPL3 // Append a node to the singly-linked list pointed to by head or tail void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node) { // Check for valid pointers if (head == NULL || tail == NULL || node == NULL) fatal("Either head, tail or node is NULL in appendsym"); // Append to the list if (*tail) { (*tail)->next = node; *tail = node; } else *head = *tail = node; node->next = NULL; } // Create a symbol node to be added to a symbol table list. // Set up the node's: // + type: char, int etc. // + ctype: composite type pointer for struct/union // + structural type: var, function, array etc. // + size: number of elements, or endlabel: end label for a function // + posn: Position information for local symbols // Return a pointer to the new node struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn) { // Get a new node struct symtable *node = (struct symtable *) malloc(sizeof(struct symtable)); if (node == NULL) fatal("Unable to malloc a symbol table node in newsym"); // Fill in the values node->name = strdup(name); node->type = type; node->ctype = ctype; node->stype = stype; node->class = class; node->nelems = nelems; // For pointers and integer types, set the size // of the symbol. structs and union declarations // manually set this up themselves. if (ptrtype(type) || inttype(type)) node->size = nelems * typesize(type, ctype); node->posn = posn; node->next = NULL; node->member = NULL; node->initlist = NULL; return (node); } // Add a symbol to the global symbol list struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn) { struct symtable *sym = newsym(name, type, ctype, stype, class, nelems, posn); // For structs and unions, copy the size from the type node if (type== P_STRUCT || type== P_UNION) sym->size = ctype->size; appendsym(&Globhead, &Globtail, sym); return (sym); } // Add a symbol to the local symbol list struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int nelems) { struct symtable *sym = newsym(name, type, ctype, stype, C_LOCAL, nelems, 0); // For structs and unions, copy the size from the type node if (type== P_STRUCT || type== P_UNION) sym->size = ctype->size; appendsym(&Loclhead, &Locltail, sym); return (sym); } // Add a symbol to the parameter list struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype) { struct symtable *sym = newsym(name, type, ctype, stype, C_PARAM, 1, 0); appendsym(&Parmhead, &Parmtail, sym); return (sym); } // Add a symbol to the temporary member list struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int nelems) { struct symtable *sym = newsym(name, type, ctype, stype, C_MEMBER, nelems, 0); // For structs and unions, copy the size from the type node if (type== P_STRUCT || type== P_UNION) sym->size = ctype->size; appendsym(&Membhead, &Membtail, sym); return (sym); } // Add a struct to the struct list struct symtable *addstruct(char *name) { struct symtable *sym = newsym(name, P_STRUCT, NULL, 0, C_STRUCT, 0, 0); appendsym(&Structhead, &Structtail, sym); return (sym); } // Add a struct to the union list struct symtable *addunion(char *name) { struct symtable *sym = newsym(name, P_UNION, NULL, 0, C_UNION, 0, 0); appendsym(&Unionhead, &Uniontail, sym); return (sym); } // Add an enum type or value to the enum list. // Class is C_ENUMTYPE or C_ENUMVAL. // Use posn to store the int value. struct symtable *addenum(char *name, int class, int value) { struct symtable *sym = newsym(name, P_INT, NULL, 0, class, 0, value); appendsym(&Enumhead, &Enumtail, sym); return (sym); } // Add a typedef to the typedef list struct symtable *addtypedef(char *name, int type, struct symtable *ctype) { struct symtable *sym = newsym(name, type, ctype, 0, C_TYPEDEF, 0, 0); appendsym(&Typehead, &Typetail, sym); return (sym); } // Search for a symbol in a specific list. // Return a pointer to the found node or NULL if not found. // If class is not zero, also match on the given class static struct symtable *findsyminlist(char *s, struct symtable *list, int class) { for (; list != NULL; list = list->next) if ((list->name != NULL) && !strcmp(s, list->name)) if (class==0 || class== list->class) return (list); return (NULL); } // Determine if the symbol s is in the global symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findglob(char *s) { return (findsyminlist(s, Globhead, 0)); } // Determine if the symbol s is in the local symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findlocl(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member, 0); if (node) return (node); } return (findsyminlist(s, Loclhead, 0)); } // Determine if the symbol s is in the symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findsymbol(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member, 0); if (node) return (node); } // Otherwise, try the local and global symbol lists node = findsyminlist(s, Loclhead, 0); if (node) return (node); return (findsyminlist(s, Globhead, 0)); } // Find a member in the member list // Return a pointer to the found node or NULL if not found. struct symtable *findmember(char *s) { return (findsyminlist(s, Membhead, 0)); } // Find a struct in the struct list // Return a pointer to the found node or NULL if not found. struct symtable *findstruct(char *s) { return (findsyminlist(s, Structhead, 0)); } // Find a struct in the union list // Return a pointer to the found node or NULL if not found. struct symtable *findunion(char *s) { return (findsyminlist(s, Unionhead, 0)); } // Find an enum type in the enum list // Return a pointer to the found node or NULL if not found. struct symtable *findenumtype(char *s) { return (findsyminlist(s, Enumhead, C_ENUMTYPE)); } // Find an enum value in the enum list // Return a pointer to the found node or NULL if not found. struct symtable *findenumval(char *s) { return (findsyminlist(s, Enumhead, C_ENUMVAL)); } // Find a type in the tyedef list // Return a pointer to the found node or NULL if not found. struct symtable *findtypedef(char *s) { return (findsyminlist(s, Typehead, 0)); } // Reset the contents of the symbol table void clear_symtable(void) { Globhead = Globtail = NULL; Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Membhead = Membtail = NULL; Structhead = Structtail = NULL; Unionhead = Uniontail = NULL; Enumhead = Enumtail = NULL; Typehead = Typetail = NULL; } // Clear all the entries in the local symbol table void freeloclsyms(void) { Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Functionid = NULL; } ================================================ FILE: 41_Local_Var_Init/tests/err.input031.c ================================================ Expecting a primary expression, got token:+ on line 5 of input031.c ================================================ FILE: 41_Local_Var_Init/tests/err.input032.c ================================================ Unknown variable:cow on line 4 of input032.c ================================================ FILE: 41_Local_Var_Init/tests/err.input033.c ================================================ Incompatible type to return on line 4 of input033.c ================================================ FILE: 41_Local_Var_Init/tests/err.input034.c ================================================ For now, declaration of non-global arrays is not implemented on line 4 of input034.c ================================================ FILE: 41_Local_Var_Init/tests/err.input035.c ================================================ Duplicate local variable declaration:a on line 4 of input035.c ================================================ FILE: 41_Local_Var_Init/tests/err.input036.c ================================================ Type doesn't match prototype for parameter:2 on line 4 of input036.c ================================================ FILE: 41_Local_Var_Init/tests/err.input037.c ================================================ Expected:comma on line 3 of input037.c ================================================ FILE: 41_Local_Var_Init/tests/err.input038.c ================================================ Type doesn't match prototype for parameter:2 on line 4 of input038.c ================================================ FILE: 41_Local_Var_Init/tests/err.input039.c ================================================ No statements in function with non-void type on line 4 of input039.c ================================================ FILE: 41_Local_Var_Init/tests/err.input040.c ================================================ No return for function with non-void type on line 4 of input040.c ================================================ FILE: 41_Local_Var_Init/tests/err.input041.c ================================================ Can't return from a void function on line 3 of input041.c ================================================ FILE: 41_Local_Var_Init/tests/err.input042.c ================================================ Undeclared function:fred on line 3 of input042.c ================================================ FILE: 41_Local_Var_Init/tests/err.input043.c ================================================ Undeclared array:b on line 3 of input043.c ================================================ FILE: 41_Local_Var_Init/tests/err.input044.c ================================================ Unknown variable:z on line 3 of input044.c ================================================ FILE: 41_Local_Var_Init/tests/err.input045.c ================================================ & operator must be followed by an identifier on line 3 of input045.c ================================================ FILE: 41_Local_Var_Init/tests/err.input046.c ================================================ * operator must be followed by an identifier or * on line 3 of input046.c ================================================ FILE: 41_Local_Var_Init/tests/err.input047.c ================================================ ++ operator must be followed by an identifier on line 3 of input047.c ================================================ FILE: 41_Local_Var_Init/tests/err.input048.c ================================================ -- operator must be followed by an identifier on line 3 of input048.c ================================================ FILE: 41_Local_Var_Init/tests/err.input049.c ================================================ Incompatible expression in assignment on line 6 of input049.c ================================================ FILE: 41_Local_Var_Init/tests/err.input050.c ================================================ Incompatible types in binary expression on line 6 of input050.c ================================================ FILE: 41_Local_Var_Init/tests/err.input051.c ================================================ Expected '\'' at end of char literal on line 4 of input051.c ================================================ FILE: 41_Local_Var_Init/tests/err.input052.c ================================================ Unrecognised character:$ on line 5 of input052.c ================================================ FILE: 41_Local_Var_Init/tests/err.input056.c ================================================ unknown struct/union type:var1 on line 2 of input056.c ================================================ FILE: 41_Local_Var_Init/tests/err.input057.c ================================================ previously defined struct/union:fred on line 2 of input057.c ================================================ FILE: 41_Local_Var_Init/tests/err.input059.c ================================================ Undeclared variable:y on line 3 of input059.c ================================================ FILE: 41_Local_Var_Init/tests/err.input060.c ================================================ Undeclared variable:x on line 3 of input060.c ================================================ FILE: 41_Local_Var_Init/tests/err.input061.c ================================================ Undeclared variable:x on line 3 of input061.c ================================================ FILE: 41_Local_Var_Init/tests/err.input064.c ================================================ undeclared enum type::fred on line 1 of input064.c ================================================ FILE: 41_Local_Var_Init/tests/err.input065.c ================================================ enum type redeclared::fred on line 2 of input065.c ================================================ FILE: 41_Local_Var_Init/tests/err.input066.c ================================================ enum value redeclared::z on line 2 of input066.c ================================================ FILE: 41_Local_Var_Init/tests/err.input068.c ================================================ redefinition of typedef:FOO on line 2 of input068.c ================================================ FILE: 41_Local_Var_Init/tests/err.input069.c ================================================ unknown type:FLOO on line 2 of input069.c ================================================ FILE: 41_Local_Var_Init/tests/err.input072.c ================================================ no loop or switch to break out from on line 1 of input072.c ================================================ FILE: 41_Local_Var_Init/tests/err.input073.c ================================================ no loop to continue to on line 1 of input073.c ================================================ FILE: 41_Local_Var_Init/tests/err.input075.c ================================================ Unexpected token in switch:if on line 4 of input075.c ================================================ FILE: 41_Local_Var_Init/tests/err.input076.c ================================================ No cases in switch on line 3 of input076.c ================================================ FILE: 41_Local_Var_Init/tests/err.input077.c ================================================ case or default after existing default on line 6 of input077.c ================================================ FILE: 41_Local_Var_Init/tests/err.input078.c ================================================ case or default after existing default on line 6 of input078.c ================================================ FILE: 41_Local_Var_Init/tests/err.input079.c ================================================ Duplicate case value on line 6 of input079.c ================================================ FILE: 41_Local_Var_Init/tests/err.input085.c ================================================ Bad type in parameter list on line 1 of input085.c ================================================ FILE: 41_Local_Var_Init/tests/err.input086.c ================================================ Function definition not at global level on line 3 of input086.c ================================================ FILE: 41_Local_Var_Init/tests/err.input087.c ================================================ Bad type in member list on line 4 of input087.c ================================================ FILE: 41_Local_Var_Init/tests/err.input092.c ================================================ Integer literal value too big for char type on line 1 of input092.c ================================================ FILE: 41_Local_Var_Init/tests/err.input093.c ================================================ Expecting an integer literal value on line 1 of input093.c ================================================ FILE: 41_Local_Var_Init/tests/err.input094.c ================================================ Type mismatch: integer literal vs. variable on line 1 of input094.c ================================================ FILE: 41_Local_Var_Init/tests/err.input095.c ================================================ Variable can not be initialised:x on line 1 of input095.c ================================================ FILE: 41_Local_Var_Init/tests/err.input096.c ================================================ Array size is illegal:0 on line 1 of input096.c ================================================ FILE: 41_Local_Var_Init/tests/err.input097.c ================================================ For now, declaration of non-global arrays is not implemented on line 2 of input097.c ================================================ FILE: 41_Local_Var_Init/tests/err.input098.c ================================================ Too many values in initialisation list on line 1 of input098.c ================================================ FILE: 41_Local_Var_Init/tests/input001.c ================================================ int printf(char *fmt); void main() { printf("%d\n", 12 * 3); printf("%d\n", 18 - 2 * 4); printf("%d\n", 1 + 2 + 9 - 5/2 + 3*5); } ================================================ FILE: 41_Local_Var_Init/tests/input002.c ================================================ int printf(char *fmt); void main() { int fred; int jim; fred= 5; jim= 12; printf("%d\n", fred + jim); } ================================================ FILE: 41_Local_Var_Init/tests/input003.c ================================================ int printf(char *fmt); void main() { int x; x= 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); } ================================================ FILE: 41_Local_Var_Init/tests/input004.c ================================================ int printf(char *fmt); void main() { int x; x= 7 < 9; printf("%d\n", x); x= 7 <= 9; printf("%d\n", x); x= 7 != 9; printf("%d\n", x); x= 7 == 7; printf("%d\n", x); x= 7 >= 7; printf("%d\n", x); x= 7 <= 7; printf("%d\n", x); x= 9 > 7; printf("%d\n", x); x= 9 >= 7; printf("%d\n", x); x= 9 != 7; printf("%d\n", x); } ================================================ FILE: 41_Local_Var_Init/tests/input005.c ================================================ int printf(char *fmt); void main() { int i; int j; i=6; j=12; if (i < j) { printf("%d\n", i); } else { printf("%d\n", j); } } ================================================ FILE: 41_Local_Var_Init/tests/input006.c ================================================ int printf(char *fmt); void main() { int i; i=1; while (i <= 10) { printf("%d\n", i); i= i + 1; } } ================================================ FILE: 41_Local_Var_Init/tests/input007.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 41_Local_Var_Init/tests/input008.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 41_Local_Var_Init/tests/input009.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } void fred() { int a; int b; a= 12; b= 3 * a; if (a >= b) { printf("%d\n", 2 * b - a); } } ================================================ FILE: 41_Local_Var_Init/tests/input010.c ================================================ int printf(char *fmt); void main() { int i; char j; j= 20; printf("%d\n", j); i= 10; printf("%d\n", i); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 2; j= j + 1) { printf("%d\n", j); } } ================================================ FILE: 41_Local_Var_Init/tests/input011.c ================================================ int printf(char *fmt); int main() { int i; char j; long k; i= 10; printf("%d\n", i); j= 20; printf("%d\n", j); k= 30; printf("%d\n", k); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 4; j= j + 1) { printf("%d\n", j); } for (k= 1; k <= 5; k= k + 1) { printf("%d\n", k); } return(i); printf("%d\n", 12345); return(3); } ================================================ FILE: 41_Local_Var_Init/tests/input012.c ================================================ int printf(char *fmt); int fred() { return(5); } void main() { int x; x= fred(2); printf("%d\n", x); } ================================================ FILE: 41_Local_Var_Init/tests/input013.c ================================================ int printf(char *fmt); int fred() { return(56); } void main() { int dummy; int result; dummy= printf("%d\n", 23); result= fred(10); dummy= printf("%d\n", result); } ================================================ FILE: 41_Local_Var_Init/tests/input014.c ================================================ int printf(char *fmt); int fred() { return(20); } int main() { int result; printf("%d\n", 10); result= fred(15); printf("%d\n", result); printf("%d\n", fred(15)+10); return(0); } ================================================ FILE: 41_Local_Var_Init/tests/input015.c ================================================ int printf(char *fmt); int main() { char a; char *b; char c; int d; int *e; int f; a= 18; printf("%d\n", a); b= &a; c= *b; printf("%d\n", c); d= 12; printf("%d\n", d); e= &d; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 41_Local_Var_Init/tests/input016.c ================================================ int printf(char *fmt); int c; int d; int *e; int f; int main() { c= 12; d=18; printf("%d\n", c); e= &c + 1; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 41_Local_Var_Init/tests/input017.c ================================================ int printf(char *fmt); int main() { char a; char *b; int d; int *e; b= &a; *b= 19; printf("%d\n", a); e= &d; *e= 12; printf("%d\n", d); return(0); } ================================================ FILE: 41_Local_Var_Init/tests/input018.c ================================================ int printf(char *fmt); int main() { int a; int b; a= b= 34; printf("%d\n", a); printf("%d\n", b); return(0); } ================================================ FILE: 41_Local_Var_Init/tests/input018a.c ================================================ int printf(char *fmt); int a; int *b; char c; char *d; int main() { b= &a; *b= 15; printf("%d\n", a); d= &c; *d= 16; printf("%d\n", c); return(0); } ================================================ FILE: 41_Local_Var_Init/tests/input019.c ================================================ int printf(char *fmt); int a; int b; int c; int d; int e; int main() { a= 2; b= 4; c= 3; d= 2; e= (a+b) * (c+d); printf("%d\n", e); return(0); } ================================================ FILE: 41_Local_Var_Init/tests/input020.c ================================================ int printf(char *fmt); int a; int b[25]; int main() { b[3]= 12; a= b[3]; printf("%d\n", a); return(0); } ================================================ FILE: 41_Local_Var_Init/tests/input021.c ================================================ int printf(char *fmt); char c; char *str; int main() { c= '\n'; printf("%d\n", c); for (str= "Hello world\n"; *str != 0; str= str + 1) { printf("%c", *str); } return(0); } ================================================ FILE: 41_Local_Var_Init/tests/input022.c ================================================ int printf(char *fmt); char a; char b; char c; int d; int e; int f; long g; long h; long i; int main() { b= 5; c= 7; a= b + c++; printf("%d\n", a); e= 5; f= 7; d= e + f++; printf("%d\n", d); h= 5; i= 7; g= h + i++; printf("%d\n", g); a= b-- + c; printf("%d\n", a); d= e-- + f; printf("%d\n", d); g= h-- + i; printf("%d\n", g); a= ++b + c; printf("%d\n", a); d= ++e + f; printf("%d\n", d); g= ++h + i; printf("%d\n", g); a= b * --c; printf("%d\n", a); d= e * --f; printf("%d\n", d); g= h * --i; printf("%d\n", g); return(0); } ================================================ FILE: 41_Local_Var_Init/tests/input023.c ================================================ int printf(char *fmt); char *str; int x; int main() { x= -23; printf("%d\n", x); printf("%d\n", -10 * -10); x= 1; x= ~x; printf("%d\n", x); x= 2 > 5; printf("%d\n", x); x= !x; printf("%d\n", x); x= !x; printf("%d\n", x); x= 13; if (x) { printf("%d\n", 13); } x= 0; if (!x) { printf("%d\n", 14); } for (str= "Hello world\n"; *str; str++) { printf("%c", *str); } return(0); } ================================================ FILE: 41_Local_Var_Init/tests/input024.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { a= 42; b= 19; printf("%d\n", a & b); printf("%d\n", a | b); printf("%d\n", a ^ b); printf("%d\n", 1 << 3); printf("%d\n", 63 >> 3); return(0); } ================================================ FILE: 41_Local_Var_Init/tests/input025.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { char z; int y; int x; x= 10; y= 20; z= 30; printf("%d\n", x); printf("%d\n", y); printf("%d\n", z); a= 5; b= 15; c= 25; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); return(0); } ================================================ FILE: 41_Local_Var_Init/tests/input026.c ================================================ int printf(char *fmt); int main(int a, char b, long c, int d, int e, int f, int g, int h) { int i; int j; int k; a= 13; printf("%d\n", a); b= 23; printf("%d\n", b); c= 34; printf("%d\n", c); d= 44; printf("%d\n", d); e= 54; printf("%d\n", e); f= 64; printf("%d\n", f); g= 74; printf("%d\n", g); h= 84; printf("%d\n", h); i= 94; printf("%d\n", i); j= 95; printf("%d\n", j); k= 96; printf("%d\n", k); return(0); } ================================================ FILE: 41_Local_Var_Init/tests/input027.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int param5(int a, int b, int c, int d, int e) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param2(int a, int b) { int c; int d; int e; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param0() { int a; int b; int c; int d; int e; a= 1; b= 2; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int main() { param8(1,2,3,4,5,6,7,8); param5(1,2,3,4,5); param2(1,2); param0(); return(0); } ================================================ FILE: 41_Local_Var_Init/tests/input028.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 41_Local_Var_Init/tests/input029.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h); int fred(int a, int b, int c); int main(); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 41_Local_Var_Init/tests/input030.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input030.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 41_Local_Var_Init/tests/input031.c ================================================ int printf(char *fmt); int main() { int x; x= 2 + + 3 - * / ; } ================================================ FILE: 41_Local_Var_Init/tests/input032.c ================================================ int printf(char *fmt); int main() { pizza cow llama sausage; } ================================================ FILE: 41_Local_Var_Init/tests/input033.c ================================================ int printf(char *fmt); int main() { char *z; return(z); } ================================================ FILE: 41_Local_Var_Init/tests/input034.c ================================================ int printf(char *fmt); int main() { int a[12]; return(0); } ================================================ FILE: 41_Local_Var_Init/tests/input035.c ================================================ int printf(char *fmt); int fred(int a, int b) { int a; return(a); } ================================================ FILE: 41_Local_Var_Init/tests/input036.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c); ================================================ FILE: 41_Local_Var_Init/tests/input037.c ================================================ int printf(char *fmt); int fred(int a, char b +, int z); ================================================ FILE: 41_Local_Var_Init/tests/input038.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c, int g); ================================================ FILE: 41_Local_Var_Init/tests/input039.c ================================================ int printf(char *fmt); int main() { int a; } ================================================ FILE: 41_Local_Var_Init/tests/input040.c ================================================ int printf(char *fmt); int main() { int a; a= 5; } ================================================ FILE: 41_Local_Var_Init/tests/input041.c ================================================ int printf(char *fmt); void fred() { return(5); } ================================================ FILE: 41_Local_Var_Init/tests/input042.c ================================================ int printf(char *fmt); int main() { fred(5); } ================================================ FILE: 41_Local_Var_Init/tests/input043.c ================================================ int printf(char *fmt); int main() { int a; a= b[4]; } ================================================ FILE: 41_Local_Var_Init/tests/input044.c ================================================ int printf(char *fmt); int main() { int a; a= z; } ================================================ FILE: 41_Local_Var_Init/tests/input045.c ================================================ int printf(char *fmt); int main() { int a; a= &5; } ================================================ FILE: 41_Local_Var_Init/tests/input046.c ================================================ int printf(char *fmt); int main() { int a; a= *5; } ================================================ FILE: 41_Local_Var_Init/tests/input047.c ================================================ int printf(char *fmt); int main() { int a; a= ++5; } ================================================ FILE: 41_Local_Var_Init/tests/input048.c ================================================ int printf(char *fmt); int main() { int a; a= --5; } ================================================ FILE: 41_Local_Var_Init/tests/input049.c ================================================ int printf(char *fmt); int main() { int x; char y; y= x; } ================================================ FILE: 41_Local_Var_Init/tests/input050.c ================================================ int printf(char *fmt); int main() { char *a; char *b; a= a + b; } ================================================ FILE: 41_Local_Var_Init/tests/input051.c ================================================ int printf(char *fmt); int main() { char a; a= 'fred'; } ================================================ FILE: 41_Local_Var_Init/tests/input052.c ================================================ int printf(char *fmt); int main() { int a; a= $5.00; } ================================================ FILE: 41_Local_Var_Init/tests/input053.c ================================================ int printf(char *fmt); int main() { printf("Hello world, %d\n", 23); return(0); } ================================================ FILE: 41_Local_Var_Init/tests/input054.c ================================================ int printf(char *fmt); int main() { int i; for (i=0; i < 20; i++) { printf("Hello world, %d\n", i); } return(0); } ================================================ FILE: 41_Local_Var_Init/tests/input055.c ================================================ int printf(char *fmt); int main(int argc, char **argv) { int i; char *argument; printf("Hello world\n"); for (i=0; i < argc; i++) { argument= *argv; argv= argv + 1; printf("Argument %d is %s\n", i, argument); } return(0); } ================================================ FILE: 41_Local_Var_Init/tests/input056.c ================================================ struct foo { int x; }; struct mary var1; ================================================ FILE: 41_Local_Var_Init/tests/input057.c ================================================ struct fred { int x; } ; struct fred { char y; } ; ================================================ FILE: 41_Local_Var_Init/tests/input058.c ================================================ int printf(char *fmt); struct fred { int x; char y; long z; }; struct fred var2; struct fred *varptr; int main() { long result; var2.x= 12; printf("%d\n", var2.x); var2.y= 'c'; printf("%d\n", var2.y); var2.z= 4005; printf("%d\n", var2.z); result= var2.x + var2.y + var2.z; printf("%d\n", result); varptr= &var2; result= varptr->x + varptr->y + varptr->z; printf("%d\n", result); return(0); } ================================================ FILE: 41_Local_Var_Init/tests/input059.c ================================================ int main() { int x; x= y.foo; } ================================================ FILE: 41_Local_Var_Init/tests/input060.c ================================================ int main() { int x; x= x.foo; } ================================================ FILE: 41_Local_Var_Init/tests/input061.c ================================================ int main() { int x; x= x->foo; } ================================================ FILE: 41_Local_Var_Init/tests/input062.c ================================================ int printf(char *fmt); union fred { char w; int x; int y; long z; }; union fred var1; union fred *varptr; int main() { var1.x= 65; printf("%d\n", var1.x); var1.x= 66; printf("%d\n", var1.x); printf("%d\n", var1.y); printf("The next two depend on the endian of the platform\n"); printf("%d\n", var1.w); printf("%d\n", var1.z); varptr= &var1; varptr->x= 67; printf("%d\n", varptr->x); printf("%d\n", varptr->y); return(0); } ================================================ FILE: 41_Local_Var_Init/tests/input063.c ================================================ int printf(char *fmt); enum fred { apple=1, banana, carrot, pear=10, peach, mango, papaya }; enum jane { aple=1, bnana, crrot, par=10, pech, mago, paaya }; enum fred var1; enum jane var2; enum fred var3; int main() { var1= carrot + pear + mango; printf("%d\n", var1); return(0); } ================================================ FILE: 41_Local_Var_Init/tests/input064.c ================================================ enum fred var3; ================================================ FILE: 41_Local_Var_Init/tests/input065.c ================================================ enum fred { x, y, z }; enum fred { a, b }; ================================================ FILE: 41_Local_Var_Init/tests/input066.c ================================================ enum fred { x, y, z }; enum mary { a, b, z }; ================================================ FILE: 41_Local_Var_Init/tests/input067.c ================================================ int printf(char *fmt); typedef int FOO; FOO var1; struct bar { int x; int y} ; typedef struct bar BAR; BAR var2; int main() { var1= 5; printf("%d\n", var1); var2.x= 7; var2.y= 10; printf("%d\n", var2.x + var2.y); return(0); } ================================================ FILE: 41_Local_Var_Init/tests/input068.c ================================================ typedef int FOO; typedef char FOO; ================================================ FILE: 41_Local_Var_Init/tests/input069.c ================================================ typedef int FOO; FLOO y; ================================================ FILE: 41_Local_Var_Init/tests/input070.c ================================================ #include typedef int FOO; int main() { FOO x; x= 56; printf("%d\n", x); return(0); } ================================================ FILE: 41_Local_Var_Init/tests/input071.c ================================================ #include int main() { int x; x = 0; while (x < 100) { if (x == 5) { x = x + 2; continue; } printf("%d\n", x); if (x == 14) { break; } x = x + 1; } printf("Done\n"); return (0); } ================================================ FILE: 41_Local_Var_Init/tests/input072.c ================================================ int main() { break; } ================================================ FILE: 41_Local_Var_Init/tests/input073.c ================================================ int main() { continue; } ================================================ FILE: 41_Local_Var_Init/tests/input074.c ================================================ #include int main() { int x; int y; y= 0; for (x=0; x < 5; x++) { switch(x) { case 1: { y= 5; break; } case 2: { y= 7; break; } case 3: { y= 9; } default: { y= 100; } } printf("%d\n", y); } return(0); } ================================================ FILE: 41_Local_Var_Init/tests/input075.c ================================================ int main() { int x; switch(x) { if (x<5); } } ================================================ FILE: 41_Local_Var_Init/tests/input076.c ================================================ int main() { int x; switch(x) { } } ================================================ FILE: 41_Local_Var_Init/tests/input077.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } default: { x= 3; } case 4: { x= 2; } } } ================================================ FILE: 41_Local_Var_Init/tests/input078.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } default: { x= 3; } default: { x= 2; } } } ================================================ FILE: 41_Local_Var_Init/tests/input079.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } case 2: { x= 2; } case 1: { x= 2; } default: { x= 2; } } } ================================================ FILE: 41_Local_Var_Init/tests/input080.c ================================================ #include int x; int y; int main() { for (x=0, y=1; x < 6; x++, y=y+2) { printf("%d %d\n", x, y); } return(0); } ================================================ FILE: 41_Local_Var_Init/tests/input081.c ================================================ #include int x; int y; int main() { x= 0; y=1; for (;x<5;) { printf("%d %d\n", x, y); x=x+1; y=y+2; } return(0); } ================================================ FILE: 41_Local_Var_Init/tests/input082.c ================================================ #include int main() { int x; x= 10; // Dangling else test if (x > 5) if (x > 15) printf("x > 15\n"); else printf("15 >= x > 5\n"); else printf("5 >= x\n"); return(0); } ================================================ FILE: 41_Local_Var_Init/tests/input083.c ================================================ #include int main() { int x; // Dangling else test. // We should not print anything for x<= 5 for (x=0; x < 12; x++) if (x > 5) if (x > 10) printf("10 < %2d\n", x); else printf(" 5 < %2d <= 10\n", x); return(0); } ================================================ FILE: 41_Local_Var_Init/tests/input084.c ================================================ #include int main() { int x, y; x=2; y=3; printf("%d %d\n", x, y); char a, *b; a= 'f'; b= &a; printf("%c %c\n", a, *b); return(0); } ================================================ FILE: 41_Local_Var_Init/tests/input085.c ================================================ int main(struct foo { int x; }; , int a) { return(0); } ================================================ FILE: 41_Local_Var_Init/tests/input086.c ================================================ int main() { int fred() { return(5); } int x; x=2; return(x); } ================================================ FILE: 41_Local_Var_Init/tests/input087.c ================================================ struct foo { int x; int y; struct blah { int g; }; }; ================================================ FILE: 41_Local_Var_Init/tests/input088.c ================================================ #include struct foo { int x; int y; } fred, mary; struct foo james; int main() { fred.x= 5; mary.y= 6; printf("%d %d\n", fred.x, mary.y); return(0); } ================================================ FILE: 41_Local_Var_Init/tests/input089.c ================================================ #include int x= 23; char y= 'H'; char *z= "Hello world"; int main() { printf("%d %c %s\n", x, y, z); return(0); } ================================================ FILE: 41_Local_Var_Init/tests/input090.c ================================================ #include int a = 23, b = 100; char y = 'H', *z = "Hello world"; int main() { printf("%d %d %c %s\n", a, b, y, z); return (0); } ================================================ FILE: 41_Local_Var_Init/tests/input091.c ================================================ #include int fred[] = { 1, 2, 3, 4, 5 }; int jim[10] = { 1, 2, 3, 4, 5 }; int main() { int i; for (i=0; i < 5; i++) printf("%d\n", fred[i]); for (i=0; i < 10; i++) printf("%d\n", jim[i]); return(0); } ================================================ FILE: 41_Local_Var_Init/tests/input092.c ================================================ char x= 3000; ================================================ FILE: 41_Local_Var_Init/tests/input093.c ================================================ char x= fred; ================================================ FILE: 41_Local_Var_Init/tests/input094.c ================================================ char *s= 54; ================================================ FILE: 41_Local_Var_Init/tests/input095.c ================================================ int fred(int x=2) { return(x); } ================================================ FILE: 41_Local_Var_Init/tests/input096.c ================================================ int fred[0]; ================================================ FILE: 41_Local_Var_Init/tests/input097.c ================================================ int main() { int x[45]; return(0); } ================================================ FILE: 41_Local_Var_Init/tests/input098.c ================================================ int fred[3]= { 1, 2, 3, 4, 5 }; ================================================ FILE: 41_Local_Var_Init/tests/input099.c ================================================ #include // List of token strings, for debugging purposes. // As yet, we can't store a NULL into the list char *Tstring[] = { "EOF", "=", "||", "&&", "|", "^", "&", "==", "!=", ",", ">", "<=", ">=", "<<", ">>", "+", "-", "*", "/", "++", "--", "~", "!", "void", "char", "int", "long", "if", "else", "while", "for", "return", "struct", "union", "enum", "typedef", "extern", "break", "continue", "switch", "case", "default", "intlit", "strlit", ";", "identifier", "{", "}", "(", ")", "[", "]", ",", ".", "->", ":", "" }; int main() { int i; char *str; i=0; while (1) { str= Tstring[i]; if (*str == 0) break; printf("%s\n", str); i++; } return(0); } ================================================ FILE: 41_Local_Var_Init/tests/input100.c ================================================ #include int main() { int x= 3, y=14; int z= 2 * x + y; char *str= "Hello world"; printf("%s %d %d\n", str, x+y, z); return(0); } ================================================ FILE: 41_Local_Var_Init/tests/mktests ================================================ #!/bin/sh # Make the output files for each test # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make install) fi for i in input*c do if [ ! -f "out.$i" -a ! -f "err.$i" ] then ../cwj -o out $i 2> "err.$i" # If the err file is empty if [ ! -s "err.$i" ] then rm -f "err.$i" cc -o out $i ./out > "out.$i" fi fi rm -f out out.s done ================================================ FILE: 41_Local_Var_Init/tests/out.input001.c ================================================ 36 10 25 ================================================ FILE: 41_Local_Var_Init/tests/out.input002.c ================================================ 17 ================================================ FILE: 41_Local_Var_Init/tests/out.input003.c ================================================ 1 2 3 4 5 ================================================ FILE: 41_Local_Var_Init/tests/out.input004.c ================================================ 1 1 1 1 1 1 1 1 1 ================================================ FILE: 41_Local_Var_Init/tests/out.input005.c ================================================ 6 ================================================ FILE: 41_Local_Var_Init/tests/out.input006.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 41_Local_Var_Init/tests/out.input007.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 41_Local_Var_Init/tests/out.input008.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 41_Local_Var_Init/tests/out.input009.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 41_Local_Var_Init/tests/out.input010.c ================================================ 20 10 1 2 3 4 5 253 254 255 0 1 ================================================ FILE: 41_Local_Var_Init/tests/out.input011.c ================================================ 10 20 30 1 2 3 4 5 253 254 255 0 1 2 3 1 2 3 4 5 ================================================ FILE: 41_Local_Var_Init/tests/out.input012.c ================================================ 5 ================================================ FILE: 41_Local_Var_Init/tests/out.input013.c ================================================ 23 56 ================================================ FILE: 41_Local_Var_Init/tests/out.input014.c ================================================ 10 20 30 ================================================ FILE: 41_Local_Var_Init/tests/out.input015.c ================================================ 18 18 12 12 ================================================ FILE: 41_Local_Var_Init/tests/out.input016.c ================================================ 12 18 ================================================ FILE: 41_Local_Var_Init/tests/out.input017.c ================================================ 19 12 ================================================ FILE: 41_Local_Var_Init/tests/out.input018.c ================================================ 34 34 ================================================ FILE: 41_Local_Var_Init/tests/out.input018a.c ================================================ 15 16 ================================================ FILE: 41_Local_Var_Init/tests/out.input019.c ================================================ 30 ================================================ FILE: 41_Local_Var_Init/tests/out.input020.c ================================================ 12 ================================================ FILE: 41_Local_Var_Init/tests/out.input021.c ================================================ 10 Hello world ================================================ FILE: 41_Local_Var_Init/tests/out.input022.c ================================================ 12 12 12 13 13 13 13 13 13 35 35 35 ================================================ FILE: 41_Local_Var_Init/tests/out.input023.c ================================================ -23 100 -2 0 1 0 13 14 Hello world ================================================ FILE: 41_Local_Var_Init/tests/out.input024.c ================================================ 2 59 57 8 7 ================================================ FILE: 41_Local_Var_Init/tests/out.input025.c ================================================ 10 20 30 5 15 25 ================================================ FILE: 41_Local_Var_Init/tests/out.input026.c ================================================ 13 23 34 44 54 64 74 84 94 95 96 ================================================ FILE: 41_Local_Var_Init/tests/out.input027.c ================================================ 1 2 3 4 5 6 7 8 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 ================================================ FILE: 41_Local_Var_Init/tests/out.input028.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 41_Local_Var_Init/tests/out.input029.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 41_Local_Var_Init/tests/out.input030.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input030.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 41_Local_Var_Init/tests/out.input053.c ================================================ Hello world, 23 ================================================ FILE: 41_Local_Var_Init/tests/out.input054.c ================================================ Hello world, 0 Hello world, 1 Hello world, 2 Hello world, 3 Hello world, 4 Hello world, 5 Hello world, 6 Hello world, 7 Hello world, 8 Hello world, 9 Hello world, 10 Hello world, 11 Hello world, 12 Hello world, 13 Hello world, 14 Hello world, 15 Hello world, 16 Hello world, 17 Hello world, 18 Hello world, 19 ================================================ FILE: 41_Local_Var_Init/tests/out.input055.c ================================================ Hello world Argument 0 is ./out ================================================ FILE: 41_Local_Var_Init/tests/out.input058.c ================================================ 12 99 4005 4116 4116 ================================================ FILE: 41_Local_Var_Init/tests/out.input062.c ================================================ 65 66 66 The next two depend on the endian of the platform 66 66 67 67 ================================================ FILE: 41_Local_Var_Init/tests/out.input063.c ================================================ 25 ================================================ FILE: 41_Local_Var_Init/tests/out.input067.c ================================================ 5 17 ================================================ FILE: 41_Local_Var_Init/tests/out.input070.c ================================================ 56 ================================================ FILE: 41_Local_Var_Init/tests/out.input071.c ================================================ 0 1 2 3 4 7 8 9 10 11 12 13 14 Done ================================================ FILE: 41_Local_Var_Init/tests/out.input074.c ================================================ 100 5 7 100 100 ================================================ FILE: 41_Local_Var_Init/tests/out.input080.c ================================================ 0 1 1 3 2 5 3 7 4 9 5 11 ================================================ FILE: 41_Local_Var_Init/tests/out.input081.c ================================================ 0 1 1 3 2 5 3 7 4 9 ================================================ FILE: 41_Local_Var_Init/tests/out.input082.c ================================================ 15 >= x > 5 ================================================ FILE: 41_Local_Var_Init/tests/out.input083.c ================================================ 5 < 6 <= 10 5 < 7 <= 10 5 < 8 <= 10 5 < 9 <= 10 5 < 10 <= 10 10 < 11 ================================================ FILE: 41_Local_Var_Init/tests/out.input084.c ================================================ 2 3 f f ================================================ FILE: 41_Local_Var_Init/tests/out.input088.c ================================================ 5 6 ================================================ FILE: 41_Local_Var_Init/tests/out.input089.c ================================================ 23 H Hello world ================================================ FILE: 41_Local_Var_Init/tests/out.input090.c ================================================ 23 100 H Hello world ================================================ FILE: 41_Local_Var_Init/tests/out.input091.c ================================================ 1 2 3 4 5 1 2 3 4 5 0 0 0 0 0 ================================================ FILE: 41_Local_Var_Init/tests/out.input099.c ================================================ EOF = || && | ^ & == != , > <= >= << >> + - * / ++ -- ~ ! void char int long if else while for return struct union enum typedef extern break continue switch case default intlit strlit ; identifier { } ( ) [ ] , . -> : ================================================ FILE: 41_Local_Var_Init/tests/out.input100.c ================================================ Hello world 17 20 ================================================ FILE: 41_Local_Var_Init/tests/runtests ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make install) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../cwj -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../cwj $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.s "trial.$i" done ================================================ FILE: 41_Local_Var_Init/tests/runtestsn ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../compn ] then (cd ..; make install) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../compn -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../compn $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.o out.s "trial.$i" done ================================================ FILE: 41_Local_Var_Init/tree.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree functions // Copyright (c) 2019 Warren Toomey, GPL3 // Build and return a generic AST node struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue) { struct ASTnode *n; // Malloc a new ASTnode n = (struct ASTnode *) malloc(sizeof(struct ASTnode)); if (n == NULL) fatal("Unable to malloc in mkastnode()"); // Copy in the field values and return it n->op = op; n->type = type; n->left = left; n->mid = mid; n->right = right; n->sym = sym; n->intvalue = intvalue; return (n); } // Make an AST leaf node struct ASTnode *mkastleaf(int op, int type, struct symtable *sym, int intvalue) { return (mkastnode(op, type, NULL, NULL, NULL, sym, intvalue)); } // Make a unary AST node: only one child struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, struct symtable *sym, int intvalue) { return (mkastnode(op, type, left, NULL, NULL, sym, intvalue)); } // Generate and return a new label number // just for AST dumping purposes static int gendumplabel(void) { static int id = 1; return (id++); } // Given an AST tree, print it out and follow the // traversal of the tree that genAST() follows void dumpAST(struct ASTnode *n, int label, int level) { int Lfalse, Lstart, Lend; switch (n->op) { case A_IF: Lfalse = gendumplabel(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_IF"); if (n->right) { Lend = gendumplabel(); fprintf(stdout, ", end L%d", Lend); } fprintf(stdout, "\n"); dumpAST(n->left, Lfalse, level + 2); dumpAST(n->mid, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); return; case A_WHILE: Lstart = gendumplabel(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_WHILE, start L%d\n", Lstart); Lend = gendumplabel(); dumpAST(n->left, Lend, level + 2); dumpAST(n->right, NOLABEL, level + 2); return; } // Reset level to -2 for A_GLUE if (n->op == A_GLUE) level = -2; // General AST node handling if (n->left) dumpAST(n->left, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); for (int i = 0; i < level; i++) fprintf(stdout, " "); switch (n->op) { case A_GLUE: fprintf(stdout, "\n\n"); return; case A_FUNCTION: fprintf(stdout, "A_FUNCTION %s\n", n->sym->name); return; case A_ADD: fprintf(stdout, "A_ADD\n"); return; case A_SUBTRACT: fprintf(stdout, "A_SUBTRACT\n"); return; case A_MULTIPLY: fprintf(stdout, "A_MULTIPLY\n"); return; case A_DIVIDE: fprintf(stdout, "A_DIVIDE\n"); return; case A_EQ: fprintf(stdout, "A_EQ\n"); return; case A_NE: fprintf(stdout, "A_NE\n"); return; case A_LT: fprintf(stdout, "A_LE\n"); return; case A_GT: fprintf(stdout, "A_GT\n"); return; case A_LE: fprintf(stdout, "A_LE\n"); return; case A_GE: fprintf(stdout, "A_GE\n"); return; case A_INTLIT: fprintf(stdout, "A_INTLIT %d\n", n->intvalue); return; case A_STRLIT: fprintf(stdout, "A_STRLIT rval label L%d\n", n->intvalue); return; case A_IDENT: if (n->rvalue) fprintf(stdout, "A_IDENT rval %s\n", n->sym->name); else fprintf(stdout, "A_IDENT %s\n", n->sym->name); return; case A_ASSIGN: fprintf(stdout, "A_ASSIGN\n"); return; case A_WIDEN: fprintf(stdout, "A_WIDEN\n"); return; case A_RETURN: fprintf(stdout, "A_RETURN\n"); return; case A_FUNCCALL: fprintf(stdout, "A_FUNCCALL %s\n", n->sym->name); return; case A_ADDR: fprintf(stdout, "A_ADDR %s\n", n->sym->name); return; case A_DEREF: if (n->rvalue) fprintf(stdout, "A_DEREF rval\n"); else fprintf(stdout, "A_DEREF\n"); return; case A_SCALE: fprintf(stdout, "A_SCALE %d\n", n->size); return; case A_PREINC: fprintf(stdout, "A_PREINC %s\n", n->sym->name); return; case A_PREDEC: fprintf(stdout, "A_PREDEC %s\n", n->sym->name); return; case A_POSTINC: fprintf(stdout, "A_POSTINC\n"); return; case A_POSTDEC: fprintf(stdout, "A_POSTDEC\n"); return; case A_NEGATE: fprintf(stdout, "A_NEGATE\n"); return; case A_BREAK: fprintf(stdout, "A_BREAK\n"); return; case A_CONTINUE: fprintf(stdout, "A_CONTINUE\n"); return; case A_CASE: fprintf(stdout, "A_CASE %d\n", n->intvalue); return; case A_DEFAULT: fprintf(stdout, "A_DEFAULT\n"); return; case A_SWITCH: fprintf(stdout, "A_SWITCH\n"); return; default: fatald("Unknown dumpAST operator", n->op); } } ================================================ FILE: 41_Local_Var_Init/types.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Types and type handling // Copyright (c) 2019 Warren Toomey, GPL3 // Return true if a type is an int type // of any size, false otherwise int inttype(int type) { return (((type & 0xf) == 0) && (type >= P_CHAR && type <= P_LONG)); } // Return true if a type is of pointer type int ptrtype(int type) { return ((type & 0xf) != 0); } // Given a primitive type, return // the type which is a pointer to it int pointer_to(int type) { if ((type & 0xf) == 0xf) fatald("Unrecognised in pointer_to: type", type); return (type + 1); } // Given a primitive pointer type, return // the type which it points to int value_at(int type) { if ((type & 0xf) == 0x0) fatald("Unrecognised in value_at: type", type); return (type - 1); } // Given a type and a composite type pointer, return // the size of this type in bytes int typesize(int type, struct symtable *ctype) { if (type == P_STRUCT || type == P_UNION) return (ctype->size); return (genprimsize(type)); } // Given an AST tree and a type which we want it to become, // possibly modify the tree by widening or scaling so that // it is compatible with this type. Return the original tree // if no changes occurred, a modified tree, or NULL if the // tree is not compatible with the given type. // If this will be part of a binary operation, the AST op is not zero. struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op) { int ltype; int lsize, rsize; ltype = tree->type; // XXX No idea on these yet if (ltype == P_STRUCT || ltype == P_UNION) fatal("Don't know how to do this yet"); if (rtype == P_STRUCT || rtype == P_UNION) fatal("Don't know how to do this yet"); // Compare scalar int types if (inttype(ltype) && inttype(rtype)) { // Both types same, nothing to do if (ltype == rtype) return (tree); // Get the sizes for each type lsize = typesize(ltype, NULL); // XXX Fix soon rsize = typesize(rtype, NULL); // XXX Fix soon // Tree's size is too big if (lsize > rsize) return (NULL); // Widen to the right if (rsize > lsize) return (mkastunary(A_WIDEN, rtype, tree, NULL, 0)); } // For pointers on the left if (ptrtype(ltype)) { // OK is same type on right and not doing a binary op if (op == 0 && ltype == rtype) return (tree); } // We can scale only on A_ADD or A_SUBTRACT operation if (op == A_ADD || op == A_SUBTRACT) { // Left is int type, right is pointer type and the size // of the original type is >1: scale the left if (inttype(ltype) && ptrtype(rtype)) { rsize = genprimsize(value_at(rtype)); if (rsize > 1) return (mkastunary(A_SCALE, rtype, tree, NULL, rsize)); else return (tree); // Size 1, no need to scale } } // If we get here, the types are not compatible return (NULL); } ================================================ FILE: 42_Casting/Makefile ================================================ # Define the location of the include directory # and the location to install the compiler binary INCDIR=/tmp/include BINDIR=/tmp HSRCS= data.h decl.h defs.h SRCS= cg.c decl.c expr.c gen.c main.c misc.c \ scan.c stmt.c sym.c tree.c types.c SRCN= cgn.c decl.c expr.c gen.c main.c misc.c \ scan.c stmt.c sym.c tree.c types.c ARMSRCS= cg_arm.c decl.c expr.c gen.c main.c misc.c \ scan.c stmt.c sym.c tree.c types.c cwj: $(SRCS) $(HSRCS) cc -o cwj -g -Wall -DINCDIR=\"$(INCDIR)\" $(SRCS) compn: $(SRCN) $(HSRCS) cc -D__NASM__ -o compn -g -Wall -DINCDIR=\"$(INCDIR)\" $(SRCN) cwjarm: $(ARMSRCS) $(HSRCS) cc -o cwjarm -g -Wall $(ARMSRCS) cp cwjarm cwj install: cwj mkdir -p $(INCDIR) rsync -a include/. $(INCDIR) cp cwj $(BINDIR) chmod +x $(BINDIR)/cwj installn: compn mkdir -p $(INCDIR) rsync -a include/. $(INCDIR) cp compn $(BINDIR) chmod +x $(BINDIR)/compn clean: rm -f cwj cwjarm compn *.o *.s out a.out test: install tests/runtests (cd tests; chmod +x runtests; ./runtests) armtest: cwjarm tests/runtests (cd tests; chmod +x runtests; ./runtests) testn: installn tests/runtestsn (cd tests; chmod +x runtestsn; ./runtestsn) ================================================ FILE: 42_Casting/Readme.md ================================================ # Part 42: Type Casting and NULL In this part of our compiler writing journey, I've implemented type casting. I thought this would allow me to do: ```c #define NULL (void *)0 ``` but I hadn't done enough to get `void *` to work properly. So I've added type casting and also got `void *` to work. ## What is Type Casting? Type casting is where you forcibly change the type of an expression to be something else. Common reasons are to narrow an integer value down to a smaller range type, or to assign a pointer from one type into a pointer storage of another type, e.g. ```c int x= 65535; char y= (char)x; // y is now 255, the lower 8 bits int *a= &x; char *b= (char *)a; // b point at the address of x long *z= (void *)0; // z is a NULL pointer, not pointing at anything ``` Notice above that I've used the casts in assignment statements. For expressions within functions, we will need to add an A_CAST node to our AST tree to say "cast the original expression type to this new type". For global variable assignments, we will need to modify the assignment parser to allow a cast to come before the literal value. ## A New Function, `parse_cast()` I've added this new function in `decl.c`: ```c // Parse a type which appears inside a cast int parse_cast(void) { int type, class; struct symtable *ctype; // Get the type inside the parentheses type= parse_stars(parse_type(&ctype, &class)); // Do some error checking. I'm sure more can be done if (type == P_STRUCT || type == P_UNION || type == P_VOID) fatal("Cannot cast to a struct, union or void type"); return(type); } ``` The parsing of the surrounding '(' ... ')' is done elsewhere. We get the type identifier and the following '*' tokens to get the type of the cast. Then we prevent casts to structs, unions and to `void`. We need a function to do this as we have to do it in expressions and also in global variable assignments. I didn't want any [DRY code](https://en.wikipedia.org/wiki/Don%27t_repeat_yourself). ## Cast Parsing in Expressions We already parse parentheses in our expression code, so we will need to modify this. In `primary()` in `expr.c`, we now do this: ```c static struct ASTnode *primary(void) { int type=0; ... switch (Token.token) { ... case T_LPAREN: // Beginning of a parenthesised expression, skip the '('. scan(&Token); // If the token after is a type identifier, this is a cast expression switch (Token.token) { case T_IDENT: // We have to see if the identifier matches a typedef. // If not, treat it as an expression. if (findtypedef(Text) == NULL) { n = binexpr(0); break; } case T_VOID: case T_CHAR: case T_INT: case T_LONG: case T_STRUCT: case T_UNION: case T_ENUM: // Get the type inside the parentheses type= parse_cast(); // Skip the closing ')' and then parse the following expression rparen(); default: n = binexpr(0); // Scan in the expression } // We now have at least an expression in n, and possibly a non-zero type in type // if there was a cast. Skip the closing ')' if there was no cast. if (type == 0) rparen(); else // Otherwise, make a unary AST node for the cast n= mkastunary(A_CAST, type, n, NULL, 0); return (n); } } ``` That's a lot to digest, so let's go through it in stages. All of the cases ensure that we have a type identifier after the '(' token. We call `parse_cast()` to get the cast type and parse the ')' token. We don't have an AST tree to return yet because we don't know which expression we are casting. So we fall through to the default case where the next expression is parsed. At this point either `type` is still zero (no cast) or non-zero (there was a cast). If no cast, the right parenthesis has to be skipped and we can simply return the expression in parentheses. If there was a cast, we build an A_CAST node with the new `type` and with the following expression as the child. ## Generating the Assembly Code for a Cast Well, we are lucky because the expression's value will be stored in a register. So if we do: ```c int x= 65535; char y= (char)x; // y is now 255, the lower 8 bits ``` then we can simply put the 65535 into a register. But when we save it to y, then the lvalue's type will be invoked to generate the correct code to save the right size: ``` movq $65535, %r10 # Store 65535 in x movl %r10d, -4(%rbp) movslq -4(%rbp), %r10 # Get x into %r10 movb %r10b, -8(%rbp) # Store one byte into y ``` So, in `genAST()` in `gen.c`, we have this code to deal with casting: ```c ... leftreg = genAST(n->left, NOLABEL, NOLABEL, NOLABEL, n->op); ... switch (n->op) { ... case A_CAST: return (leftreg); // Not much to do ... } ``` ## Casts in Global Assignments The above is fine when the variables are local variables, as the compiler does the above assignments as expressions. For global variables, we have to hand-parse the cast and apply it to a literal value that follows it. So, for example, in `scalar_declaration` in `decl.c` we need this code: ```c // Globals must be assigned a literal value if (class == C_GLOBAL) { // If there is a cast if (Token.token == T_LPAREN) { // Get the type in the cast scan(&Token); casttype= parse_cast(); rparen(); // Check that the two types are compatible. Change // the new type so that the literal parse below works. // A 'void *' casstype can be assigned to any pointer type. if (casttype == type || (casttype== pointer_to(P_VOID) && ptrtype(type))) type= P_NONE; else fatal("Type mismatch"); } // Create one initial value for the variable and // parse this value sym->initlist= (int *)malloc(sizeof(int)); sym->initlist[0]= parse_literal(type); scan(&Token); } ``` First of all, note that we set `type= P_NONE` when there is a cast, and we call `parse_literal()` with P_NONE when there is a cast. Why? Because this function used to required that the literal being parsed was exactly the type which was the argument, i.e. a string literal had to be of type `char *`, a `char` had to be matched by a literal in the range 0 ... 255 etc. Now that we have a cast, we should be able to accept: ```c char a= (char)65536; ``` So the code in `parse_literal()` in `decl.c` now does this: ```c int parse_literal(int type) { // We have a string literal. Store in memory and return the label if (Token.token == T_STRLIT) { if (type == pointer_to(P_CHAR) || type == P_NONE) return(genglobstr(Text)); } // We have an integer literal. Do some range checking. if (Token.token == T_INTLIT) { switch(type) { case P_CHAR: if (Token.intvalue < 0 || Token.intvalue > 255) fatal("Integer literal value too big for char type"); case P_NONE: case P_INT: case P_LONG: break; default: fatal("Type mismatch: integer literal vs. variable"); } } else fatal("Expecting an integer literal value"); return(Token.intvalue); } ``` and the P_NONE is used to relax the type restrictions. ## Dealing with `void *` A `void *` pointer is one that can be used in place of any other pointer type. So we have to implement this. We already did this for global variable assignments above: ```c if (casttype == type || (casttype== pointer_to(P_VOID) && ptrtype(type))) ``` i.e. if the types are equal, or if a `void *` pointer is being assigned to a pointer. This allows the following global assignment: ```c char *str= (void *)0; ``` even though `str` is of type `char *` and not `void *`. Now we need to deal with `void *` (and other pointer/pointer operations) in expressions. To do this, I had to change `modify_type()` in `types.c`. As a refresher, here is what this function does: ```c // Given an AST tree and a type which we want it to become, // possibly modify the tree by widening or scaling so that // it is compatible with this type. Return the original tree // if no changes occurred, a modified tree, or NULL if the // tree is not compatible with the given type. // If this will be part of a binary operation, the AST op is not zero. struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op); ``` This is the code that widens values, e.g. `int x= 'Q';` to make `x` into a 32-bit value. We also use it for scaling: when we do: ```c int x[4]; int y= x[2]; ``` The "2" index is scaled by the size of `int` to be eight bytes offset from the base of the `x[]` array. So, inside a function, when we write: ```c char *str= (void *)0; ``` we get the AST tree: ``` A_ASSIGN / \ A_CAST A_IDENT / str A_INTLIT 0 ``` the type of the left-hand `tree` will be `void *` and the `rtype` will be `char *`. We had better ensure that the operation can be performed. I've changed `modify_type()` to do this for pointers: ```c // For pointers if (ptrtype(ltype) && ptrtype(rtype)) { // We can compare them if (op >= A_EQ && op <= A_GE) return(tree); // A comparison of the same type for a non-binary operation is OK, // or when the left tree is of `void *` type. if (op == 0 && (ltype == rtype || ltype == pointer_to(P_VOID))) return (tree); } ``` Now, pointer comparison is OK but other binary operations (e.g. addition) is bad. A "non-binary operation" means something like an assignment. We can definitely assign between two things of the same type. Now, we can also assign from a `void *` pointer to any pointer. ## Adding NULL Now that we can deal with `void *` pointers, we can add NULL to our include files. I've added this to both `stdio.h` and `stddef.h`: ```c #ifndef NULL # define NULL (void *)0 #endif ``` But there was one final wrinkle. When I tried this global declaration: ```c #include char *str= NULL; ``` I got this: ``` str: .quad L0 ``` because every initialisation value for a `char *` pointer is treated as a label number. So the "0" in the NULL was being turned into an "L0" label. We need to fix this. Now, in `cgglobsym()` in `cg.c`: ```c case 8: // Generate the pointer to a string literal. Treat a zero value // as actually zero, not the label L0 if (node->initlist != NULL && type== pointer_to(P_CHAR) && initvalue != 0) fprintf(Outfile, "\t.quad\tL%d\n", initvalue); else fprintf(Outfile, "\t.quad\t%d\n", initvalue); ``` Yes it's ugly but it works! ## Testing the Changes I won't go through all the tests themselves, but files `tests/input101.c` to `tests/input108.c` test the above functionality and also the error checking of the compiler. ## Conclusion and What's Next I thought casting was going to be easy, and it was. What I didn't reckon with was the issues surrounding `void *`. I feel that I've covered most bases here but not all of them, so expect to see some `void *` edge cases that I haven't spotted yet. In the next part of our compiler writing journey, we'll add some missing operators. [Next step](../43_More_Operators/Readme.md) ================================================ FILE: 42_Casting/cg.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\t.text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\t.data\n", Outfile); currSeg = data_seg; } } // Given a scalar type value, return the // size of the type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch (type) { case P_CHAR: return (offset); case P_INT: case P_LONG: break; default: if (!ptrtype(type)) fatald("Bad type in cg_align:", type); } // Here we have an int or a long. Align it on a 4-byte offset // I put the generic code here so it can be reused elsewhere. alignment = 4; offset = (offset + direction * (alignment - 1)) & ~(alignment - 1); return (offset); } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. static int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "%r10", "%r11", "%r12", "%r13", "%r9", "%r8", "%rcx", "%rdx", "%rsi", "%rdi" }; static char *breglist[] = { "%r10b", "%r11b", "%r12b", "%r13b", "%r9b", "%r8b", "%cl", "%dl", "%sil", "%dil" }; static char *dreglist[] = { "%r10d", "%r11d", "%r12d", "%r13d", "%r9d", "%r8d", "%ecx", "%edx", "%esi", "%edi" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); cgtextseg(); fprintf(Outfile, "# internal switch(expr) routine\n" "# %%rsi = switch table, %%rax = expr\n" "# from SubC: http://www.t3x.org/subc/\n" "\n" "switch:\n" " pushq %%rsi\n" " movq %%rdx, %%rsi\n" " movq %%rax, %%rbx\n" " cld\n" " lodsq\n" " movq %%rax, %%rcx\n" "next:\n" " lodsq\n" " movq %%rax, %%rdx\n" " lodsq\n" " cmpq %%rdx, %%rbx\n" " jnz no\n" " popq %%rsi\n" " jmp *%%rax\n" "no:\n" " loop next\n" " lodsq\n" " popq %%rsi\n" " jmp *%%rax\n" "\n"); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the %rsp and %rsp fprintf(Outfile, "\t.globl\t%s\n" "\t.type\t%s, @function\n" "%s:\n" "\tpushq\t%%rbp\n" "\tmovq\t%%rsp, %%rbp\n", name, name, name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->posn = paramOffset; paramOffset += 8; } else { parm->posn = newlocaloffset(parm->type); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->posn = newlocaloffset(locvar->type); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\taddq\t$%d,%%rsp\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->endlabel); fprintf(Outfile, "\taddq\t$%d,%%rsp\n", stackOffset); fputs("\tpopq %rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmovq\t$%d, %s\n", value, reglist[r]); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", sym->name); } else // Print out the code to initialise it switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovzbq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", sym->name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovslq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", sym->name); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", sym->posn); fprintf(Outfile, "\tmovq\t%d(%%rbp), %s\n", sym->posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", sym->posn); } else switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", sym->posn); fprintf(Outfile, "\tmovzbq\t%d(%%rbp), %s\n", sym->posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", sym->posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", sym->posn); fprintf(Outfile, "\tmovslq\t%d(%%rbp), %s\n", sym->posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", sym->posn); break; default: fatald("Bad type in cgloadlocal:", sym->type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tleaq\tL%d(%%rip), %s\n", label, reglist[r]); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\taddq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsubq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timulq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s,%%rax\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidivq\t%s\n", reglist[r2]); fprintf(Outfile, "\tmovq\t%%rax,%s\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tandq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\torq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txorq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshlq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshrq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tnegq\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnotq\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); return (r); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(struct symtable *sym, int numargs) { // Get a new register int outr = alloc_register(); // Call the function fprintf(Outfile, "\tcall\t%s@PLT\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\taddq\t$%d, %%rsp\n", 8 * (numargs - 6)); // and copy the return value into our register fprintf(Outfile, "\tmovq\t%%rax, %s\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpushq\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmovq\t%s, %s\n", reglist[r], reglist[FIRSTPARAMREG - argposn + 1]); } } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsalq\t$%d, %s\n", val, reglist[r]); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %s(%%rip)\n", reglist[r], sym->name); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %s(%%rip)\n", breglist[r], sym->name); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %s(%%rip)\n", dreglist[r], sym->name); break; default: fatald("Bad type in cgstorglob:", sym->type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %d(%%rbp)\n", reglist[r], sym->posn); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %d(%%rbp)\n", breglist[r], sym->posn); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %d(%%rbp)\n", dreglist[r], sym->posn); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size, type; int initvalue; int i; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the variable (or its elements if an array) // and the type of the variable if (node->stype == S_ARRAY) { size= typesize(value_at(node->type), node->ctype); type= value_at(node->type); } else { size = node->size; type= node->type; } // Generate the global identity and the label cgdataseg(); fprintf(Outfile, "\t.globl\t%s\n", node->name); fprintf(Outfile, "%s:\n", node->name); // Output space for one or more elements for (i=0; i < node->nelems; i++) { // Get any initial value initvalue= 0; if (node->initlist != NULL) initvalue= node->initlist[i]; // Generate the space for this type switch (size) { case 1: fprintf(Outfile, "\t.byte\t%d\n", initvalue); break; case 4: fprintf(Outfile, "\t.long\t%d\n", initvalue); break; case 8: // Generate the pointer to a string literal. Treat a zero value // as actually zero, not the label L0 if (node->initlist != NULL && type== pointer_to(P_CHAR) && initvalue != 0) fprintf(Outfile, "\t.quad\tL%d\n", initvalue); else fprintf(Outfile, "\t.quad\t%d\n", initvalue); break; default: for (int i = 0; i < size; i++) fprintf(Outfile, "\t.byte\t0\n"); } } } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\t.byte\t%d\n", *cptr); } fprintf(Outfile, "\t.byte\t0\n"); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzbl\t%s, %%eax\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %%eax\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } cgjump(sym->endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = alloc_register(); if (sym->class == C_GLOBAL) fprintf(Outfile, "\tleaq\t%s(%%rip), %s\n", sym->name, reglist[r]); else fprintf(Outfile, "\tleaq\t%d(%%rbp), %s\n", sym->posn, reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzbq\t(%s), %s\n", reglist[r], reglist[r]); break; case 2: fprintf(Outfile, "\tmovslq\t(%s), %s\n", reglist[r], reglist[r]); break; case 4: case 8: fprintf(Outfile, "\tmovq\t(%s), %s\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { // Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmovb\t%s, (%s)\n", breglist[r1], reglist[r2]); break; case 2: case 4: case 8: fprintf(Outfile, "\tmovq\t%s, (%s)\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } // Generate a switch jump table and the code to // load the registers and call the switch() code void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel) { int i, label; // Get a label for the switch table label = genlabel(); cglabel(label); // Heuristic. If we have no cases, create one case // which points to the default case if (casecount == 0) { caseval[0] = 0; caselabel[0] = defaultlabel; casecount = 1; } // Generate the switch jump table. fprintf(Outfile, "\t.quad\t%d\n", casecount); for (i = 0; i < casecount; i++) fprintf(Outfile, "\t.quad\t%d, L%d\n", caseval[i], caselabel[i]); fprintf(Outfile, "\t.quad\tL%d\n", defaultlabel); // Load the specific registers cglabel(toplabel); fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); fprintf(Outfile, "\tleaq\tL%d(%%rip), %%rdx\n", label); fprintf(Outfile, "\tjmp\tswitch\n"); } ================================================ FILE: 42_Casting/cg_arm.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for ARMv6 on Raspberry Pi // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. static int freereg[4]; static char *reglist[4] = { "r4", "r5", "r6", "r7" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // We have to store large integer literal values in memory. // Keep a list of them which will be output in the postamble #define MAXINTS 1024 int Intlist[MAXINTS]; static int Intslot = 0; // Determine the offset of a large integer // literal from the .L3 label. If the integer // isn't in the list, add it. static void set_int_offset(int val) { int offset = -1; // See if it is already there for (int i = 0; i < Intslot; i++) { if (Intlist[i] == val) { offset = 4 * i; break; } } // Not in the list, so add it if (offset == -1) { offset = 4 * Intslot; if (Intslot == MAXINTS) fatal("Out of int slots in set_int_offset()"); Intlist[Intslot++] = val; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L3+%d\n", offset); } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n", Outfile); } // Print out the assembly postamble void cgpostamble() { // Print out the global variables fprintf(Outfile, ".L2:\n"); for (int i = 0; i < Globs; i++) { if (Symtable[i].stype == S_VARIABLE) fprintf(Outfile, "\t.word %s\n", Symtable[i].name); } // Print out the integer literals fprintf(Outfile, ".L3:\n"); for (int i = 0; i < Intslot; i++) { fprintf(Outfile, "\t.word %d\n", Intlist[i]); } } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Symtable[id].name; fprintf(Outfile, "\t.text\n" "\t.globl\t%s\n" "\t.type\t%s, \%%function\n" "%s:\n" "\tpush\t{fp, lr}\n" "\tadd\tfp, sp, #4\n" "\tsub\tsp, sp, #8\n" "\tstr\tr0, [fp, #-8]\n", name, name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Symtable[id].endlabel); fputs("\tsub\tsp, fp, #4\n" "\tpop\t{fp, pc}\n" "\t.align\t2\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); // If the literal value is small, do it with one instruction if (value <= 1000) fprintf(Outfile, "\tmov\t%s, #%d\n", reglist[r], value); else { set_int_offset(value); fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); } return (r); } // Determine the offset of a variable from the .L2 // label. Yes, this is inefficient code. static void set_var_offset(int id) { int offset = 0; // Walk the symbol table up to id. // Find S_VARIABLEs and add on 4 until // we get to our variable for (int i = 0; i < id; i++) { if (Symtable[i].stype == S_VARIABLE) offset += 4; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L2+%d\n", offset); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tldrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgloadglob:", Symtable[id].type); } return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s, %s\n", reglist[r1], reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\tmul\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { // To do a divide: r0 holds the dividend, r1 holds the divisor. // The quotient is in r0. fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r1]); fprintf(Outfile, "\tmov\tr1, %s\n", reglist[r2]); fprintf(Outfile, "\tbl\t__aeabi_idiv\n"); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r1]); free_register(r2); return (r1); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r]); fprintf(Outfile, "\tbl\t%s\n", Symtable[id].name); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r]); return (r); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tlsl\t%s, %s, #%d\n", reglist[r], reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, int id) { // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tstr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgstorglob:", Symtable[id].type); } return (r); } // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (4); switch (type) { case P_CHAR: return (1); case P_INT: case P_LONG: return (4); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Symtable[id].type); fprintf(Outfile, "\t.data\n" "\t.globl\t%s\n", Symtable[id].name); switch (typesize) { case 1: fprintf(Outfile, "%s:\t.byte\t0\n", Symtable[id].name); break; case 4: fprintf(Outfile, "%s:\t.long\t0\n", Symtable[id].name); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "moveq", "movne", "movlt", "movgt", "movle", "movge" }; // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "movne", "moveq", "movge", "movle", "movgt", "movlt" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #1\n", cmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #0\n", invcmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\tuxtb\t%s, %s\n", reglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tb\tL%d\n", l); } // List of inverted branch instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *brlist[] = { "bne", "beq", "bge", "ble", "bgt", "blt" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", brlist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[reg]); cgjump(Symtable[id].endlabel); } // Generate code to load the address of a global // identifier into a variable. Return a new register int cgaddress(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); fprintf(Outfile, "\tmov\t%s, r3\n", reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tldrb\t%s, [%s]\n", reglist[r], reglist[r]); break; case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [%s]\n", reglist[r], reglist[r]); break; } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [%s]\n", reglist[r1], reglist[r2]); break; case P_INT: case P_LONG: fprintf(Outfile, "\tstr\t%s, [%s]\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 42_Casting/cgn.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\tsection .text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\tsection .data\n", Outfile); currSeg = data_seg; } } // Given a scalar type value, return the // size of the type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch (type) { case P_CHAR: return (offset); case P_INT: case P_LONG: break; default: if (!ptrtype(type)) fatald("Bad type in cg_align:", type); } // Here we have an int or a long. Align it on a 4-byte offset // I put the generic code here so it can be reused elsewhere. alignment = 4; offset = (offset + direction * (alignment - 1)) & ~(alignment - 1); return (offset); } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "r10", "r11", "r12", "r13", "r9", "r8", "rcx", "rdx", "rsi", "rdi" }; static char *breglist[] = { "r10b", "r11b", "r12b", "r13b", "r9b", "r8b", "cl", "dl", "sil", "dil" }; static char *dreglist[] = { "r10d", "r11d", "r12d", "r13d", "r9d", "r8d", "ecx", "edx", "esi", "edi" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\textern\tprintint\n", Outfile); fputs("\textern\tprintchar\n", Outfile); fputs("\textern\topen\n", Outfile); fputs("\textern\tclose\n", Outfile); fputs("\textern\tread\n", Outfile); fputs("\textern\twrite\n", Outfile); fputs("\textern\tprintf\n", Outfile); cgtextseg(); fprintf(Outfile, "; internal switch(expr) routine\n" "; rsi = switch table, rax = expr\n" "; from SubC: http://www.t3x.org/subc/\n" "\n" "switch:\n" " push rsi\n" " mov rsi, rdx\n" " mov rbx, rax\n" " cld\n" " lodsq\n" " mov rcx, rax\n" "next:\n" " lodsq\n" " mov rdx, rax\n" " lodsq\n" " cmp rbx, rdx\n" " jnz no\n" " pop rsi\n" " jmp rax\n" "no:\n" " loop next\n" " lodsq\n" " pop rsi\n" " jmp rax\n" "\n"); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the rsp and rbp fprintf(Outfile, "\tglobal\t%s\n" "%s:\n" "\tpush\trbp\n" "\tmov\trbp, rsp\n", name, name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->posn = paramOffset; paramOffset += 8; } else { parm->posn = newlocaloffset(parm->type); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->posn = newlocaloffset(locvar->type); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\tadd\trsp, %d\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->endlabel); fprintf(Outfile, "\tadd\trsp, %d\n", stackOffset); fputs("\tpop rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], value); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", sym->name); fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], sym->name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", sym->name); } else // Print out the code to initialise it switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", sym->name); fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], sym->name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", sym->name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", sym->name); fprintf(Outfile, "\tmovsx\t%s, word [%s]\n", dreglist[r], sym->name); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", sym->name); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", sym->posn); fprintf(Outfile, "\tmov\t%s, [rbp+%d]\n", reglist[r], sym->posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", sym->posn); } else switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", sym->posn); fprintf(Outfile, "\tmovzx\t%s, byte [rbp+%d]\n", reglist[r], sym->posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", sym->posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", sym->posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", sym->posn); fprintf(Outfile, "\tmovsx\t%s, dword [rbp+%d]\n", reglist[r], sym->posn); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", sym->posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", sym->posn); break; default: fatald("Bad type in cgloadlocal:", sym->type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, L%d\n", reglist[r], label); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timul\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmov\trax, %s\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidiv\t%s\n", reglist[r2]); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tand\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\tor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshl\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshr\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tneg\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnot\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r], breglist[r]); return (r); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, byte %s\n", reglist[r], breglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(struct symtable *sym, int numargs) { // Get a new register int outr = alloc_register(); // Call the function fprintf(Outfile, "\tcall\t%s\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\tadd\trsp, %d\n", 8 * (numargs - 6)); // and copy the return value into our register fprintf(Outfile, "\tmov\t%s, rax\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpush\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmov\t%s, %s\n", reglist[FIRSTPARAMREG - argposn + 1], reglist[r]); } } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsal\t%s, %d\n", reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, dreglist[r]); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\tqword\t[rbp+%d], %s\n", sym->posn, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\tbyte\t[rbp+%d], %s\n", sym->posn, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\tdword\t[rbp+%d], %s\n", sym->posn, dreglist[r]); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size, type; int initvalue; int i; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the variable (or its elements if an array) // and the type of the variable if (node->stype == S_ARRAY) { size= typesize(value_at(node->type), node->ctype); type= value_at(node->type); } else { size = node->size; type= node->type; } // Generate the global identity and the label cgdataseg(); fprintf(Outfile, "\tsection\t.data\n" "\tglobal\t%s\n", node->name); fprintf(Outfile, "%s:\n", node->name); // Output space for one or more elements for (i = 0; i < node->nelems; i++) { // Get any initial value initvalue = 0; if (node->initlist != NULL) initvalue = node->initlist[i]; // Generate the space for this type // original version switch(size) { case 1: fprintf(Outfile, "\tdb\t%d\n", initvalue); break; case 4: fprintf(Outfile, "\tdd\t%d\n", initvalue); break; case 8: // Generate the pointer to a string literal. Treat a zero value // as actually zero, not the label L0 if (node->initlist != NULL && type == pointer_to(P_CHAR) && initvalue != 0) fprintf(Outfile, "\tdq\tL%d\n", initvalue); else fprintf(Outfile, "\tdq\t%d\n", initvalue); break; default: for (int i = 0; i < size; i++) fprintf(Outfile, "\tdb\t0\n"); /* compact version using times instead of loop fprintf(Outfile, "\ttimes\t%d\tdb\t0\n", size); */ } } } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\tdb\t%d\n", *cptr); } fprintf(Outfile, "\tdb\t0\n"); /* put string in readable format on a single line // probably went overboard with error checking int comma = 0, quote = 0, start = 1; fprintf(Outfile, "\tdb\t"); for (cptr=strvalue; *cptr; cptr++) { if ( ! isprint(*cptr) ) if (comma || start) { fprintf(Outfile, "%d, ", *cptr); start = 0; comma = 1; } else if (quote) { fprintf(Outfile, "\', %d, ", *cptr); comma = 1; quote = 0; } else { fprintf(Outfile, "%d, ", *cptr); comma = 1; quote = 0; } else if (start || comma) { fprintf(Outfile, "\'%c", *cptr); start = comma = 0; quote = 1; } else { fprintf(Outfile, "%c", *cptr); comma = 0; quote = 1; } } if (comma || start) fprintf(Outfile, "0\n"); else fprintf(Outfile, "\', 0\n"); */ } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r2], breglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzx\teax, %s\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmov\teax, %s\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } cgjump(sym->endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = alloc_register(); if (sym->class == C_GLOBAL) fprintf(Outfile, "\tmov\t%s, %s\n", reglist[r], sym->name); else fprintf(Outfile, "\tlea\t%s, [rbp+%d]\n", reglist[r], sym->posn); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], reglist[r]); break; case 2: fprintf(Outfile, "\tmovsx\t%s, dword [%s]\n", reglist[r], reglist[r]); break; case 4: case 8: fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { //Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmov\t[%s], byte %s\n", reglist[r2], breglist[r1]); break; case 2: case 4: case 8: fprintf(Outfile, "\tmov\t[%s], %s\n", reglist[r2], reglist[r1]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } // Generate a switch jump table and the code to // load the registers and call the switch() code void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel) { int i, label; // Get a label for the switch table label = genlabel(); cglabel(label); // Heuristic. If we have no cases, create one case // which points to the default case if (casecount == 0) { caseval[0] = 0; caselabel[0] = defaultlabel; casecount = 1; } // Generate the switch jump table. fprintf(Outfile, "\tdq\t%d\n", casecount); for (i = 0; i < casecount; i++) fprintf(Outfile, "\tdq\t%d, L%d\n", caseval[i], caselabel[i]); fprintf(Outfile, "\tdq\tL%d\n", defaultlabel); // Load the specific registers cglabel(toplabel); fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); fprintf(Outfile, "\tmov\trdx, L%d\n", label); fprintf(Outfile, "\tjmp\tswitch\n"); } ================================================ FILE: 42_Casting/data.h ================================================ #ifndef extern_ #define extern_ extern #endif // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 extern_ int Line; // Current line number extern_ int Putback; // Character put back by scanner extern_ struct symtable *Functionid; // Symbol ptr of the current function extern_ FILE *Infile; // Input and output files extern_ FILE *Outfile; extern_ char *Infilename; // Name of file we are parsing extern_ char *Outfilename; // Name of file we opened as Outfile extern_ struct token Token; // Last token scanned extern_ char Text[TEXTLEN + 1]; // Last identifier scanned extern_ int Looplevel; // Depth of nested loops extern_ int Switchlevel; // Depth of nested switches // Symbol table lists extern_ struct symtable *Globhead, *Globtail; // Global variables and functions extern_ struct symtable *Loclhead, *Locltail; // Local variables extern_ struct symtable *Parmhead, *Parmtail; // Local parameters extern_ struct symtable *Membhead, *Membtail; // Temp list of struct/union members extern_ struct symtable *Structhead, *Structtail; // List of struct types extern_ struct symtable *Unionhead, *Uniontail; // List of union types extern_ struct symtable *Enumhead, *Enumtail; // List of enum types and values extern_ struct symtable *Typehead, *Typetail; // List of typedefs // Command-line flags extern_ int O_dumpAST; // If true, dump the AST trees extern_ int O_keepasm; // If true, keep any assembly files extern_ int O_assemble; // If true, assemble the assembly files extern_ int O_dolink; // If true, link the object files extern_ int O_verbose; // If true, print info on compilation stages ================================================ FILE: 42_Casting/decl.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of declarations // Copyright (c) 2019 Warren Toomey, GPL3 static struct symtable *composite_declaration(int type); static int typedef_declaration(struct symtable **ctype); static int type_of_typedef(char *name, struct symtable **ctype); static void enum_declaration(void); // Parse the current token and return a primitive type enum value, // a pointer to any composite type and possibly modify // the class of the type. int parse_type(struct symtable **ctype, int *class) { int type, exstatic = 1; // See if the class has been changed to extern (later, static) while (exstatic) { switch (Token.token) { case T_EXTERN: *class = C_EXTERN; scan(&Token); break; default: exstatic = 0; } } // Now work on the actual type keyword switch (Token.token) { case T_VOID: type = P_VOID; scan(&Token); break; case T_CHAR: type = P_CHAR; scan(&Token); break; case T_INT: type = P_INT; scan(&Token); break; case T_LONG: type = P_LONG; scan(&Token); break; // For the following, if we have a ';' after the // parsing then there is no type, so return -1. // Example: struct x {int y; int z}; case T_STRUCT: type = P_STRUCT; *ctype = composite_declaration(P_STRUCT); if (Token.token == T_SEMI) type = -1; break; case T_UNION: type = P_UNION; *ctype = composite_declaration(P_UNION); if (Token.token == T_SEMI) type = -1; break; case T_ENUM: type = P_INT; // Enums are really ints enum_declaration(); if (Token.token == T_SEMI) type = -1; break; case T_TYPEDEF: type = typedef_declaration(ctype); if (Token.token == T_SEMI) type = -1; break; case T_IDENT: type = type_of_typedef(Text, ctype); break; default: fatals("Illegal type, token", Token.tokstr); } return (type); } // Given a type parsed by parse_type(), scan in any following // '*' tokens and return the new type int parse_stars(int type) { while (1) { if (Token.token != T_STAR) break; type = pointer_to(type); scan(&Token); } return (type); } // Parse a type which appears inside a cast int parse_cast(void) { int type, class; struct symtable *ctype; // Get the type inside the parentheses type= parse_stars(parse_type(&ctype, &class)); // Do some error checking. I'm sure more can be done if (type == P_STRUCT || type == P_UNION || type == P_VOID) fatal("Cannot cast to a struct, union or void type"); return(type); } // Given a type, check that the latest token is a literal // of that type. If an integer literal, return this value. // If a string literal, return the label number of the string. // Do not scan the next token. If type is P_NONE, relax all // parse restrictions int parse_literal(int type) { // We have a string literal. Store in memory and return the label if (Token.token == T_STRLIT) { if (type == pointer_to(P_CHAR) || type == P_NONE) return(genglobstr(Text)); } // We have an integer literal. Do some range checking. if (Token.token == T_INTLIT) { switch(type) { case P_CHAR: if (Token.intvalue < 0 || Token.intvalue > 255) fatal("Integer literal value too big for char type"); case P_NONE: case P_INT: case P_LONG: break; default: fatal("Type mismatch: integer literal vs. variable"); } } else fatal("Expecting an integer literal value"); return(Token.intvalue); } // Given the type, name and class of a scalar variable, // parse any initialisation value and allocate storage for it. // Return the variable's symbol table entry. static struct symtable *scalar_declaration(char *varname, int type, struct symtable *ctype, int class, struct ASTnode **tree) { struct symtable *sym=NULL; struct ASTnode *varnode, *exprnode; int casttype; *tree= NULL; // Add this as a known scalar switch (class) { case C_EXTERN: case C_GLOBAL: sym= addglob(varname, type, ctype, S_VARIABLE, class, 1, 0); break; case C_LOCAL: sym= addlocl(varname, type, ctype, S_VARIABLE, 1); break; case C_PARAM: sym= addparm(varname, type, ctype, S_VARIABLE); break; case C_MEMBER: sym= addmemb(varname, type, ctype, S_VARIABLE, 1); break; } // The variable is being initialised if (Token.token == T_ASSIGN) { // Only possible for a global or local if (class != C_GLOBAL && class != C_LOCAL) fatals("Variable can not be initialised", varname); scan(&Token); // Globals must be assigned a literal value if (class == C_GLOBAL) { // If there is a cast if (Token.token == T_LPAREN) { // Get the type in the cast scan(&Token); casttype= parse_cast(); rparen(); // Check that the two types are compatible. Change // the new type so that the literal parse below works. // A 'void *' casstype can be assigned to any pointer type. if (casttype == type || (casttype== pointer_to(P_VOID) && ptrtype(type))) type= P_NONE; else fatal("Type mismatch"); } // Create one initial value for the variable and // parse this value sym->initlist= (int *)malloc(sizeof(int)); sym->initlist[0]= parse_literal(type); scan(&Token); } if (class == C_LOCAL) { // Make an A_IDENT AST node with the variable varnode = mkastleaf(A_IDENT, sym->type, sym, 0); // Get the expression for the assignment, make into a rvalue exprnode = binexpr(0); exprnode->rvalue = 1; // Ensure the expression's type matches the variable exprnode = modify_type(exprnode, varnode->type, 0); if (exprnode == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree *tree = mkastnode(A_ASSIGN, exprnode->type, exprnode, NULL, varnode, NULL, 0); } } // Generate any global space if (class == C_GLOBAL) genglobsym(sym); return (sym); } // Given the type, name and class of an variable, parse // the size of the array, if any. Then parse any initialisation // value and allocate storage for it. // Return the variable's symbol table entry. static struct symtable *array_declaration(char *varname, int type, struct symtable *ctype, int class) { struct symtable *sym; // New symbol table entry int nelems= -1; // Assume the number of elements won't be given int maxelems; // The maximum number of elements in the init list int *initlist; // The list of initial elements int i=0, j; int casttype, newtype; // Skip past the '[' scan(&Token); // See we have an array size if (Token.token == T_INTLIT) { if (Token.intvalue <= 0) fatald("Array size is illegal", Token.intvalue); nelems= Token.intvalue; scan(&Token); } // Ensure we have a following ']' match(T_RBRACKET, "]"); // Add this as a known array. We treat the // array as a pointer to its elements' type switch (class) { case C_EXTERN: case C_GLOBAL: sym = addglob(varname, pointer_to(type), ctype, S_ARRAY, class, 0, 0); break; default: fatal("For now, declaration of non-global arrays is not implemented"); } // Array initialisation if (Token.token == T_ASSIGN) { if (class != C_GLOBAL) fatals("Variable can not be initialised", varname); scan(&Token); // Get the following left curly bracket match(T_LBRACE, "{"); #define TABLE_INCREMENT 10 // If the array already has nelems, allocate that many elements // in the list. Otherwise, start with TABLE_INCREMENT. if (nelems != -1) maxelems= nelems; else maxelems= TABLE_INCREMENT; initlist= (int *)malloc(maxelems *sizeof(int)); // Loop getting a new literal value from the list while (1) { // Get the original type newtype= type; // Check we can add the next value, then parse and add it if (nelems != -1 && i == maxelems) fatal("Too many values in initialisation list"); if (Token.token == T_LPAREN) { // Get the type in the cast scan(&Token); casttype= parse_cast(); rparen(); // Check that the two types are compatible. Change // the new type so that the literal parse below works. // A 'void *' casstype can be assigned to any pointer type. if (casttype == type || (casttype== pointer_to(P_VOID) && ptrtype(type))) newtype= P_NONE; else fatal("Type mismatch"); newtype= P_NONE; } initlist[i++]= parse_literal(newtype); scan(&Token); // Increase the list size if the original size was // not set and we have hit the end of the current list if (nelems == -1 && i == maxelems) { maxelems += TABLE_INCREMENT; initlist= (int *)realloc(initlist, maxelems *sizeof(int)); } // Leave when we hit the right curly bracket if (Token.token == T_RBRACE) { scan(&Token); break; } // Next token must be a comma, then comma(); } // Zero any unused elements in the initlist. // Attach the list to the symbol table entry for (j=i; j < sym->nelems; j++) initlist[j]=0; if (i > nelems) nelems = i; sym->initlist= initlist; } // Set the size of the array and the number of elements sym->nelems= nelems; sym->size= sym->nelems * typesize(type, ctype); // Generate any global space if (class == C_GLOBAL) genglobsym(sym); return (sym); } // Given a pointer to the new function being declared and // a possibly NULL pointer to the function's previous declaration, // parse a list of parameters and cross-check them against the // previous declaration. Return the count of parameters static int param_declaration_list(struct symtable *oldfuncsym, struct symtable *newfuncsym) { int type, paramcnt = 0; struct symtable *ctype; struct symtable *protoptr = NULL; struct ASTnode *unused; // Get the pointer to the first prototype parameter if (oldfuncsym != NULL) protoptr = oldfuncsym->member; // Loop getting any parameters while (Token.token != T_RPAREN) { // Get the type of the next parameter type = declaration_list(&ctype, C_PARAM, T_COMMA, T_RPAREN, &unused); if (type == -1) fatal("Bad type in parameter list"); // Ensure the type of this parameter matches the prototype if (protoptr != NULL) { if (type != protoptr->type) fatald("Type doesn't match prototype for parameter", paramcnt + 1); protoptr = protoptr->next; } paramcnt++; // Stop when we hit the right parenthesis if (Token.token == T_RPAREN) break; // We need a comma as separator comma(); } if (oldfuncsym != NULL && paramcnt != oldfuncsym->nelems) fatals("Parameter count mismatch for function", oldfuncsym->name); // Return the count of parameters return (paramcnt); } // // function_declaration: type identifier '(' parameter_list ')' ; // | type identifier '(' parameter_list ')' compound_statement ; // // Parse the declaration of function. static struct symtable *function_declaration(char *funcname, int type, struct symtable *ctype, int class) { struct ASTnode *tree, *finalstmt; struct symtable *oldfuncsym, *newfuncsym = NULL; int endlabel, paramcnt; // Text has the identifier's name. If this exists and is a // function, get the id. Otherwise, set oldfuncsym to NULL. if ((oldfuncsym = findsymbol(funcname)) != NULL) if (oldfuncsym->stype != S_FUNCTION) oldfuncsym = NULL; // If this is a new function declaration, get a // label-id for the end label, and add the function // to the symbol table, if (oldfuncsym == NULL) { endlabel = genlabel(); // Assumtion: functions only return scalar types, so NULL below newfuncsym = addglob(funcname, type, NULL, S_FUNCTION, C_GLOBAL, 0, endlabel); } // Scan in the '(', any parameters and the ')'. // Pass in any existing function prototype pointer lparen(); paramcnt = param_declaration_list(oldfuncsym, newfuncsym); rparen(); // If this is a new function declaration, update the // function symbol entry with the number of parameters. // Also copy the parameter list into the function's node. if (newfuncsym) { newfuncsym->nelems = paramcnt; newfuncsym->member = Parmhead; oldfuncsym = newfuncsym; } // Clear out the parameter list Parmhead = Parmtail = NULL; // Declaration ends in a semicolon, only a prototype. if (Token.token == T_SEMI) return (oldfuncsym); // This is not just a prototype. // Set the Functionid global to the function's symbol pointer Functionid = oldfuncsym; // Get the AST tree for the compound statement and mark // that we have parsed no loops or switches yet Looplevel = 0; Switchlevel = 0; lbrace(); tree = compound_statement(0); rbrace(); // If the function type isn't P_VOID .. if (type != P_VOID) { // Error if no statements in the function if (tree == NULL) fatal("No statements in function with non-void type"); // Check that the last AST operation in the // compound statement was a return statement finalstmt = (tree->op == A_GLUE) ? tree->right : tree; if (finalstmt == NULL || finalstmt->op != A_RETURN) fatal("No return for function with non-void type"); } // Build the A_FUNCTION node which has the function's symbol pointer // and the compound statement sub-tree tree = mkastunary(A_FUNCTION, type, tree, oldfuncsym, endlabel); // Generate the assembly code for it if (O_dumpAST) { dumpAST(tree, NOLABEL, 0); fprintf(stdout, "\n\n"); } genAST(tree, NOLABEL, NOLABEL, NOLABEL, 0); // Now free the symbols associated // with this function freeloclsyms(); return (oldfuncsym); } // Parse composite type declarations: structs or unions. // Either find an existing struct/union declaration, or build // a struct/union symbol table entry and return its pointer. static struct symtable *composite_declaration(int type) { struct symtable *ctype = NULL; struct symtable *m; struct ASTnode *unused; int offset; int t; // Skip the struct/union keyword scan(&Token); // See if there is a following struct/union name if (Token.token == T_IDENT) { // Find any matching composite type if (type == P_STRUCT) ctype = findstruct(Text); else ctype = findunion(Text); scan(&Token); } // If the next token isn't an LBRACE , this is // the usage of an existing struct/union type. // Return the pointer to the type. if (Token.token != T_LBRACE) { if (ctype == NULL) fatals("unknown struct/union type", Text); return (ctype); } // Ensure this struct/union type hasn't been // previously defined if (ctype) fatals("previously defined struct/union", Text); // Build the composite type and skip the left brace if (type == P_STRUCT) ctype = addstruct(Text); else ctype = addunion(Text); scan(&Token); // Scan in the list of members while (1) { // Get the next member. m is used as a dummy t= declaration_list(&m, C_MEMBER, T_SEMI, T_RBRACE, &unused); if (t== -1) fatal("Bad type in member list"); if (Token.token == T_SEMI) scan(&Token); if (Token.token == T_RBRACE) break; } // Attach to the struct type's node rbrace(); if (Membhead==NULL) fatals("No members in struct", ctype->name); ctype->member = Membhead; Membhead = Membtail = NULL; // Set the offset of the initial member // and find the first free byte after it m = ctype->member; m->posn = 0; offset = typesize(m->type, m->ctype); // Set the position of each successive member in the composite type // Unions are easy. For structs, align the member and find the next free byte for (m = m->next; m != NULL; m = m->next) { // Set the offset for this member if (type == P_STRUCT) m->posn = genalign(m->type, offset, 1); else m->posn = 0; // Get the offset of the next free byte after this member offset += typesize(m->type, m->ctype); } // Set the overall size of the composite type ctype->size = offset; return (ctype); } // Parse an enum declaration static void enum_declaration(void) { struct symtable *etype = NULL; char *name= NULL; int intval = 0; // Skip the enum keyword. scan(&Token); // If there's a following enum type name, get a // pointer to any existing enum type node. if (Token.token == T_IDENT) { etype = findenumtype(Text); name = strdup(Text); // As it gets tromped soon scan(&Token); } // If the next token isn't a LBRACE, check // that we have an enum type name, then return if (Token.token != T_LBRACE) { if (etype == NULL) fatals("undeclared enum type:", name); return; } // We do have an LBRACE. Skip it scan(&Token); // If we have an enum type name, ensure that it // hasn't been declared before. if (etype != NULL) fatals("enum type redeclared:", etype->name); else // Build an enum type node for this identifier etype = addenum(name, C_ENUMTYPE, 0); // Loop to get all the enum values while (1) { // Ensure we have an identifier // Copy it in case there's an int literal coming up ident(); name = strdup(Text); // Ensure this enum value hasn't been declared before etype = findenumval(name); if (etype != NULL) fatals("enum value redeclared:", Text); // If the next token is an '=', skip it and // get the following int literal if (Token.token == T_ASSIGN) { scan(&Token); if (Token.token != T_INTLIT) fatal("Expected int literal after '='"); intval = Token.intvalue; scan(&Token); } // Build an enum value node for this identifier. // Increment the value for the next enum identifier. etype = addenum(name, C_ENUMVAL, intval++); // Bail out on a right curly bracket, else get a comma if (Token.token == T_RBRACE) break; comma(); } scan(&Token); // Skip over the right curly bracket } // Parse a typedef declaration and return the type // and ctype that it represents static int typedef_declaration(struct symtable **ctype) { int type, class = 0; // Skip the typedef keyword. scan(&Token); // Get the actual type following the keyword type = parse_type(ctype, &class); if (class != 0) fatal("Can't have extern in a typedef declaration"); // See if the typedef identifier already exists if (findtypedef(Text) != NULL) fatals("redefinition of typedef", Text); // Get any following '*' tokens type = parse_stars(type); // It doesn't exist so add it to the typedef list addtypedef(Text, type, *ctype); scan(&Token); return (type); } // Given a typedef name, return the type it represents static int type_of_typedef(char *name, struct symtable **ctype) { struct symtable *t; // Look up the typedef in the list t = findtypedef(name); if (t == NULL) fatals("unknown type", name); scan(&Token); *ctype = t->ctype; return (t->type); } // Parse the declaration of a variable or function. // The type and any following '*'s have been scanned, and we // have the identifier in the Token variable. // The class argument is the variable's class. // Return a pointer to the symbol's entry in the symbol table static struct symtable *symbol_declaration(int type, struct symtable *ctype, int class, struct ASTnode **tree) { struct symtable *sym = NULL; char *varname = strdup(Text); // Ensure that we have an identifier. // We copied it above so we can scan more tokens in, e.g. // an assignment expression for a local variable. ident(); // Deal with function declarations if (Token.token == T_LPAREN) { return (function_declaration(varname, type, ctype, class)); } // See if this array or scalar variable has already been declared switch (class) { case C_EXTERN: case C_GLOBAL: if (findglob(varname) != NULL) fatals("Duplicate global variable declaration", varname); case C_LOCAL: case C_PARAM: if (findlocl(varname) != NULL) fatals("Duplicate local variable declaration", varname); case C_MEMBER: if (findmember(varname) != NULL) fatals("Duplicate struct/union member declaration", varname); } // Add the array or scalar variable to the symbol table if (Token.token == T_LBRACKET) sym = array_declaration(varname, type, ctype, class); else sym = scalar_declaration(varname, type, ctype, class, tree); return (sym); } // Parse a list of symbols where there is an initial type. // Return the type of the symbols. et1 and et2 are end tokens. int declaration_list(struct symtable **ctype, int class, int et1, int et2, struct ASTnode **gluetree) { int inittype, type; struct symtable *sym; struct ASTnode *tree; *gluetree= NULL; // Get the initial type. If -1, it was // a composite type definition, return this if ((inittype = parse_type(ctype, &class)) == -1) return (inittype); // Now parse the list of symbols while (1) { // See if this symbol is a pointer type = parse_stars(inittype); // Parse this symbol sym = symbol_declaration(type, *ctype, class, &tree); // We parsed a function, there is no list so leave if (sym->stype == S_FUNCTION) { if (class != C_GLOBAL) fatal("Function definition not at global level"); return (type); } // Glue any AST tree from a local declaration // to build a sequence of assignments to perform if (*gluetree== NULL) *gluetree= tree; else *gluetree = mkastnode(A_GLUE, P_NONE, *gluetree, NULL, tree, NULL, 0); // We are at the end of the list, leave if (Token.token == et1 || Token.token == et2) return (type); // Otherwise, we need a comma as separator comma(); } } // Parse one or more global declarations, either // variables, functions or structs void global_declarations(void) { struct symtable *ctype; struct ASTnode *unused; while (Token.token != T_EOF) { declaration_list(&ctype, C_GLOBAL, T_SEMI, T_EOF, &unused); // Skip any semicolons and right curly brackets if (Token.token == T_SEMI) scan(&Token); } } ================================================ FILE: 42_Casting/decl.h ================================================ // Function prototypes for all compiler files // Copyright (c) 2019 Warren Toomey, GPL3 // scan.c void reject_token(struct token *t); int scan(struct token *t); // tree.c struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue); struct ASTnode *mkastleaf(int op, int type, struct symtable *sym, int intvalue); struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, struct symtable *sym, int intvalue); void dumpAST(struct ASTnode *n, int label, int parentASTop); // gen.c int genlabel(void); int genAST(struct ASTnode *n, int iflabel, int looptoplabel, int loopendlabel, int parentASTop); void genpreamble(); void genpostamble(); void genfreeregs(); void genglobsym(struct symtable *node); int genglobstr(char *strvalue); int genprimsize(int type); int genalign(int type, int offset, int direction); void genreturn(int reg, int id); // cg.c int cgprimsize(int type); int cgalign(int type, int offset, int direction); void cgtextseg(); void cgdataseg(); void freeall_registers(void); void cgpreamble(); void cgpostamble(); void cgfuncpreamble(struct symtable *sym); void cgfuncpostamble(struct symtable *sym); int cgloadint(int value, int type); int cgloadglob(struct symtable *sym, int op); int cgloadlocal(struct symtable *sym, int op); int cgloadglobstr(int label); int cgadd(int r1, int r2); int cgsub(int r1, int r2); int cgmul(int r1, int r2); int cgdiv(int r1, int r2); int cgshlconst(int r, int val); int cgcall(struct symtable *sym, int numargs); void cgcopyarg(int r, int argposn); int cgstorglob(int r, struct symtable *sym); int cgstorlocal(int r, struct symtable *sym); void cgglobsym(struct symtable *node); void cgglobstr(int l, char *strvalue); int cgcompare_and_set(int ASTop, int r1, int r2); int cgcompare_and_jump(int ASTop, int r1, int r2, int label); void cglabel(int l); void cgjump(int l); int cgwiden(int r, int oldtype, int newtype); void cgreturn(int reg, struct symtable *sym); int cgaddress(struct symtable *sym); int cgderef(int r, int type); int cgstorderef(int r1, int r2, int type); int cgnegate(int r); int cginvert(int r); int cglognot(int r); int cgboolean(int r, int op, int label); int cgand(int r1, int r2); int cgor(int r1, int r2); int cgxor(int r1, int r2); int cgshl(int r1, int r2); int cgshr(int r1, int r2); void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel); // expr.c struct ASTnode *expression_list(int endtoken); struct ASTnode *binexpr(int ptp); // stmt.c struct ASTnode *compound_statement(int inswitch); // misc.c void match(int t, char *what); void semi(void); void lbrace(void); void rbrace(void); void lparen(void); void rparen(void); void ident(void); void comma(void); void fatal(char *s); void fatals(char *s1, char *s2); void fatald(char *s, int d); void fatalc(char *s, int c); // sym.c void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node); struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn); struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn); struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int nelems); struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype); struct symtable *addstruct(char *name); struct symtable *addunion(char *name); struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int nelems); struct symtable *addenum(char *name, int class, int value); struct symtable *addtypedef(char *name, int type, struct symtable *ctype); struct symtable *findglob(char *s); struct symtable *findlocl(char *s); struct symtable *findsymbol(char *s); struct symtable *findmember(char *s); struct symtable *findstruct(char *s); struct symtable *findunion(char *s); struct symtable *findenumtype(char *s); struct symtable *findenumval(char *s); struct symtable *findtypedef(char *s); void clear_symtable(void); void freeloclsyms(void); // decl.c int parse_type(struct symtable **ctype, int *class); int parse_stars(int type); int parse_cast(void); int declaration_list(struct symtable **ctype, int class, int et1, int et2, struct ASTnode **gluetree); void global_declarations(void); // types.c int inttype(int type); int ptrtype(int type); int pointer_to(int type); int value_at(int type); int typesize(int type, struct symtable *ctype); struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op); ================================================ FILE: 42_Casting/defs.h ================================================ #include #include #include #include // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 enum { TEXTLEN = 512 // Length of identifiers in input }; // Commands and default filenames #define AOUT "a.out" #ifdef __NASM__ #define ASCMD "nasm -f elf64 -o " #define LDCMD "cc -no-pie -fno-plt -Wall -o " #else #define ASCMD "as -o " #define LDCMD "cc -o " #endif #define CPPCMD "cpp -nostdinc -isystem " // Token types enum { T_EOF, // Binary operators T_ASSIGN, T_LOGOR, T_LOGAND, T_OR, T_XOR, T_AMPER, T_EQ, T_NE, T_LT, T_GT, T_LE, T_GE, T_LSHIFT, T_RSHIFT, T_PLUS, T_MINUS, T_STAR, T_SLASH, // Other operators T_INC, T_DEC, T_INVERT, T_LOGNOT, // Type keywords T_VOID, T_CHAR, T_INT, T_LONG, // Other keywords T_IF, T_ELSE, T_WHILE, T_FOR, T_RETURN, T_STRUCT, T_UNION, T_ENUM, T_TYPEDEF, T_EXTERN, T_BREAK, T_CONTINUE, T_SWITCH, T_CASE, T_DEFAULT, // Structural tokens T_INTLIT, T_STRLIT, T_SEMI, T_IDENT, T_LBRACE, T_RBRACE, T_LPAREN, T_RPAREN, T_LBRACKET, T_RBRACKET, T_COMMA, T_DOT, T_ARROW, T_COLON }; // Token structure struct token { int token; // Token type, from the enum list above char *tokstr; // String version of the token int intvalue; // For T_INTLIT, the integer value }; // AST node types. The first few line up // with the related tokens enum { A_ASSIGN = 1, A_LOGOR, A_LOGAND, A_OR, A_XOR, A_AND, A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE, A_LSHIFT, A_RSHIFT, A_ADD, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, A_INTLIT, A_STRLIT, A_IDENT, A_GLUE, A_IF, A_WHILE, A_FUNCTION, A_WIDEN, A_RETURN, A_FUNCCALL, A_DEREF, A_ADDR, A_SCALE, A_PREINC, A_PREDEC, A_POSTINC, A_POSTDEC, A_NEGATE, A_INVERT, A_LOGNOT, A_TOBOOL, A_BREAK, A_CONTINUE, A_SWITCH, A_CASE, A_DEFAULT, A_CAST }; // Primitive types. The bottom 4 bits is an integer // value that represents the level of indirection, // e.g. 0= no pointer, 1= pointer, 2= pointer pointer etc. enum { P_NONE, P_VOID = 16, P_CHAR = 32, P_INT = 48, P_LONG = 64, P_STRUCT=80, P_UNION=96 }; // Structural types enum { S_VARIABLE, S_FUNCTION, S_ARRAY }; // Storage classes enum { C_GLOBAL = 1, // Globally visible symbol C_LOCAL, // Locally visible symbol C_PARAM, // Locally visible function parameter C_EXTERN, // External globally visible symbol C_STRUCT, // A struct C_UNION, // A union C_MEMBER, // Member of a struct or union C_ENUMTYPE, // A named enumeration type C_ENUMVAL, // A named enumeration value C_TYPEDEF // A named typedef }; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol struct symtable *ctype; // If struct/union, ptr to that type int stype; // Structural type for the symbol int class; // Storage class for the symbol int size; // Total size in bytes of this symbol int nelems; // Functions: # params. Arrays: # elements union { int endlabel; // For functions, the end label int posn; // For locals, the negative offset // from the stack base pointer }; int *initlist; // List of initial values struct symtable *next; // Next symbol in one list struct symtable *member; // First member of a function, struct, }; // union or enum // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates int rvalue; // True if the node is an rvalue struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; struct symtable *sym; // For many AST nodes, the pointer to union { // the symbol in the symbol table int intvalue; // For A_INTLIT, the integer value int size; // For A_SCALE, the size to scale by }; }; enum { NOREG = -1, // Use NOREG when the AST generation // functions have no register to return NOLABEL = 0 // Use NOLABEL when we have no label to // pass to genAST() }; ================================================ FILE: 42_Casting/expr.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of expressions // Copyright (c) 2019 Warren Toomey, GPL3 // expression_list: // | expression // | expression ',' expression_list // ; // Parse a list of zero or more comma-separated expressions and // return an AST composed of A_GLUE nodes with the left-hand child // being the sub-tree of previous expressions (or NULL) and the right-hand // child being the next expression. Each A_GLUE node will have size field // set to the number of expressions in the tree at this point. If no // expressions are parsed, NULL is returned struct ASTnode *expression_list(int endtoken) { struct ASTnode *tree = NULL; struct ASTnode *child = NULL; int exprcount = 0; // Loop until the end token while (Token.token != endtoken) { // Parse the next expression and increment the expression count child = binexpr(0); exprcount++; // Build an A_GLUE AST node with the previous tree as the left child // and the new expression as the right child. Store the expression count. tree = mkastnode(A_GLUE, P_NONE, tree, NULL, child, NULL, exprcount); // Stop when we reach the end token if (Token.token == endtoken) break; // Must have a ',' at this point match(T_COMMA, ","); } // Return the tree of expressions return (tree); } // Parse a function call and return its AST static struct ASTnode *funccall(void) { struct ASTnode *tree; struct symtable *funcptr; // Check that the identifier has been defined as a function, // then make a leaf node for it. if ((funcptr = findsymbol(Text)) == NULL || funcptr->stype != S_FUNCTION) { fatals("Undeclared function", Text); } // Get the '(' lparen(); // Parse the argument expression list tree = expression_list(T_RPAREN); // XXX Check type of each argument against the function's prototype // Build the function call AST node. Store the // function's return type as this node's type. // Also record the function's symbol-id tree = mkastunary(A_FUNCCALL, funcptr->type, tree, funcptr, 0); // Get the ')' rparen(); return (tree); } // Parse the index into an array and return an AST tree for it static struct ASTnode *array_access(void) { struct ASTnode *left, *right; struct symtable *aryptr; // Check that the identifier has been defined as an array // then make a leaf node for it that points at the base if ((aryptr = findsymbol(Text)) == NULL || aryptr->stype != S_ARRAY) { fatals("Undeclared array", Text); } left = mkastleaf(A_ADDR, aryptr->type, aryptr, 0); // Get the '[' scan(&Token); // Parse the following expression right = binexpr(0); // Get the ']' match(T_RBRACKET, "]"); // Ensure that this is of int type if (!inttype(right->type)) fatal("Array index is not of integer type"); // Scale the index by the size of the element's type right = modify_type(right, left->type, A_ADD); // Return an AST tree where the array's base has the offset // added to it, and dereference the element. Still an lvalue // at this point. left = mkastnode(A_ADD, aryptr->type, left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, value_at(left->type), left, NULL, 0); return (left); } // Parse the member reference of a struct or union // and return an AST tree for it. If withpointer is true, // the access is through a pointer to the member. static struct ASTnode *member_access(int withpointer) { struct ASTnode *left, *right; struct symtable *compvar; struct symtable *typeptr; struct symtable *m; // Check that the identifier has been declared as a // struct/union or a struct/union pointer if ((compvar = findsymbol(Text)) == NULL) fatals("Undeclared variable", Text); if (withpointer && compvar->type != pointer_to(P_STRUCT) && compvar->type != pointer_to(P_UNION)) fatals("Undeclared variable", Text); if (!withpointer && compvar->type != P_STRUCT && compvar->type != P_UNION) fatals("Undeclared variable", Text); // If a pointer to a struct/union, get the pointer's value. // Otherwise, make a leaf node that points at the base // Either way, it's an rvalue if (withpointer) { left = mkastleaf(A_IDENT, pointer_to(compvar->type), compvar, 0); } else left = mkastleaf(A_ADDR, compvar->type, compvar, 0); left->rvalue = 1; // Get the details of the composite type typeptr = compvar->ctype; // Skip the '.' or '->' token and get the member's name scan(&Token); ident(); // Find the matching member's name in the type // Die if we can't find it for (m = typeptr->member; m != NULL; m = m->next) if (!strcmp(m->name, Text)) break; if (m == NULL) fatals("No member found in struct/union: ", Text); // Build an A_INTLIT node with the offset right = mkastleaf(A_INTLIT, P_INT, NULL, m->posn); // Add the member's offset to the base of the struct/union // and dereference it. Still an lvalue at this point left = mkastnode(A_ADD, pointer_to(m->type), left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, m->type, left, NULL, 0); return (left); } // Parse a postfix expression and return // an AST node representing it. The // identifier is already in Text. static struct ASTnode *postfix(void) { struct ASTnode *n; struct symtable *varptr; struct symtable *enumptr; // If the identifier matches an enum value, // return an A_INTLIT node if ((enumptr = findenumval(Text)) != NULL) { scan(&Token); return (mkastleaf(A_INTLIT, P_INT, NULL, enumptr->posn)); } // Scan in the next token to see if we have a postfix expression scan(&Token); // Function call if (Token.token == T_LPAREN) return (funccall()); // An array reference if (Token.token == T_LBRACKET) return (array_access()); // Access into a struct or union if (Token.token == T_DOT) return (member_access(0)); if (Token.token == T_ARROW) return (member_access(1)); // A variable. Check that the variable exists. if ((varptr = findsymbol(Text)) == NULL || varptr->stype != S_VARIABLE) fatals("Unknown variable", Text); switch (Token.token) { // Post-increment: skip over the token case T_INC: scan(&Token); n = mkastleaf(A_POSTINC, varptr->type, varptr, 0); break; // Post-decrement: skip over the token case T_DEC: scan(&Token); n = mkastleaf(A_POSTDEC, varptr->type, varptr, 0); break; // Just a variable reference default: n = mkastleaf(A_IDENT, varptr->type, varptr, 0); } return (n); } // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(void) { struct ASTnode *n; int id; int type=0; switch (Token.token) { case T_INTLIT: // For an INTLIT token, make a leaf AST node for it. // Make it a P_CHAR if it's within the P_CHAR range if (Token.intvalue >= 0 && Token.intvalue < 256) n = mkastleaf(A_INTLIT, P_CHAR, NULL, Token.intvalue); else n = mkastleaf(A_INTLIT, P_INT, NULL, Token.intvalue); break; case T_STRLIT: // For a STRLIT token, generate the assembly for it. // Then make a leaf AST node for it. id is the string's label. id = genglobstr(Text); n = mkastleaf(A_STRLIT, pointer_to(P_CHAR), NULL, id); break; case T_IDENT: return (postfix()); case T_LPAREN: // Beginning of a parenthesised expression, skip the '('. scan(&Token); // If the token after is a type identifier, this is a cast expression switch (Token.token) { case T_IDENT: // We have to see if the identifier matches a typedef. // If not, treat it as an expression. if (findtypedef(Text) == NULL) { n = binexpr(0); break; } case T_VOID: case T_CHAR: case T_INT: case T_LONG: case T_STRUCT: case T_UNION: case T_ENUM: // Get the type inside the parentheses type= parse_cast(); // Skip the closing ')' and then parse the following expression rparen(); default: n = binexpr(0); // Scan in the expression } // We now have at least an expression in n, and possibly a non-zero type in type // if there was a cast. Skip the closing ')' if there was no cast. if (type == 0) rparen(); else // Otherwise, make a unary AST node for the cast n= mkastunary(A_CAST, type, n, NULL, 0); return (n); default: fatals("Expecting a primary expression, got token", Token.tokstr); } // Scan in the next token and return the leaf node scan(&Token); return (n); } // Convert a binary operator token into a binary AST operation. // We rely on a 1:1 mapping from token to AST operation static int binastop(int tokentype) { if (tokentype > T_EOF && tokentype <= T_SLASH) return (tokentype); fatald("Syntax error, token", tokentype); return (0); // Keep -Wall happy } // Return true if a token is right-associative, // false otherwise. static int rightassoc(int tokentype) { if (tokentype == T_ASSIGN) return (1); return (0); } // Operator precedence for each token. Must // match up with the order of tokens in defs.h static int OpPrec[] = { 0, 10, 20, 30, // T_EOF, T_ASSIGN, T_LOGOR, T_LOGAND 40, 50, 60, // T_OR, T_XOR, T_AMPER 70, 70, // T_EQ, T_NE 80, 80, 80, 80, // T_LT, T_GT, T_LE, T_GE 90, 90, // T_LSHIFT, T_RSHIFT 100, 100, // T_PLUS, T_MINUS 110, 110 // T_STAR, T_SLASH }; // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec; if (tokentype > T_SLASH) fatald("Token with no precedence in op_precedence:", tokentype); prec = OpPrec[tokentype]; if (prec == 0) fatald("Syntax error, token", tokentype); return (prec); } // prefix_expression: primary // | '*' prefix_expression // | '&' prefix_expression // | '-' prefix_expression // | '++' prefix_expression // | '--' prefix_expression // ; // Parse a prefix expression and return // a sub-tree representing it. struct ASTnode *prefix(void) { struct ASTnode *tree; switch (Token.token) { case T_AMPER: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Ensure that it's an identifier if (tree->op != A_IDENT) fatal("& operator must be followed by an identifier"); // Now change the operator to A_ADDR and the type to // a pointer to the original type tree->op = A_ADDR; tree->type = pointer_to(tree->type); break; case T_STAR: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's either another deref or an // identifier if (tree->op != A_IDENT && tree->op != A_DEREF) fatal("* operator must be followed by an identifier or *"); // Prepend an A_DEREF operation to the tree tree = mkastunary(A_DEREF, value_at(tree->type), tree, NULL, 0); break; case T_MINUS: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_NEGATE operation to the tree and // make the child an rvalue. Because chars are unsigned, // also widen this to int so that it's signed tree->rvalue = 1; tree = modify_type(tree, P_INT, 0); tree = mkastunary(A_NEGATE, tree->type, tree, NULL, 0); break; case T_INVERT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_INVERT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_INVERT, tree->type, tree, NULL, 0); break; case T_LOGNOT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_LOGNOT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_LOGNOT, tree->type, tree, NULL, 0); break; case T_INC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("++ operator must be followed by an identifier"); // Prepend an A_PREINC operation to the tree tree = mkastunary(A_PREINC, tree->type, tree, NULL, 0); break; case T_DEC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("-- operator must be followed by an identifier"); // Prepend an A_PREDEC operation to the tree tree = mkastunary(A_PREDEC, tree->type, tree, NULL, 0); break; default: tree = primary(); } return (tree); } // Return an AST tree whose root is a binary operator. // Parameter ptp is the previous token's precedence. struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; struct ASTnode *ltemp, *rtemp; int ASTop; int tokentype; // Get the tree on the left. // Fetch the next token at the same time. left = prefix(); // If we hit one of several terminating tokens, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA || tokentype == T_COLON) { left->rvalue = 1; return (left); } // While the precedence of this token is more than that of the // previous token precedence, or it's right associative and // equal to the previous token's precedence while ((op_precedence(tokentype) > ptp) || (rightassoc(tokentype) && op_precedence(tokentype) == ptp)) { // Fetch in the next integer literal scan(&Token); // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Determine the operation to be performed on the sub-trees ASTop = binastop(tokentype); if (ASTop == A_ASSIGN) { // Assignment // Make the right tree into an rvalue right->rvalue = 1; // Ensure the right's type matches the left right = modify_type(right, left->type, 0); if (right == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree. However, switch // left and right around, so that the right expression's // code will be generated before the left expression ltemp = left; left = right; right = ltemp; } else { // We are not doing an assignment, so both trees should be rvalues // Convert both trees into rvalue if they are lvalue trees left->rvalue = 1; right->rvalue = 1; // Ensure the two types are compatible by trying // to modify each tree to match the other's type. ltemp = modify_type(left, right->type, ASTop); rtemp = modify_type(right, left->type, ASTop); if (ltemp == NULL && rtemp == NULL) fatal("Incompatible types in binary expression"); if (ltemp != NULL) left = ltemp; if (rtemp != NULL) right = rtemp; } // Join that sub-tree with ours. Convert the token // into an AST operation at the same time. left = mkastnode(binastop(tokentype), left->type, left, NULL, right, NULL, 0); // Update the details of the current token. // If we hit a terminating token, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA || tokentype == T_COLON) { left->rvalue = 1; return (left); } } // Return the tree we have when the precedence // is the same or lower left->rvalue = 1; return (left); } ================================================ FILE: 42_Casting/gen.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Generic code generator // Copyright (c) 2019 Warren Toomey, GPL3 // Generate and return a new label number int genlabel(void) { static int id = 1; return (id++); } // Generate the code for an IF statement // and an optional ELSE clause static int genIF(struct ASTnode *n, int looptoplabel, int loopendlabel) { int Lfalse, Lend; // Generate two labels: one for the // false compound statement, and one // for the end of the overall IF statement. // When there is no ELSE clause, Lfalse _is_ // the ending label! Lfalse = genlabel(); if (n->right) Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. genAST(n->left, Lfalse, NOLABEL, NOLABEL, n->op); genfreeregs(); // Generate the true compound statement genAST(n->mid, NOLABEL, looptoplabel, loopendlabel, n->op); genfreeregs(); // If there is an optional ELSE clause, // generate the jump to skip to the end if (n->right) cgjump(Lend); // Now the false label cglabel(Lfalse); // Optional ELSE clause: generate the // false compound statement and the // end label if (n->right) { genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); genfreeregs(); cglabel(Lend); } return (NOREG); } // Generate the code for a WHILE statement static int genWHILE(struct ASTnode *n) { int Lstart, Lend; // Generate the start and end labels // and output the start label Lstart = genlabel(); Lend = genlabel(); cglabel(Lstart); // Generate the condition code followed // by a jump to the end label. genAST(n->left, Lend, Lstart, Lend, n->op); genfreeregs(); // Generate the compound statement for the body genAST(n->right, NOLABEL, Lstart, Lend, n->op); genfreeregs(); // Finally output the jump back to the condition, // and the end label cgjump(Lstart); cglabel(Lend); return (NOREG); } // Generate the code for a SWITCH statement static int genSWITCH(struct ASTnode *n) { int *caseval, *caselabel; int Ljumptop, Lend; int i, reg, defaultlabel = 0, casecount = 0; struct ASTnode *c; // Create arrays for the case values and associated labels. // Ensure that we have at least one position in each array. caseval = (int *) malloc((n->intvalue + 1) * sizeof(int)); caselabel = (int *) malloc((n->intvalue + 1) * sizeof(int)); // Generate labels for the top of the jump table, and the // end of the switch statement. Set a default label for // the end of the switch, in case we don't have a default. Ljumptop = genlabel(); Lend = genlabel(); defaultlabel = Lend; // Output the code to calculate the switch condition reg = genAST(n->left, NOLABEL, NOLABEL, NOLABEL, 0); cgjump(Ljumptop); genfreeregs(); // Walk the right-child linked list to // generate the code for each case for (i = 0, c = n->right; c != NULL; i++, c = c->right) { // Get a label for this case. Store it // and the case value in the arrays. // Record if it is the default case. caselabel[i] = genlabel(); caseval[i] = c->intvalue; cglabel(caselabel[i]); if (c->op == A_DEFAULT) defaultlabel = caselabel[i]; else casecount++; // Generate the case code. Pass in the end label for the breaks genAST(c->left, NOLABEL, NOLABEL, Lend, 0); genfreeregs(); } // Ensure the last case jumps past the switch table cgjump(Lend); // Now output the switch table and the end label. cgswitch(reg, casecount, Ljumptop, caselabel, caseval, defaultlabel); cglabel(Lend); return (NOREG); } // Generate the code to copy the arguments of a // function call to its parameters, then call the // function itself. Return the register that holds // the function's return value. static int gen_funccall(struct ASTnode *n) { struct ASTnode *gluetree = n->left; int reg; int numargs = 0; // If there is a list of arguments, walk this list // from the last argument (right-hand child) to the // first while (gluetree) { // Calculate the expression's value reg = genAST(gluetree->right, NOLABEL, NOLABEL, NOLABEL, gluetree->op); // Copy this into the n'th function parameter: size is 1, 2, 3, ... cgcopyarg(reg, gluetree->size); // Keep the first (highest) number of arguments if (numargs == 0) numargs = gluetree->size; genfreeregs(); gluetree = gluetree->left; } // Call the function, clean up the stack (based on numargs), // and return its result return (cgcall(n->sym, numargs)); } // Given an AST, an optional label, and the AST op // of the parent, generate assembly code recursively. // Return the register id with the tree's final value. int genAST(struct ASTnode *n, int iflabel, int looptoplabel, int loopendlabel, int parentASTop) { int leftreg, rightreg; // We have some specific AST node handling at the top // so that we don't evaluate the child sub-trees immediately switch (n->op) { case A_IF: return (genIF(n, looptoplabel, loopendlabel)); case A_WHILE: return (genWHILE(n)); case A_SWITCH: return (genSWITCH(n)); case A_FUNCCALL: return (gen_funccall(n)); case A_GLUE: // Do each child statement, and free the // registers after each child if (n->left != NULL) genAST(n->left, iflabel, looptoplabel, loopendlabel, n->op); genfreeregs(); if (n->right != NULL) genAST(n->right, iflabel, looptoplabel, loopendlabel, n->op); genfreeregs(); return (NOREG); case A_FUNCTION: // Generate the function's preamble before the code // in the child sub-tree cgfuncpreamble(n->sym); genAST(n->left, NOLABEL, NOLABEL, NOLABEL, n->op); cgfuncpostamble(n->sym); return (NOREG); } // General AST node handling below // Get the left and right sub-tree values if (n->left) leftreg = genAST(n->left, NOLABEL, NOLABEL, NOLABEL, n->op); if (n->right) rightreg = genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); switch (n->op) { case A_ADD: return (cgadd(leftreg, rightreg)); case A_SUBTRACT: return (cgsub(leftreg, rightreg)); case A_MULTIPLY: return (cgmul(leftreg, rightreg)); case A_DIVIDE: return (cgdiv(leftreg, rightreg)); case A_AND: return (cgand(leftreg, rightreg)); case A_OR: return (cgor(leftreg, rightreg)); case A_XOR: return (cgxor(leftreg, rightreg)); case A_LSHIFT: return (cgshl(leftreg, rightreg)); case A_RSHIFT: return (cgshr(leftreg, rightreg)); case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, compare registers // and set one to 1 or 0 based on the comparison. if (parentASTop == A_IF || parentASTop == A_WHILE) return (cgcompare_and_jump(n->op, leftreg, rightreg, iflabel)); else return (cgcompare_and_set(n->op, leftreg, rightreg)); case A_INTLIT: return (cgloadint(n->intvalue, n->type)); case A_STRLIT: return (cgloadglobstr(n->intvalue)); case A_IDENT: // Load our value if we are an rvalue // or we are being dereferenced if (n->rvalue || parentASTop == A_DEREF) { if (n->sym->class == C_GLOBAL) { return (cgloadglob(n->sym, n->op)); } else { return (cgloadlocal(n->sym, n->op)); } } else return (NOREG); case A_ASSIGN: // Are we assigning to an identifier or through a pointer? switch (n->right->op) { case A_IDENT: if (n->right->sym->class == C_GLOBAL) return (cgstorglob(leftreg, n->right->sym)); else return (cgstorlocal(leftreg, n->right->sym)); case A_DEREF: return (cgstorderef(leftreg, rightreg, n->right->type)); default: fatald("Can't A_ASSIGN in genAST(), op", n->op); } case A_WIDEN: // Widen the child's type to the parent's type return (cgwiden(leftreg, n->left->type, n->type)); case A_RETURN: cgreturn(leftreg, Functionid); return (NOREG); case A_ADDR: return (cgaddress(n->sym)); case A_DEREF: // If we are an rvalue, dereference to get the value we point at, // otherwise leave it for A_ASSIGN to store through the pointer if (n->rvalue) return (cgderef(leftreg, n->left->type)); else return (leftreg); case A_SCALE: // Small optimisation: use shift if the // scale value is a known power of two switch (n->size) { case 2: return (cgshlconst(leftreg, 1)); case 4: return (cgshlconst(leftreg, 2)); case 8: return (cgshlconst(leftreg, 3)); default: // Load a register with the size and // multiply the leftreg by this size rightreg = cgloadint(n->size, P_INT); return (cgmul(leftreg, rightreg)); } case A_POSTINC: case A_POSTDEC: // Load and decrement the variable's value into a register // and post increment/decrement it if (n->sym->class == C_GLOBAL) return (cgloadglob(n->sym, n->op)); else return (cgloadlocal(n->sym, n->op)); case A_PREINC: case A_PREDEC: // Load and decrement the variable's value into a register // and pre increment/decrement it if (n->left->sym->class == C_GLOBAL) return (cgloadglob(n->left->sym, n->op)); else return (cgloadlocal(n->left->sym, n->op)); case A_NEGATE: return (cgnegate(leftreg)); case A_INVERT: return (cginvert(leftreg)); case A_LOGNOT: return (cglognot(leftreg)); case A_TOBOOL: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, set the register // to 0 or 1 based on it's zeroeness or non-zeroeness return (cgboolean(leftreg, parentASTop, iflabel)); case A_BREAK: cgjump(loopendlabel); return (NOREG); case A_CONTINUE: cgjump(looptoplabel); return (NOREG); case A_CAST: return (leftreg); // Not much to do default: fatald("Unknown AST operator", n->op); } return (NOREG); // Keep -Wall happy } void genpreamble() { cgpreamble(); } void genpostamble() { cgpostamble(); } void genfreeregs() { freeall_registers(); } void genglobsym(struct symtable *node) { cgglobsym(node); } int genglobstr(char *strvalue) { int l = genlabel(); cgglobstr(l, strvalue); return (l); } int genprimsize(int type) { return (cgprimsize(type)); } int genalign(int type, int offset, int direction) { return (cgalign(type, offset, direction)); } ================================================ FILE: 42_Casting/include/ctype.h ================================================ #ifndef _CTYPE_H_ # define _CTYPE_H_ #endif // _CTYPE_H_ ================================================ FILE: 42_Casting/include/fcntl.h ================================================ #ifndef _FCNTL_H_ # define _FCNTL_H_ #define O_RDONLY 00 #define O_WRONLY 01 #define O_RDWR 02 int open(char *pathname, int flags); #endif // _FCNTL_H_ ================================================ FILE: 42_Casting/include/stddef.h ================================================ #ifndef _STDDEF_H_ # define _STDDEF_H_ #ifndef NULL # define NULL (void *)0 #endif typedef long size_t; #endif //_STDDEF_H_ ================================================ FILE: 42_Casting/include/stdio.h ================================================ #ifndef _STDIO_H_ # define _STDIO_H_ #include #ifndef NULL # define NULL (void *)0 #endif // This FILE definition will do for now typedef char * FILE; FILE *fopen(char *pathname, char *mode); size_t fread(void *ptr, size_t size, size_t nmemb, FILE *stream); size_t fwrite(void *ptr, size_t size, size_t nmemb, FILE *stream); int fclose(FILE *stream); int printf(char *format); int fprintf(FILE *stream, char *format); #endif // _STDIO_H_ ================================================ FILE: 42_Casting/include/stdlib.h ================================================ #ifndef _STDLIB_H_ # define _STDLIB_H_ #endif // _STDLIB_H_ ================================================ FILE: 42_Casting/include/string.h ================================================ #ifndef _STRING_H_ # define _STRING_H_ #endif // _STRING_H_ ================================================ FILE: 42_Casting/include/unistd.h ================================================ #ifndef _UNISTD_H_ # define _UNISTD_H_ #endif // _UNISTD_H_ ================================================ FILE: 42_Casting/main.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "decl.h" #include #include // Compiler setup and top-level execution // Copyright (c) 2019 Warren Toomey, GPL3 // Given a string with a '.' and at least a 1-character suffix // after the '.', change the suffix to be the given character. // Return the new string or NULL if the original string could // not be modified char *alter_suffix(char *str, char suffix) { char *posn; char *newstr; // Clone the string if ((newstr = strdup(str)) == NULL) return (NULL); // Find the '.' if ((posn = strrchr(newstr, '.')) == NULL) return (NULL); // Ensure there is a suffix posn++; if (*posn == '\0') return (NULL); // Change the suffix and NUL-terminate the string *posn++ = suffix; *posn = '\0'; return (newstr); } // Given an input filename, compile that file // down to assembly code. Return the new file's name static char *do_compile(char *filename) { char cmd[TEXTLEN]; // Change the input file's suffix to .s Outfilename = alter_suffix(filename, 's'); if (Outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .c on the end\n", filename); exit(1); } // Generate the pre-processor command snprintf(cmd, TEXTLEN, "%s %s %s", CPPCMD, INCDIR, filename); // Open up the pre-processor pipe if ((Infile = popen(cmd, "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", filename, strerror(errno)); exit(1); } Infilename = filename; // Create the output file if ((Outfile = fopen(Outfilename, "w")) == NULL) { fprintf(stderr, "Unable to create %s: %s\n", Outfilename, strerror(errno)); exit(1); } Line = 1; // Reset the scanner Putback = '\n'; clear_symtable(); // Clear the symbol table if (O_verbose) printf("compiling %s\n", filename); scan(&Token); // Get the first token from the input genpreamble(); // Output the preamble global_declarations(); // Parse the global declarations genpostamble(); // Output the postamble fclose(Outfile); // Close the output file return (Outfilename); } // Given an input filename, assemble that file // down to object code. Return the object filename char *do_assemble(char *filename) { char cmd[TEXTLEN]; int err; char *outfilename = alter_suffix(filename, 'o'); if (outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .s on the end\n", filename); exit(1); } // Build the assembly command and run it snprintf(cmd, TEXTLEN, "%s %s %s", ASCMD, outfilename, filename); if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Assembly of %s failed\n", filename); exit(1); } return (outfilename); } // Given a list of object files and an output filename, // link all of the object filenames together. void do_link(char *outfilename, char *objlist[]) { int cnt, size = TEXTLEN; char cmd[TEXTLEN], *cptr; int err; // Start with the linker command and the output file cptr = cmd; cnt = snprintf(cptr, size, "%s %s ", LDCMD, outfilename); cptr += cnt; size -= cnt; // Now append each object file while (*objlist != NULL) { cnt = snprintf(cptr, size, "%s ", *objlist); cptr += cnt; size -= cnt; objlist++; } if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Linking failed\n"); exit(1); } } // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "Usage: %s [-vcST] [-o outfile] file [file ...]\n", prog); fprintf(stderr, " -v give verbose output of the compilation stages\n"); fprintf(stderr, " -c generate object files but don't link them\n"); fprintf(stderr, " -S generate assembly files but don't link them\n"); fprintf(stderr, " -T dump the AST trees for each input file\n"); fprintf(stderr, " -o outfile, produce the outfile executable file\n"); exit(1); } // Main program: check arguments and print a usage // if we don't have an argument. Open up the input // file and call scanfile() to scan the tokens in it. enum { MAXOBJ = 100 }; int main(int argc, char *argv[]) { char *outfilename = AOUT; char *asmfile, *objfile; char *objlist[MAXOBJ]; int i, objcnt = 0; // Initialise our variables O_dumpAST = 0; O_keepasm = 0; O_assemble = 0; O_verbose = 0; O_dolink = 1; // Scan for command-line options for (i = 1; i < argc; i++) { // No leading '-', stop scanning for options if (*argv[i] != '-') break; // For each option in this argument for (int j = 1; (*argv[i] == '-') && argv[i][j]; j++) { switch (argv[i][j]) { case 'o': outfilename = argv[++i]; // Save & skip to next argument break; case 'T': O_dumpAST = 1; break; case 'c': O_assemble = 1; O_keepasm = 0; O_dolink = 0; break; case 'S': O_keepasm = 1; O_assemble = 0; O_dolink = 0; break; case 'v': O_verbose = 1; break; default: usage(argv[0]); } } } // Ensure we have at lease one input file argument if (i >= argc) usage(argv[0]); // Work on each input file in turn while (i < argc) { asmfile = do_compile(argv[i]); // Compile the source file if (O_dolink || O_assemble) { objfile = do_assemble(asmfile); // Assemble it to object forma if (objcnt == (MAXOBJ - 2)) { fprintf(stderr, "Too many object files for the compiler to handle\n"); exit(1); } objlist[objcnt++] = objfile; // Add the object file's name objlist[objcnt] = NULL; // to the list of object files } if (!O_keepasm) // Remove the assembly file if unlink(asmfile); // we don't need to keep it i++; } // Now link all the object files together if (O_dolink) { do_link(outfilename, objlist); // If we don't need to keep the object // files, then remove them if (!O_assemble) { for (i = 0; objlist[i] != NULL; i++) unlink(objlist[i]); } } return (0); } ================================================ FILE: 42_Casting/misc.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" #include // Miscellaneous functions // Copyright (c) 2019 Warren Toomey, GPL3 // Ensure that the current token is t, // and fetch the next token. Otherwise // throw an error void match(int t, char *what) { if (Token.token == t) { scan(&Token); } else { fatals("Expected", what); } } // Match a semicolon and fetch the next token void semi(void) { match(T_SEMI, ";"); } // Match a left brace and fetch the next token void lbrace(void) { match(T_LBRACE, "{"); } // Match a right brace and fetch the next token void rbrace(void) { match(T_RBRACE, "}"); } // Match a left parenthesis and fetch the next token void lparen(void) { match(T_LPAREN, "("); } // Match a right parenthesis and fetch the next token void rparen(void) { match(T_RPAREN, ")"); } // Match an identifier and fetch the next token void ident(void) { match(T_IDENT, "identifier"); } // Match a comma and fetch the next token void comma(void) { match(T_COMMA, "comma"); } // Print out fatal messages void fatal(char *s) { fprintf(stderr, "%s on line %d of %s\n", s, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatals(char *s1, char *s2) { fprintf(stderr, "%s:%s on line %d of %s\n", s1, s2, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatald(char *s, int d) { fprintf(stderr, "%s:%d on line %d of %s\n", s, d, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatalc(char *s, int c) { fprintf(stderr, "%s:%c on line %d of %s\n", s, c, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } ================================================ FILE: 42_Casting/scan.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { char *p; p = strchr(s, c); return (p ? p - s : -1); } // Get the next character from the input file. static int next(void) { int c, l; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return (c); } c = fgetc(Infile); // Read from input file while (c == '#') { // We've hit a pre-processor statement scan(&Token); // Get the line number into l if (Token.token != T_INTLIT) fatals("Expecting pre-processor line number, got:", Text); l = Token.intvalue; scan(&Token); // Get the filename in Text if (Token.token != T_STRLIT) fatals("Expecting pre-processor file name, got:", Text); if (Text[0] != '<') { // If this is a real filename if (strcmp(Text, Infilename)) // and not the one we have now Infilename = strdup(Text); // save it. Then update the line num Line = l; } while ((c = fgetc(Infile)) != '\n'); // Skip to the end of the line c = fgetc(Infile); // and get the next character } if ('\n' == c) Line++; // Increment line count return (c); } // Put back an unwanted character static void putback(int c) { Putback = c; } // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } // Return the next character from a character // or string literal static int scanch(void) { int c; // Get the next input character and interpret // metacharacters that start with a backslash c = next(); if (c == '\\') { switch (c = next()) { case 'a': return '\a'; case 'b': return '\b'; case 'f': return '\f'; case 'n': return '\n'; case 'r': return '\r'; case 't': return '\t'; case 'v': return '\v'; case '\\': return '\\'; case '"': return '"'; case '\'': return '\''; default: fatalc("unknown escape sequence", c); } } return (c); // Just an ordinary old character! } // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0; // Convert each character into an int value while ((k = chrpos("0123456789", c)) >= 0) { val = val * 10 + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return (val); } // Scan in a string literal from the input file, // and store it in buf[]. Return the length of // the string. static int scanstr(char *buf) { int i, c; // Loop while we have enough buffer space for (i = 0; i < TEXTLEN - 1; i++) { // Get the next char and append to buf // Return when we hit the ending double quote if ((c = scanch()) == '"') { buf[i] = 0; return (i); } buf[i] = c; } // Ran out of buf[] space fatal("String literal too long"); return (0); } // Scan an identifier from the input file and // store it in buf[]. Return the identifier's length static int scanident(int c, char *buf, int lim) { int i = 0; // Allow digits, alpha and underscores while (isalpha(c) || isdigit(c) || '_' == c) { // Error if we hit the identifier length limit, // else append to buf[] and get next character if (lim - 1 == i) { fatal("Identifier too long"); } else if (i < lim - 1) { buf[i++] = c; } c = next(); } // We hit a non-valid character, put it back. // NUL-terminate the buf[] and return the length putback(c); buf[i] = '\0'; return (i); } // Given a word from the input, return the matching // keyword token number or 0 if it's not a keyword. // Switch on the first letter so that we don't have // to waste time strcmp()ing against all the keywords. static int keyword(char *s) { switch (*s) { case 'b': if (!strcmp(s, "break")) return (T_BREAK); break; case 'c': if (!strcmp(s, "case")) return (T_CASE); if (!strcmp(s, "char")) return (T_CHAR); if (!strcmp(s, "continue")) return (T_CONTINUE); break; case 'd': if (!strcmp(s, "default")) return (T_DEFAULT); break; case 'e': if (!strcmp(s, "else")) return (T_ELSE); if (!strcmp(s, "enum")) return (T_ENUM); if (!strcmp(s, "extern")) return (T_EXTERN); break; case 'f': if (!strcmp(s, "for")) return (T_FOR); break; case 'i': if (!strcmp(s, "if")) return (T_IF); if (!strcmp(s, "int")) return (T_INT); break; case 'l': if (!strcmp(s, "long")) return (T_LONG); break; case 'r': if (!strcmp(s, "return")) return (T_RETURN); break; case 's': if (!strcmp(s, "struct")) return (T_STRUCT); if (!strcmp(s, "switch")) return (T_SWITCH); break; case 't': if (!strcmp(s, "typedef")) return (T_TYPEDEF); break; case 'u': if (!strcmp(s, "union")) return (T_UNION); break; case 'v': if (!strcmp(s, "void")) return (T_VOID); break; case 'w': if (!strcmp(s, "while")) return (T_WHILE); break; } return (0); } // A pointer to a rejected token static struct token *Rejtoken = NULL; // Reject the token that we just scanned void reject_token(struct token *t) { if (Rejtoken != NULL) fatal("Can't reject token twice"); Rejtoken = t; } // List of token strings, for debugging purposes char *Tstring[] = { "EOF", "=", "||", "&&", "|", "^", "&", "==", "!=", ",", ">", "<=", ">=", "<<", ">>", "+", "-", "*", "/", "++", "--", "~", "!", "void", "char", "int", "long", "if", "else", "while", "for", "return", "struct", "union", "enum", "typedef", "extern", "break", "continue", "switch", "case", "default", "intlit", "strlit", ";", "identifier", "{", "}", "(", ")", "[", "]", ",", ".", "->", ":" }; // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c, tokentype; // If we have any rejected token, return it if (Rejtoken != NULL) { t = Rejtoken; Rejtoken = NULL; return (1); } // Skip whitespace c = skip(); // Determine the token based on // the input character switch (c) { case EOF: t->token = T_EOF; return (0); case '+': if ((c = next()) == '+') { t->token = T_INC; } else { putback(c); t->token = T_PLUS; } break; case '-': if ((c = next()) == '-') { t->token = T_DEC; } else if (c == '>') { t->token = T_ARROW; } else { putback(c); t->token = T_MINUS; } break; case '*': t->token = T_STAR; break; case '/': t->token = T_SLASH; break; case ';': t->token = T_SEMI; break; case '{': t->token = T_LBRACE; break; case '}': t->token = T_RBRACE; break; case '(': t->token = T_LPAREN; break; case ')': t->token = T_RPAREN; break; case '[': t->token = T_LBRACKET; break; case ']': t->token = T_RBRACKET; break; case '~': t->token = T_INVERT; break; case '^': t->token = T_XOR; break; case ',': t->token = T_COMMA; break; case '.': t->token = T_DOT; break; case ':': t->token = T_COLON; break; case '=': if ((c = next()) == '=') { t->token = T_EQ; } else { putback(c); t->token = T_ASSIGN; } break; case '!': if ((c = next()) == '=') { t->token = T_NE; } else { putback(c); t->token = T_LOGNOT; } break; case '<': if ((c = next()) == '=') { t->token = T_LE; } else if (c == '<') { t->token = T_LSHIFT; } else { putback(c); t->token = T_LT; } break; case '>': if ((c = next()) == '=') { t->token = T_GE; } else if (c == '>') { t->token = T_RSHIFT; } else { putback(c); t->token = T_GT; } break; case '&': if ((c = next()) == '&') { t->token = T_LOGAND; } else { putback(c); t->token = T_AMPER; } break; case '|': if ((c = next()) == '|') { t->token = T_LOGOR; } else { putback(c); t->token = T_OR; } break; case '\'': // If it's a quote, scan in the // literal character value and // the trailing quote t->intvalue = scanch(); t->token = T_INTLIT; if (next() != '\'') fatal("Expected '\\'' at end of char literal"); break; case '"': // Scan in a literal string scanstr(Text); t->token = T_STRLIT; break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } else if (isalpha(c) || '_' == c) { // Read in a keyword or identifier scanident(c, Text, TEXTLEN); // If it's a recognised keyword, return that token if ((tokentype = keyword(Text)) != 0) { t->token = tokentype; break; } // Not a recognised keyword, so it must be an identifier t->token = T_IDENT; break; } // The character isn't part of any recognised token, error fatalc("Unrecognised character", c); } // We found a token t->tokstr = Tstring[t->token]; return (1); } ================================================ FILE: 42_Casting/stmt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of statements // Copyright (c) 2019 Warren Toomey, GPL3 // Prototypes static struct ASTnode *single_statement(void); // compound_statement: // empty, i.e. no statement // | statement // | statement statements // ; // // statement: declaration // | expression_statement // | function_call // | if_statement // | while_statement // | for_statement // | return_statement // ; // if_statement: if_head // | if_head 'else' statement // ; // // if_head: 'if' '(' true_false_expression ')' statement ; // // Parse an IF statement including any // optional ELSE clause and return its AST static struct ASTnode *if_statement(void) { struct ASTnode *condAST, *trueAST, *falseAST = NULL; // Ensure we have 'if' '(' match(T_IF, "if"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); rparen(); // Get the AST for the statement trueAST = single_statement(); // If we have an 'else', skip it // and get the AST for the statement if (Token.token == T_ELSE) { scan(&Token); falseAST = single_statement(); } // Build and return the AST for this statement return (mkastnode(A_IF, P_NONE, condAST, trueAST, falseAST, NULL, 0)); } // while_statement: 'while' '(' true_false_expression ')' statement ; // // Parse a WHILE statement and return its AST static struct ASTnode *while_statement(void) { struct ASTnode *condAST, *bodyAST; // Ensure we have 'while' '(' match(T_WHILE, "while"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); rparen(); // Get the AST for the statement. // Update the loop depth in the process Looplevel++; bodyAST = single_statement(); Looplevel--; // Build and return the AST for this statement return (mkastnode(A_WHILE, P_NONE, condAST, NULL, bodyAST, NULL, 0)); } // for_statement: 'for' '(' expression_list ';' // true_false_expression ';' // expression_list ')' statement ; // // Parse a FOR statement and return its AST static struct ASTnode *for_statement(void) { struct ASTnode *condAST, *bodyAST; struct ASTnode *preopAST, *postopAST; struct ASTnode *tree; // Ensure we have 'for' '(' match(T_FOR, "for"); lparen(); // Get the pre_op expression and the ';' preopAST = expression_list(T_SEMI); semi(); // Get the condition and the ';'. // Force a non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); semi(); // Get the post_op expression and the ')' postopAST = expression_list(T_RPAREN); rparen(); // Get the statement which is the body // Update the loop depth in the process Looplevel++; bodyAST = single_statement(); Looplevel--; // Glue the statement and the postop tree tree = mkastnode(A_GLUE, P_NONE, bodyAST, NULL, postopAST, NULL, 0); // Make a WHILE loop with the condition and this new body tree = mkastnode(A_WHILE, P_NONE, condAST, NULL, tree, NULL, 0); // And glue the preop tree to the A_WHILE tree return (mkastnode(A_GLUE, P_NONE, preopAST, NULL, tree, NULL, 0)); } // return_statement: 'return' '(' expression ')' ; // // Parse a return statement and return its AST static struct ASTnode *return_statement(void) { struct ASTnode *tree; // Can't return a value if function returns P_VOID if (Functionid->type == P_VOID) fatal("Can't return from a void function"); // Ensure we have 'return' '(' match(T_RETURN, "return"); lparen(); // Parse the following expression tree = binexpr(0); // Ensure this is compatible with the function's type tree = modify_type(tree, Functionid->type, 0); if (tree == NULL) fatal("Incompatible type to return"); // Add on the A_RETURN node tree = mkastunary(A_RETURN, P_NONE, tree, NULL, 0); // Get the ')' and ';' rparen(); semi(); return (tree); } // break_statement: 'break' ; // // Parse a break statement and return its AST static struct ASTnode *break_statement(void) { if (Looplevel == 0 && Switchlevel == 0) fatal("no loop or switch to break out from"); scan(&Token); semi(); return (mkastleaf(A_BREAK, 0, NULL, 0)); } // continue_statement: 'continue' ; // // Parse a continue statement and return its AST static struct ASTnode *continue_statement(void) { if (Looplevel == 0) fatal("no loop to continue to"); scan(&Token); semi(); return (mkastleaf(A_CONTINUE, 0, NULL, 0)); } // Parse a switch statement and return its AST static struct ASTnode *switch_statement(void) { struct ASTnode *left, *n, *c, *casetree= NULL, *casetail; int inloop=1, casecount=0; int seendefault=0; int ASTop, casevalue; // Skip the 'switch' and '(' scan(&Token); lparen(); // Get the switch expression, the ')' and the '{' left= binexpr(0); rparen(); lbrace(); // Ensure that this is of int type if (!inttype(left->type)) fatal("Switch expression is not of integer type"); // Build an A_SWITCH subtree with the expression as // the child n= mkastunary(A_SWITCH, 0, left, NULL, 0); // Now parse the cases Switchlevel++; while (inloop) { switch(Token.token) { // Leave the loop when we hit a '}' case T_RBRACE: if (casecount==0) fatal("No cases in switch"); inloop=0; break; case T_CASE: case T_DEFAULT: // Ensure this isn't after a previous 'default' if (seendefault) fatal("case or default after existing default"); // Set the AST operation. Scan the case value if required if (Token.token==T_DEFAULT) { ASTop= A_DEFAULT; seendefault= 1; scan(&Token); } else { ASTop= A_CASE; scan(&Token); left= binexpr(0); // Ensure the case value is an integer literal if (left->op != A_INTLIT) fatal("Expecting integer literal for case value"); casevalue= left->intvalue; // Walk the list of existing case values to ensure // that there isn't a duplicate case value for (c= casetree; c != NULL; c= c -> right) if (casevalue == c->intvalue) fatal("Duplicate case value"); } // Scan the ':' and get the compound expression match(T_COLON, ":"); left= compound_statement(1); casecount++; // Build a sub-tree with the compound statement as the left child // and link it in to the growing A_CASE tree if (casetree==NULL) { casetree= casetail= mkastunary(ASTop, 0, left, NULL, casevalue); } else { casetail->right= mkastunary(ASTop, 0, left, NULL, casevalue); casetail= casetail->right; } break; default: fatals("Unexpected token in switch", Token.tokstr); } } Switchlevel--; // We have a sub-tree with the cases and any default. Put the // case count into the A_SWITCH node and attach the case tree. n->intvalue= casecount; n->right= casetree; rbrace(); return(n); } // Parse a single statement and return its AST. static struct ASTnode *single_statement(void) { struct ASTnode *stmt; struct symtable *ctype; switch (Token.token) { case T_LBRACE: // We have a '{', so this is a compound statement lbrace(); stmt = compound_statement(0); rbrace(); return(stmt); case T_IDENT: // We have to see if the identifier matches a typedef. // If not, treat it as an expression. // Otherwise, fall down to the parse_type() call. if (findtypedef(Text) == NULL) { stmt= binexpr(0); semi(); return(stmt); } case T_CHAR: case T_INT: case T_LONG: case T_STRUCT: case T_UNION: case T_ENUM: case T_TYPEDEF: // The beginning of a variable declaration list. declaration_list(&ctype, C_LOCAL, T_SEMI, T_EOF, &stmt); semi(); return (stmt); // Any assignments from the declarations case T_IF: return (if_statement()); case T_WHILE: return (while_statement()); case T_FOR: return (for_statement()); case T_RETURN: return (return_statement()); case T_BREAK: return (break_statement()); case T_CONTINUE: return (continue_statement()); case T_SWITCH: return (switch_statement()); default: // For now, see if this is an expression. // This catches assignment statements. stmt= binexpr(0); semi(); return(stmt); } return (NULL); // Keep -Wall happy } // Parse a compound statement // and return its AST. If inswitch is true, // we look for a '}', 'case' or 'default' token // to end the parsing. Otherwise, look for // just a '}' to end the parsing. struct ASTnode *compound_statement(int inswitch) { struct ASTnode *left = NULL; struct ASTnode *tree; while (1) { // Parse a single statement tree = single_statement(); // For each new tree, either save it in left // if left is empty, or glue the left and the // new tree together if (tree != NULL) { if (left == NULL) left = tree; else left = mkastnode(A_GLUE, P_NONE, left, NULL, tree, NULL, 0); } // Leave if we've hit the end token if (Token.token == T_RBRACE) return(left); if (inswitch && (Token.token == T_CASE || Token.token == T_DEFAULT)) return(left); } } ================================================ FILE: 42_Casting/sym.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Symbol table functions // Copyright (c) 2019 Warren Toomey, GPL3 // Append a node to the singly-linked list pointed to by head or tail void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node) { // Check for valid pointers if (head == NULL || tail == NULL || node == NULL) fatal("Either head, tail or node is NULL in appendsym"); // Append to the list if (*tail) { (*tail)->next = node; *tail = node; } else *head = *tail = node; node->next = NULL; } // Create a symbol node to be added to a symbol table list. // Set up the node's: // + type: char, int etc. // + ctype: composite type pointer for struct/union // + structural type: var, function, array etc. // + size: number of elements, or endlabel: end label for a function // + posn: Position information for local symbols // Return a pointer to the new node struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn) { // Get a new node struct symtable *node = (struct symtable *) malloc(sizeof(struct symtable)); if (node == NULL) fatal("Unable to malloc a symbol table node in newsym"); // Fill in the values if (name == NULL) node->name = NULL; else node->name = strdup(name); node->type = type; node->ctype = ctype; node->stype = stype; node->class = class; node->nelems = nelems; // For pointers and integer types, set the size // of the symbol. structs and union declarations // manually set this up themselves. if (ptrtype(type) || inttype(type)) node->size = nelems * typesize(type, ctype); node->posn = posn; node->next = NULL; node->member = NULL; node->initlist = NULL; return (node); } // Add a symbol to the global symbol list struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn) { struct symtable *sym = newsym(name, type, ctype, stype, class, nelems, posn); // For structs and unions, copy the size from the type node if (type== P_STRUCT || type== P_UNION) sym->size = ctype->size; appendsym(&Globhead, &Globtail, sym); return (sym); } // Add a symbol to the local symbol list struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int nelems) { struct symtable *sym = newsym(name, type, ctype, stype, C_LOCAL, nelems, 0); // For structs and unions, copy the size from the type node if (type== P_STRUCT || type== P_UNION) sym->size = ctype->size; appendsym(&Loclhead, &Locltail, sym); return (sym); } // Add a symbol to the parameter list struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype) { struct symtable *sym = newsym(name, type, ctype, stype, C_PARAM, 1, 0); appendsym(&Parmhead, &Parmtail, sym); return (sym); } // Add a symbol to the temporary member list struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int nelems) { struct symtable *sym = newsym(name, type, ctype, stype, C_MEMBER, nelems, 0); // For structs and unions, copy the size from the type node if (type== P_STRUCT || type== P_UNION) sym->size = ctype->size; appendsym(&Membhead, &Membtail, sym); return (sym); } // Add a struct to the struct list struct symtable *addstruct(char *name) { struct symtable *sym = newsym(name, P_STRUCT, NULL, 0, C_STRUCT, 0, 0); appendsym(&Structhead, &Structtail, sym); return (sym); } // Add a struct to the union list struct symtable *addunion(char *name) { struct symtable *sym = newsym(name, P_UNION, NULL, 0, C_UNION, 0, 0); appendsym(&Unionhead, &Uniontail, sym); return (sym); } // Add an enum type or value to the enum list. // Class is C_ENUMTYPE or C_ENUMVAL. // Use posn to store the int value. struct symtable *addenum(char *name, int class, int value) { struct symtable *sym = newsym(name, P_INT, NULL, 0, class, 0, value); appendsym(&Enumhead, &Enumtail, sym); return (sym); } // Add a typedef to the typedef list struct symtable *addtypedef(char *name, int type, struct symtable *ctype) { struct symtable *sym = newsym(name, type, ctype, 0, C_TYPEDEF, 0, 0); appendsym(&Typehead, &Typetail, sym); return (sym); } // Search for a symbol in a specific list. // Return a pointer to the found node or NULL if not found. // If class is not zero, also match on the given class static struct symtable *findsyminlist(char *s, struct symtable *list, int class) { for (; list != NULL; list = list->next) if ((list->name != NULL) && !strcmp(s, list->name)) if (class==0 || class== list->class) return (list); return (NULL); } // Determine if the symbol s is in the global symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findglob(char *s) { return (findsyminlist(s, Globhead, 0)); } // Determine if the symbol s is in the local symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findlocl(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member, 0); if (node) return (node); } return (findsyminlist(s, Loclhead, 0)); } // Determine if the symbol s is in the symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findsymbol(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member, 0); if (node) return (node); } // Otherwise, try the local and global symbol lists node = findsyminlist(s, Loclhead, 0); if (node) return (node); return (findsyminlist(s, Globhead, 0)); } // Find a member in the member list // Return a pointer to the found node or NULL if not found. struct symtable *findmember(char *s) { return (findsyminlist(s, Membhead, 0)); } // Find a struct in the struct list // Return a pointer to the found node or NULL if not found. struct symtable *findstruct(char *s) { return (findsyminlist(s, Structhead, 0)); } // Find a struct in the union list // Return a pointer to the found node or NULL if not found. struct symtable *findunion(char *s) { return (findsyminlist(s, Unionhead, 0)); } // Find an enum type in the enum list // Return a pointer to the found node or NULL if not found. struct symtable *findenumtype(char *s) { return (findsyminlist(s, Enumhead, C_ENUMTYPE)); } // Find an enum value in the enum list // Return a pointer to the found node or NULL if not found. struct symtable *findenumval(char *s) { return (findsyminlist(s, Enumhead, C_ENUMVAL)); } // Find a type in the tyedef list // Return a pointer to the found node or NULL if not found. struct symtable *findtypedef(char *s) { return (findsyminlist(s, Typehead, 0)); } // Reset the contents of the symbol table void clear_symtable(void) { Globhead = Globtail = NULL; Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Membhead = Membtail = NULL; Structhead = Structtail = NULL; Unionhead = Uniontail = NULL; Enumhead = Enumtail = NULL; Typehead = Typetail = NULL; } // Clear all the entries in the local symbol table void freeloclsyms(void) { Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Functionid = NULL; } ================================================ FILE: 42_Casting/tests/err.input031.c ================================================ Expecting a primary expression, got token:+ on line 5 of input031.c ================================================ FILE: 42_Casting/tests/err.input032.c ================================================ Unknown variable:cow on line 4 of input032.c ================================================ FILE: 42_Casting/tests/err.input033.c ================================================ Incompatible type to return on line 4 of input033.c ================================================ FILE: 42_Casting/tests/err.input034.c ================================================ For now, declaration of non-global arrays is not implemented on line 4 of input034.c ================================================ FILE: 42_Casting/tests/err.input035.c ================================================ Duplicate local variable declaration:a on line 4 of input035.c ================================================ FILE: 42_Casting/tests/err.input036.c ================================================ Type doesn't match prototype for parameter:2 on line 4 of input036.c ================================================ FILE: 42_Casting/tests/err.input037.c ================================================ Expected:comma on line 3 of input037.c ================================================ FILE: 42_Casting/tests/err.input038.c ================================================ Type doesn't match prototype for parameter:2 on line 4 of input038.c ================================================ FILE: 42_Casting/tests/err.input039.c ================================================ No statements in function with non-void type on line 4 of input039.c ================================================ FILE: 42_Casting/tests/err.input040.c ================================================ No return for function with non-void type on line 4 of input040.c ================================================ FILE: 42_Casting/tests/err.input041.c ================================================ Can't return from a void function on line 3 of input041.c ================================================ FILE: 42_Casting/tests/err.input042.c ================================================ Undeclared function:fred on line 3 of input042.c ================================================ FILE: 42_Casting/tests/err.input043.c ================================================ Undeclared array:b on line 3 of input043.c ================================================ FILE: 42_Casting/tests/err.input044.c ================================================ Unknown variable:z on line 3 of input044.c ================================================ FILE: 42_Casting/tests/err.input045.c ================================================ & operator must be followed by an identifier on line 3 of input045.c ================================================ FILE: 42_Casting/tests/err.input046.c ================================================ * operator must be followed by an identifier or * on line 3 of input046.c ================================================ FILE: 42_Casting/tests/err.input047.c ================================================ ++ operator must be followed by an identifier on line 3 of input047.c ================================================ FILE: 42_Casting/tests/err.input048.c ================================================ -- operator must be followed by an identifier on line 3 of input048.c ================================================ FILE: 42_Casting/tests/err.input049.c ================================================ Incompatible expression in assignment on line 6 of input049.c ================================================ FILE: 42_Casting/tests/err.input050.c ================================================ Incompatible types in binary expression on line 6 of input050.c ================================================ FILE: 42_Casting/tests/err.input051.c ================================================ Expected '\'' at end of char literal on line 4 of input051.c ================================================ FILE: 42_Casting/tests/err.input052.c ================================================ Unrecognised character:$ on line 5 of input052.c ================================================ FILE: 42_Casting/tests/err.input056.c ================================================ unknown struct/union type:var1 on line 2 of input056.c ================================================ FILE: 42_Casting/tests/err.input057.c ================================================ previously defined struct/union:fred on line 2 of input057.c ================================================ FILE: 42_Casting/tests/err.input059.c ================================================ Undeclared variable:y on line 3 of input059.c ================================================ FILE: 42_Casting/tests/err.input060.c ================================================ Undeclared variable:x on line 3 of input060.c ================================================ FILE: 42_Casting/tests/err.input061.c ================================================ Undeclared variable:x on line 3 of input061.c ================================================ FILE: 42_Casting/tests/err.input064.c ================================================ undeclared enum type::fred on line 1 of input064.c ================================================ FILE: 42_Casting/tests/err.input065.c ================================================ enum type redeclared::fred on line 2 of input065.c ================================================ FILE: 42_Casting/tests/err.input066.c ================================================ enum value redeclared::z on line 2 of input066.c ================================================ FILE: 42_Casting/tests/err.input068.c ================================================ redefinition of typedef:FOO on line 2 of input068.c ================================================ FILE: 42_Casting/tests/err.input069.c ================================================ unknown type:FLOO on line 2 of input069.c ================================================ FILE: 42_Casting/tests/err.input072.c ================================================ no loop or switch to break out from on line 1 of input072.c ================================================ FILE: 42_Casting/tests/err.input073.c ================================================ no loop to continue to on line 1 of input073.c ================================================ FILE: 42_Casting/tests/err.input075.c ================================================ Unexpected token in switch:if on line 4 of input075.c ================================================ FILE: 42_Casting/tests/err.input076.c ================================================ No cases in switch on line 3 of input076.c ================================================ FILE: 42_Casting/tests/err.input077.c ================================================ case or default after existing default on line 6 of input077.c ================================================ FILE: 42_Casting/tests/err.input078.c ================================================ case or default after existing default on line 6 of input078.c ================================================ FILE: 42_Casting/tests/err.input079.c ================================================ Duplicate case value on line 6 of input079.c ================================================ FILE: 42_Casting/tests/err.input085.c ================================================ Bad type in parameter list on line 1 of input085.c ================================================ FILE: 42_Casting/tests/err.input086.c ================================================ Function definition not at global level on line 3 of input086.c ================================================ FILE: 42_Casting/tests/err.input087.c ================================================ Bad type in member list on line 4 of input087.c ================================================ FILE: 42_Casting/tests/err.input092.c ================================================ Integer literal value too big for char type on line 1 of input092.c ================================================ FILE: 42_Casting/tests/err.input093.c ================================================ Expecting an integer literal value on line 1 of input093.c ================================================ FILE: 42_Casting/tests/err.input094.c ================================================ Type mismatch: integer literal vs. variable on line 1 of input094.c ================================================ FILE: 42_Casting/tests/err.input095.c ================================================ Variable can not be initialised:x on line 1 of input095.c ================================================ FILE: 42_Casting/tests/err.input096.c ================================================ Array size is illegal:0 on line 1 of input096.c ================================================ FILE: 42_Casting/tests/err.input097.c ================================================ For now, declaration of non-global arrays is not implemented on line 2 of input097.c ================================================ FILE: 42_Casting/tests/err.input098.c ================================================ Too many values in initialisation list on line 1 of input098.c ================================================ FILE: 42_Casting/tests/err.input102.c ================================================ Cannot cast to a struct, union or void type on line 3 of input102.c ================================================ FILE: 42_Casting/tests/err.input103.c ================================================ Cannot cast to a struct, union or void type on line 3 of input103.c ================================================ FILE: 42_Casting/tests/err.input104.c ================================================ Cannot cast to a struct, union or void type on line 2 of input104.c ================================================ FILE: 42_Casting/tests/err.input105.c ================================================ Incompatible expression in assignment on line 4 of input105.c ================================================ FILE: 42_Casting/tests/input001.c ================================================ int printf(char *fmt); void main() { printf("%d\n", 12 * 3); printf("%d\n", 18 - 2 * 4); printf("%d\n", 1 + 2 + 9 - 5/2 + 3*5); } ================================================ FILE: 42_Casting/tests/input002.c ================================================ int printf(char *fmt); void main() { int fred; int jim; fred= 5; jim= 12; printf("%d\n", fred + jim); } ================================================ FILE: 42_Casting/tests/input003.c ================================================ int printf(char *fmt); void main() { int x; x= 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); } ================================================ FILE: 42_Casting/tests/input004.c ================================================ int printf(char *fmt); void main() { int x; x= 7 < 9; printf("%d\n", x); x= 7 <= 9; printf("%d\n", x); x= 7 != 9; printf("%d\n", x); x= 7 == 7; printf("%d\n", x); x= 7 >= 7; printf("%d\n", x); x= 7 <= 7; printf("%d\n", x); x= 9 > 7; printf("%d\n", x); x= 9 >= 7; printf("%d\n", x); x= 9 != 7; printf("%d\n", x); } ================================================ FILE: 42_Casting/tests/input005.c ================================================ int printf(char *fmt); void main() { int i; int j; i=6; j=12; if (i < j) { printf("%d\n", i); } else { printf("%d\n", j); } } ================================================ FILE: 42_Casting/tests/input006.c ================================================ int printf(char *fmt); void main() { int i; i=1; while (i <= 10) { printf("%d\n", i); i= i + 1; } } ================================================ FILE: 42_Casting/tests/input007.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 42_Casting/tests/input008.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 42_Casting/tests/input009.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } void fred() { int a; int b; a= 12; b= 3 * a; if (a >= b) { printf("%d\n", 2 * b - a); } } ================================================ FILE: 42_Casting/tests/input010.c ================================================ int printf(char *fmt); void main() { int i; char j; j= 20; printf("%d\n", j); i= 10; printf("%d\n", i); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 2; j= j + 1) { printf("%d\n", j); } } ================================================ FILE: 42_Casting/tests/input011.c ================================================ int printf(char *fmt); int main() { int i; char j; long k; i= 10; printf("%d\n", i); j= 20; printf("%d\n", j); k= 30; printf("%d\n", k); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 4; j= j + 1) { printf("%d\n", j); } for (k= 1; k <= 5; k= k + 1) { printf("%d\n", k); } return(i); printf("%d\n", 12345); return(3); } ================================================ FILE: 42_Casting/tests/input012.c ================================================ int printf(char *fmt); int fred() { return(5); } void main() { int x; x= fred(2); printf("%d\n", x); } ================================================ FILE: 42_Casting/tests/input013.c ================================================ int printf(char *fmt); int fred() { return(56); } void main() { int dummy; int result; dummy= printf("%d\n", 23); result= fred(10); dummy= printf("%d\n", result); } ================================================ FILE: 42_Casting/tests/input014.c ================================================ int printf(char *fmt); int fred() { return(20); } int main() { int result; printf("%d\n", 10); result= fred(15); printf("%d\n", result); printf("%d\n", fred(15)+10); return(0); } ================================================ FILE: 42_Casting/tests/input015.c ================================================ int printf(char *fmt); int main() { char a; char *b; char c; int d; int *e; int f; a= 18; printf("%d\n", a); b= &a; c= *b; printf("%d\n", c); d= 12; printf("%d\n", d); e= &d; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 42_Casting/tests/input016.c ================================================ int printf(char *fmt); int c; int d; int *e; int f; int main() { c= 12; d=18; printf("%d\n", c); e= &c + 1; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 42_Casting/tests/input017.c ================================================ int printf(char *fmt); int main() { char a; char *b; int d; int *e; b= &a; *b= 19; printf("%d\n", a); e= &d; *e= 12; printf("%d\n", d); return(0); } ================================================ FILE: 42_Casting/tests/input018.c ================================================ int printf(char *fmt); int main() { int a; int b; a= b= 34; printf("%d\n", a); printf("%d\n", b); return(0); } ================================================ FILE: 42_Casting/tests/input018a.c ================================================ int printf(char *fmt); int a; int *b; char c; char *d; int main() { b= &a; *b= 15; printf("%d\n", a); d= &c; *d= 16; printf("%d\n", c); return(0); } ================================================ FILE: 42_Casting/tests/input019.c ================================================ int printf(char *fmt); int a; int b; int c; int d; int e; int main() { a= 2; b= 4; c= 3; d= 2; e= (a+b) * (c+d); printf("%d\n", e); return(0); } ================================================ FILE: 42_Casting/tests/input020.c ================================================ int printf(char *fmt); int a; int b[25]; int main() { b[3]= 12; a= b[3]; printf("%d\n", a); return(0); } ================================================ FILE: 42_Casting/tests/input021.c ================================================ int printf(char *fmt); char c; char *str; int main() { c= '\n'; printf("%d\n", c); for (str= "Hello world\n"; *str != 0; str= str + 1) { printf("%c", *str); } return(0); } ================================================ FILE: 42_Casting/tests/input022.c ================================================ int printf(char *fmt); char a; char b; char c; int d; int e; int f; long g; long h; long i; int main() { b= 5; c= 7; a= b + c++; printf("%d\n", a); e= 5; f= 7; d= e + f++; printf("%d\n", d); h= 5; i= 7; g= h + i++; printf("%d\n", g); a= b-- + c; printf("%d\n", a); d= e-- + f; printf("%d\n", d); g= h-- + i; printf("%d\n", g); a= ++b + c; printf("%d\n", a); d= ++e + f; printf("%d\n", d); g= ++h + i; printf("%d\n", g); a= b * --c; printf("%d\n", a); d= e * --f; printf("%d\n", d); g= h * --i; printf("%d\n", g); return(0); } ================================================ FILE: 42_Casting/tests/input023.c ================================================ int printf(char *fmt); char *str; int x; int main() { x= -23; printf("%d\n", x); printf("%d\n", -10 * -10); x= 1; x= ~x; printf("%d\n", x); x= 2 > 5; printf("%d\n", x); x= !x; printf("%d\n", x); x= !x; printf("%d\n", x); x= 13; if (x) { printf("%d\n", 13); } x= 0; if (!x) { printf("%d\n", 14); } for (str= "Hello world\n"; *str; str++) { printf("%c", *str); } return(0); } ================================================ FILE: 42_Casting/tests/input024.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { a= 42; b= 19; printf("%d\n", a & b); printf("%d\n", a | b); printf("%d\n", a ^ b); printf("%d\n", 1 << 3); printf("%d\n", 63 >> 3); return(0); } ================================================ FILE: 42_Casting/tests/input025.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { char z; int y; int x; x= 10; y= 20; z= 30; printf("%d\n", x); printf("%d\n", y); printf("%d\n", z); a= 5; b= 15; c= 25; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); return(0); } ================================================ FILE: 42_Casting/tests/input026.c ================================================ int printf(char *fmt); int main(int a, char b, long c, int d, int e, int f, int g, int h) { int i; int j; int k; a= 13; printf("%d\n", a); b= 23; printf("%d\n", b); c= 34; printf("%d\n", c); d= 44; printf("%d\n", d); e= 54; printf("%d\n", e); f= 64; printf("%d\n", f); g= 74; printf("%d\n", g); h= 84; printf("%d\n", h); i= 94; printf("%d\n", i); j= 95; printf("%d\n", j); k= 96; printf("%d\n", k); return(0); } ================================================ FILE: 42_Casting/tests/input027.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int param5(int a, int b, int c, int d, int e) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param2(int a, int b) { int c; int d; int e; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param0() { int a; int b; int c; int d; int e; a= 1; b= 2; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int main() { param8(1,2,3,4,5,6,7,8); param5(1,2,3,4,5); param2(1,2); param0(); return(0); } ================================================ FILE: 42_Casting/tests/input028.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 42_Casting/tests/input029.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h); int fred(int a, int b, int c); int main(); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 42_Casting/tests/input030.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input030.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 42_Casting/tests/input031.c ================================================ int printf(char *fmt); int main() { int x; x= 2 + + 3 - * / ; } ================================================ FILE: 42_Casting/tests/input032.c ================================================ int printf(char *fmt); int main() { pizza cow llama sausage; } ================================================ FILE: 42_Casting/tests/input033.c ================================================ int printf(char *fmt); int main() { char *z; return(z); } ================================================ FILE: 42_Casting/tests/input034.c ================================================ int printf(char *fmt); int main() { int a[12]; return(0); } ================================================ FILE: 42_Casting/tests/input035.c ================================================ int printf(char *fmt); int fred(int a, int b) { int a; return(a); } ================================================ FILE: 42_Casting/tests/input036.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c); ================================================ FILE: 42_Casting/tests/input037.c ================================================ int printf(char *fmt); int fred(int a, char b +, int z); ================================================ FILE: 42_Casting/tests/input038.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c, int g); ================================================ FILE: 42_Casting/tests/input039.c ================================================ int printf(char *fmt); int main() { int a; } ================================================ FILE: 42_Casting/tests/input040.c ================================================ int printf(char *fmt); int main() { int a; a= 5; } ================================================ FILE: 42_Casting/tests/input041.c ================================================ int printf(char *fmt); void fred() { return(5); } ================================================ FILE: 42_Casting/tests/input042.c ================================================ int printf(char *fmt); int main() { fred(5); } ================================================ FILE: 42_Casting/tests/input043.c ================================================ int printf(char *fmt); int main() { int a; a= b[4]; } ================================================ FILE: 42_Casting/tests/input044.c ================================================ int printf(char *fmt); int main() { int a; a= z; } ================================================ FILE: 42_Casting/tests/input045.c ================================================ int printf(char *fmt); int main() { int a; a= &5; } ================================================ FILE: 42_Casting/tests/input046.c ================================================ int printf(char *fmt); int main() { int a; a= *5; } ================================================ FILE: 42_Casting/tests/input047.c ================================================ int printf(char *fmt); int main() { int a; a= ++5; } ================================================ FILE: 42_Casting/tests/input048.c ================================================ int printf(char *fmt); int main() { int a; a= --5; } ================================================ FILE: 42_Casting/tests/input049.c ================================================ int printf(char *fmt); int main() { int x; char y; y= x; } ================================================ FILE: 42_Casting/tests/input050.c ================================================ int printf(char *fmt); int main() { char *a; char *b; a= a + b; } ================================================ FILE: 42_Casting/tests/input051.c ================================================ int printf(char *fmt); int main() { char a; a= 'fred'; } ================================================ FILE: 42_Casting/tests/input052.c ================================================ int printf(char *fmt); int main() { int a; a= $5.00; } ================================================ FILE: 42_Casting/tests/input053.c ================================================ int printf(char *fmt); int main() { printf("Hello world, %d\n", 23); return(0); } ================================================ FILE: 42_Casting/tests/input054.c ================================================ int printf(char *fmt); int main() { int i; for (i=0; i < 20; i++) { printf("Hello world, %d\n", i); } return(0); } ================================================ FILE: 42_Casting/tests/input055.c ================================================ int printf(char *fmt); int main(int argc, char **argv) { int i; char *argument; printf("Hello world\n"); for (i=0; i < argc; i++) { argument= *argv; argv= argv + 1; printf("Argument %d is %s\n", i, argument); } return(0); } ================================================ FILE: 42_Casting/tests/input056.c ================================================ struct foo { int x; }; struct mary var1; ================================================ FILE: 42_Casting/tests/input057.c ================================================ struct fred { int x; } ; struct fred { char y; } ; ================================================ FILE: 42_Casting/tests/input058.c ================================================ int printf(char *fmt); struct fred { int x; char y; long z; }; struct fred var2; struct fred *varptr; int main() { long result; var2.x= 12; printf("%d\n", var2.x); var2.y= 'c'; printf("%d\n", var2.y); var2.z= 4005; printf("%d\n", var2.z); result= var2.x + var2.y + var2.z; printf("%d\n", result); varptr= &var2; result= varptr->x + varptr->y + varptr->z; printf("%d\n", result); return(0); } ================================================ FILE: 42_Casting/tests/input059.c ================================================ int main() { int x; x= y.foo; } ================================================ FILE: 42_Casting/tests/input060.c ================================================ int main() { int x; x= x.foo; } ================================================ FILE: 42_Casting/tests/input061.c ================================================ int main() { int x; x= x->foo; } ================================================ FILE: 42_Casting/tests/input062.c ================================================ int printf(char *fmt); union fred { char w; int x; int y; long z; }; union fred var1; union fred *varptr; int main() { var1.x= 65; printf("%d\n", var1.x); var1.x= 66; printf("%d\n", var1.x); printf("%d\n", var1.y); printf("The next two depend on the endian of the platform\n"); printf("%d\n", var1.w); printf("%d\n", var1.z); varptr= &var1; varptr->x= 67; printf("%d\n", varptr->x); printf("%d\n", varptr->y); return(0); } ================================================ FILE: 42_Casting/tests/input063.c ================================================ int printf(char *fmt); enum fred { apple=1, banana, carrot, pear=10, peach, mango, papaya }; enum jane { aple=1, bnana, crrot, par=10, pech, mago, paaya }; enum fred var1; enum jane var2; enum fred var3; int main() { var1= carrot + pear + mango; printf("%d\n", var1); return(0); } ================================================ FILE: 42_Casting/tests/input064.c ================================================ enum fred var3; ================================================ FILE: 42_Casting/tests/input065.c ================================================ enum fred { x, y, z }; enum fred { a, b }; ================================================ FILE: 42_Casting/tests/input066.c ================================================ enum fred { x, y, z }; enum mary { a, b, z }; ================================================ FILE: 42_Casting/tests/input067.c ================================================ int printf(char *fmt); typedef int FOO; FOO var1; struct bar { int x; int y} ; typedef struct bar BAR; BAR var2; int main() { var1= 5; printf("%d\n", var1); var2.x= 7; var2.y= 10; printf("%d\n", var2.x + var2.y); return(0); } ================================================ FILE: 42_Casting/tests/input068.c ================================================ typedef int FOO; typedef char FOO; ================================================ FILE: 42_Casting/tests/input069.c ================================================ typedef int FOO; FLOO y; ================================================ FILE: 42_Casting/tests/input070.c ================================================ #include typedef int FOO; int main() { FOO x; x= 56; printf("%d\n", x); return(0); } ================================================ FILE: 42_Casting/tests/input071.c ================================================ #include int main() { int x; x = 0; while (x < 100) { if (x == 5) { x = x + 2; continue; } printf("%d\n", x); if (x == 14) { break; } x = x + 1; } printf("Done\n"); return (0); } ================================================ FILE: 42_Casting/tests/input072.c ================================================ int main() { break; } ================================================ FILE: 42_Casting/tests/input073.c ================================================ int main() { continue; } ================================================ FILE: 42_Casting/tests/input074.c ================================================ #include int main() { int x; int y; y= 0; for (x=0; x < 5; x++) { switch(x) { case 1: { y= 5; break; } case 2: { y= 7; break; } case 3: { y= 9; } default: { y= 100; } } printf("%d\n", y); } return(0); } ================================================ FILE: 42_Casting/tests/input075.c ================================================ int main() { int x; switch(x) { if (x<5); } } ================================================ FILE: 42_Casting/tests/input076.c ================================================ int main() { int x; switch(x) { } } ================================================ FILE: 42_Casting/tests/input077.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } default: { x= 3; } case 4: { x= 2; } } } ================================================ FILE: 42_Casting/tests/input078.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } default: { x= 3; } default: { x= 2; } } } ================================================ FILE: 42_Casting/tests/input079.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } case 2: { x= 2; } case 1: { x= 2; } default: { x= 2; } } } ================================================ FILE: 42_Casting/tests/input080.c ================================================ #include int x; int y; int main() { for (x=0, y=1; x < 6; x++, y=y+2) { printf("%d %d\n", x, y); } return(0); } ================================================ FILE: 42_Casting/tests/input081.c ================================================ #include int x; int y; int main() { x= 0; y=1; for (;x<5;) { printf("%d %d\n", x, y); x=x+1; y=y+2; } return(0); } ================================================ FILE: 42_Casting/tests/input082.c ================================================ #include int main() { int x; x= 10; // Dangling else test if (x > 5) if (x > 15) printf("x > 15\n"); else printf("15 >= x > 5\n"); else printf("5 >= x\n"); return(0); } ================================================ FILE: 42_Casting/tests/input083.c ================================================ #include int main() { int x; // Dangling else test. // We should not print anything for x<= 5 for (x=0; x < 12; x++) if (x > 5) if (x > 10) printf("10 < %2d\n", x); else printf(" 5 < %2d <= 10\n", x); return(0); } ================================================ FILE: 42_Casting/tests/input084.c ================================================ #include int main() { int x, y; x=2; y=3; printf("%d %d\n", x, y); char a, *b; a= 'f'; b= &a; printf("%c %c\n", a, *b); return(0); } ================================================ FILE: 42_Casting/tests/input085.c ================================================ int main(struct foo { int x; }; , int a) { return(0); } ================================================ FILE: 42_Casting/tests/input086.c ================================================ int main() { int fred() { return(5); } int x; x=2; return(x); } ================================================ FILE: 42_Casting/tests/input087.c ================================================ struct foo { int x; int y; struct blah { int g; }; }; ================================================ FILE: 42_Casting/tests/input088.c ================================================ #include struct foo { int x; int y; } fred, mary; struct foo james; int main() { fred.x= 5; mary.y= 6; printf("%d %d\n", fred.x, mary.y); return(0); } ================================================ FILE: 42_Casting/tests/input089.c ================================================ #include int x= 23; char y= 'H'; char *z= "Hello world"; int main() { printf("%d %c %s\n", x, y, z); return(0); } ================================================ FILE: 42_Casting/tests/input090.c ================================================ #include int a = 23, b = 100; char y = 'H', *z = "Hello world"; int main() { printf("%d %d %c %s\n", a, b, y, z); return (0); } ================================================ FILE: 42_Casting/tests/input091.c ================================================ #include int fred[] = { 1, 2, 3, 4, 5 }; int jim[10] = { 1, 2, 3, 4, 5 }; int main() { int i; for (i=0; i < 5; i++) printf("%d\n", fred[i]); for (i=0; i < 10; i++) printf("%d\n", jim[i]); return(0); } ================================================ FILE: 42_Casting/tests/input092.c ================================================ char x= 3000; ================================================ FILE: 42_Casting/tests/input093.c ================================================ char x= fred; ================================================ FILE: 42_Casting/tests/input094.c ================================================ char *s= 54; ================================================ FILE: 42_Casting/tests/input095.c ================================================ int fred(int x=2) { return(x); } ================================================ FILE: 42_Casting/tests/input096.c ================================================ int fred[0]; ================================================ FILE: 42_Casting/tests/input097.c ================================================ int main() { int x[45]; return(0); } ================================================ FILE: 42_Casting/tests/input098.c ================================================ int fred[3]= { 1, 2, 3, 4, 5 }; ================================================ FILE: 42_Casting/tests/input099.c ================================================ #include // List of token strings, for debugging purposes. // As yet, we can't store a NULL into the list char *Tstring[] = { "EOF", "=", "||", "&&", "|", "^", "&", "==", "!=", ",", ">", "<=", ">=", "<<", ">>", "+", "-", "*", "/", "++", "--", "~", "!", "void", "char", "int", "long", "if", "else", "while", "for", "return", "struct", "union", "enum", "typedef", "extern", "break", "continue", "switch", "case", "default", "intlit", "strlit", ";", "identifier", "{", "}", "(", ")", "[", "]", ",", ".", "->", ":", "" }; int main() { int i; char *str; i=0; while (1) { str= Tstring[i]; if (*str == 0) break; printf("%s\n", str); i++; } return(0); } ================================================ FILE: 42_Casting/tests/input100.c ================================================ #include int main() { int x= 3, y=14; int z= 2 * x + y; char *str= "Hello world"; printf("%s %d %d\n", str, x+y, z); return(0); } ================================================ FILE: 42_Casting/tests/input101.c ================================================ #include int main() { int x= 65535; char y; char *str; y= (char )x; printf("0x%x\n", y); str= (char *)0; printf("0x%lx\n", (long)str); return(0); } ================================================ FILE: 42_Casting/tests/input102.c ================================================ int main() { struct foo { int p; }; int y= (struct foo) x; } ================================================ FILE: 42_Casting/tests/input103.c ================================================ int main() { union foo { int p; }; int y= (union foo) x; } ================================================ FILE: 42_Casting/tests/input104.c ================================================ int main() { int y= (void) x; } ================================================ FILE: 42_Casting/tests/input105.c ================================================ int main() { int x; char *y; y= (char) x; } ================================================ FILE: 42_Casting/tests/input106.c ================================================ #include char *y= (char *)0; int main() { printf("0x%lx\n", (long)y); return(0); } ================================================ FILE: 42_Casting/tests/input107.c ================================================ #include char *y[] = { "fish", "cow", NULL }; char *z= NULL; int main() { int i; char *ptr; for (i=0; i < 3; i++) { ptr= y[i]; if (ptr != (char *)0) printf("%s\n", y[i]); else printf("NULL\n"); } return(0); } ================================================ FILE: 42_Casting/tests/input108.c ================================================ int main() { char *str= (void *)0; return(0); } ================================================ FILE: 42_Casting/tests/mktests ================================================ #!/bin/sh # Make the output files for each test # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make install) fi for i in input*c do if [ ! -f "out.$i" -a ! -f "err.$i" ] then ../cwj -o out $i 2> "err.$i" # If the err file is empty if [ ! -s "err.$i" ] then rm -f "err.$i" cc -o out $i ./out > "out.$i" fi fi rm -f out out.s done ================================================ FILE: 42_Casting/tests/out.input001.c ================================================ 36 10 25 ================================================ FILE: 42_Casting/tests/out.input002.c ================================================ 17 ================================================ FILE: 42_Casting/tests/out.input003.c ================================================ 1 2 3 4 5 ================================================ FILE: 42_Casting/tests/out.input004.c ================================================ 1 1 1 1 1 1 1 1 1 ================================================ FILE: 42_Casting/tests/out.input005.c ================================================ 6 ================================================ FILE: 42_Casting/tests/out.input006.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 42_Casting/tests/out.input007.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 42_Casting/tests/out.input008.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 42_Casting/tests/out.input009.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 42_Casting/tests/out.input010.c ================================================ 20 10 1 2 3 4 5 253 254 255 0 1 ================================================ FILE: 42_Casting/tests/out.input011.c ================================================ 10 20 30 1 2 3 4 5 253 254 255 0 1 2 3 1 2 3 4 5 ================================================ FILE: 42_Casting/tests/out.input012.c ================================================ 5 ================================================ FILE: 42_Casting/tests/out.input013.c ================================================ 23 56 ================================================ FILE: 42_Casting/tests/out.input014.c ================================================ 10 20 30 ================================================ FILE: 42_Casting/tests/out.input015.c ================================================ 18 18 12 12 ================================================ FILE: 42_Casting/tests/out.input016.c ================================================ 12 18 ================================================ FILE: 42_Casting/tests/out.input017.c ================================================ 19 12 ================================================ FILE: 42_Casting/tests/out.input018.c ================================================ 34 34 ================================================ FILE: 42_Casting/tests/out.input018a.c ================================================ 15 16 ================================================ FILE: 42_Casting/tests/out.input019.c ================================================ 30 ================================================ FILE: 42_Casting/tests/out.input020.c ================================================ 12 ================================================ FILE: 42_Casting/tests/out.input021.c ================================================ 10 Hello world ================================================ FILE: 42_Casting/tests/out.input022.c ================================================ 12 12 12 13 13 13 13 13 13 35 35 35 ================================================ FILE: 42_Casting/tests/out.input023.c ================================================ -23 100 -2 0 1 0 13 14 Hello world ================================================ FILE: 42_Casting/tests/out.input024.c ================================================ 2 59 57 8 7 ================================================ FILE: 42_Casting/tests/out.input025.c ================================================ 10 20 30 5 15 25 ================================================ FILE: 42_Casting/tests/out.input026.c ================================================ 13 23 34 44 54 64 74 84 94 95 96 ================================================ FILE: 42_Casting/tests/out.input027.c ================================================ 1 2 3 4 5 6 7 8 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 ================================================ FILE: 42_Casting/tests/out.input028.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 42_Casting/tests/out.input029.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 42_Casting/tests/out.input030.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input030.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 42_Casting/tests/out.input053.c ================================================ Hello world, 23 ================================================ FILE: 42_Casting/tests/out.input054.c ================================================ Hello world, 0 Hello world, 1 Hello world, 2 Hello world, 3 Hello world, 4 Hello world, 5 Hello world, 6 Hello world, 7 Hello world, 8 Hello world, 9 Hello world, 10 Hello world, 11 Hello world, 12 Hello world, 13 Hello world, 14 Hello world, 15 Hello world, 16 Hello world, 17 Hello world, 18 Hello world, 19 ================================================ FILE: 42_Casting/tests/out.input055.c ================================================ Hello world Argument 0 is ./out ================================================ FILE: 42_Casting/tests/out.input058.c ================================================ 12 99 4005 4116 4116 ================================================ FILE: 42_Casting/tests/out.input062.c ================================================ 65 66 66 The next two depend on the endian of the platform 66 66 67 67 ================================================ FILE: 42_Casting/tests/out.input063.c ================================================ 25 ================================================ FILE: 42_Casting/tests/out.input067.c ================================================ 5 17 ================================================ FILE: 42_Casting/tests/out.input070.c ================================================ 56 ================================================ FILE: 42_Casting/tests/out.input071.c ================================================ 0 1 2 3 4 7 8 9 10 11 12 13 14 Done ================================================ FILE: 42_Casting/tests/out.input074.c ================================================ 100 5 7 100 100 ================================================ FILE: 42_Casting/tests/out.input080.c ================================================ 0 1 1 3 2 5 3 7 4 9 5 11 ================================================ FILE: 42_Casting/tests/out.input081.c ================================================ 0 1 1 3 2 5 3 7 4 9 ================================================ FILE: 42_Casting/tests/out.input082.c ================================================ 15 >= x > 5 ================================================ FILE: 42_Casting/tests/out.input083.c ================================================ 5 < 6 <= 10 5 < 7 <= 10 5 < 8 <= 10 5 < 9 <= 10 5 < 10 <= 10 10 < 11 ================================================ FILE: 42_Casting/tests/out.input084.c ================================================ 2 3 f f ================================================ FILE: 42_Casting/tests/out.input088.c ================================================ 5 6 ================================================ FILE: 42_Casting/tests/out.input089.c ================================================ 23 H Hello world ================================================ FILE: 42_Casting/tests/out.input090.c ================================================ 23 100 H Hello world ================================================ FILE: 42_Casting/tests/out.input091.c ================================================ 1 2 3 4 5 1 2 3 4 5 0 0 0 0 0 ================================================ FILE: 42_Casting/tests/out.input099.c ================================================ EOF = || && | ^ & == != , > <= >= << >> + - * / ++ -- ~ ! void char int long if else while for return struct union enum typedef extern break continue switch case default intlit strlit ; identifier { } ( ) [ ] , . -> : ================================================ FILE: 42_Casting/tests/out.input100.c ================================================ Hello world 17 20 ================================================ FILE: 42_Casting/tests/out.input101.c ================================================ 0xff 0x0 ================================================ FILE: 42_Casting/tests/out.input106.c ================================================ 0x0 ================================================ FILE: 42_Casting/tests/out.input107.c ================================================ fish cow NULL ================================================ FILE: 42_Casting/tests/out.input108.c ================================================ ================================================ FILE: 42_Casting/tests/runtests ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make install) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../cwj -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../cwj $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.s "trial.$i" done ================================================ FILE: 42_Casting/tests/runtestsn ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../compn ] then (cd ..; make install) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../compn -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../compn $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.o out.s "trial.$i" done ================================================ FILE: 42_Casting/tree.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree functions // Copyright (c) 2019 Warren Toomey, GPL3 // Build and return a generic AST node struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue) { struct ASTnode *n; // Malloc a new ASTnode n = (struct ASTnode *) malloc(sizeof(struct ASTnode)); if (n == NULL) fatal("Unable to malloc in mkastnode()"); // Copy in the field values and return it n->op = op; n->type = type; n->left = left; n->mid = mid; n->right = right; n->sym = sym; n->intvalue = intvalue; return (n); } // Make an AST leaf node struct ASTnode *mkastleaf(int op, int type, struct symtable *sym, int intvalue) { return (mkastnode(op, type, NULL, NULL, NULL, sym, intvalue)); } // Make a unary AST node: only one child struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, struct symtable *sym, int intvalue) { return (mkastnode(op, type, left, NULL, NULL, sym, intvalue)); } // Generate and return a new label number // just for AST dumping purposes static int gendumplabel(void) { static int id = 1; return (id++); } // Given an AST tree, print it out and follow the // traversal of the tree that genAST() follows void dumpAST(struct ASTnode *n, int label, int level) { int Lfalse, Lstart, Lend; switch (n->op) { case A_IF: Lfalse = gendumplabel(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_IF"); if (n->right) { Lend = gendumplabel(); fprintf(stdout, ", end L%d", Lend); } fprintf(stdout, "\n"); dumpAST(n->left, Lfalse, level + 2); dumpAST(n->mid, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); return; case A_WHILE: Lstart = gendumplabel(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_WHILE, start L%d\n", Lstart); Lend = gendumplabel(); dumpAST(n->left, Lend, level + 2); dumpAST(n->right, NOLABEL, level + 2); return; } // Reset level to -2 for A_GLUE if (n->op == A_GLUE) level = -2; // General AST node handling if (n->left) dumpAST(n->left, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); for (int i = 0; i < level; i++) fprintf(stdout, " "); switch (n->op) { case A_GLUE: fprintf(stdout, "\n\n"); return; case A_FUNCTION: fprintf(stdout, "A_FUNCTION %s\n", n->sym->name); return; case A_ADD: fprintf(stdout, "A_ADD\n"); return; case A_SUBTRACT: fprintf(stdout, "A_SUBTRACT\n"); return; case A_MULTIPLY: fprintf(stdout, "A_MULTIPLY\n"); return; case A_DIVIDE: fprintf(stdout, "A_DIVIDE\n"); return; case A_EQ: fprintf(stdout, "A_EQ\n"); return; case A_NE: fprintf(stdout, "A_NE\n"); return; case A_LT: fprintf(stdout, "A_LE\n"); return; case A_GT: fprintf(stdout, "A_GT\n"); return; case A_LE: fprintf(stdout, "A_LE\n"); return; case A_GE: fprintf(stdout, "A_GE\n"); return; case A_INTLIT: fprintf(stdout, "A_INTLIT %d\n", n->intvalue); return; case A_STRLIT: fprintf(stdout, "A_STRLIT rval label L%d\n", n->intvalue); return; case A_IDENT: if (n->rvalue) fprintf(stdout, "A_IDENT rval %s\n", n->sym->name); else fprintf(stdout, "A_IDENT %s\n", n->sym->name); return; case A_ASSIGN: fprintf(stdout, "A_ASSIGN\n"); return; case A_WIDEN: fprintf(stdout, "A_WIDEN\n"); return; case A_RETURN: fprintf(stdout, "A_RETURN\n"); return; case A_FUNCCALL: fprintf(stdout, "A_FUNCCALL %s\n", n->sym->name); return; case A_ADDR: fprintf(stdout, "A_ADDR %s\n", n->sym->name); return; case A_DEREF: if (n->rvalue) fprintf(stdout, "A_DEREF rval\n"); else fprintf(stdout, "A_DEREF\n"); return; case A_SCALE: fprintf(stdout, "A_SCALE %d\n", n->size); return; case A_PREINC: fprintf(stdout, "A_PREINC %s\n", n->sym->name); return; case A_PREDEC: fprintf(stdout, "A_PREDEC %s\n", n->sym->name); return; case A_POSTINC: fprintf(stdout, "A_POSTINC\n"); return; case A_POSTDEC: fprintf(stdout, "A_POSTDEC\n"); return; case A_NEGATE: fprintf(stdout, "A_NEGATE\n"); return; case A_BREAK: fprintf(stdout, "A_BREAK\n"); return; case A_CONTINUE: fprintf(stdout, "A_CONTINUE\n"); return; case A_CASE: fprintf(stdout, "A_CASE %d\n", n->intvalue); return; case A_DEFAULT: fprintf(stdout, "A_DEFAULT\n"); return; case A_SWITCH: fprintf(stdout, "A_SWITCH\n"); return; case A_CAST: fprintf(stdout, "A_CAST %d\n", n->type); return; default: fatald("Unknown dumpAST operator", n->op); } } ================================================ FILE: 42_Casting/types.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Types and type handling // Copyright (c) 2019 Warren Toomey, GPL3 // Return true if a type is an int type // of any size, false otherwise int inttype(int type) { return (((type & 0xf) == 0) && (type >= P_CHAR && type <= P_LONG)); } // Return true if a type is of pointer type int ptrtype(int type) { return ((type & 0xf) != 0); } // Given a primitive type, return // the type which is a pointer to it int pointer_to(int type) { if ((type & 0xf) == 0xf) fatald("Unrecognised in pointer_to: type", type); return (type + 1); } // Given a primitive pointer type, return // the type which it points to int value_at(int type) { if ((type & 0xf) == 0x0) fatald("Unrecognised in value_at: type", type); return (type - 1); } // Given a type and a composite type pointer, return // the size of this type in bytes int typesize(int type, struct symtable *ctype) { if (type == P_STRUCT || type == P_UNION) return (ctype->size); return (genprimsize(type)); } // Given an AST tree and a type which we want it to become, // possibly modify the tree by widening or scaling so that // it is compatible with this type. Return the original tree // if no changes occurred, a modified tree, or NULL if the // tree is not compatible with the given type. // If this will be part of a binary operation, the AST op is not zero. struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op) { int ltype; int lsize, rsize; ltype = tree->type; // XXX No idea on these yet if (ltype == P_STRUCT || ltype == P_UNION) fatal("Don't know how to do this yet"); if (rtype == P_STRUCT || rtype == P_UNION) fatal("Don't know how to do this yet"); // Compare scalar int types if (inttype(ltype) && inttype(rtype)) { // Both types same, nothing to do if (ltype == rtype) return (tree); // Get the sizes for each type lsize = typesize(ltype, NULL); rsize = typesize(rtype, NULL); // Tree's size is too big if (lsize > rsize) return (NULL); // Widen to the right if (rsize > lsize) return (mkastunary(A_WIDEN, rtype, tree, NULL, 0)); } // For pointers if (ptrtype(ltype) && ptrtype(rtype)) { // We can compare them if (op >= A_EQ && op <= A_GE) return(tree); // A comparison of the same type for a non-binary operation is OK, // or when the left tree is of `void *` type. if (op == 0 && (ltype == rtype || ltype == pointer_to(P_VOID))) return (tree); } // We can scale only on A_ADD or A_SUBTRACT operation if (op == A_ADD || op == A_SUBTRACT) { // Left is int type, right is pointer type and the size // of the original type is >1: scale the left if (inttype(ltype) && ptrtype(rtype)) { rsize = genprimsize(value_at(rtype)); if (rsize > 1) return (mkastunary(A_SCALE, rtype, tree, NULL, rsize)); else return (tree); // Size 1, no need to scale } } // If we get here, the types are not compatible return (NULL); } ================================================ FILE: 43_More_Operators/Makefile ================================================ # Define the location of the include directory # and the location to install the compiler binary INCDIR=/tmp/include BINDIR=/tmp HSRCS= data.h decl.h defs.h SRCS= cg.c decl.c expr.c gen.c main.c misc.c \ scan.c stmt.c sym.c tree.c types.c SRCN= cgn.c decl.c expr.c gen.c main.c misc.c \ scan.c stmt.c sym.c tree.c types.c ARMSRCS= cg_arm.c decl.c expr.c gen.c main.c misc.c \ scan.c stmt.c sym.c tree.c types.c cwj: $(SRCS) $(HSRCS) cc -o cwj -g -Wall -DINCDIR=\"$(INCDIR)\" $(SRCS) compn: $(SRCN) $(HSRCS) cc -D__NASM__ -o compn -g -Wall -DINCDIR=\"$(INCDIR)\" $(SRCN) cwjarm: $(ARMSRCS) $(HSRCS) cc -o cwjarm -g -Wall $(ARMSRCS) cp cwjarm cwj install: cwj mkdir -p $(INCDIR) rsync -a include/. $(INCDIR) cp cwj $(BINDIR) chmod +x $(BINDIR)/cwj installn: compn mkdir -p $(INCDIR) rsync -a include/. $(INCDIR) cp compn $(BINDIR) chmod +x $(BINDIR)/compn clean: rm -f cwj cwjarm compn *.o *.s out a.out test: install tests/runtests (cd tests; chmod +x runtests; ./runtests) armtest: cwjarm tests/runtests (cd tests; chmod +x runtests; ./runtests) testn: installn tests/runtestsn (cd tests; chmod +x runtestsn; ./runtestsn) ================================================ FILE: 43_More_Operators/Readme.md ================================================ # Part 43: Bugfixes and More Operators I've started to pass some of the source code of our compiler as input to itself, as this is how we are going to get it to eventually compile itself. The first big hurdle is to get the compiler to parse and recognise its source code. The second big hurdle will be to get the compiler to generate correct, working, code from its source code. This is also the first time that the compiler has been given some substantial input to chew on, and it's going to reveal a bunch of bugs, misfeatures and missing features. ## Bugfixes I started with `cwj -S defs.h` and found several header files missing. For now they exist but are empty. With these in place, the compiler crashes with a segfault. I had a few pointers which should be initialised to NULL and places where I wasn't checking for a NULL pointer. ## Missing Features Next up, I hit `enum { NOREG = -1 ...` in `defs.h` and realised that the scanner wasn't dealing with integer literals which start with a minus sign. So I've added this code to `scan()` in `scan.c`: ```c case '-': if ((c = next()) == '-') { t->token = T_DEC; } else if (c == '>') { t->token = T_ARROW; } else if (isdigit(c)) { // Negative int literal t->intvalue = -scanint(c); t->token = T_INTLIT; } else { putback(c); t->token = T_MINUS; } ``` If a '-' is followed by a digit, scan in the integer literal and negate its value. At first I was worried that the expression `1 - 1` would be treated as the two tokens '1', 'int literal -1', but I forgot that `next()` doesn't skip the space. So, by having a space between the '-' and the '1', the expression `1 - 1` is correctly parsed as '1', '-', '1'. However, as [Luke Gruber](https://github.com/luke-gru) has pointed out, this also means that the input `1-1` **is** treated as `1 -1` instead of `1 - 1`. In other words, the scanner is too greedy and forces `-1` to always be treated as a T_INTLIT when sometimes it shouldn't be. I'm going to leave this for now, as we can work around it when writing our source code. Obviously, in a production compiler this would have to be fixed. ## Misfeatures In the AST node and symbol table node structures, I've been using unions to try and keep the size of each node down. I guess I'm a bit old school and I worry about wasting memory. An example is the AST node structure: ```c struct ASTnode { int op; // "Operation" to be performed on this tree ... union { // the symbol in the symbol table int intvalue; // For A_INTLIT, the integer value int size; // For A_SCALE, the size to scale by }; }; ``` But the compiler isn't able to parse and work with a union inside a struct, and especially an unnamed union inside a struct. I could add this functionality, but it will be easier to redo the two structs where I do this. So, I've made these changes: ```c // Symbol table structure struct symtable { char *name; // Name of a symbol ... #define st_endlabel st_posn // For functions, the end label int st_posn; // For locals, the negative offset // from the stack base pointer ... }; // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree ... #define a_intvalue a_size // For A_INTLIT, the integer value int a_size; // For A_SCALE, the size to scale by }; ``` This way, I still have two named fields sharing the same location in each struct, but the compiler will see only the one field name in each struct. I've given each `#define` a different prefix to prevent pollution of the global namespace. A consequence of this is that I've had to rename the `endlabel`, `posn`, `intvalue` and `size` fields across half a dozen source files. C'est la vie. So now the compiler, when doing `cwj -S misc.c` gets up to: ``` Expected:] on line 16 of data.h, where the line is extern char Text[TEXTLEN + 1]; ``` This fails as the compiler as it stands does not parse expressions in a global variable declaration. I'm going to have to rethink this. My thoughts so far are to use `binexpr()` to parse the expression, and to add some optimisation code to perform [constant folding](https://en.wikipedia.org/wiki/Constant_folding) on the resulting AST tree. This should result in a single A_INTLIT node from which I can extract the literal value. I could even let `binexpr()` parse any casts, e.g. ```c char x= (char)('a' + 1024); ``` Anyway, that's something for the future. I was going to do constant folding at some point, but I thought it would be further down the track. What I will do in this part of the journey is add some more operators: specifically, '+=', '-=', '*=' and '/='. We currently use the first two operators in the compiler's source code. ## New Tokens, Scanning and Parsing Adding new keywords to our compiler is easy: a new token and a change to the scanner. Adding new operators is much harder as we have to: + align the token with the AST operation + deal with precedence and associativity. We are adding four operators: '+=', '-=', '*=' and '/='. They have matching tokens: T_ASPLUS, T_ASMINUS, T_ASSTAR and T_ASSLASH. These have corresponding AST operations: A_ASPLUS, A_ASMINUS, A_ASSTAR, A_ASSLASH. The AST operations **must** have the same enum value as the tokens because of this function in `expr.c`: ```c // Convert a binary operator token into a binary AST operation. // We rely on a 1:1 mapping from token to AST operation static int binastop(int tokentype) { if (tokentype > T_EOF && tokentype <= T_SLASH) return (tokentype); fatald("Syntax error, token", tokentype); return (0); // Keep -Wall happy } ``` We also need to configure the precedence of the new operators. According to [this list of C operators](https://en.cppreference.com/w/c/language/operator_precedence), these new operators have the same precedence as our existing assignment operator, so we can modify the `OpPrec[]` table in `expr.c` as follows: ```c // Operator precedence for each token. Must // match up with the order of tokens in defs.h static int OpPrec[] = { 0, 10, 10, // T_EOF, T_ASSIGN, T_ASPLUS, 10, 10, 10, // T_ASMINUS, T_ASSTAR, T_ASSLASH, 20, 30, // T_LOGOR, T_LOGAND ... }; ``` But that list of C operators also notes that the assignment operators are *right_associative*. This means, for example, that: ```c a += b + c; // needs to be parsed as a += (b + c); // not (a += b) + c; ``` So we also need to update this function in `expr.c` to do this: ```c // Return true if a token is right-associative, // false otherwise. static int rightassoc(int tokentype) { if (tokentype >= T_ASSIGN && tokentype <= T_ASSLASH) return (1); return (0); } ``` Fortunately, these are the only changes we need to make to our scanner and expression parser: the Pratt parser for binary expressions is now primed to deal with the new operators. ## Dealing with the AST Tree Now that we can parse expressions with the four new operators, we need to deal with the AST that is created for each expression. One thing we need to do is dump the AST tree. So, in `dumpAST()` in `tree.c`, I added this code: ```c case A_ASPLUS: fprintf(stdout, "A_ASPLUS\n"); return; case A_ASMINUS: fprintf(stdout, "A_ASMINUS\n"); return; case A_ASSTAR: fprintf(stdout, "A_ASSTAR\n"); return; case A_ASSLASH: fprintf(stdout, "A_ASSLASH\n"); return; ``` Now when I run `cwj -T input.c` with the expression `a += b + c`, I see: ``` A_IDENT rval a A_IDENT rval b A_IDENT rval c A_ADD A_ASPLUS ``` which we can redraw as: ``` A_ASPLUS / \ A_IDENT A_ADD rval a / \ A_IDENT A_IDENT rval b rval c ``` ## Generating the Assembly For the Operators Well, in `gen.c` we already walk the AST tree and deal with A_ADD and A_ASSIGN. Is there a way to use the existing code to make implementing the new A_ASPLUS operator a bit easier? Yes! We can rewrite the above AST tree to look like this: ``` A_ASSIGN / \ A_ADD lval a / \ A_IDENT A_ADD rval a / \ A_IDENT A_IDENT rval b rval c ``` Now, we don't *have* to rewrite the tree as long as we perform the tree walking *as if* the tree had been rewritten like this. So in `genAST()`, we have: ```c int genAST(...) { ... // Get the left and right sub-tree values. This code already here. if (n->left) leftreg = genAST(n->left, NOLABEL, NOLABEL, NOLABEL, n->op); if (n->right) rightreg = genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); } ``` From the perspective of doing the work for the A_ASPLUS node, we have evaluated the left-hand child (e.g. `a`'s value) and the right-hand child (e.g. `b+c`) and we have the values in two registers. If this was an A_ADD operation, we would `cgadd(leftreg, rightreg)` at this point. Well, it is an A_ADD operation on these children, then followed by an assignment back into `a`. So, the `genAST()` code now has this: ```c switch (n->op) { ... case A_ASPLUS: case A_ASMINUS: case A_ASSTAR: case A_ASSLASH: case A_ASSIGN: // For the '+=' and friends operators, generate suitable code // and get the register with the result. Then take the left child, // make it the right child so that we can fall into the assignment code. switch (n->op) { case A_ASPLUS: leftreg= cgadd(leftreg, rightreg); n->right= n->left; break; case A_ASMINUS: leftreg= cgsub(leftreg, rightreg); n->right= n->left; break; case A_ASSTAR: leftreg= cgmul(leftreg, rightreg); n->right= n->left; break; case A_ASSLASH: leftreg= cgdiv(leftreg, rightreg); n->right= n->left; break; } // And the existing code to do A_ASSIGN is here ... } ``` In other words, for each new operator, we perform the correct maths operation on the children. But before we can drop into the A_ASSIGN we have to move the left-child pointer over to be the right child. Why? Because the A_ASSIGN code expects the destination to be the right child: ```c return (cgstorlocal(leftreg, n->right->sym)); ``` And that's it. We were lucky to have code which we could adapt to add in these four new operators. There are more assignment operators which I haven't implemented: '%=', '<=', '>>=', '&=', '^=' and '|='. They should also be as easy to add as the four we just added. ## Example Code The `tests/input110.c` program is our testing program: ```c #include int x; int y; int main() { x= 3; y= 15; y += x; printf("%d\n", y); x= 3; y= 15; y -= x; printf("%d\n", y); x= 3; y= 15; y *= x; printf("%d\n", y); x= 3; y= 15; y /= x; printf("%d\n", y); return(0); } ``` and produces these results: ``` 18 12 45 5 ``` ## Conclusion and What's Next We've added some more operators, and the hardest part really was aligning all the tokens, the AST operators and setting the precedence levels and right-associativity. After that, we could reuse some of the code generation code in `genAST()` to make our lives a bit easier. In the next part of our compiler writing journey, it looks like I'll be adding constant folding to the compiler. [Next step](../44_Fold_Optimisation/Readme.md) ================================================ FILE: 43_More_Operators/cg.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\t.text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\t.data\n", Outfile); currSeg = data_seg; } } // Given a scalar type value, return the // size of the type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch (type) { case P_CHAR: return (offset); case P_INT: case P_LONG: break; default: if (!ptrtype(type)) fatald("Bad type in cg_align:", type); } // Here we have an int or a long. Align it on a 4-byte offset // I put the generic code here so it can be reused elsewhere. alignment = 4; offset = (offset + direction * (alignment - 1)) & ~(alignment - 1); return (offset); } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. static int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "%r10", "%r11", "%r12", "%r13", "%r9", "%r8", "%rcx", "%rdx", "%rsi", "%rdi" }; static char *breglist[] = { "%r10b", "%r11b", "%r12b", "%r13b", "%r9b", "%r8b", "%cl", "%dl", "%sil", "%dil" }; static char *dreglist[] = { "%r10d", "%r11d", "%r12d", "%r13d", "%r9d", "%r8d", "%ecx", "%edx", "%esi", "%edi" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); cgtextseg(); fprintf(Outfile, "# internal switch(expr) routine\n" "# %%rsi = switch table, %%rax = expr\n" "# from SubC: http://www.t3x.org/subc/\n" "\n" "switch:\n" " pushq %%rsi\n" " movq %%rdx, %%rsi\n" " movq %%rax, %%rbx\n" " cld\n" " lodsq\n" " movq %%rax, %%rcx\n" "next:\n" " lodsq\n" " movq %%rax, %%rdx\n" " lodsq\n" " cmpq %%rdx, %%rbx\n" " jnz no\n" " popq %%rsi\n" " jmp *%%rax\n" "no:\n" " loop next\n" " lodsq\n" " popq %%rsi\n" " jmp *%%rax\n" "\n"); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the %rsp and %rsp fprintf(Outfile, "\t.globl\t%s\n" "\t.type\t%s, @function\n" "%s:\n" "\tpushq\t%%rbp\n" "\tmovq\t%%rsp, %%rbp\n", name, name, name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->st_posn = paramOffset; paramOffset += 8; } else { parm->st_posn = newlocaloffset(parm->type); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->st_posn = newlocaloffset(locvar->type); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\taddq\t$%d,%%rsp\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->st_endlabel); fprintf(Outfile, "\taddq\t$%d,%%rsp\n", stackOffset); fputs("\tpopq %rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmovq\t$%d, %s\n", value, reglist[r]); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", sym->name); } else // Print out the code to initialise it switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovzbq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", sym->name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovslq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", sym->name); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", sym->st_posn); fprintf(Outfile, "\tmovq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", sym->st_posn); } else switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", sym->st_posn); fprintf(Outfile, "\tmovzbq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", sym->st_posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", sym->st_posn); fprintf(Outfile, "\tmovslq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", sym->st_posn); break; default: fatald("Bad type in cgloadlocal:", sym->type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tleaq\tL%d(%%rip), %s\n", label, reglist[r]); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\taddq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsubq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timulq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s,%%rax\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidivq\t%s\n", reglist[r2]); fprintf(Outfile, "\tmovq\t%%rax,%s\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tandq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\torq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txorq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshlq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshrq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tnegq\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnotq\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); return (r); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(struct symtable *sym, int numargs) { // Get a new register int outr = alloc_register(); // Call the function fprintf(Outfile, "\tcall\t%s@PLT\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\taddq\t$%d, %%rsp\n", 8 * (numargs - 6)); // and copy the return value into our register fprintf(Outfile, "\tmovq\t%%rax, %s\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpushq\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmovq\t%s, %s\n", reglist[r], reglist[FIRSTPARAMREG - argposn + 1]); } } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsalq\t$%d, %s\n", val, reglist[r]); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %s(%%rip)\n", reglist[r], sym->name); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %s(%%rip)\n", breglist[r], sym->name); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %s(%%rip)\n", dreglist[r], sym->name); break; default: fatald("Bad type in cgstorglob:", sym->type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %d(%%rbp)\n", reglist[r], sym->st_posn); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %d(%%rbp)\n", breglist[r], sym->st_posn); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %d(%%rbp)\n", dreglist[r], sym->st_posn); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size, type; int initvalue; int i; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the variable (or its elements if an array) // and the type of the variable if (node->stype == S_ARRAY) { size= typesize(value_at(node->type), node->ctype); type= value_at(node->type); } else { size = node->size; type= node->type; } // Generate the global identity and the label cgdataseg(); fprintf(Outfile, "\t.globl\t%s\n", node->name); fprintf(Outfile, "%s:\n", node->name); // Output space for one or more elements for (i=0; i < node->nelems; i++) { // Get any initial value initvalue= 0; if (node->initlist != NULL) initvalue= node->initlist[i]; // Generate the space for this type switch (size) { case 1: fprintf(Outfile, "\t.byte\t%d\n", initvalue); break; case 4: fprintf(Outfile, "\t.long\t%d\n", initvalue); break; case 8: // Generate the pointer to a string literal. Treat a zero value // as actually zero, not the label L0 if (node->initlist != NULL && type== pointer_to(P_CHAR) && initvalue != 0) fprintf(Outfile, "\t.quad\tL%d\n", initvalue); else fprintf(Outfile, "\t.quad\t%d\n", initvalue); break; default: for (int i = 0; i < size; i++) fprintf(Outfile, "\t.byte\t0\n"); } } } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\t.byte\t%d\n", *cptr); } fprintf(Outfile, "\t.byte\t0\n"); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzbl\t%s, %%eax\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %%eax\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } cgjump(sym->st_endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = alloc_register(); if (sym->class == C_GLOBAL) fprintf(Outfile, "\tleaq\t%s(%%rip), %s\n", sym->name, reglist[r]); else fprintf(Outfile, "\tleaq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzbq\t(%s), %s\n", reglist[r], reglist[r]); break; case 2: fprintf(Outfile, "\tmovslq\t(%s), %s\n", reglist[r], reglist[r]); break; case 4: case 8: fprintf(Outfile, "\tmovq\t(%s), %s\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { // Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmovb\t%s, (%s)\n", breglist[r1], reglist[r2]); break; case 2: case 4: case 8: fprintf(Outfile, "\tmovq\t%s, (%s)\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } // Generate a switch jump table and the code to // load the registers and call the switch() code void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel) { int i, label; // Get a label for the switch table label = genlabel(); cglabel(label); // Heuristic. If we have no cases, create one case // which points to the default case if (casecount == 0) { caseval[0] = 0; caselabel[0] = defaultlabel; casecount = 1; } // Generate the switch jump table. fprintf(Outfile, "\t.quad\t%d\n", casecount); for (i = 0; i < casecount; i++) fprintf(Outfile, "\t.quad\t%d, L%d\n", caseval[i], caselabel[i]); fprintf(Outfile, "\t.quad\tL%d\n", defaultlabel); // Load the specific registers cglabel(toplabel); fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); fprintf(Outfile, "\tleaq\tL%d(%%rip), %%rdx\n", label); fprintf(Outfile, "\tjmp\tswitch\n"); } ================================================ FILE: 43_More_Operators/cg_arm.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for ARMv6 on Raspberry Pi // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. static int freereg[4]; static char *reglist[4] = { "r4", "r5", "r6", "r7" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // We have to store large integer literal values in memory. // Keep a list of them which will be output in the postamble #define MAXINTS 1024 int Intlist[MAXINTS]; static int Intslot = 0; // Determine the offset of a large integer // literal from the .L3 label. If the integer // isn't in the list, add it. static void set_int_offset(int val) { int offset = -1; // See if it is already there for (int i = 0; i < Intslot; i++) { if (Intlist[i] == val) { offset = 4 * i; break; } } // Not in the list, so add it if (offset == -1) { offset = 4 * Intslot; if (Intslot == MAXINTS) fatal("Out of int slots in set_int_offset()"); Intlist[Intslot++] = val; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L3+%d\n", offset); } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n", Outfile); } // Print out the assembly postamble void cgpostamble() { // Print out the global variables fprintf(Outfile, ".L2:\n"); for (int i = 0; i < Globs; i++) { if (Symtable[i].stype == S_VARIABLE) fprintf(Outfile, "\t.word %s\n", Symtable[i].name); } // Print out the integer literals fprintf(Outfile, ".L3:\n"); for (int i = 0; i < Intslot; i++) { fprintf(Outfile, "\t.word %d\n", Intlist[i]); } } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Symtable[id].name; fprintf(Outfile, "\t.text\n" "\t.globl\t%s\n" "\t.type\t%s, \%%function\n" "%s:\n" "\tpush\t{fp, lr}\n" "\tadd\tfp, sp, #4\n" "\tsub\tsp, sp, #8\n" "\tstr\tr0, [fp, #-8]\n", name, name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Symtable[id].endlabel); fputs("\tsub\tsp, fp, #4\n" "\tpop\t{fp, pc}\n" "\t.align\t2\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); // If the literal value is small, do it with one instruction if (value <= 1000) fprintf(Outfile, "\tmov\t%s, #%d\n", reglist[r], value); else { set_int_offset(value); fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); } return (r); } // Determine the offset of a variable from the .L2 // label. Yes, this is inefficient code. static void set_var_offset(int id) { int offset = 0; // Walk the symbol table up to id. // Find S_VARIABLEs and add on 4 until // we get to our variable for (int i = 0; i < id; i++) { if (Symtable[i].stype == S_VARIABLE) offset += 4; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L2+%d\n", offset); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tldrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgloadglob:", Symtable[id].type); } return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s, %s\n", reglist[r1], reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\tmul\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { // To do a divide: r0 holds the dividend, r1 holds the divisor. // The quotient is in r0. fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r1]); fprintf(Outfile, "\tmov\tr1, %s\n", reglist[r2]); fprintf(Outfile, "\tbl\t__aeabi_idiv\n"); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r1]); free_register(r2); return (r1); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r]); fprintf(Outfile, "\tbl\t%s\n", Symtable[id].name); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r]); return (r); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tlsl\t%s, %s, #%d\n", reglist[r], reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, int id) { // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tstr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgstorglob:", Symtable[id].type); } return (r); } // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (4); switch (type) { case P_CHAR: return (1); case P_INT: case P_LONG: return (4); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Symtable[id].type); fprintf(Outfile, "\t.data\n" "\t.globl\t%s\n", Symtable[id].name); switch (typesize) { case 1: fprintf(Outfile, "%s:\t.byte\t0\n", Symtable[id].name); break; case 4: fprintf(Outfile, "%s:\t.long\t0\n", Symtable[id].name); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "moveq", "movne", "movlt", "movgt", "movle", "movge" }; // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "movne", "moveq", "movge", "movle", "movgt", "movlt" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #1\n", cmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #0\n", invcmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\tuxtb\t%s, %s\n", reglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tb\tL%d\n", l); } // List of inverted branch instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *brlist[] = { "bne", "beq", "bge", "ble", "bgt", "blt" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", brlist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[reg]); cgjump(Symtable[id].endlabel); } // Generate code to load the address of a global // identifier into a variable. Return a new register int cgaddress(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); fprintf(Outfile, "\tmov\t%s, r3\n", reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tldrb\t%s, [%s]\n", reglist[r], reglist[r]); break; case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [%s]\n", reglist[r], reglist[r]); break; } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [%s]\n", reglist[r1], reglist[r2]); break; case P_INT: case P_LONG: fprintf(Outfile, "\tstr\t%s, [%s]\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 43_More_Operators/cgn.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\tsection .text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\tsection .data\n", Outfile); currSeg = data_seg; } } // Given a scalar type value, return the // size of the type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch (type) { case P_CHAR: return (offset); case P_INT: case P_LONG: break; default: if (!ptrtype(type)) fatald("Bad type in cg_align:", type); } // Here we have an int or a long. Align it on a 4-byte offset // I put the generic code here so it can be reused elsewhere. alignment = 4; offset = (offset + direction * (alignment - 1)) & ~(alignment - 1); return (offset); } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "r10", "r11", "r12", "r13", "r9", "r8", "rcx", "rdx", "rsi", "rdi" }; static char *breglist[] = { "r10b", "r11b", "r12b", "r13b", "r9b", "r8b", "cl", "dl", "sil", "dil" }; static char *dreglist[] = { "r10d", "r11d", "r12d", "r13d", "r9d", "r8d", "ecx", "edx", "esi", "edi" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\textern\tprintint\n", Outfile); fputs("\textern\tprintchar\n", Outfile); fputs("\textern\topen\n", Outfile); fputs("\textern\tclose\n", Outfile); fputs("\textern\tread\n", Outfile); fputs("\textern\twrite\n", Outfile); fputs("\textern\tprintf\n", Outfile); cgtextseg(); fprintf(Outfile, "; internal switch(expr) routine\n" "; rsi = switch table, rax = expr\n" "; from SubC: http://www.t3x.org/subc/\n" "\n" "switch:\n" " push rsi\n" " mov rsi, rdx\n" " mov rbx, rax\n" " cld\n" " lodsq\n" " mov rcx, rax\n" "next:\n" " lodsq\n" " mov rdx, rax\n" " lodsq\n" " cmp rbx, rdx\n" " jnz no\n" " pop rsi\n" " jmp rax\n" "no:\n" " loop next\n" " lodsq\n" " pop rsi\n" " jmp rax\n" "\n"); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the rsp and rbp fprintf(Outfile, "\tglobal\t%s\n" "%s:\n" "\tpush\trbp\n" "\tmov\trbp, rsp\n", name, name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->st_posn = paramOffset; paramOffset += 8; } else { parm->st_posn = newlocaloffset(parm->type); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->st_posn = newlocaloffset(locvar->type); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\tadd\trsp, %d\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->st_endlabel); fprintf(Outfile, "\tadd\trsp, %d\n", stackOffset); fputs("\tpop rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], value); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", sym->name); fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], sym->name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", sym->name); } else // Print out the code to initialise it switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", sym->name); fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], sym->name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", sym->name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", sym->name); fprintf(Outfile, "\tmovsx\t%s, word [%s]\n", dreglist[r], sym->name); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", sym->name); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", sym->st_posn); fprintf(Outfile, "\tmov\t%s, [rbp+%d]\n", reglist[r], sym->st_posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", sym->st_posn); } else switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", sym->st_posn); fprintf(Outfile, "\tmovzx\t%s, byte [rbp+%d]\n", reglist[r], sym->st_posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", sym->st_posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", sym->st_posn); fprintf(Outfile, "\tmovsx\t%s, dword [rbp+%d]\n", reglist[r], sym->st_posn); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", sym->st_posn); break; default: fatald("Bad type in cgloadlocal:", sym->type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, L%d\n", reglist[r], label); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timul\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmov\trax, %s\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidiv\t%s\n", reglist[r2]); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tand\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\tor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshl\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshr\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tneg\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnot\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r], breglist[r]); return (r); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, byte %s\n", reglist[r], breglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(struct symtable *sym, int numargs) { // Get a new register int outr = alloc_register(); // Call the function fprintf(Outfile, "\tcall\t%s\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\tadd\trsp, %d\n", 8 * (numargs - 6)); // and copy the return value into our register fprintf(Outfile, "\tmov\t%s, rax\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpush\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmov\t%s, %s\n", reglist[FIRSTPARAMREG - argposn + 1], reglist[r]); } } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsal\t%s, %d\n", reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, dreglist[r]); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\tqword\t[rbp+%d], %s\n", sym->st_posn, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\tbyte\t[rbp+%d], %s\n", sym->st_posn, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\tdword\t[rbp+%d], %s\n", sym->st_posn, dreglist[r]); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size, type; int initvalue; int i; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the variable (or its elements if an array) // and the type of the variable if (node->stype == S_ARRAY) { size= typesize(value_at(node->type), node->ctype); type= value_at(node->type); } else { size = node->size; type= node->type; } // Generate the global identity and the label cgdataseg(); fprintf(Outfile, "\tsection\t.data\n" "\tglobal\t%s\n", node->name); fprintf(Outfile, "%s:\n", node->name); // Output space for one or more elements for (i = 0; i < node->nelems; i++) { // Get any initial value initvalue = 0; if (node->initlist != NULL) initvalue = node->initlist[i]; // Generate the space for this type // original version switch(size) { case 1: fprintf(Outfile, "\tdb\t%d\n", initvalue); break; case 4: fprintf(Outfile, "\tdd\t%d\n", initvalue); break; case 8: // Generate the pointer to a string literal. Treat a zero value // as actually zero, not the label L0 if (node->initlist != NULL && type == pointer_to(P_CHAR) && initvalue != 0) fprintf(Outfile, "\tdq\tL%d\n", initvalue); else fprintf(Outfile, "\tdq\t%d\n", initvalue); break; default: for (int i = 0; i < size; i++) fprintf(Outfile, "\tdb\t0\n"); /* compact version using times instead of loop fprintf(Outfile, "\ttimes\t%d\tdb\t0\n", size); */ } } } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\tdb\t%d\n", *cptr); } fprintf(Outfile, "\tdb\t0\n"); /* put string in readable format on a single line // probably went overboard with error checking int comma = 0, quote = 0, start = 1; fprintf(Outfile, "\tdb\t"); for (cptr=strvalue; *cptr; cptr++) { if ( ! isprint(*cptr) ) if (comma || start) { fprintf(Outfile, "%d, ", *cptr); start = 0; comma = 1; } else if (quote) { fprintf(Outfile, "\', %d, ", *cptr); comma = 1; quote = 0; } else { fprintf(Outfile, "%d, ", *cptr); comma = 1; quote = 0; } else if (start || comma) { fprintf(Outfile, "\'%c", *cptr); start = comma = 0; quote = 1; } else { fprintf(Outfile, "%c", *cptr); comma = 0; quote = 1; } } if (comma || start) fprintf(Outfile, "0\n"); else fprintf(Outfile, "\', 0\n"); */ } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r2], breglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzx\teax, %s\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmov\teax, %s\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } cgjump(sym->st_endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = alloc_register(); if (sym->class == C_GLOBAL) fprintf(Outfile, "\tmov\t%s, %s\n", reglist[r], sym->name); else fprintf(Outfile, "\tlea\t%s, [rbp+%d]\n", reglist[r], sym->st_posn); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], reglist[r]); break; case 2: fprintf(Outfile, "\tmovsx\t%s, dword [%s]\n", reglist[r], reglist[r]); break; case 4: case 8: fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { //Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmov\t[%s], byte %s\n", reglist[r2], breglist[r1]); break; case 2: case 4: case 8: fprintf(Outfile, "\tmov\t[%s], %s\n", reglist[r2], reglist[r1]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } // Generate a switch jump table and the code to // load the registers and call the switch() code void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel) { int i, label; // Get a label for the switch table label = genlabel(); cglabel(label); // Heuristic. If we have no cases, create one case // which points to the default case if (casecount == 0) { caseval[0] = 0; caselabel[0] = defaultlabel; casecount = 1; } // Generate the switch jump table. fprintf(Outfile, "\tdq\t%d\n", casecount); for (i = 0; i < casecount; i++) fprintf(Outfile, "\tdq\t%d, L%d\n", caseval[i], caselabel[i]); fprintf(Outfile, "\tdq\tL%d\n", defaultlabel); // Load the specific registers cglabel(toplabel); fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); fprintf(Outfile, "\tmov\trdx, L%d\n", label); fprintf(Outfile, "\tjmp\tswitch\n"); } ================================================ FILE: 43_More_Operators/data.h ================================================ #ifndef extern_ #define extern_ extern #endif // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 extern_ int Line; // Current line number extern_ int Putback; // Character put back by scanner extern_ struct symtable *Functionid; // Symbol ptr of the current function extern_ FILE *Infile; // Input and output files extern_ FILE *Outfile; extern_ char *Infilename; // Name of file we are parsing extern_ char *Outfilename; // Name of file we opened as Outfile extern_ struct token Token; // Last token scanned extern_ char Text[TEXTLEN + 1]; // Last identifier scanned extern_ int Looplevel; // Depth of nested loops extern_ int Switchlevel; // Depth of nested switches // Symbol table lists extern_ struct symtable *Globhead, *Globtail; // Global variables and functions extern_ struct symtable *Loclhead, *Locltail; // Local variables extern_ struct symtable *Parmhead, *Parmtail; // Local parameters extern_ struct symtable *Membhead, *Membtail; // Temp list of struct/union members extern_ struct symtable *Structhead, *Structtail; // List of struct types extern_ struct symtable *Unionhead, *Uniontail; // List of union types extern_ struct symtable *Enumhead, *Enumtail; // List of enum types and values extern_ struct symtable *Typehead, *Typetail; // List of typedefs // Command-line flags extern_ int O_dumpAST; // If true, dump the AST trees extern_ int O_keepasm; // If true, keep any assembly files extern_ int O_assemble; // If true, assemble the assembly files extern_ int O_dolink; // If true, link the object files extern_ int O_verbose; // If true, print info on compilation stages ================================================ FILE: 43_More_Operators/decl.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of declarations // Copyright (c) 2019 Warren Toomey, GPL3 static struct symtable *composite_declaration(int type); static int typedef_declaration(struct symtable **ctype); static int type_of_typedef(char *name, struct symtable **ctype); static void enum_declaration(void); // Parse the current token and return a primitive type enum value, // a pointer to any composite type and possibly modify // the class of the type. int parse_type(struct symtable **ctype, int *class) { int type, exstatic = 1; // See if the class has been changed to extern (later, static) while (exstatic) { switch (Token.token) { case T_EXTERN: *class = C_EXTERN; scan(&Token); break; default: exstatic = 0; } } // Now work on the actual type keyword switch (Token.token) { case T_VOID: type = P_VOID; scan(&Token); break; case T_CHAR: type = P_CHAR; scan(&Token); break; case T_INT: type = P_INT; scan(&Token); break; case T_LONG: type = P_LONG; scan(&Token); break; // For the following, if we have a ';' after the // parsing then there is no type, so return -1. // Example: struct x {int y; int z}; case T_STRUCT: type = P_STRUCT; *ctype = composite_declaration(P_STRUCT); if (Token.token == T_SEMI) type = -1; break; case T_UNION: type = P_UNION; *ctype = composite_declaration(P_UNION); if (Token.token == T_SEMI) type = -1; break; case T_ENUM: type = P_INT; // Enums are really ints enum_declaration(); if (Token.token == T_SEMI) type = -1; break; case T_TYPEDEF: type = typedef_declaration(ctype); if (Token.token == T_SEMI) type = -1; break; case T_IDENT: type = type_of_typedef(Text, ctype); break; default: fatals("Illegal type, token", Token.tokstr); } return (type); } // Given a type parsed by parse_type(), scan in any following // '*' tokens and return the new type int parse_stars(int type) { while (1) { if (Token.token != T_STAR) break; type = pointer_to(type); scan(&Token); } return (type); } // Parse a type which appears inside a cast int parse_cast(void) { int type, class; struct symtable *ctype; // Get the type inside the parentheses type= parse_stars(parse_type(&ctype, &class)); // Do some error checking. I'm sure more can be done if (type == P_STRUCT || type == P_UNION || type == P_VOID) fatal("Cannot cast to a struct, union or void type"); return(type); } // Given a type, check that the latest token is a literal // of that type. If an integer literal, return this value. // If a string literal, return the label number of the string. // Do not scan the next token. If type is P_NONE, relax all // parse restrictions int parse_literal(int type) { // We have a string literal. Store in memory and return the label if (Token.token == T_STRLIT) { if (type == pointer_to(P_CHAR) || type == P_NONE) return(genglobstr(Text)); } // We have an integer literal. Do some range checking. if (Token.token == T_INTLIT) { switch(type) { case P_CHAR: if (Token.intvalue < 0 || Token.intvalue > 255) fatal("Integer literal value too big for char type"); case P_NONE: case P_INT: case P_LONG: break; default: fatal("Type mismatch: integer literal vs. variable"); } } else fatal("Expecting an integer literal value"); return(Token.intvalue); } // Given the type, name and class of a scalar variable, // parse any initialisation value and allocate storage for it. // Return the variable's symbol table entry. static struct symtable *scalar_declaration(char *varname, int type, struct symtable *ctype, int class, struct ASTnode **tree) { struct symtable *sym=NULL; struct ASTnode *varnode, *exprnode; int casttype; *tree= NULL; // Add this as a known scalar switch (class) { case C_EXTERN: case C_GLOBAL: sym= addglob(varname, type, ctype, S_VARIABLE, class, 1, 0); break; case C_LOCAL: sym= addlocl(varname, type, ctype, S_VARIABLE, 1); break; case C_PARAM: sym= addparm(varname, type, ctype, S_VARIABLE); break; case C_MEMBER: sym= addmemb(varname, type, ctype, S_VARIABLE, 1); break; } // The variable is being initialised if (Token.token == T_ASSIGN) { // Only possible for a global or local if (class != C_GLOBAL && class != C_LOCAL) fatals("Variable can not be initialised", varname); scan(&Token); // Globals must be assigned a literal value if (class == C_GLOBAL) { // If there is a cast if (Token.token == T_LPAREN) { // Get the type in the cast scan(&Token); casttype= parse_cast(); rparen(); // Check that the two types are compatible. Change // the new type so that the literal parse below works. // A 'void *' casstype can be assigned to any pointer type. if (casttype == type || (casttype== pointer_to(P_VOID) && ptrtype(type))) type= P_NONE; else fatal("Type mismatch"); } // Create one initial value for the variable and // parse this value sym->initlist= (int *)malloc(sizeof(int)); sym->initlist[0]= parse_literal(type); scan(&Token); } if (class == C_LOCAL) { // Make an A_IDENT AST node with the variable varnode = mkastleaf(A_IDENT, sym->type, sym, 0); // Get the expression for the assignment, make into a rvalue exprnode = binexpr(0); exprnode->rvalue = 1; // Ensure the expression's type matches the variable exprnode = modify_type(exprnode, varnode->type, 0); if (exprnode == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree *tree = mkastnode(A_ASSIGN, exprnode->type, exprnode, NULL, varnode, NULL, 0); } } // Generate any global space if (class == C_GLOBAL) genglobsym(sym); return (sym); } // Given the type, name and class of an variable, parse // the size of the array, if any. Then parse any initialisation // value and allocate storage for it. // Return the variable's symbol table entry. static struct symtable *array_declaration(char *varname, int type, struct symtable *ctype, int class) { struct symtable *sym; // New symbol table entry int nelems= -1; // Assume the number of elements won't be given int maxelems; // The maximum number of elements in the init list int *initlist; // The list of initial elements int i=0, j; int casttype, newtype; // Skip past the '[' scan(&Token); // See we have an array size if (Token.token == T_INTLIT) { if (Token.intvalue <= 0) fatald("Array size is illegal", Token.intvalue); nelems= Token.intvalue; scan(&Token); } // Ensure we have a following ']' match(T_RBRACKET, "]"); // Add this as a known array. We treat the // array as a pointer to its elements' type switch (class) { case C_EXTERN: case C_GLOBAL: sym = addglob(varname, pointer_to(type), ctype, S_ARRAY, class, 0, 0); break; default: fatal("For now, declaration of non-global arrays is not implemented"); } // Array initialisation if (Token.token == T_ASSIGN) { if (class != C_GLOBAL) fatals("Variable can not be initialised", varname); scan(&Token); // Get the following left curly bracket match(T_LBRACE, "{"); #define TABLE_INCREMENT 10 // If the array already has nelems, allocate that many elements // in the list. Otherwise, start with TABLE_INCREMENT. if (nelems != -1) maxelems= nelems; else maxelems= TABLE_INCREMENT; initlist= (int *)malloc(maxelems *sizeof(int)); // Loop getting a new literal value from the list while (1) { // Get the original type newtype= type; // Check we can add the next value, then parse and add it if (nelems != -1 && i == maxelems) fatal("Too many values in initialisation list"); if (Token.token == T_LPAREN) { // Get the type in the cast scan(&Token); casttype= parse_cast(); rparen(); // Check that the two types are compatible. Change // the new type so that the literal parse below works. // A 'void *' casstype can be assigned to any pointer type. if (casttype == type || (casttype== pointer_to(P_VOID) && ptrtype(type))) newtype= P_NONE; else fatal("Type mismatch"); newtype= P_NONE; } initlist[i++]= parse_literal(newtype); scan(&Token); // Increase the list size if the original size was // not set and we have hit the end of the current list if (nelems == -1 && i == maxelems) { maxelems += TABLE_INCREMENT; initlist= (int *)realloc(initlist, maxelems *sizeof(int)); } // Leave when we hit the right curly bracket if (Token.token == T_RBRACE) { scan(&Token); break; } // Next token must be a comma, then comma(); } // Zero any unused elements in the initlist. // Attach the list to the symbol table entry for (j=i; j < sym->nelems; j++) initlist[j]=0; if (i > nelems) nelems = i; sym->initlist= initlist; } // Set the size of the array and the number of elements sym->nelems= nelems; sym->size= sym->nelems * typesize(type, ctype); // Generate any global space if (class == C_GLOBAL) genglobsym(sym); return (sym); } // Given a pointer to the new function being declared and // a possibly NULL pointer to the function's previous declaration, // parse a list of parameters and cross-check them against the // previous declaration. Return the count of parameters static int param_declaration_list(struct symtable *oldfuncsym, struct symtable *newfuncsym) { int type, paramcnt = 0; struct symtable *ctype; struct symtable *protoptr = NULL; struct ASTnode *unused; // Get the pointer to the first prototype parameter if (oldfuncsym != NULL) protoptr = oldfuncsym->member; // Loop getting any parameters while (Token.token != T_RPAREN) { // Get the type of the next parameter type = declaration_list(&ctype, C_PARAM, T_COMMA, T_RPAREN, &unused); if (type == -1) fatal("Bad type in parameter list"); // Ensure the type of this parameter matches the prototype if (protoptr != NULL) { if (type != protoptr->type) fatald("Type doesn't match prototype for parameter", paramcnt + 1); protoptr = protoptr->next; } paramcnt++; // Stop when we hit the right parenthesis if (Token.token == T_RPAREN) break; // We need a comma as separator comma(); } if (oldfuncsym != NULL && paramcnt != oldfuncsym->nelems) fatals("Parameter count mismatch for function", oldfuncsym->name); // Return the count of parameters return (paramcnt); } // // function_declaration: type identifier '(' parameter_list ')' ; // | type identifier '(' parameter_list ')' compound_statement ; // // Parse the declaration of function. static struct symtable *function_declaration(char *funcname, int type, struct symtable *ctype, int class) { struct ASTnode *tree, *finalstmt; struct symtable *oldfuncsym, *newfuncsym = NULL; int endlabel, paramcnt; // Text has the identifier's name. If this exists and is a // function, get the id. Otherwise, set oldfuncsym to NULL. if ((oldfuncsym = findsymbol(funcname)) != NULL) if (oldfuncsym->stype != S_FUNCTION) oldfuncsym = NULL; // If this is a new function declaration, get a // label-id for the end label, and add the function // to the symbol table, if (oldfuncsym == NULL) { endlabel = genlabel(); // Assumtion: functions only return scalar types, so NULL below newfuncsym = addglob(funcname, type, NULL, S_FUNCTION, C_GLOBAL, 0, endlabel); } // Scan in the '(', any parameters and the ')'. // Pass in any existing function prototype pointer lparen(); paramcnt = param_declaration_list(oldfuncsym, newfuncsym); rparen(); // If this is a new function declaration, update the // function symbol entry with the number of parameters. // Also copy the parameter list into the function's node. if (newfuncsym) { newfuncsym->nelems = paramcnt; newfuncsym->member = Parmhead; oldfuncsym = newfuncsym; } // Clear out the parameter list Parmhead = Parmtail = NULL; // Declaration ends in a semicolon, only a prototype. if (Token.token == T_SEMI) return (oldfuncsym); // This is not just a prototype. // Set the Functionid global to the function's symbol pointer Functionid = oldfuncsym; // Get the AST tree for the compound statement and mark // that we have parsed no loops or switches yet Looplevel = 0; Switchlevel = 0; lbrace(); tree = compound_statement(0); rbrace(); // If the function type isn't P_VOID .. if (type != P_VOID) { // Error if no statements in the function if (tree == NULL) fatal("No statements in function with non-void type"); // Check that the last AST operation in the // compound statement was a return statement finalstmt = (tree->op == A_GLUE) ? tree->right : tree; if (finalstmt == NULL || finalstmt->op != A_RETURN) fatal("No return for function with non-void type"); } // Build the A_FUNCTION node which has the function's symbol pointer // and the compound statement sub-tree tree = mkastunary(A_FUNCTION, type, tree, oldfuncsym, endlabel); // Generate the assembly code for it if (O_dumpAST) { dumpAST(tree, NOLABEL, 0); fprintf(stdout, "\n\n"); } genAST(tree, NOLABEL, NOLABEL, NOLABEL, 0); // Now free the symbols associated // with this function freeloclsyms(); return (oldfuncsym); } // Parse composite type declarations: structs or unions. // Either find an existing struct/union declaration, or build // a struct/union symbol table entry and return its pointer. static struct symtable *composite_declaration(int type) { struct symtable *ctype = NULL; struct symtable *m; struct ASTnode *unused; int offset; int t; // Skip the struct/union keyword scan(&Token); // See if there is a following struct/union name if (Token.token == T_IDENT) { // Find any matching composite type if (type == P_STRUCT) ctype = findstruct(Text); else ctype = findunion(Text); scan(&Token); } // If the next token isn't an LBRACE , this is // the usage of an existing struct/union type. // Return the pointer to the type. if (Token.token != T_LBRACE) { if (ctype == NULL) fatals("unknown struct/union type", Text); return (ctype); } // Ensure this struct/union type hasn't been // previously defined if (ctype) fatals("previously defined struct/union", Text); // Build the composite type and skip the left brace if (type == P_STRUCT) ctype = addstruct(Text); else ctype = addunion(Text); scan(&Token); // Scan in the list of members while (1) { // Get the next member. m is used as a dummy t= declaration_list(&m, C_MEMBER, T_SEMI, T_RBRACE, &unused); if (t== -1) fatal("Bad type in member list"); if (Token.token == T_SEMI) scan(&Token); if (Token.token == T_RBRACE) break; } // Attach to the struct type's node rbrace(); if (Membhead==NULL) fatals("No members in struct", ctype->name); ctype->member = Membhead; Membhead = Membtail = NULL; // Set the offset of the initial member // and find the first free byte after it m = ctype->member; m->st_posn = 0; offset = typesize(m->type, m->ctype); // Set the position of each successive member in the composite type // Unions are easy. For structs, align the member and find the next free byte for (m = m->next; m != NULL; m = m->next) { // Set the offset for this member if (type == P_STRUCT) m->st_posn = genalign(m->type, offset, 1); else m->st_posn = 0; // Get the offset of the next free byte after this member offset += typesize(m->type, m->ctype); } // Set the overall size of the composite type ctype->size = offset; return (ctype); } // Parse an enum declaration static void enum_declaration(void) { struct symtable *etype = NULL; char *name= NULL; int intval = 0; // Skip the enum keyword. scan(&Token); // If there's a following enum type name, get a // pointer to any existing enum type node. if (Token.token == T_IDENT) { etype = findenumtype(Text); name = strdup(Text); // As it gets tromped soon scan(&Token); } // If the next token isn't a LBRACE, check // that we have an enum type name, then return if (Token.token != T_LBRACE) { if (etype == NULL) fatals("undeclared enum type:", name); return; } // We do have an LBRACE. Skip it scan(&Token); // If we have an enum type name, ensure that it // hasn't been declared before. if (etype != NULL) fatals("enum type redeclared:", etype->name); else // Build an enum type node for this identifier etype = addenum(name, C_ENUMTYPE, 0); // Loop to get all the enum values while (1) { // Ensure we have an identifier // Copy it in case there's an int literal coming up ident(); name = strdup(Text); // Ensure this enum value hasn't been declared before etype = findenumval(name); if (etype != NULL) fatals("enum value redeclared:", Text); // If the next token is an '=', skip it and // get the following int literal if (Token.token == T_ASSIGN) { scan(&Token); if (Token.token != T_INTLIT) fatal("Expected int literal after '='"); intval = Token.intvalue; scan(&Token); } // Build an enum value node for this identifier. // Increment the value for the next enum identifier. etype = addenum(name, C_ENUMVAL, intval++); // Bail out on a right curly bracket, else get a comma if (Token.token == T_RBRACE) break; comma(); } scan(&Token); // Skip over the right curly bracket } // Parse a typedef declaration and return the type // and ctype that it represents static int typedef_declaration(struct symtable **ctype) { int type, class = 0; // Skip the typedef keyword. scan(&Token); // Get the actual type following the keyword type = parse_type(ctype, &class); if (class != 0) fatal("Can't have extern in a typedef declaration"); // See if the typedef identifier already exists if (findtypedef(Text) != NULL) fatals("redefinition of typedef", Text); // Get any following '*' tokens type = parse_stars(type); // It doesn't exist so add it to the typedef list addtypedef(Text, type, *ctype); scan(&Token); return (type); } // Given a typedef name, return the type it represents static int type_of_typedef(char *name, struct symtable **ctype) { struct symtable *t; // Look up the typedef in the list t = findtypedef(name); if (t == NULL) fatals("unknown type", name); scan(&Token); *ctype = t->ctype; return (t->type); } // Parse the declaration of a variable or function. // The type and any following '*'s have been scanned, and we // have the identifier in the Token variable. // The class argument is the variable's class. // Return a pointer to the symbol's entry in the symbol table static struct symtable *symbol_declaration(int type, struct symtable *ctype, int class, struct ASTnode **tree) { struct symtable *sym = NULL; char *varname = strdup(Text); // Ensure that we have an identifier. // We copied it above so we can scan more tokens in, e.g. // an assignment expression for a local variable. ident(); // Deal with function declarations if (Token.token == T_LPAREN) { return (function_declaration(varname, type, ctype, class)); } // See if this array or scalar variable has already been declared switch (class) { case C_EXTERN: case C_GLOBAL: if (findglob(varname) != NULL) fatals("Duplicate global variable declaration", varname); case C_LOCAL: case C_PARAM: if (findlocl(varname) != NULL) fatals("Duplicate local variable declaration", varname); case C_MEMBER: if (findmember(varname) != NULL) fatals("Duplicate struct/union member declaration", varname); } // Add the array or scalar variable to the symbol table if (Token.token == T_LBRACKET) sym = array_declaration(varname, type, ctype, class); else sym = scalar_declaration(varname, type, ctype, class, tree); return (sym); } // Parse a list of symbols where there is an initial type. // Return the type of the symbols. et1 and et2 are end tokens. int declaration_list(struct symtable **ctype, int class, int et1, int et2, struct ASTnode **gluetree) { int inittype, type; struct symtable *sym; struct ASTnode *tree; *gluetree= NULL; // Get the initial type. If -1, it was // a composite type definition, return this if ((inittype = parse_type(ctype, &class)) == -1) return (inittype); // Now parse the list of symbols while (1) { // See if this symbol is a pointer type = parse_stars(inittype); // Parse this symbol sym = symbol_declaration(type, *ctype, class, &tree); // We parsed a function, there is no list so leave if (sym->stype == S_FUNCTION) { if (class != C_GLOBAL) fatal("Function definition not at global level"); return (type); } // Glue any AST tree from a local declaration // to build a sequence of assignments to perform if (*gluetree== NULL) *gluetree= tree; else *gluetree = mkastnode(A_GLUE, P_NONE, *gluetree, NULL, tree, NULL, 0); // We are at the end of the list, leave if (Token.token == et1 || Token.token == et2) return (type); // Otherwise, we need a comma as separator comma(); } } // Parse one or more global declarations, either // variables, functions or structs void global_declarations(void) { struct symtable *ctype; struct ASTnode *unused; while (Token.token != T_EOF) { declaration_list(&ctype, C_GLOBAL, T_SEMI, T_EOF, &unused); // Skip any semicolons and right curly brackets if (Token.token == T_SEMI) scan(&Token); } } ================================================ FILE: 43_More_Operators/decl.h ================================================ // Function prototypes for all compiler files // Copyright (c) 2019 Warren Toomey, GPL3 // scan.c void reject_token(struct token *t); int scan(struct token *t); // tree.c struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue); struct ASTnode *mkastleaf(int op, int type, struct symtable *sym, int intvalue); struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, struct symtable *sym, int intvalue); void dumpAST(struct ASTnode *n, int label, int parentASTop); // gen.c int genlabel(void); int genAST(struct ASTnode *n, int iflabel, int looptoplabel, int loopendlabel, int parentASTop); void genpreamble(); void genpostamble(); void genfreeregs(); void genglobsym(struct symtable *node); int genglobstr(char *strvalue); int genprimsize(int type); int genalign(int type, int offset, int direction); void genreturn(int reg, int id); // cg.c int cgprimsize(int type); int cgalign(int type, int offset, int direction); void cgtextseg(); void cgdataseg(); void freeall_registers(void); void cgpreamble(); void cgpostamble(); void cgfuncpreamble(struct symtable *sym); void cgfuncpostamble(struct symtable *sym); int cgloadint(int value, int type); int cgloadglob(struct symtable *sym, int op); int cgloadlocal(struct symtable *sym, int op); int cgloadglobstr(int label); int cgadd(int r1, int r2); int cgsub(int r1, int r2); int cgmul(int r1, int r2); int cgdiv(int r1, int r2); int cgshlconst(int r, int val); int cgcall(struct symtable *sym, int numargs); void cgcopyarg(int r, int argposn); int cgstorglob(int r, struct symtable *sym); int cgstorlocal(int r, struct symtable *sym); void cgglobsym(struct symtable *node); void cgglobstr(int l, char *strvalue); int cgcompare_and_set(int ASTop, int r1, int r2); int cgcompare_and_jump(int ASTop, int r1, int r2, int label); void cglabel(int l); void cgjump(int l); int cgwiden(int r, int oldtype, int newtype); void cgreturn(int reg, struct symtable *sym); int cgaddress(struct symtable *sym); int cgderef(int r, int type); int cgstorderef(int r1, int r2, int type); int cgnegate(int r); int cginvert(int r); int cglognot(int r); int cgboolean(int r, int op, int label); int cgand(int r1, int r2); int cgor(int r1, int r2); int cgxor(int r1, int r2); int cgshl(int r1, int r2); int cgshr(int r1, int r2); void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel); // expr.c struct ASTnode *expression_list(int endtoken); struct ASTnode *binexpr(int ptp); // stmt.c struct ASTnode *compound_statement(int inswitch); // misc.c void match(int t, char *what); void semi(void); void lbrace(void); void rbrace(void); void lparen(void); void rparen(void); void ident(void); void comma(void); void fatal(char *s); void fatals(char *s1, char *s2); void fatald(char *s, int d); void fatalc(char *s, int c); // sym.c void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node); struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn); struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn); struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int nelems); struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype); struct symtable *addstruct(char *name); struct symtable *addunion(char *name); struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int nelems); struct symtable *addenum(char *name, int class, int value); struct symtable *addtypedef(char *name, int type, struct symtable *ctype); struct symtable *findglob(char *s); struct symtable *findlocl(char *s); struct symtable *findsymbol(char *s); struct symtable *findmember(char *s); struct symtable *findstruct(char *s); struct symtable *findunion(char *s); struct symtable *findenumtype(char *s); struct symtable *findenumval(char *s); struct symtable *findtypedef(char *s); void clear_symtable(void); void freeloclsyms(void); // decl.c int parse_type(struct symtable **ctype, int *class); int parse_stars(int type); int parse_cast(void); int declaration_list(struct symtable **ctype, int class, int et1, int et2, struct ASTnode **gluetree); void global_declarations(void); // types.c int inttype(int type); int ptrtype(int type); int pointer_to(int type); int value_at(int type); int typesize(int type, struct symtable *ctype); struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op); ================================================ FILE: 43_More_Operators/defs.h ================================================ #include #include #include #include // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 enum { TEXTLEN = 512 // Length of identifiers in input }; // Commands and default filenames #define AOUT "a.out" #ifdef __NASM__ #define ASCMD "nasm -f elf64 -o " #define LDCMD "cc -no-pie -fno-plt -Wall -o " #else #define ASCMD "as -o " #define LDCMD "cc -o " #endif #define CPPCMD "cpp -nostdinc -isystem " // Token types enum { T_EOF, // Binary operators T_ASSIGN, T_ASPLUS, T_ASMINUS, T_ASSTAR, T_ASSLASH, T_LOGOR, T_LOGAND, T_OR, T_XOR, T_AMPER, T_EQ, T_NE, T_LT, T_GT, T_LE, T_GE, T_LSHIFT, T_RSHIFT, T_PLUS, T_MINUS, T_STAR, T_SLASH, // Other operators T_INC, T_DEC, T_INVERT, T_LOGNOT, // Type keywords T_VOID, T_CHAR, T_INT, T_LONG, // Other keywords T_IF, T_ELSE, T_WHILE, T_FOR, T_RETURN, T_STRUCT, T_UNION, T_ENUM, T_TYPEDEF, T_EXTERN, T_BREAK, T_CONTINUE, T_SWITCH, T_CASE, T_DEFAULT, // Structural tokens T_INTLIT, T_STRLIT, T_SEMI, T_IDENT, T_LBRACE, T_RBRACE, T_LPAREN, T_RPAREN, T_LBRACKET, T_RBRACKET, T_COMMA, T_DOT, T_ARROW, T_COLON }; // Token structure struct token { int token; // Token type, from the enum list above char *tokstr; // String version of the token int intvalue; // For T_INTLIT, the integer value }; // AST node types. The first few line up // with the related tokens enum { A_ASSIGN = 1, A_ASPLUS, A_ASMINUS, A_ASSTAR, A_ASSLASH, A_LOGOR, A_LOGAND, A_OR, A_XOR, A_AND, A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE, A_LSHIFT, A_RSHIFT, A_ADD, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, A_INTLIT, A_STRLIT, A_IDENT, A_GLUE, A_IF, A_WHILE, A_FUNCTION, A_WIDEN, A_RETURN, A_FUNCCALL, A_DEREF, A_ADDR, A_SCALE, A_PREINC, A_PREDEC, A_POSTINC, A_POSTDEC, A_NEGATE, A_INVERT, A_LOGNOT, A_TOBOOL, A_BREAK, A_CONTINUE, A_SWITCH, A_CASE, A_DEFAULT, A_CAST }; // Primitive types. The bottom 4 bits is an integer // value that represents the level of indirection, // e.g. 0= no pointer, 1= pointer, 2= pointer pointer etc. enum { P_NONE, P_VOID = 16, P_CHAR = 32, P_INT = 48, P_LONG = 64, P_STRUCT=80, P_UNION=96 }; // Structural types enum { S_VARIABLE, S_FUNCTION, S_ARRAY }; // Storage classes enum { C_GLOBAL = 1, // Globally visible symbol C_LOCAL, // Locally visible symbol C_PARAM, // Locally visible function parameter C_EXTERN, // External globally visible symbol C_STRUCT, // A struct C_UNION, // A union C_MEMBER, // Member of a struct or union C_ENUMTYPE, // A named enumeration type C_ENUMVAL, // A named enumeration value C_TYPEDEF // A named typedef }; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol struct symtable *ctype; // If struct/union, ptr to that type int stype; // Structural type for the symbol int class; // Storage class for the symbol int size; // Total size in bytes of this symbol int nelems; // Functions: # params. Arrays: # elements #define st_endlabel st_posn // For functions, the end label int st_posn; // For locals, the negative offset // from the stack base pointer int *initlist; // List of initial values struct symtable *next; // Next symbol in one list struct symtable *member; // First member of a function, struct, }; // union or enum // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates int rvalue; // True if the node is an rvalue struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; struct symtable *sym; // For many AST nodes, the pointer to // the symbol in the symbol table #define a_intvalue a_size // For A_INTLIT, the integer value int a_size; // For A_SCALE, the size to scale by }; enum { NOREG = -1, // Use NOREG when the AST generation // functions have no register to return NOLABEL = 0 // Use NOLABEL when we have no label to // pass to genAST() }; ================================================ FILE: 43_More_Operators/expr.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of expressions // Copyright (c) 2019 Warren Toomey, GPL3 // expression_list: // | expression // | expression ',' expression_list // ; // Parse a list of zero or more comma-separated expressions and // return an AST composed of A_GLUE nodes with the left-hand child // being the sub-tree of previous expressions (or NULL) and the right-hand // child being the next expression. Each A_GLUE node will have size field // set to the number of expressions in the tree at this point. If no // expressions are parsed, NULL is returned struct ASTnode *expression_list(int endtoken) { struct ASTnode *tree = NULL; struct ASTnode *child = NULL; int exprcount = 0; // Loop until the end token while (Token.token != endtoken) { // Parse the next expression and increment the expression count child = binexpr(0); exprcount++; // Build an A_GLUE AST node with the previous tree as the left child // and the new expression as the right child. Store the expression count. tree = mkastnode(A_GLUE, P_NONE, tree, NULL, child, NULL, exprcount); // Stop when we reach the end token if (Token.token == endtoken) break; // Must have a ',' at this point match(T_COMMA, ","); } // Return the tree of expressions return (tree); } // Parse a function call and return its AST static struct ASTnode *funccall(void) { struct ASTnode *tree; struct symtable *funcptr; // Check that the identifier has been defined as a function, // then make a leaf node for it. if ((funcptr = findsymbol(Text)) == NULL || funcptr->stype != S_FUNCTION) { fatals("Undeclared function", Text); } // Get the '(' lparen(); // Parse the argument expression list tree = expression_list(T_RPAREN); // XXX Check type of each argument against the function's prototype // Build the function call AST node. Store the // function's return type as this node's type. // Also record the function's symbol-id tree = mkastunary(A_FUNCCALL, funcptr->type, tree, funcptr, 0); // Get the ')' rparen(); return (tree); } // Parse the index into an array and return an AST tree for it static struct ASTnode *array_access(void) { struct ASTnode *left, *right; struct symtable *aryptr; // Check that the identifier has been defined as an array // then make a leaf node for it that points at the base if ((aryptr = findsymbol(Text)) == NULL || aryptr->stype != S_ARRAY) { fatals("Undeclared array", Text); } left = mkastleaf(A_ADDR, aryptr->type, aryptr, 0); // Get the '[' scan(&Token); // Parse the following expression right = binexpr(0); // Get the ']' match(T_RBRACKET, "]"); // Ensure that this is of int type if (!inttype(right->type)) fatal("Array index is not of integer type"); // Scale the index by the size of the element's type right = modify_type(right, left->type, A_ADD); // Return an AST tree where the array's base has the offset // added to it, and dereference the element. Still an lvalue // at this point. left = mkastnode(A_ADD, aryptr->type, left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, value_at(left->type), left, NULL, 0); return (left); } // Parse the member reference of a struct or union // and return an AST tree for it. If withpointer is true, // the access is through a pointer to the member. static struct ASTnode *member_access(int withpointer) { struct ASTnode *left, *right; struct symtable *compvar; struct symtable *typeptr; struct symtable *m; // Check that the identifier has been declared as a // struct/union or a struct/union pointer if ((compvar = findsymbol(Text)) == NULL) fatals("Undeclared variable", Text); if (withpointer && compvar->type != pointer_to(P_STRUCT) && compvar->type != pointer_to(P_UNION)) fatals("Undeclared variable", Text); if (!withpointer && compvar->type != P_STRUCT && compvar->type != P_UNION) fatals("Undeclared variable", Text); // If a pointer to a struct/union, get the pointer's value. // Otherwise, make a leaf node that points at the base // Either way, it's an rvalue if (withpointer) { left = mkastleaf(A_IDENT, pointer_to(compvar->type), compvar, 0); } else left = mkastleaf(A_ADDR, compvar->type, compvar, 0); left->rvalue = 1; // Get the details of the composite type typeptr = compvar->ctype; // Skip the '.' or '->' token and get the member's name scan(&Token); ident(); // Find the matching member's name in the type // Die if we can't find it for (m = typeptr->member; m != NULL; m = m->next) if (!strcmp(m->name, Text)) break; if (m == NULL) fatals("No member found in struct/union: ", Text); // Build an A_INTLIT node with the offset right = mkastleaf(A_INTLIT, P_INT, NULL, m->st_posn); // Add the member's offset to the base of the struct/union // and dereference it. Still an lvalue at this point left = mkastnode(A_ADD, pointer_to(m->type), left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, m->type, left, NULL, 0); return (left); } // Parse a postfix expression and return // an AST node representing it. The // identifier is already in Text. static struct ASTnode *postfix(void) { struct ASTnode *n; struct symtable *varptr; struct symtable *enumptr; // If the identifier matches an enum value, // return an A_INTLIT node if ((enumptr = findenumval(Text)) != NULL) { scan(&Token); return (mkastleaf(A_INTLIT, P_INT, NULL, enumptr->st_posn)); } // Scan in the next token to see if we have a postfix expression scan(&Token); // Function call if (Token.token == T_LPAREN) return (funccall()); // An array reference if (Token.token == T_LBRACKET) return (array_access()); // Access into a struct or union if (Token.token == T_DOT) return (member_access(0)); if (Token.token == T_ARROW) return (member_access(1)); // A variable. Check that the variable exists. if ((varptr = findsymbol(Text)) == NULL || varptr->stype != S_VARIABLE) fatals("Unknown variable", Text); switch (Token.token) { // Post-increment: skip over the token case T_INC: scan(&Token); n = mkastleaf(A_POSTINC, varptr->type, varptr, 0); break; // Post-decrement: skip over the token case T_DEC: scan(&Token); n = mkastleaf(A_POSTDEC, varptr->type, varptr, 0); break; // Just a variable reference default: n = mkastleaf(A_IDENT, varptr->type, varptr, 0); } return (n); } // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(void) { struct ASTnode *n; int id; int type=0; switch (Token.token) { case T_INTLIT: // For an INTLIT token, make a leaf AST node for it. // Make it a P_CHAR if it's within the P_CHAR range if (Token.intvalue >= 0 && Token.intvalue < 256) n = mkastleaf(A_INTLIT, P_CHAR, NULL, Token.intvalue); else n = mkastleaf(A_INTLIT, P_INT, NULL, Token.intvalue); break; case T_STRLIT: // For a STRLIT token, generate the assembly for it. // Then make a leaf AST node for it. id is the string's label. id = genglobstr(Text); n = mkastleaf(A_STRLIT, pointer_to(P_CHAR), NULL, id); break; case T_IDENT: return (postfix()); case T_LPAREN: // Beginning of a parenthesised expression, skip the '('. scan(&Token); // If the token after is a type identifier, this is a cast expression switch (Token.token) { case T_IDENT: // We have to see if the identifier matches a typedef. // If not, treat it as an expression. if (findtypedef(Text) == NULL) { n = binexpr(0); break; } case T_VOID: case T_CHAR: case T_INT: case T_LONG: case T_STRUCT: case T_UNION: case T_ENUM: // Get the type inside the parentheses type= parse_cast(); // Skip the closing ')' and then parse the following expression rparen(); default: n = binexpr(0); // Scan in the expression } // We now have at least an expression in n, and possibly a non-zero type in type // if there was a cast. Skip the closing ')' if there was no cast. if (type == 0) rparen(); else // Otherwise, make a unary AST node for the cast n= mkastunary(A_CAST, type, n, NULL, 0); return (n); default: fatals("Expecting a primary expression, got token", Token.tokstr); } // Scan in the next token and return the leaf node scan(&Token); return (n); } // Convert a binary operator token into a binary AST operation. // We rely on a 1:1 mapping from token to AST operation static int binastop(int tokentype) { if (tokentype > T_EOF && tokentype <= T_SLASH) return (tokentype); fatald("Syntax error, token", tokentype); return (0); // Keep -Wall happy } // Return true if a token is right-associative, // false otherwise. static int rightassoc(int tokentype) { if (tokentype >= T_ASSIGN && tokentype <= T_ASSLASH) return (1); return (0); } // Operator precedence for each token. Must // match up with the order of tokens in defs.h static int OpPrec[] = { 0, 10, 10, // T_EOF, T_ASSIGN, T_ASPLUS, 10, 10, 10, // T_ASMINUS, T_ASSTAR, T_ASSLASH, 20, 30, // T_LOGOR, T_LOGAND 40, 50, 60, // T_OR, T_XOR, T_AMPER 70, 70, // T_EQ, T_NE 80, 80, 80, 80, // T_LT, T_GT, T_LE, T_GE 90, 90, // T_LSHIFT, T_RSHIFT 100, 100, // T_PLUS, T_MINUS 110, 110 // T_STAR, T_SLASH }; // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec; if (tokentype > T_SLASH) fatald("Token with no precedence in op_precedence:", tokentype); prec = OpPrec[tokentype]; if (prec == 0) fatald("Syntax error, token", tokentype); return (prec); } // prefix_expression: primary // | '*' prefix_expression // | '&' prefix_expression // | '-' prefix_expression // | '++' prefix_expression // | '--' prefix_expression // ; // Parse a prefix expression and return // a sub-tree representing it. struct ASTnode *prefix(void) { struct ASTnode *tree; switch (Token.token) { case T_AMPER: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Ensure that it's an identifier if (tree->op != A_IDENT) fatal("& operator must be followed by an identifier"); // Now change the operator to A_ADDR and the type to // a pointer to the original type tree->op = A_ADDR; tree->type = pointer_to(tree->type); break; case T_STAR: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's either another deref or an // identifier if (tree->op != A_IDENT && tree->op != A_DEREF) fatal("* operator must be followed by an identifier or *"); // Prepend an A_DEREF operation to the tree tree = mkastunary(A_DEREF, value_at(tree->type), tree, NULL, 0); break; case T_MINUS: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_NEGATE operation to the tree and // make the child an rvalue. Because chars are unsigned, // also widen this to int so that it's signed tree->rvalue = 1; tree = modify_type(tree, P_INT, 0); tree = mkastunary(A_NEGATE, tree->type, tree, NULL, 0); break; case T_INVERT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_INVERT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_INVERT, tree->type, tree, NULL, 0); break; case T_LOGNOT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_LOGNOT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_LOGNOT, tree->type, tree, NULL, 0); break; case T_INC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("++ operator must be followed by an identifier"); // Prepend an A_PREINC operation to the tree tree = mkastunary(A_PREINC, tree->type, tree, NULL, 0); break; case T_DEC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("-- operator must be followed by an identifier"); // Prepend an A_PREDEC operation to the tree tree = mkastunary(A_PREDEC, tree->type, tree, NULL, 0); break; default: tree = primary(); } return (tree); } // Return an AST tree whose root is a binary operator. // Parameter ptp is the previous token's precedence. struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; struct ASTnode *ltemp, *rtemp; int ASTop; int tokentype; // Get the tree on the left. // Fetch the next token at the same time. left = prefix(); // If we hit one of several terminating tokens, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA || tokentype == T_COLON) { left->rvalue = 1; return (left); } // While the precedence of this token is more than that of the // previous token precedence, or it's right associative and // equal to the previous token's precedence while ((op_precedence(tokentype) > ptp) || (rightassoc(tokentype) && op_precedence(tokentype) == ptp)) { // Fetch in the next integer literal scan(&Token); // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Determine the operation to be performed on the sub-trees ASTop = binastop(tokentype); if (ASTop == A_ASSIGN) { // Assignment // Make the right tree into an rvalue right->rvalue = 1; // Ensure the right's type matches the left right = modify_type(right, left->type, 0); if (right == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree. However, switch // left and right around, so that the right expression's // code will be generated before the left expression ltemp = left; left = right; right = ltemp; } else { // We are not doing an assignment, so both trees should be rvalues // Convert both trees into rvalue if they are lvalue trees left->rvalue = 1; right->rvalue = 1; // Ensure the two types are compatible by trying // to modify each tree to match the other's type. ltemp = modify_type(left, right->type, ASTop); rtemp = modify_type(right, left->type, ASTop); if (ltemp == NULL && rtemp == NULL) fatal("Incompatible types in binary expression"); if (ltemp != NULL) left = ltemp; if (rtemp != NULL) right = rtemp; } // Join that sub-tree with ours. Convert the token // into an AST operation at the same time. left = mkastnode(binastop(tokentype), left->type, left, NULL, right, NULL, 0); // Update the details of the current token. // If we hit a terminating token, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA || tokentype == T_COLON) { left->rvalue = 1; return (left); } } // Return the tree we have when the precedence // is the same or lower left->rvalue = 1; return (left); } ================================================ FILE: 43_More_Operators/gen.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Generic code generator // Copyright (c) 2019 Warren Toomey, GPL3 // Generate and return a new label number int genlabel(void) { static int id = 1; return (id++); } // Generate the code for an IF statement // and an optional ELSE clause static int genIF(struct ASTnode *n, int looptoplabel, int loopendlabel) { int Lfalse, Lend; // Generate two labels: one for the // false compound statement, and one // for the end of the overall IF statement. // When there is no ELSE clause, Lfalse _is_ // the ending label! Lfalse = genlabel(); if (n->right) Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. genAST(n->left, Lfalse, NOLABEL, NOLABEL, n->op); genfreeregs(); // Generate the true compound statement genAST(n->mid, NOLABEL, looptoplabel, loopendlabel, n->op); genfreeregs(); // If there is an optional ELSE clause, // generate the jump to skip to the end if (n->right) cgjump(Lend); // Now the false label cglabel(Lfalse); // Optional ELSE clause: generate the // false compound statement and the // end label if (n->right) { genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); genfreeregs(); cglabel(Lend); } return (NOREG); } // Generate the code for a WHILE statement static int genWHILE(struct ASTnode *n) { int Lstart, Lend; // Generate the start and end labels // and output the start label Lstart = genlabel(); Lend = genlabel(); cglabel(Lstart); // Generate the condition code followed // by a jump to the end label. genAST(n->left, Lend, Lstart, Lend, n->op); genfreeregs(); // Generate the compound statement for the body genAST(n->right, NOLABEL, Lstart, Lend, n->op); genfreeregs(); // Finally output the jump back to the condition, // and the end label cgjump(Lstart); cglabel(Lend); return (NOREG); } // Generate the code for a SWITCH statement static int genSWITCH(struct ASTnode *n) { int *caseval, *caselabel; int Ljumptop, Lend; int i, reg, defaultlabel = 0, casecount = 0; struct ASTnode *c; // Create arrays for the case values and associated labels. // Ensure that we have at least one position in each array. caseval = (int *) malloc((n->a_intvalue + 1) * sizeof(int)); caselabel = (int *) malloc((n->a_intvalue + 1) * sizeof(int)); // Generate labels for the top of the jump table, and the // end of the switch statement. Set a default label for // the end of the switch, in case we don't have a default. Ljumptop = genlabel(); Lend = genlabel(); defaultlabel = Lend; // Output the code to calculate the switch condition reg = genAST(n->left, NOLABEL, NOLABEL, NOLABEL, 0); cgjump(Ljumptop); genfreeregs(); // Walk the right-child linked list to // generate the code for each case for (i = 0, c = n->right; c != NULL; i++, c = c->right) { // Get a label for this case. Store it // and the case value in the arrays. // Record if it is the default case. caselabel[i] = genlabel(); caseval[i] = c->a_intvalue; cglabel(caselabel[i]); if (c->op == A_DEFAULT) defaultlabel = caselabel[i]; else casecount++; // Generate the case code. Pass in the end label for the breaks genAST(c->left, NOLABEL, NOLABEL, Lend, 0); genfreeregs(); } // Ensure the last case jumps past the switch table cgjump(Lend); // Now output the switch table and the end label. cgswitch(reg, casecount, Ljumptop, caselabel, caseval, defaultlabel); cglabel(Lend); return (NOREG); } // Generate the code to copy the arguments of a // function call to its parameters, then call the // function itself. Return the register that holds // the function's return value. static int gen_funccall(struct ASTnode *n) { struct ASTnode *gluetree = n->left; int reg; int numargs = 0; // If there is a list of arguments, walk this list // from the last argument (right-hand child) to the // first while (gluetree) { // Calculate the expression's value reg = genAST(gluetree->right, NOLABEL, NOLABEL, NOLABEL, gluetree->op); // Copy this into the n'th function parameter: size is 1, 2, 3, ... cgcopyarg(reg, gluetree->a_size); // Keep the first (highest) number of arguments if (numargs == 0) numargs = gluetree->a_size; genfreeregs(); gluetree = gluetree->left; } // Call the function, clean up the stack (based on numargs), // and return its result return (cgcall(n->sym, numargs)); } // Given an AST, an optional label, and the AST op // of the parent, generate assembly code recursively. // Return the register id with the tree's final value. int genAST(struct ASTnode *n, int iflabel, int looptoplabel, int loopendlabel, int parentASTop) { int leftreg, rightreg; // We have some specific AST node handling at the top // so that we don't evaluate the child sub-trees immediately switch (n->op) { case A_IF: return (genIF(n, looptoplabel, loopendlabel)); case A_WHILE: return (genWHILE(n)); case A_SWITCH: return (genSWITCH(n)); case A_FUNCCALL: return (gen_funccall(n)); case A_GLUE: // Do each child statement, and free the // registers after each child if (n->left != NULL) genAST(n->left, iflabel, looptoplabel, loopendlabel, n->op); genfreeregs(); if (n->right != NULL) genAST(n->right, iflabel, looptoplabel, loopendlabel, n->op); genfreeregs(); return (NOREG); case A_FUNCTION: // Generate the function's preamble before the code // in the child sub-tree cgfuncpreamble(n->sym); genAST(n->left, NOLABEL, NOLABEL, NOLABEL, n->op); cgfuncpostamble(n->sym); return (NOREG); } // General AST node handling below // Get the left and right sub-tree values if (n->left) leftreg = genAST(n->left, NOLABEL, NOLABEL, NOLABEL, n->op); if (n->right) rightreg = genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); switch (n->op) { case A_ADD: return (cgadd(leftreg, rightreg)); case A_SUBTRACT: return (cgsub(leftreg, rightreg)); case A_MULTIPLY: return (cgmul(leftreg, rightreg)); case A_DIVIDE: return (cgdiv(leftreg, rightreg)); case A_AND: return (cgand(leftreg, rightreg)); case A_OR: return (cgor(leftreg, rightreg)); case A_XOR: return (cgxor(leftreg, rightreg)); case A_LSHIFT: return (cgshl(leftreg, rightreg)); case A_RSHIFT: return (cgshr(leftreg, rightreg)); case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, compare registers // and set one to 1 or 0 based on the comparison. if (parentASTop == A_IF || parentASTop == A_WHILE) return (cgcompare_and_jump(n->op, leftreg, rightreg, iflabel)); else return (cgcompare_and_set(n->op, leftreg, rightreg)); case A_INTLIT: return (cgloadint(n->a_intvalue, n->type)); case A_STRLIT: return (cgloadglobstr(n->a_intvalue)); case A_IDENT: // Load our value if we are an rvalue // or we are being dereferenced if (n->rvalue || parentASTop == A_DEREF) { if (n->sym->class == C_GLOBAL) { return (cgloadglob(n->sym, n->op)); } else { return (cgloadlocal(n->sym, n->op)); } } else return (NOREG); case A_ASPLUS: case A_ASMINUS: case A_ASSTAR: case A_ASSLASH: case A_ASSIGN: // For the '+=' and friends operators, generate suitable code // and get the register with the result. Then take the left child, // make it the right child so that we can fall into the assignment code. switch (n->op) { case A_ASPLUS: leftreg= cgadd(leftreg, rightreg); n->right= n->left; break; case A_ASMINUS: leftreg= cgsub(leftreg, rightreg); n->right= n->left; break; case A_ASSTAR: leftreg= cgmul(leftreg, rightreg); n->right= n->left; break; case A_ASSLASH: leftreg= cgdiv(leftreg, rightreg); n->right= n->left; break; } // Now into the assignment code // Are we assigning to an identifier or through a pointer? switch (n->right->op) { case A_IDENT: if (n->right->sym->class == C_GLOBAL) return (cgstorglob(leftreg, n->right->sym)); else return (cgstorlocal(leftreg, n->right->sym)); case A_DEREF: return (cgstorderef(leftreg, rightreg, n->right->type)); default: fatald("Can't A_ASSIGN in genAST(), op", n->op); } case A_WIDEN: // Widen the child's type to the parent's type return (cgwiden(leftreg, n->left->type, n->type)); case A_RETURN: cgreturn(leftreg, Functionid); return (NOREG); case A_ADDR: return (cgaddress(n->sym)); case A_DEREF: // If we are an rvalue, dereference to get the value we point at, // otherwise leave it for A_ASSIGN to store through the pointer if (n->rvalue) return (cgderef(leftreg, n->left->type)); else return (leftreg); case A_SCALE: // Small optimisation: use shift if the // scale value is a known power of two switch (n->a_size) { case 2: return (cgshlconst(leftreg, 1)); case 4: return (cgshlconst(leftreg, 2)); case 8: return (cgshlconst(leftreg, 3)); default: // Load a register with the size and // multiply the leftreg by this size rightreg = cgloadint(n->a_size, P_INT); return (cgmul(leftreg, rightreg)); } case A_POSTINC: case A_POSTDEC: // Load and decrement the variable's value into a register // and post increment/decrement it if (n->sym->class == C_GLOBAL) return (cgloadglob(n->sym, n->op)); else return (cgloadlocal(n->sym, n->op)); case A_PREINC: case A_PREDEC: // Load and decrement the variable's value into a register // and pre increment/decrement it if (n->left->sym->class == C_GLOBAL) return (cgloadglob(n->left->sym, n->op)); else return (cgloadlocal(n->left->sym, n->op)); case A_NEGATE: return (cgnegate(leftreg)); case A_INVERT: return (cginvert(leftreg)); case A_LOGNOT: return (cglognot(leftreg)); case A_TOBOOL: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, set the register // to 0 or 1 based on it's zeroeness or non-zeroeness return (cgboolean(leftreg, parentASTop, iflabel)); case A_BREAK: cgjump(loopendlabel); return (NOREG); case A_CONTINUE: cgjump(looptoplabel); return (NOREG); case A_CAST: return (leftreg); // Not much to do default: fatald("Unknown AST operator", n->op); } return (NOREG); // Keep -Wall happy } void genpreamble() { cgpreamble(); } void genpostamble() { cgpostamble(); } void genfreeregs() { freeall_registers(); } void genglobsym(struct symtable *node) { cgglobsym(node); } int genglobstr(char *strvalue) { int l = genlabel(); cgglobstr(l, strvalue); return (l); } int genprimsize(int type) { return (cgprimsize(type)); } int genalign(int type, int offset, int direction) { return (cgalign(type, offset, direction)); } ================================================ FILE: 43_More_Operators/include/ctype.h ================================================ #ifndef _CTYPE_H_ # define _CTYPE_H_ #endif // _CTYPE_H_ ================================================ FILE: 43_More_Operators/include/fcntl.h ================================================ #ifndef _FCNTL_H_ # define _FCNTL_H_ #define O_RDONLY 00 #define O_WRONLY 01 #define O_RDWR 02 int open(char *pathname, int flags); #endif // _FCNTL_H_ ================================================ FILE: 43_More_Operators/include/stddef.h ================================================ #ifndef _STDDEF_H_ # define _STDDEF_H_ #ifndef NULL # define NULL (void *)0 #endif typedef long size_t; #endif //_STDDEF_H_ ================================================ FILE: 43_More_Operators/include/stdio.h ================================================ #ifndef _STDIO_H_ # define _STDIO_H_ #include #ifndef NULL # define NULL (void *)0 #endif // This FILE definition will do for now typedef char * FILE; FILE *fopen(char *pathname, char *mode); size_t fread(void *ptr, size_t size, size_t nmemb, FILE *stream); size_t fwrite(void *ptr, size_t size, size_t nmemb, FILE *stream); int fclose(FILE *stream); int printf(char *format); int fprintf(FILE *stream, char *format); #endif // _STDIO_H_ ================================================ FILE: 43_More_Operators/include/stdlib.h ================================================ #ifndef _STDLIB_H_ # define _STDLIB_H_ #endif // _STDLIB_H_ ================================================ FILE: 43_More_Operators/include/string.h ================================================ #ifndef _STRING_H_ # define _STRING_H_ #endif // _STRING_H_ ================================================ FILE: 43_More_Operators/include/unistd.h ================================================ #ifndef _UNISTD_H_ # define _UNISTD_H_ #endif // _UNISTD_H_ ================================================ FILE: 43_More_Operators/main.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "decl.h" #include #include // Compiler setup and top-level execution // Copyright (c) 2019 Warren Toomey, GPL3 // Given a string with a '.' and at least a 1-character suffix // after the '.', change the suffix to be the given character. // Return the new string or NULL if the original string could // not be modified char *alter_suffix(char *str, char suffix) { char *posn; char *newstr; // Clone the string if ((newstr = strdup(str)) == NULL) return (NULL); // Find the '.' if ((posn = strrchr(newstr, '.')) == NULL) return (NULL); // Ensure there is a suffix posn++; if (*posn == '\0') return (NULL); // Change the suffix and NUL-terminate the string *posn++ = suffix; *posn = '\0'; return (newstr); } // Given an input filename, compile that file // down to assembly code. Return the new file's name static char *do_compile(char *filename) { char cmd[TEXTLEN]; // Change the input file's suffix to .s Outfilename = alter_suffix(filename, 's'); if (Outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .c on the end\n", filename); exit(1); } // Generate the pre-processor command snprintf(cmd, TEXTLEN, "%s %s %s", CPPCMD, INCDIR, filename); // Open up the pre-processor pipe if ((Infile = popen(cmd, "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", filename, strerror(errno)); exit(1); } Infilename = filename; // Create the output file if ((Outfile = fopen(Outfilename, "w")) == NULL) { fprintf(stderr, "Unable to create %s: %s\n", Outfilename, strerror(errno)); exit(1); } Line = 1; // Reset the scanner Putback = '\n'; clear_symtable(); // Clear the symbol table if (O_verbose) printf("compiling %s\n", filename); scan(&Token); // Get the first token from the input genpreamble(); // Output the preamble global_declarations(); // Parse the global declarations genpostamble(); // Output the postamble fclose(Outfile); // Close the output file return (Outfilename); } // Given an input filename, assemble that file // down to object code. Return the object filename char *do_assemble(char *filename) { char cmd[TEXTLEN]; int err; char *outfilename = alter_suffix(filename, 'o'); if (outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .s on the end\n", filename); exit(1); } // Build the assembly command and run it snprintf(cmd, TEXTLEN, "%s %s %s", ASCMD, outfilename, filename); if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Assembly of %s failed\n", filename); exit(1); } return (outfilename); } // Given a list of object files and an output filename, // link all of the object filenames together. void do_link(char *outfilename, char *objlist[]) { int cnt, size = TEXTLEN; char cmd[TEXTLEN], *cptr; int err; // Start with the linker command and the output file cptr = cmd; cnt = snprintf(cptr, size, "%s %s ", LDCMD, outfilename); cptr += cnt; size -= cnt; // Now append each object file while (*objlist != NULL) { cnt = snprintf(cptr, size, "%s ", *objlist); cptr += cnt; size -= cnt; objlist++; } if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Linking failed\n"); exit(1); } } // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "Usage: %s [-vcST] [-o outfile] file [file ...]\n", prog); fprintf(stderr, " -v give verbose output of the compilation stages\n"); fprintf(stderr, " -c generate object files but don't link them\n"); fprintf(stderr, " -S generate assembly files but don't link them\n"); fprintf(stderr, " -T dump the AST trees for each input file\n"); fprintf(stderr, " -o outfile, produce the outfile executable file\n"); exit(1); } // Main program: check arguments and print a usage // if we don't have an argument. Open up the input // file and call scanfile() to scan the tokens in it. enum { MAXOBJ = 100 }; int main(int argc, char *argv[]) { char *outfilename = AOUT; char *asmfile, *objfile; char *objlist[MAXOBJ]; int i, objcnt = 0; // Initialise our variables O_dumpAST = 0; O_keepasm = 0; O_assemble = 0; O_verbose = 0; O_dolink = 1; // Scan for command-line options for (i = 1; i < argc; i++) { // No leading '-', stop scanning for options if (*argv[i] != '-') break; // For each option in this argument for (int j = 1; (*argv[i] == '-') && argv[i][j]; j++) { switch (argv[i][j]) { case 'o': outfilename = argv[++i]; // Save & skip to next argument break; case 'T': O_dumpAST = 1; break; case 'c': O_assemble = 1; O_keepasm = 0; O_dolink = 0; break; case 'S': O_keepasm = 1; O_assemble = 0; O_dolink = 0; break; case 'v': O_verbose = 1; break; default: usage(argv[0]); } } } // Ensure we have at lease one input file argument if (i >= argc) usage(argv[0]); // Work on each input file in turn while (i < argc) { asmfile = do_compile(argv[i]); // Compile the source file if (O_dolink || O_assemble) { objfile = do_assemble(asmfile); // Assemble it to object forma if (objcnt == (MAXOBJ - 2)) { fprintf(stderr, "Too many object files for the compiler to handle\n"); exit(1); } objlist[objcnt++] = objfile; // Add the object file's name objlist[objcnt] = NULL; // to the list of object files } if (!O_keepasm) // Remove the assembly file if unlink(asmfile); // we don't need to keep it i++; } // Now link all the object files together if (O_dolink) { do_link(outfilename, objlist); // If we don't need to keep the object // files, then remove them if (!O_assemble) { for (i = 0; objlist[i] != NULL; i++) unlink(objlist[i]); } } return (0); } ================================================ FILE: 43_More_Operators/misc.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" #include // Miscellaneous functions // Copyright (c) 2019 Warren Toomey, GPL3 // Ensure that the current token is t, // and fetch the next token. Otherwise // throw an error void match(int t, char *what) { if (Token.token == t) { scan(&Token); } else { fatals("Expected", what); } } // Match a semicolon and fetch the next token void semi(void) { match(T_SEMI, ";"); } // Match a left brace and fetch the next token void lbrace(void) { match(T_LBRACE, "{"); } // Match a right brace and fetch the next token void rbrace(void) { match(T_RBRACE, "}"); } // Match a left parenthesis and fetch the next token void lparen(void) { match(T_LPAREN, "("); } // Match a right parenthesis and fetch the next token void rparen(void) { match(T_RPAREN, ")"); } // Match an identifier and fetch the next token void ident(void) { match(T_IDENT, "identifier"); } // Match a comma and fetch the next token void comma(void) { match(T_COMMA, "comma"); } // Print out fatal messages void fatal(char *s) { fprintf(stderr, "%s on line %d of %s\n", s, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatals(char *s1, char *s2) { fprintf(stderr, "%s:%s on line %d of %s\n", s1, s2, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatald(char *s, int d) { fprintf(stderr, "%s:%d on line %d of %s\n", s, d, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatalc(char *s, int c) { fprintf(stderr, "%s:%c on line %d of %s\n", s, c, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } ================================================ FILE: 43_More_Operators/scan.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { char *p; p = strchr(s, c); return (p ? p - s : -1); } // Get the next character from the input file. static int next(void) { int c, l; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return (c); } c = fgetc(Infile); // Read from input file while (c == '#') { // We've hit a pre-processor statement scan(&Token); // Get the line number into l if (Token.token != T_INTLIT) fatals("Expecting pre-processor line number, got:", Text); l = Token.intvalue; scan(&Token); // Get the filename in Text if (Token.token != T_STRLIT) fatals("Expecting pre-processor file name, got:", Text); if (Text[0] != '<') { // If this is a real filename if (strcmp(Text, Infilename)) // and not the one we have now Infilename = strdup(Text); // save it. Then update the line num Line = l; } while ((c = fgetc(Infile)) != '\n'); // Skip to the end of the line c = fgetc(Infile); // and get the next character } if ('\n' == c) Line++; // Increment line count return (c); } // Put back an unwanted character static void putback(int c) { Putback = c; } // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } // Return the next character from a character // or string literal static int scanch(void) { int c; // Get the next input character and interpret // metacharacters that start with a backslash c = next(); if (c == '\\') { switch (c = next()) { case 'a': return '\a'; case 'b': return '\b'; case 'f': return '\f'; case 'n': return '\n'; case 'r': return '\r'; case 't': return '\t'; case 'v': return '\v'; case '\\': return '\\'; case '"': return '"'; case '\'': return '\''; default: fatalc("unknown escape sequence", c); } } return (c); // Just an ordinary old character! } // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0; // Convert each character into an int value while ((k = chrpos("0123456789", c)) >= 0) { val = val * 10 + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return (val); } // Scan in a string literal from the input file, // and store it in buf[]. Return the length of // the string. static int scanstr(char *buf) { int i, c; // Loop while we have enough buffer space for (i = 0; i < TEXTLEN - 1; i++) { // Get the next char and append to buf // Return when we hit the ending double quote if ((c = scanch()) == '"') { buf[i] = 0; return (i); } buf[i] = c; } // Ran out of buf[] space fatal("String literal too long"); return (0); } // Scan an identifier from the input file and // store it in buf[]. Return the identifier's length static int scanident(int c, char *buf, int lim) { int i = 0; // Allow digits, alpha and underscores while (isalpha(c) || isdigit(c) || '_' == c) { // Error if we hit the identifier length limit, // else append to buf[] and get next character if (lim - 1 == i) { fatal("Identifier too long"); } else if (i < lim - 1) { buf[i++] = c; } c = next(); } // We hit a non-valid character, put it back. // NUL-terminate the buf[] and return the length putback(c); buf[i] = '\0'; return (i); } // Given a word from the input, return the matching // keyword token number or 0 if it's not a keyword. // Switch on the first letter so that we don't have // to waste time strcmp()ing against all the keywords. static int keyword(char *s) { switch (*s) { case 'b': if (!strcmp(s, "break")) return (T_BREAK); break; case 'c': if (!strcmp(s, "case")) return (T_CASE); if (!strcmp(s, "char")) return (T_CHAR); if (!strcmp(s, "continue")) return (T_CONTINUE); break; case 'd': if (!strcmp(s, "default")) return (T_DEFAULT); break; case 'e': if (!strcmp(s, "else")) return (T_ELSE); if (!strcmp(s, "enum")) return (T_ENUM); if (!strcmp(s, "extern")) return (T_EXTERN); break; case 'f': if (!strcmp(s, "for")) return (T_FOR); break; case 'i': if (!strcmp(s, "if")) return (T_IF); if (!strcmp(s, "int")) return (T_INT); break; case 'l': if (!strcmp(s, "long")) return (T_LONG); break; case 'r': if (!strcmp(s, "return")) return (T_RETURN); break; case 's': if (!strcmp(s, "struct")) return (T_STRUCT); if (!strcmp(s, "switch")) return (T_SWITCH); break; case 't': if (!strcmp(s, "typedef")) return (T_TYPEDEF); break; case 'u': if (!strcmp(s, "union")) return (T_UNION); break; case 'v': if (!strcmp(s, "void")) return (T_VOID); break; case 'w': if (!strcmp(s, "while")) return (T_WHILE); break; } return (0); } // A pointer to a rejected token static struct token *Rejtoken = NULL; // Reject the token that we just scanned void reject_token(struct token *t) { if (Rejtoken != NULL) fatal("Can't reject token twice"); Rejtoken = t; } // List of token strings, for debugging purposes char *Tstring[] = { "EOF", "=", "+=", "-=", "*=", "/=", "||", "&&", "|", "^", "&", "==", "!=", ",", ">", "<=", ">=", "<<", ">>", "+", "-", "*", "/", "++", "--", "~", "!", "void", "char", "int", "long", "if", "else", "while", "for", "return", "struct", "union", "enum", "typedef", "extern", "break", "continue", "switch", "case", "default", "intlit", "strlit", ";", "identifier", "{", "}", "(", ")", "[", "]", ",", ".", "->", ":" }; // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c, tokentype; // If we have any rejected token, return it if (Rejtoken != NULL) { t = Rejtoken; Rejtoken = NULL; return (1); } // Skip whitespace c = skip(); // Determine the token based on // the input character switch (c) { case EOF: t->token = T_EOF; return (0); case '+': if ((c = next()) == '+') { t->token = T_INC; } else if (c == '=') { t->token = T_ASPLUS; } else { putback(c); t->token = T_PLUS; } break; case '-': if ((c = next()) == '-') { t->token = T_DEC; } else if (c == '>') { t->token = T_ARROW; } else if (c == '=') { t->token = T_ASMINUS; } else if (isdigit(c)) { // Negative int literal t->intvalue = -scanint(c); t->token = T_INTLIT; } else { putback(c); t->token = T_MINUS; } break; case '*': if ((c = next()) == '=') { t->token = T_ASSTAR; } else { putback(c); t->token = T_STAR; } break; case '/': if ((c = next()) == '=') { t->token = T_ASSLASH; } else { putback(c); t->token = T_SLASH; } break; case ';': t->token = T_SEMI; break; case '{': t->token = T_LBRACE; break; case '}': t->token = T_RBRACE; break; case '(': t->token = T_LPAREN; break; case ')': t->token = T_RPAREN; break; case '[': t->token = T_LBRACKET; break; case ']': t->token = T_RBRACKET; break; case '~': t->token = T_INVERT; break; case '^': t->token = T_XOR; break; case ',': t->token = T_COMMA; break; case '.': t->token = T_DOT; break; case ':': t->token = T_COLON; break; case '=': if ((c = next()) == '=') { t->token = T_EQ; } else { putback(c); t->token = T_ASSIGN; } break; case '!': if ((c = next()) == '=') { t->token = T_NE; } else { putback(c); t->token = T_LOGNOT; } break; case '<': if ((c = next()) == '=') { t->token = T_LE; } else if (c == '<') { t->token = T_LSHIFT; } else { putback(c); t->token = T_LT; } break; case '>': if ((c = next()) == '=') { t->token = T_GE; } else if (c == '>') { t->token = T_RSHIFT; } else { putback(c); t->token = T_GT; } break; case '&': if ((c = next()) == '&') { t->token = T_LOGAND; } else { putback(c); t->token = T_AMPER; } break; case '|': if ((c = next()) == '|') { t->token = T_LOGOR; } else { putback(c); t->token = T_OR; } break; case '\'': // If it's a quote, scan in the // literal character value and // the trailing quote t->intvalue = scanch(); t->token = T_INTLIT; if (next() != '\'') fatal("Expected '\\'' at end of char literal"); break; case '"': // Scan in a literal string scanstr(Text); t->token = T_STRLIT; break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } else if (isalpha(c) || '_' == c) { // Read in a keyword or identifier scanident(c, Text, TEXTLEN); // If it's a recognised keyword, return that token if ((tokentype = keyword(Text)) != 0) { t->token = tokentype; break; } // Not a recognised keyword, so it must be an identifier t->token = T_IDENT; break; } // The character isn't part of any recognised token, error fatalc("Unrecognised character", c); } // We found a token t->tokstr = Tstring[t->token]; return (1); } ================================================ FILE: 43_More_Operators/stmt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of statements // Copyright (c) 2019 Warren Toomey, GPL3 // Prototypes static struct ASTnode *single_statement(void); // compound_statement: // empty, i.e. no statement // | statement // | statement statements // ; // // statement: declaration // | expression_statement // | function_call // | if_statement // | while_statement // | for_statement // | return_statement // ; // if_statement: if_head // | if_head 'else' statement // ; // // if_head: 'if' '(' true_false_expression ')' statement ; // // Parse an IF statement including any // optional ELSE clause and return its AST static struct ASTnode *if_statement(void) { struct ASTnode *condAST, *trueAST, *falseAST = NULL; // Ensure we have 'if' '(' match(T_IF, "if"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); rparen(); // Get the AST for the statement trueAST = single_statement(); // If we have an 'else', skip it // and get the AST for the statement if (Token.token == T_ELSE) { scan(&Token); falseAST = single_statement(); } // Build and return the AST for this statement return (mkastnode(A_IF, P_NONE, condAST, trueAST, falseAST, NULL, 0)); } // while_statement: 'while' '(' true_false_expression ')' statement ; // // Parse a WHILE statement and return its AST static struct ASTnode *while_statement(void) { struct ASTnode *condAST, *bodyAST; // Ensure we have 'while' '(' match(T_WHILE, "while"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); rparen(); // Get the AST for the statement. // Update the loop depth in the process Looplevel++; bodyAST = single_statement(); Looplevel--; // Build and return the AST for this statement return (mkastnode(A_WHILE, P_NONE, condAST, NULL, bodyAST, NULL, 0)); } // for_statement: 'for' '(' expression_list ';' // true_false_expression ';' // expression_list ')' statement ; // // Parse a FOR statement and return its AST static struct ASTnode *for_statement(void) { struct ASTnode *condAST, *bodyAST; struct ASTnode *preopAST, *postopAST; struct ASTnode *tree; // Ensure we have 'for' '(' match(T_FOR, "for"); lparen(); // Get the pre_op expression and the ';' preopAST = expression_list(T_SEMI); semi(); // Get the condition and the ';'. // Force a non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); semi(); // Get the post_op expression and the ')' postopAST = expression_list(T_RPAREN); rparen(); // Get the statement which is the body // Update the loop depth in the process Looplevel++; bodyAST = single_statement(); Looplevel--; // Glue the statement and the postop tree tree = mkastnode(A_GLUE, P_NONE, bodyAST, NULL, postopAST, NULL, 0); // Make a WHILE loop with the condition and this new body tree = mkastnode(A_WHILE, P_NONE, condAST, NULL, tree, NULL, 0); // And glue the preop tree to the A_WHILE tree return (mkastnode(A_GLUE, P_NONE, preopAST, NULL, tree, NULL, 0)); } // return_statement: 'return' '(' expression ')' ; // // Parse a return statement and return its AST static struct ASTnode *return_statement(void) { struct ASTnode *tree; // Can't return a value if function returns P_VOID if (Functionid->type == P_VOID) fatal("Can't return from a void function"); // Ensure we have 'return' '(' match(T_RETURN, "return"); lparen(); // Parse the following expression tree = binexpr(0); // Ensure this is compatible with the function's type tree = modify_type(tree, Functionid->type, 0); if (tree == NULL) fatal("Incompatible type to return"); // Add on the A_RETURN node tree = mkastunary(A_RETURN, P_NONE, tree, NULL, 0); // Get the ')' and ';' rparen(); semi(); return (tree); } // break_statement: 'break' ; // // Parse a break statement and return its AST static struct ASTnode *break_statement(void) { if (Looplevel == 0 && Switchlevel == 0) fatal("no loop or switch to break out from"); scan(&Token); semi(); return (mkastleaf(A_BREAK, 0, NULL, 0)); } // continue_statement: 'continue' ; // // Parse a continue statement and return its AST static struct ASTnode *continue_statement(void) { if (Looplevel == 0) fatal("no loop to continue to"); scan(&Token); semi(); return (mkastleaf(A_CONTINUE, 0, NULL, 0)); } // Parse a switch statement and return its AST static struct ASTnode *switch_statement(void) { struct ASTnode *left, *n, *c, *casetree= NULL, *casetail; int inloop=1, casecount=0; int seendefault=0; int ASTop, casevalue; // Skip the 'switch' and '(' scan(&Token); lparen(); // Get the switch expression, the ')' and the '{' left= binexpr(0); rparen(); lbrace(); // Ensure that this is of int type if (!inttype(left->type)) fatal("Switch expression is not of integer type"); // Build an A_SWITCH subtree with the expression as // the child n= mkastunary(A_SWITCH, 0, left, NULL, 0); // Now parse the cases Switchlevel++; while (inloop) { switch(Token.token) { // Leave the loop when we hit a '}' case T_RBRACE: if (casecount==0) fatal("No cases in switch"); inloop=0; break; case T_CASE: case T_DEFAULT: // Ensure this isn't after a previous 'default' if (seendefault) fatal("case or default after existing default"); // Set the AST operation. Scan the case value if required if (Token.token==T_DEFAULT) { ASTop= A_DEFAULT; seendefault= 1; scan(&Token); } else { ASTop= A_CASE; scan(&Token); left= binexpr(0); // Ensure the case value is an integer literal if (left->op != A_INTLIT) fatal("Expecting integer literal for case value"); casevalue= left->a_intvalue; // Walk the list of existing case values to ensure // that there isn't a duplicate case value for (c= casetree; c != NULL; c= c -> right) if (casevalue == c->a_intvalue) fatal("Duplicate case value"); } // Scan the ':' and get the compound expression match(T_COLON, ":"); left= compound_statement(1); casecount++; // Build a sub-tree with the compound statement as the left child // and link it in to the growing A_CASE tree if (casetree==NULL) { casetree= casetail= mkastunary(ASTop, 0, left, NULL, casevalue); } else { casetail->right= mkastunary(ASTop, 0, left, NULL, casevalue); casetail= casetail->right; } break; default: fatals("Unexpected token in switch", Token.tokstr); } } Switchlevel--; // We have a sub-tree with the cases and any default. Put the // case count into the A_SWITCH node and attach the case tree. n->a_intvalue= casecount; n->right= casetree; rbrace(); return(n); } // Parse a single statement and return its AST. static struct ASTnode *single_statement(void) { struct ASTnode *stmt; struct symtable *ctype; switch (Token.token) { case T_LBRACE: // We have a '{', so this is a compound statement lbrace(); stmt = compound_statement(0); rbrace(); return(stmt); case T_IDENT: // We have to see if the identifier matches a typedef. // If not, treat it as an expression. // Otherwise, fall down to the parse_type() call. if (findtypedef(Text) == NULL) { stmt= binexpr(0); semi(); return(stmt); } case T_CHAR: case T_INT: case T_LONG: case T_STRUCT: case T_UNION: case T_ENUM: case T_TYPEDEF: // The beginning of a variable declaration list. declaration_list(&ctype, C_LOCAL, T_SEMI, T_EOF, &stmt); semi(); return (stmt); // Any assignments from the declarations case T_IF: return (if_statement()); case T_WHILE: return (while_statement()); case T_FOR: return (for_statement()); case T_RETURN: return (return_statement()); case T_BREAK: return (break_statement()); case T_CONTINUE: return (continue_statement()); case T_SWITCH: return (switch_statement()); default: // For now, see if this is an expression. // This catches assignment statements. stmt= binexpr(0); semi(); return(stmt); } return (NULL); // Keep -Wall happy } // Parse a compound statement // and return its AST. If inswitch is true, // we look for a '}', 'case' or 'default' token // to end the parsing. Otherwise, look for // just a '}' to end the parsing. struct ASTnode *compound_statement(int inswitch) { struct ASTnode *left = NULL; struct ASTnode *tree; while (1) { // Parse a single statement tree = single_statement(); // For each new tree, either save it in left // if left is empty, or glue the left and the // new tree together if (tree != NULL) { if (left == NULL) left = tree; else left = mkastnode(A_GLUE, P_NONE, left, NULL, tree, NULL, 0); } // Leave if we've hit the end token if (Token.token == T_RBRACE) return(left); if (inswitch && (Token.token == T_CASE || Token.token == T_DEFAULT)) return(left); } } ================================================ FILE: 43_More_Operators/sym.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Symbol table functions // Copyright (c) 2019 Warren Toomey, GPL3 // Append a node to the singly-linked list pointed to by head or tail void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node) { // Check for valid pointers if (head == NULL || tail == NULL || node == NULL) fatal("Either head, tail or node is NULL in appendsym"); // Append to the list if (*tail) { (*tail)->next = node; *tail = node; } else *head = *tail = node; node->next = NULL; } // Create a symbol node to be added to a symbol table list. // Set up the node's: // + type: char, int etc. // + ctype: composite type pointer for struct/union // + structural type: var, function, array etc. // + size: number of elements, or endlabel: end label for a function // + posn: Position information for local symbols // Return a pointer to the new node struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn) { // Get a new node struct symtable *node = (struct symtable *) malloc(sizeof(struct symtable)); if (node == NULL) fatal("Unable to malloc a symbol table node in newsym"); // Fill in the values if (name == NULL) node->name = NULL; else node->name = strdup(name); node->type = type; node->ctype = ctype; node->stype = stype; node->class = class; node->nelems = nelems; // For pointers and integer types, set the size // of the symbol. structs and union declarations // manually set this up themselves. if (ptrtype(type) || inttype(type)) node->size = nelems * typesize(type, ctype); node->st_posn = posn; node->next = NULL; node->member = NULL; node->initlist = NULL; return (node); } // Add a symbol to the global symbol list struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn) { struct symtable *sym = newsym(name, type, ctype, stype, class, nelems, posn); // For structs and unions, copy the size from the type node if (type== P_STRUCT || type== P_UNION) sym->size = ctype->size; appendsym(&Globhead, &Globtail, sym); return (sym); } // Add a symbol to the local symbol list struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int nelems) { struct symtable *sym = newsym(name, type, ctype, stype, C_LOCAL, nelems, 0); // For structs and unions, copy the size from the type node if (type== P_STRUCT || type== P_UNION) sym->size = ctype->size; appendsym(&Loclhead, &Locltail, sym); return (sym); } // Add a symbol to the parameter list struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype) { struct symtable *sym = newsym(name, type, ctype, stype, C_PARAM, 1, 0); appendsym(&Parmhead, &Parmtail, sym); return (sym); } // Add a symbol to the temporary member list struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int nelems) { struct symtable *sym = newsym(name, type, ctype, stype, C_MEMBER, nelems, 0); // For structs and unions, copy the size from the type node if (type== P_STRUCT || type== P_UNION) sym->size = ctype->size; appendsym(&Membhead, &Membtail, sym); return (sym); } // Add a struct to the struct list struct symtable *addstruct(char *name) { struct symtable *sym = newsym(name, P_STRUCT, NULL, 0, C_STRUCT, 0, 0); appendsym(&Structhead, &Structtail, sym); return (sym); } // Add a struct to the union list struct symtable *addunion(char *name) { struct symtable *sym = newsym(name, P_UNION, NULL, 0, C_UNION, 0, 0); appendsym(&Unionhead, &Uniontail, sym); return (sym); } // Add an enum type or value to the enum list. // Class is C_ENUMTYPE or C_ENUMVAL. // Use posn to store the int value. struct symtable *addenum(char *name, int class, int value) { struct symtable *sym = newsym(name, P_INT, NULL, 0, class, 0, value); appendsym(&Enumhead, &Enumtail, sym); return (sym); } // Add a typedef to the typedef list struct symtable *addtypedef(char *name, int type, struct symtable *ctype) { struct symtable *sym = newsym(name, type, ctype, 0, C_TYPEDEF, 0, 0); appendsym(&Typehead, &Typetail, sym); return (sym); } // Search for a symbol in a specific list. // Return a pointer to the found node or NULL if not found. // If class is not zero, also match on the given class static struct symtable *findsyminlist(char *s, struct symtable *list, int class) { for (; list != NULL; list = list->next) if ((list->name != NULL) && !strcmp(s, list->name)) if (class==0 || class== list->class) return (list); return (NULL); } // Determine if the symbol s is in the global symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findglob(char *s) { return (findsyminlist(s, Globhead, 0)); } // Determine if the symbol s is in the local symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findlocl(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member, 0); if (node) return (node); } return (findsyminlist(s, Loclhead, 0)); } // Determine if the symbol s is in the symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findsymbol(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member, 0); if (node) return (node); } // Otherwise, try the local and global symbol lists node = findsyminlist(s, Loclhead, 0); if (node) return (node); return (findsyminlist(s, Globhead, 0)); } // Find a member in the member list // Return a pointer to the found node or NULL if not found. struct symtable *findmember(char *s) { return (findsyminlist(s, Membhead, 0)); } // Find a struct in the struct list // Return a pointer to the found node or NULL if not found. struct symtable *findstruct(char *s) { return (findsyminlist(s, Structhead, 0)); } // Find a struct in the union list // Return a pointer to the found node or NULL if not found. struct symtable *findunion(char *s) { return (findsyminlist(s, Unionhead, 0)); } // Find an enum type in the enum list // Return a pointer to the found node or NULL if not found. struct symtable *findenumtype(char *s) { return (findsyminlist(s, Enumhead, C_ENUMTYPE)); } // Find an enum value in the enum list // Return a pointer to the found node or NULL if not found. struct symtable *findenumval(char *s) { return (findsyminlist(s, Enumhead, C_ENUMVAL)); } // Find a type in the tyedef list // Return a pointer to the found node or NULL if not found. struct symtable *findtypedef(char *s) { return (findsyminlist(s, Typehead, 0)); } // Reset the contents of the symbol table void clear_symtable(void) { Globhead = Globtail = NULL; Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Membhead = Membtail = NULL; Structhead = Structtail = NULL; Unionhead = Uniontail = NULL; Enumhead = Enumtail = NULL; Typehead = Typetail = NULL; } // Clear all the entries in the local symbol table void freeloclsyms(void) { Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Functionid = NULL; } ================================================ FILE: 43_More_Operators/tests/err.input031.c ================================================ Expecting a primary expression, got token:+ on line 5 of input031.c ================================================ FILE: 43_More_Operators/tests/err.input032.c ================================================ Unknown variable:cow on line 4 of input032.c ================================================ FILE: 43_More_Operators/tests/err.input033.c ================================================ Incompatible type to return on line 4 of input033.c ================================================ FILE: 43_More_Operators/tests/err.input034.c ================================================ For now, declaration of non-global arrays is not implemented on line 4 of input034.c ================================================ FILE: 43_More_Operators/tests/err.input035.c ================================================ Duplicate local variable declaration:a on line 4 of input035.c ================================================ FILE: 43_More_Operators/tests/err.input036.c ================================================ Type doesn't match prototype for parameter:2 on line 4 of input036.c ================================================ FILE: 43_More_Operators/tests/err.input037.c ================================================ Expected:comma on line 3 of input037.c ================================================ FILE: 43_More_Operators/tests/err.input038.c ================================================ Type doesn't match prototype for parameter:2 on line 4 of input038.c ================================================ FILE: 43_More_Operators/tests/err.input039.c ================================================ No statements in function with non-void type on line 4 of input039.c ================================================ FILE: 43_More_Operators/tests/err.input040.c ================================================ No return for function with non-void type on line 4 of input040.c ================================================ FILE: 43_More_Operators/tests/err.input041.c ================================================ Can't return from a void function on line 3 of input041.c ================================================ FILE: 43_More_Operators/tests/err.input042.c ================================================ Undeclared function:fred on line 3 of input042.c ================================================ FILE: 43_More_Operators/tests/err.input043.c ================================================ Undeclared array:b on line 3 of input043.c ================================================ FILE: 43_More_Operators/tests/err.input044.c ================================================ Unknown variable:z on line 3 of input044.c ================================================ FILE: 43_More_Operators/tests/err.input045.c ================================================ & operator must be followed by an identifier on line 3 of input045.c ================================================ FILE: 43_More_Operators/tests/err.input046.c ================================================ * operator must be followed by an identifier or * on line 3 of input046.c ================================================ FILE: 43_More_Operators/tests/err.input047.c ================================================ ++ operator must be followed by an identifier on line 3 of input047.c ================================================ FILE: 43_More_Operators/tests/err.input048.c ================================================ -- operator must be followed by an identifier on line 3 of input048.c ================================================ FILE: 43_More_Operators/tests/err.input049.c ================================================ Incompatible expression in assignment on line 6 of input049.c ================================================ FILE: 43_More_Operators/tests/err.input050.c ================================================ Incompatible types in binary expression on line 6 of input050.c ================================================ FILE: 43_More_Operators/tests/err.input051.c ================================================ Expected '\'' at end of char literal on line 4 of input051.c ================================================ FILE: 43_More_Operators/tests/err.input052.c ================================================ Unrecognised character:$ on line 5 of input052.c ================================================ FILE: 43_More_Operators/tests/err.input056.c ================================================ unknown struct/union type:var1 on line 2 of input056.c ================================================ FILE: 43_More_Operators/tests/err.input057.c ================================================ previously defined struct/union:fred on line 2 of input057.c ================================================ FILE: 43_More_Operators/tests/err.input059.c ================================================ Undeclared variable:y on line 3 of input059.c ================================================ FILE: 43_More_Operators/tests/err.input060.c ================================================ Undeclared variable:x on line 3 of input060.c ================================================ FILE: 43_More_Operators/tests/err.input061.c ================================================ Undeclared variable:x on line 3 of input061.c ================================================ FILE: 43_More_Operators/tests/err.input064.c ================================================ undeclared enum type::fred on line 1 of input064.c ================================================ FILE: 43_More_Operators/tests/err.input065.c ================================================ enum type redeclared::fred on line 2 of input065.c ================================================ FILE: 43_More_Operators/tests/err.input066.c ================================================ enum value redeclared::z on line 2 of input066.c ================================================ FILE: 43_More_Operators/tests/err.input068.c ================================================ redefinition of typedef:FOO on line 2 of input068.c ================================================ FILE: 43_More_Operators/tests/err.input069.c ================================================ unknown type:FLOO on line 2 of input069.c ================================================ FILE: 43_More_Operators/tests/err.input072.c ================================================ no loop or switch to break out from on line 1 of input072.c ================================================ FILE: 43_More_Operators/tests/err.input073.c ================================================ no loop to continue to on line 1 of input073.c ================================================ FILE: 43_More_Operators/tests/err.input075.c ================================================ Unexpected token in switch:if on line 4 of input075.c ================================================ FILE: 43_More_Operators/tests/err.input076.c ================================================ No cases in switch on line 3 of input076.c ================================================ FILE: 43_More_Operators/tests/err.input077.c ================================================ case or default after existing default on line 6 of input077.c ================================================ FILE: 43_More_Operators/tests/err.input078.c ================================================ case or default after existing default on line 6 of input078.c ================================================ FILE: 43_More_Operators/tests/err.input079.c ================================================ Duplicate case value on line 6 of input079.c ================================================ FILE: 43_More_Operators/tests/err.input085.c ================================================ Bad type in parameter list on line 1 of input085.c ================================================ FILE: 43_More_Operators/tests/err.input086.c ================================================ Function definition not at global level on line 3 of input086.c ================================================ FILE: 43_More_Operators/tests/err.input087.c ================================================ Bad type in member list on line 4 of input087.c ================================================ FILE: 43_More_Operators/tests/err.input092.c ================================================ Integer literal value too big for char type on line 1 of input092.c ================================================ FILE: 43_More_Operators/tests/err.input093.c ================================================ Expecting an integer literal value on line 1 of input093.c ================================================ FILE: 43_More_Operators/tests/err.input094.c ================================================ Type mismatch: integer literal vs. variable on line 1 of input094.c ================================================ FILE: 43_More_Operators/tests/err.input095.c ================================================ Variable can not be initialised:x on line 1 of input095.c ================================================ FILE: 43_More_Operators/tests/err.input096.c ================================================ Array size is illegal:0 on line 1 of input096.c ================================================ FILE: 43_More_Operators/tests/err.input097.c ================================================ For now, declaration of non-global arrays is not implemented on line 2 of input097.c ================================================ FILE: 43_More_Operators/tests/err.input098.c ================================================ Too many values in initialisation list on line 1 of input098.c ================================================ FILE: 43_More_Operators/tests/err.input102.c ================================================ Cannot cast to a struct, union or void type on line 3 of input102.c ================================================ FILE: 43_More_Operators/tests/err.input103.c ================================================ Cannot cast to a struct, union or void type on line 3 of input103.c ================================================ FILE: 43_More_Operators/tests/err.input104.c ================================================ Cannot cast to a struct, union or void type on line 2 of input104.c ================================================ FILE: 43_More_Operators/tests/err.input105.c ================================================ Incompatible expression in assignment on line 4 of input105.c ================================================ FILE: 43_More_Operators/tests/input001.c ================================================ int printf(char *fmt); void main() { printf("%d\n", 12 * 3); printf("%d\n", 18 - 2 * 4); printf("%d\n", 1 + 2 + 9 - 5/2 + 3*5); } ================================================ FILE: 43_More_Operators/tests/input002.c ================================================ int printf(char *fmt); void main() { int fred; int jim; fred= 5; jim= 12; printf("%d\n", fred + jim); } ================================================ FILE: 43_More_Operators/tests/input003.c ================================================ int printf(char *fmt); void main() { int x; x= 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); } ================================================ FILE: 43_More_Operators/tests/input004.c ================================================ int printf(char *fmt); void main() { int x; x= 7 < 9; printf("%d\n", x); x= 7 <= 9; printf("%d\n", x); x= 7 != 9; printf("%d\n", x); x= 7 == 7; printf("%d\n", x); x= 7 >= 7; printf("%d\n", x); x= 7 <= 7; printf("%d\n", x); x= 9 > 7; printf("%d\n", x); x= 9 >= 7; printf("%d\n", x); x= 9 != 7; printf("%d\n", x); } ================================================ FILE: 43_More_Operators/tests/input005.c ================================================ int printf(char *fmt); void main() { int i; int j; i=6; j=12; if (i < j) { printf("%d\n", i); } else { printf("%d\n", j); } } ================================================ FILE: 43_More_Operators/tests/input006.c ================================================ int printf(char *fmt); void main() { int i; i=1; while (i <= 10) { printf("%d\n", i); i= i + 1; } } ================================================ FILE: 43_More_Operators/tests/input007.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 43_More_Operators/tests/input008.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 43_More_Operators/tests/input009.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } void fred() { int a; int b; a= 12; b= 3 * a; if (a >= b) { printf("%d\n", 2 * b - a); } } ================================================ FILE: 43_More_Operators/tests/input010.c ================================================ int printf(char *fmt); void main() { int i; char j; j= 20; printf("%d\n", j); i= 10; printf("%d\n", i); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 2; j= j + 1) { printf("%d\n", j); } } ================================================ FILE: 43_More_Operators/tests/input011.c ================================================ int printf(char *fmt); int main() { int i; char j; long k; i= 10; printf("%d\n", i); j= 20; printf("%d\n", j); k= 30; printf("%d\n", k); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 4; j= j + 1) { printf("%d\n", j); } for (k= 1; k <= 5; k= k + 1) { printf("%d\n", k); } return(i); printf("%d\n", 12345); return(3); } ================================================ FILE: 43_More_Operators/tests/input012.c ================================================ int printf(char *fmt); int fred() { return(5); } void main() { int x; x= fred(2); printf("%d\n", x); } ================================================ FILE: 43_More_Operators/tests/input013.c ================================================ int printf(char *fmt); int fred() { return(56); } void main() { int dummy; int result; dummy= printf("%d\n", 23); result= fred(10); dummy= printf("%d\n", result); } ================================================ FILE: 43_More_Operators/tests/input014.c ================================================ int printf(char *fmt); int fred() { return(20); } int main() { int result; printf("%d\n", 10); result= fred(15); printf("%d\n", result); printf("%d\n", fred(15)+10); return(0); } ================================================ FILE: 43_More_Operators/tests/input015.c ================================================ int printf(char *fmt); int main() { char a; char *b; char c; int d; int *e; int f; a= 18; printf("%d\n", a); b= &a; c= *b; printf("%d\n", c); d= 12; printf("%d\n", d); e= &d; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 43_More_Operators/tests/input016.c ================================================ int printf(char *fmt); int c; int d; int *e; int f; int main() { c= 12; d=18; printf("%d\n", c); e= &c + 1; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 43_More_Operators/tests/input017.c ================================================ int printf(char *fmt); int main() { char a; char *b; int d; int *e; b= &a; *b= 19; printf("%d\n", a); e= &d; *e= 12; printf("%d\n", d); return(0); } ================================================ FILE: 43_More_Operators/tests/input018.c ================================================ int printf(char *fmt); int main() { int a; int b; a= b= 34; printf("%d\n", a); printf("%d\n", b); return(0); } ================================================ FILE: 43_More_Operators/tests/input018a.c ================================================ int printf(char *fmt); int a; int *b; char c; char *d; int main() { b= &a; *b= 15; printf("%d\n", a); d= &c; *d= 16; printf("%d\n", c); return(0); } ================================================ FILE: 43_More_Operators/tests/input019.c ================================================ int printf(char *fmt); int a; int b; int c; int d; int e; int main() { a= 2; b= 4; c= 3; d= 2; e= (a+b) * (c+d); printf("%d\n", e); return(0); } ================================================ FILE: 43_More_Operators/tests/input020.c ================================================ int printf(char *fmt); int a; int b[25]; int main() { b[3]= 12; a= b[3]; printf("%d\n", a); return(0); } ================================================ FILE: 43_More_Operators/tests/input021.c ================================================ int printf(char *fmt); char c; char *str; int main() { c= '\n'; printf("%d\n", c); for (str= "Hello world\n"; *str != 0; str= str + 1) { printf("%c", *str); } return(0); } ================================================ FILE: 43_More_Operators/tests/input022.c ================================================ int printf(char *fmt); char a; char b; char c; int d; int e; int f; long g; long h; long i; int main() { b= 5; c= 7; a= b + c++; printf("%d\n", a); e= 5; f= 7; d= e + f++; printf("%d\n", d); h= 5; i= 7; g= h + i++; printf("%d\n", g); a= b-- + c; printf("%d\n", a); d= e-- + f; printf("%d\n", d); g= h-- + i; printf("%d\n", g); a= ++b + c; printf("%d\n", a); d= ++e + f; printf("%d\n", d); g= ++h + i; printf("%d\n", g); a= b * --c; printf("%d\n", a); d= e * --f; printf("%d\n", d); g= h * --i; printf("%d\n", g); return(0); } ================================================ FILE: 43_More_Operators/tests/input023.c ================================================ int printf(char *fmt); char *str; int x; int main() { x= -23; printf("%d\n", x); printf("%d\n", -10 * -10); x= 1; x= ~x; printf("%d\n", x); x= 2 > 5; printf("%d\n", x); x= !x; printf("%d\n", x); x= !x; printf("%d\n", x); x= 13; if (x) { printf("%d\n", 13); } x= 0; if (!x) { printf("%d\n", 14); } for (str= "Hello world\n"; *str; str++) { printf("%c", *str); } return(0); } ================================================ FILE: 43_More_Operators/tests/input024.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { a= 42; b= 19; printf("%d\n", a & b); printf("%d\n", a | b); printf("%d\n", a ^ b); printf("%d\n", 1 << 3); printf("%d\n", 63 >> 3); return(0); } ================================================ FILE: 43_More_Operators/tests/input025.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { char z; int y; int x; x= 10; y= 20; z= 30; printf("%d\n", x); printf("%d\n", y); printf("%d\n", z); a= 5; b= 15; c= 25; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); return(0); } ================================================ FILE: 43_More_Operators/tests/input026.c ================================================ int printf(char *fmt); int main(int a, char b, long c, int d, int e, int f, int g, int h) { int i; int j; int k; a= 13; printf("%d\n", a); b= 23; printf("%d\n", b); c= 34; printf("%d\n", c); d= 44; printf("%d\n", d); e= 54; printf("%d\n", e); f= 64; printf("%d\n", f); g= 74; printf("%d\n", g); h= 84; printf("%d\n", h); i= 94; printf("%d\n", i); j= 95; printf("%d\n", j); k= 96; printf("%d\n", k); return(0); } ================================================ FILE: 43_More_Operators/tests/input027.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int param5(int a, int b, int c, int d, int e) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param2(int a, int b) { int c; int d; int e; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param0() { int a; int b; int c; int d; int e; a= 1; b= 2; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int main() { param8(1,2,3,4,5,6,7,8); param5(1,2,3,4,5); param2(1,2); param0(); return(0); } ================================================ FILE: 43_More_Operators/tests/input028.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 43_More_Operators/tests/input029.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h); int fred(int a, int b, int c); int main(); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 43_More_Operators/tests/input030.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input030.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 43_More_Operators/tests/input031.c ================================================ int printf(char *fmt); int main() { int x; x= 2 + + 3 - * / ; } ================================================ FILE: 43_More_Operators/tests/input032.c ================================================ int printf(char *fmt); int main() { pizza cow llama sausage; } ================================================ FILE: 43_More_Operators/tests/input033.c ================================================ int printf(char *fmt); int main() { char *z; return(z); } ================================================ FILE: 43_More_Operators/tests/input034.c ================================================ int printf(char *fmt); int main() { int a[12]; return(0); } ================================================ FILE: 43_More_Operators/tests/input035.c ================================================ int printf(char *fmt); int fred(int a, int b) { int a; return(a); } ================================================ FILE: 43_More_Operators/tests/input036.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c); ================================================ FILE: 43_More_Operators/tests/input037.c ================================================ int printf(char *fmt); int fred(int a, char b +, int z); ================================================ FILE: 43_More_Operators/tests/input038.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c, int g); ================================================ FILE: 43_More_Operators/tests/input039.c ================================================ int printf(char *fmt); int main() { int a; } ================================================ FILE: 43_More_Operators/tests/input040.c ================================================ int printf(char *fmt); int main() { int a; a= 5; } ================================================ FILE: 43_More_Operators/tests/input041.c ================================================ int printf(char *fmt); void fred() { return(5); } ================================================ FILE: 43_More_Operators/tests/input042.c ================================================ int printf(char *fmt); int main() { fred(5); } ================================================ FILE: 43_More_Operators/tests/input043.c ================================================ int printf(char *fmt); int main() { int a; a= b[4]; } ================================================ FILE: 43_More_Operators/tests/input044.c ================================================ int printf(char *fmt); int main() { int a; a= z; } ================================================ FILE: 43_More_Operators/tests/input045.c ================================================ int printf(char *fmt); int main() { int a; a= &5; } ================================================ FILE: 43_More_Operators/tests/input046.c ================================================ int printf(char *fmt); int main() { int a; a= *5; } ================================================ FILE: 43_More_Operators/tests/input047.c ================================================ int printf(char *fmt); int main() { int a; a= ++5; } ================================================ FILE: 43_More_Operators/tests/input048.c ================================================ int printf(char *fmt); int main() { int a; a= --5; } ================================================ FILE: 43_More_Operators/tests/input049.c ================================================ int printf(char *fmt); int main() { int x; char y; y= x; } ================================================ FILE: 43_More_Operators/tests/input050.c ================================================ int printf(char *fmt); int main() { char *a; char *b; a= a + b; } ================================================ FILE: 43_More_Operators/tests/input051.c ================================================ int printf(char *fmt); int main() { char a; a= 'fred'; } ================================================ FILE: 43_More_Operators/tests/input052.c ================================================ int printf(char *fmt); int main() { int a; a= $5.00; } ================================================ FILE: 43_More_Operators/tests/input053.c ================================================ int printf(char *fmt); int main() { printf("Hello world, %d\n", 23); return(0); } ================================================ FILE: 43_More_Operators/tests/input054.c ================================================ int printf(char *fmt); int main() { int i; for (i=0; i < 20; i++) { printf("Hello world, %d\n", i); } return(0); } ================================================ FILE: 43_More_Operators/tests/input055.c ================================================ int printf(char *fmt); int main(int argc, char **argv) { int i; char *argument; printf("Hello world\n"); for (i=0; i < argc; i++) { argument= *argv; argv= argv + 1; printf("Argument %d is %s\n", i, argument); } return(0); } ================================================ FILE: 43_More_Operators/tests/input056.c ================================================ struct foo { int x; }; struct mary var1; ================================================ FILE: 43_More_Operators/tests/input057.c ================================================ struct fred { int x; } ; struct fred { char y; } ; ================================================ FILE: 43_More_Operators/tests/input058.c ================================================ int printf(char *fmt); struct fred { int x; char y; long z; }; struct fred var2; struct fred *varptr; int main() { long result; var2.x= 12; printf("%d\n", var2.x); var2.y= 'c'; printf("%d\n", var2.y); var2.z= 4005; printf("%d\n", var2.z); result= var2.x + var2.y + var2.z; printf("%d\n", result); varptr= &var2; result= varptr->x + varptr->y + varptr->z; printf("%d\n", result); return(0); } ================================================ FILE: 43_More_Operators/tests/input059.c ================================================ int main() { int x; x= y.foo; } ================================================ FILE: 43_More_Operators/tests/input060.c ================================================ int main() { int x; x= x.foo; } ================================================ FILE: 43_More_Operators/tests/input061.c ================================================ int main() { int x; x= x->foo; } ================================================ FILE: 43_More_Operators/tests/input062.c ================================================ int printf(char *fmt); union fred { char w; int x; int y; long z; }; union fred var1; union fred *varptr; int main() { var1.x= 65; printf("%d\n", var1.x); var1.x= 66; printf("%d\n", var1.x); printf("%d\n", var1.y); printf("The next two depend on the endian of the platform\n"); printf("%d\n", var1.w); printf("%d\n", var1.z); varptr= &var1; varptr->x= 67; printf("%d\n", varptr->x); printf("%d\n", varptr->y); return(0); } ================================================ FILE: 43_More_Operators/tests/input063.c ================================================ int printf(char *fmt); enum fred { apple=1, banana, carrot, pear=10, peach, mango, papaya }; enum jane { aple=1, bnana, crrot, par=10, pech, mago, paaya }; enum fred var1; enum jane var2; enum fred var3; int main() { var1= carrot + pear + mango; printf("%d\n", var1); return(0); } ================================================ FILE: 43_More_Operators/tests/input064.c ================================================ enum fred var3; ================================================ FILE: 43_More_Operators/tests/input065.c ================================================ enum fred { x, y, z }; enum fred { a, b }; ================================================ FILE: 43_More_Operators/tests/input066.c ================================================ enum fred { x, y, z }; enum mary { a, b, z }; ================================================ FILE: 43_More_Operators/tests/input067.c ================================================ int printf(char *fmt); typedef int FOO; FOO var1; struct bar { int x; int y} ; typedef struct bar BAR; BAR var2; int main() { var1= 5; printf("%d\n", var1); var2.x= 7; var2.y= 10; printf("%d\n", var2.x + var2.y); return(0); } ================================================ FILE: 43_More_Operators/tests/input068.c ================================================ typedef int FOO; typedef char FOO; ================================================ FILE: 43_More_Operators/tests/input069.c ================================================ typedef int FOO; FLOO y; ================================================ FILE: 43_More_Operators/tests/input070.c ================================================ #include typedef int FOO; int main() { FOO x; x= 56; printf("%d\n", x); return(0); } ================================================ FILE: 43_More_Operators/tests/input071.c ================================================ #include int main() { int x; x = 0; while (x < 100) { if (x == 5) { x = x + 2; continue; } printf("%d\n", x); if (x == 14) { break; } x = x + 1; } printf("Done\n"); return (0); } ================================================ FILE: 43_More_Operators/tests/input072.c ================================================ int main() { break; } ================================================ FILE: 43_More_Operators/tests/input073.c ================================================ int main() { continue; } ================================================ FILE: 43_More_Operators/tests/input074.c ================================================ #include int main() { int x; int y; y= 0; for (x=0; x < 5; x++) { switch(x) { case 1: { y= 5; break; } case 2: { y= 7; break; } case 3: { y= 9; } default: { y= 100; } } printf("%d\n", y); } return(0); } ================================================ FILE: 43_More_Operators/tests/input075.c ================================================ int main() { int x; switch(x) { if (x<5); } } ================================================ FILE: 43_More_Operators/tests/input076.c ================================================ int main() { int x; switch(x) { } } ================================================ FILE: 43_More_Operators/tests/input077.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } default: { x= 3; } case 4: { x= 2; } } } ================================================ FILE: 43_More_Operators/tests/input078.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } default: { x= 3; } default: { x= 2; } } } ================================================ FILE: 43_More_Operators/tests/input079.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } case 2: { x= 2; } case 1: { x= 2; } default: { x= 2; } } } ================================================ FILE: 43_More_Operators/tests/input080.c ================================================ #include int x; int y; int main() { for (x=0, y=1; x < 6; x++, y=y+2) { printf("%d %d\n", x, y); } return(0); } ================================================ FILE: 43_More_Operators/tests/input081.c ================================================ #include int x; int y; int main() { x= 0; y=1; for (;x<5;) { printf("%d %d\n", x, y); x=x+1; y=y+2; } return(0); } ================================================ FILE: 43_More_Operators/tests/input082.c ================================================ #include int main() { int x; x= 10; // Dangling else test if (x > 5) if (x > 15) printf("x > 15\n"); else printf("15 >= x > 5\n"); else printf("5 >= x\n"); return(0); } ================================================ FILE: 43_More_Operators/tests/input083.c ================================================ #include int main() { int x; // Dangling else test. // We should not print anything for x<= 5 for (x=0; x < 12; x++) if (x > 5) if (x > 10) printf("10 < %2d\n", x); else printf(" 5 < %2d <= 10\n", x); return(0); } ================================================ FILE: 43_More_Operators/tests/input084.c ================================================ #include int main() { int x, y; x=2; y=3; printf("%d %d\n", x, y); char a, *b; a= 'f'; b= &a; printf("%c %c\n", a, *b); return(0); } ================================================ FILE: 43_More_Operators/tests/input085.c ================================================ int main(struct foo { int x; }; , int a) { return(0); } ================================================ FILE: 43_More_Operators/tests/input086.c ================================================ int main() { int fred() { return(5); } int x; x=2; return(x); } ================================================ FILE: 43_More_Operators/tests/input087.c ================================================ struct foo { int x; int y; struct blah { int g; }; }; ================================================ FILE: 43_More_Operators/tests/input088.c ================================================ #include struct foo { int x; int y; } fred, mary; struct foo james; int main() { fred.x= 5; mary.y= 6; printf("%d %d\n", fred.x, mary.y); return(0); } ================================================ FILE: 43_More_Operators/tests/input089.c ================================================ #include int x= 23; char y= 'H'; char *z= "Hello world"; int main() { printf("%d %c %s\n", x, y, z); return(0); } ================================================ FILE: 43_More_Operators/tests/input090.c ================================================ #include int a = 23, b = 100; char y = 'H', *z = "Hello world"; int main() { printf("%d %d %c %s\n", a, b, y, z); return (0); } ================================================ FILE: 43_More_Operators/tests/input091.c ================================================ #include int fred[] = { 1, 2, 3, 4, 5 }; int jim[10] = { 1, 2, 3, 4, 5 }; int main() { int i; for (i=0; i < 5; i++) printf("%d\n", fred[i]); for (i=0; i < 10; i++) printf("%d\n", jim[i]); return(0); } ================================================ FILE: 43_More_Operators/tests/input092.c ================================================ char x= 3000; ================================================ FILE: 43_More_Operators/tests/input093.c ================================================ char x= fred; ================================================ FILE: 43_More_Operators/tests/input094.c ================================================ char *s= 54; ================================================ FILE: 43_More_Operators/tests/input095.c ================================================ int fred(int x=2) { return(x); } ================================================ FILE: 43_More_Operators/tests/input096.c ================================================ int fred[0]; ================================================ FILE: 43_More_Operators/tests/input097.c ================================================ int main() { int x[45]; return(0); } ================================================ FILE: 43_More_Operators/tests/input098.c ================================================ int fred[3]= { 1, 2, 3, 4, 5 }; ================================================ FILE: 43_More_Operators/tests/input099.c ================================================ #include // List of token strings, for debugging purposes. // As yet, we can't store a NULL into the list char *Tstring[] = { "EOF", "=", "||", "&&", "|", "^", "&", "==", "!=", ",", ">", "<=", ">=", "<<", ">>", "+", "-", "*", "/", "++", "--", "~", "!", "void", "char", "int", "long", "if", "else", "while", "for", "return", "struct", "union", "enum", "typedef", "extern", "break", "continue", "switch", "case", "default", "intlit", "strlit", ";", "identifier", "{", "}", "(", ")", "[", "]", ",", ".", "->", ":", "" }; int main() { int i; char *str; i=0; while (1) { str= Tstring[i]; if (*str == 0) break; printf("%s\n", str); i++; } return(0); } ================================================ FILE: 43_More_Operators/tests/input100.c ================================================ #include int main() { int x= 3, y=14; int z= 2 * x + y; char *str= "Hello world"; printf("%s %d %d\n", str, x+y, z); return(0); } ================================================ FILE: 43_More_Operators/tests/input101.c ================================================ #include int main() { int x= 65535; char y; char *str; y= (char )x; printf("0x%x\n", y); str= (char *)0; printf("0x%lx\n", (long)str); return(0); } ================================================ FILE: 43_More_Operators/tests/input102.c ================================================ int main() { struct foo { int p; }; int y= (struct foo) x; } ================================================ FILE: 43_More_Operators/tests/input103.c ================================================ int main() { union foo { int p; }; int y= (union foo) x; } ================================================ FILE: 43_More_Operators/tests/input104.c ================================================ int main() { int y= (void) x; } ================================================ FILE: 43_More_Operators/tests/input105.c ================================================ int main() { int x; char *y; y= (char) x; } ================================================ FILE: 43_More_Operators/tests/input106.c ================================================ #include char *y= (char *)0; int main() { printf("0x%lx\n", (long)y); return(0); } ================================================ FILE: 43_More_Operators/tests/input107.c ================================================ #include char *y[] = { "fish", "cow", NULL }; char *z= NULL; int main() { int i; char *ptr; for (i=0; i < 3; i++) { ptr= y[i]; if (ptr != (char *)0) printf("%s\n", y[i]); else printf("NULL\n"); } return(0); } ================================================ FILE: 43_More_Operators/tests/input108.c ================================================ int main() { char *str= (void *)0; return(0); } ================================================ FILE: 43_More_Operators/tests/input109.c ================================================ #include int main() { int x= 17 - 1; printf("%d\n", x); return(0); } ================================================ FILE: 43_More_Operators/tests/input110.c ================================================ #include int x; int y; int main() { x= 3; y= 15; y += x; printf("%d\n", y); x= 3; y= 15; y -= x; printf("%d\n", y); x= 3; y= 15; y *= x; printf("%d\n", y); x= 3; y= 15; y /= x; printf("%d\n", y); return(0); } ================================================ FILE: 43_More_Operators/tests/mktests ================================================ #!/bin/sh # Make the output files for each test # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make install) fi for i in input*c do if [ ! -f "out.$i" -a ! -f "err.$i" ] then ../cwj -o out $i 2> "err.$i" # If the err file is empty if [ ! -s "err.$i" ] then rm -f "err.$i" cc -o out $i ./out > "out.$i" fi fi rm -f out out.s done ================================================ FILE: 43_More_Operators/tests/out.input001.c ================================================ 36 10 25 ================================================ FILE: 43_More_Operators/tests/out.input002.c ================================================ 17 ================================================ FILE: 43_More_Operators/tests/out.input003.c ================================================ 1 2 3 4 5 ================================================ FILE: 43_More_Operators/tests/out.input004.c ================================================ 1 1 1 1 1 1 1 1 1 ================================================ FILE: 43_More_Operators/tests/out.input005.c ================================================ 6 ================================================ FILE: 43_More_Operators/tests/out.input006.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 43_More_Operators/tests/out.input007.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 43_More_Operators/tests/out.input008.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 43_More_Operators/tests/out.input009.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 43_More_Operators/tests/out.input010.c ================================================ 20 10 1 2 3 4 5 253 254 255 0 1 ================================================ FILE: 43_More_Operators/tests/out.input011.c ================================================ 10 20 30 1 2 3 4 5 253 254 255 0 1 2 3 1 2 3 4 5 ================================================ FILE: 43_More_Operators/tests/out.input012.c ================================================ 5 ================================================ FILE: 43_More_Operators/tests/out.input013.c ================================================ 23 56 ================================================ FILE: 43_More_Operators/tests/out.input014.c ================================================ 10 20 30 ================================================ FILE: 43_More_Operators/tests/out.input015.c ================================================ 18 18 12 12 ================================================ FILE: 43_More_Operators/tests/out.input016.c ================================================ 12 18 ================================================ FILE: 43_More_Operators/tests/out.input017.c ================================================ 19 12 ================================================ FILE: 43_More_Operators/tests/out.input018.c ================================================ 34 34 ================================================ FILE: 43_More_Operators/tests/out.input018a.c ================================================ 15 16 ================================================ FILE: 43_More_Operators/tests/out.input019.c ================================================ 30 ================================================ FILE: 43_More_Operators/tests/out.input020.c ================================================ 12 ================================================ FILE: 43_More_Operators/tests/out.input021.c ================================================ 10 Hello world ================================================ FILE: 43_More_Operators/tests/out.input022.c ================================================ 12 12 12 13 13 13 13 13 13 35 35 35 ================================================ FILE: 43_More_Operators/tests/out.input023.c ================================================ -23 100 -2 0 1 0 13 14 Hello world ================================================ FILE: 43_More_Operators/tests/out.input024.c ================================================ 2 59 57 8 7 ================================================ FILE: 43_More_Operators/tests/out.input025.c ================================================ 10 20 30 5 15 25 ================================================ FILE: 43_More_Operators/tests/out.input026.c ================================================ 13 23 34 44 54 64 74 84 94 95 96 ================================================ FILE: 43_More_Operators/tests/out.input027.c ================================================ 1 2 3 4 5 6 7 8 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 ================================================ FILE: 43_More_Operators/tests/out.input028.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 43_More_Operators/tests/out.input029.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 43_More_Operators/tests/out.input030.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input030.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 43_More_Operators/tests/out.input053.c ================================================ Hello world, 23 ================================================ FILE: 43_More_Operators/tests/out.input054.c ================================================ Hello world, 0 Hello world, 1 Hello world, 2 Hello world, 3 Hello world, 4 Hello world, 5 Hello world, 6 Hello world, 7 Hello world, 8 Hello world, 9 Hello world, 10 Hello world, 11 Hello world, 12 Hello world, 13 Hello world, 14 Hello world, 15 Hello world, 16 Hello world, 17 Hello world, 18 Hello world, 19 ================================================ FILE: 43_More_Operators/tests/out.input055.c ================================================ Hello world Argument 0 is ./out ================================================ FILE: 43_More_Operators/tests/out.input058.c ================================================ 12 99 4005 4116 4116 ================================================ FILE: 43_More_Operators/tests/out.input062.c ================================================ 65 66 66 The next two depend on the endian of the platform 66 66 67 67 ================================================ FILE: 43_More_Operators/tests/out.input063.c ================================================ 25 ================================================ FILE: 43_More_Operators/tests/out.input067.c ================================================ 5 17 ================================================ FILE: 43_More_Operators/tests/out.input070.c ================================================ 56 ================================================ FILE: 43_More_Operators/tests/out.input071.c ================================================ 0 1 2 3 4 7 8 9 10 11 12 13 14 Done ================================================ FILE: 43_More_Operators/tests/out.input074.c ================================================ 100 5 7 100 100 ================================================ FILE: 43_More_Operators/tests/out.input080.c ================================================ 0 1 1 3 2 5 3 7 4 9 5 11 ================================================ FILE: 43_More_Operators/tests/out.input081.c ================================================ 0 1 1 3 2 5 3 7 4 9 ================================================ FILE: 43_More_Operators/tests/out.input082.c ================================================ 15 >= x > 5 ================================================ FILE: 43_More_Operators/tests/out.input083.c ================================================ 5 < 6 <= 10 5 < 7 <= 10 5 < 8 <= 10 5 < 9 <= 10 5 < 10 <= 10 10 < 11 ================================================ FILE: 43_More_Operators/tests/out.input084.c ================================================ 2 3 f f ================================================ FILE: 43_More_Operators/tests/out.input088.c ================================================ 5 6 ================================================ FILE: 43_More_Operators/tests/out.input089.c ================================================ 23 H Hello world ================================================ FILE: 43_More_Operators/tests/out.input090.c ================================================ 23 100 H Hello world ================================================ FILE: 43_More_Operators/tests/out.input091.c ================================================ 1 2 3 4 5 1 2 3 4 5 0 0 0 0 0 ================================================ FILE: 43_More_Operators/tests/out.input099.c ================================================ EOF = || && | ^ & == != , > <= >= << >> + - * / ++ -- ~ ! void char int long if else while for return struct union enum typedef extern break continue switch case default intlit strlit ; identifier { } ( ) [ ] , . -> : ================================================ FILE: 43_More_Operators/tests/out.input100.c ================================================ Hello world 17 20 ================================================ FILE: 43_More_Operators/tests/out.input101.c ================================================ 0xff 0x0 ================================================ FILE: 43_More_Operators/tests/out.input106.c ================================================ 0x0 ================================================ FILE: 43_More_Operators/tests/out.input107.c ================================================ fish cow NULL ================================================ FILE: 43_More_Operators/tests/out.input108.c ================================================ ================================================ FILE: 43_More_Operators/tests/out.input109.c ================================================ 16 ================================================ FILE: 43_More_Operators/tests/out.input110.c ================================================ 18 12 45 5 ================================================ FILE: 43_More_Operators/tests/runtests ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make install) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../cwj -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../cwj $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.s "trial.$i" done ================================================ FILE: 43_More_Operators/tests/runtestsn ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../compn ] then (cd ..; make install) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../compn -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../compn $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.o out.s "trial.$i" done ================================================ FILE: 43_More_Operators/tree.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree functions // Copyright (c) 2019 Warren Toomey, GPL3 // Build and return a generic AST node struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue) { struct ASTnode *n; // Malloc a new ASTnode n = (struct ASTnode *) malloc(sizeof(struct ASTnode)); if (n == NULL) fatal("Unable to malloc in mkastnode()"); // Copy in the field values and return it n->op = op; n->type = type; n->left = left; n->mid = mid; n->right = right; n->sym = sym; n->a_intvalue = intvalue; return (n); } // Make an AST leaf node struct ASTnode *mkastleaf(int op, int type, struct symtable *sym, int intvalue) { return (mkastnode(op, type, NULL, NULL, NULL, sym, intvalue)); } // Make a unary AST node: only one child struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, struct symtable *sym, int intvalue) { return (mkastnode(op, type, left, NULL, NULL, sym, intvalue)); } // Generate and return a new label number // just for AST dumping purposes static int gendumplabel(void) { static int id = 1; return (id++); } // Given an AST tree, print it out and follow the // traversal of the tree that genAST() follows void dumpAST(struct ASTnode *n, int label, int level) { int Lfalse, Lstart, Lend; switch (n->op) { case A_IF: Lfalse = gendumplabel(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_IF"); if (n->right) { Lend = gendumplabel(); fprintf(stdout, ", end L%d", Lend); } fprintf(stdout, "\n"); dumpAST(n->left, Lfalse, level + 2); dumpAST(n->mid, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); return; case A_WHILE: Lstart = gendumplabel(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_WHILE, start L%d\n", Lstart); Lend = gendumplabel(); dumpAST(n->left, Lend, level + 2); dumpAST(n->right, NOLABEL, level + 2); return; } // Reset level to -2 for A_GLUE if (n->op == A_GLUE) level = -2; // General AST node handling if (n->left) dumpAST(n->left, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); for (int i = 0; i < level; i++) fprintf(stdout, " "); switch (n->op) { case A_GLUE: fprintf(stdout, "\n\n"); return; case A_FUNCTION: fprintf(stdout, "A_FUNCTION %s\n", n->sym->name); return; case A_ADD: fprintf(stdout, "A_ADD\n"); return; case A_SUBTRACT: fprintf(stdout, "A_SUBTRACT\n"); return; case A_MULTIPLY: fprintf(stdout, "A_MULTIPLY\n"); return; case A_DIVIDE: fprintf(stdout, "A_DIVIDE\n"); return; case A_EQ: fprintf(stdout, "A_EQ\n"); return; case A_NE: fprintf(stdout, "A_NE\n"); return; case A_LT: fprintf(stdout, "A_LE\n"); return; case A_GT: fprintf(stdout, "A_GT\n"); return; case A_LE: fprintf(stdout, "A_LE\n"); return; case A_GE: fprintf(stdout, "A_GE\n"); return; case A_INTLIT: fprintf(stdout, "A_INTLIT %d\n", n->a_intvalue); return; case A_STRLIT: fprintf(stdout, "A_STRLIT rval label L%d\n", n->a_intvalue); return; case A_IDENT: if (n->rvalue) fprintf(stdout, "A_IDENT rval %s\n", n->sym->name); else fprintf(stdout, "A_IDENT %s\n", n->sym->name); return; case A_ASSIGN: fprintf(stdout, "A_ASSIGN\n"); return; case A_WIDEN: fprintf(stdout, "A_WIDEN\n"); return; case A_RETURN: fprintf(stdout, "A_RETURN\n"); return; case A_FUNCCALL: fprintf(stdout, "A_FUNCCALL %s\n", n->sym->name); return; case A_ADDR: fprintf(stdout, "A_ADDR %s\n", n->sym->name); return; case A_DEREF: if (n->rvalue) fprintf(stdout, "A_DEREF rval\n"); else fprintf(stdout, "A_DEREF\n"); return; case A_SCALE: fprintf(stdout, "A_SCALE %d\n", n->a_size); return; case A_PREINC: fprintf(stdout, "A_PREINC %s\n", n->sym->name); return; case A_PREDEC: fprintf(stdout, "A_PREDEC %s\n", n->sym->name); return; case A_POSTINC: fprintf(stdout, "A_POSTINC\n"); return; case A_POSTDEC: fprintf(stdout, "A_POSTDEC\n"); return; case A_NEGATE: fprintf(stdout, "A_NEGATE\n"); return; case A_BREAK: fprintf(stdout, "A_BREAK\n"); return; case A_CONTINUE: fprintf(stdout, "A_CONTINUE\n"); return; case A_CASE: fprintf(stdout, "A_CASE %d\n", n->a_intvalue); return; case A_DEFAULT: fprintf(stdout, "A_DEFAULT\n"); return; case A_SWITCH: fprintf(stdout, "A_SWITCH\n"); return; case A_CAST: fprintf(stdout, "A_CAST %d\n", n->type); return; case A_ASPLUS: fprintf(stdout, "A_ASPLUS\n"); return; case A_ASMINUS: fprintf(stdout, "A_ASMINUS\n"); return; case A_ASSTAR: fprintf(stdout, "A_ASSTAR\n"); return; case A_ASSLASH: fprintf(stdout, "A_ASSLASH\n"); return; default: fatald("Unknown dumpAST operator", n->op); } } ================================================ FILE: 43_More_Operators/types.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Types and type handling // Copyright (c) 2019 Warren Toomey, GPL3 // Return true if a type is an int type // of any size, false otherwise int inttype(int type) { return (((type & 0xf) == 0) && (type >= P_CHAR && type <= P_LONG)); } // Return true if a type is of pointer type int ptrtype(int type) { return ((type & 0xf) != 0); } // Given a primitive type, return // the type which is a pointer to it int pointer_to(int type) { if ((type & 0xf) == 0xf) fatald("Unrecognised in pointer_to: type", type); return (type + 1); } // Given a primitive pointer type, return // the type which it points to int value_at(int type) { if ((type & 0xf) == 0x0) fatald("Unrecognised in value_at: type", type); return (type - 1); } // Given a type and a composite type pointer, return // the size of this type in bytes int typesize(int type, struct symtable *ctype) { if (type == P_STRUCT || type == P_UNION) return (ctype->size); return (genprimsize(type)); } // Given an AST tree and a type which we want it to become, // possibly modify the tree by widening or scaling so that // it is compatible with this type. Return the original tree // if no changes occurred, a modified tree, or NULL if the // tree is not compatible with the given type. // If this will be part of a binary operation, the AST op is not zero. struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op) { int ltype; int lsize, rsize; ltype = tree->type; // XXX No idea on these yet if (ltype == P_STRUCT || ltype == P_UNION) fatal("Don't know how to do this yet"); if (rtype == P_STRUCT || rtype == P_UNION) fatal("Don't know how to do this yet"); // Compare scalar int types if (inttype(ltype) && inttype(rtype)) { // Both types same, nothing to do if (ltype == rtype) return (tree); // Get the sizes for each type lsize = typesize(ltype, NULL); rsize = typesize(rtype, NULL); // Tree's size is too big if (lsize > rsize) return (NULL); // Widen to the right if (rsize > lsize) return (mkastunary(A_WIDEN, rtype, tree, NULL, 0)); } // For pointers if (ptrtype(ltype) && ptrtype(rtype)) { // We can compare them if (op >= A_EQ && op <= A_GE) return(tree); // A comparison of the same type for a non-binary operation is OK, // or when the left tree is of `void *` type. if (op == 0 && (ltype == rtype || ltype == pointer_to(P_VOID))) return (tree); } // We can scale only on A_ADD or A_SUBTRACT operation if (op == A_ADD || op == A_SUBTRACT) { // Left is int type, right is pointer type and the size // of the original type is >1: scale the left if (inttype(ltype) && ptrtype(rtype)) { rsize = genprimsize(value_at(rtype)); if (rsize > 1) return (mkastunary(A_SCALE, rtype, tree, NULL, rsize)); else return (tree); // Size 1, no need to scale } } // If we get here, the types are not compatible return (NULL); } ================================================ FILE: 44_Fold_Optimisation/Makefile ================================================ # Define the location of the include directory # and the location to install the compiler binary INCDIR=/tmp/include BINDIR=/tmp HSRCS= data.h decl.h defs.h SRCS= cg.c decl.c expr.c gen.c main.c misc.c \ opt.c scan.c stmt.c sym.c tree.c types.c SRCN= cgn.c decl.c expr.c gen.c main.c misc.c \ opt.c scan.c stmt.c sym.c tree.c types.c ARMSRCS= cg_arm.c decl.c expr.c gen.c main.c misc.c \ opt.c scan.c stmt.c sym.c tree.c types.c cwj: $(SRCS) $(HSRCS) cc -o cwj -g -Wall -DINCDIR=\"$(INCDIR)\" $(SRCS) compn: $(SRCN) $(HSRCS) cc -D__NASM__ -o compn -g -Wall -DINCDIR=\"$(INCDIR)\" $(SRCN) cwjarm: $(ARMSRCS) $(HSRCS) cc -o cwjarm -g -Wall $(ARMSRCS) cp cwjarm cwj install: cwj mkdir -p $(INCDIR) rsync -a include/. $(INCDIR) cp cwj $(BINDIR) chmod +x $(BINDIR)/cwj installn: compn mkdir -p $(INCDIR) rsync -a include/. $(INCDIR) cp compn $(BINDIR) chmod +x $(BINDIR)/compn clean: rm -f cwj cwjarm compn *.o *.s out a.out test: install tests/runtests (cd tests; chmod +x runtests; ./runtests) armtest: cwjarm tests/runtests (cd tests; chmod +x runtests; ./runtests) testn: installn tests/runtestsn (cd tests; chmod +x runtestsn; ./runtestsn) ================================================ FILE: 44_Fold_Optimisation/Readme.md ================================================ # Part 44: Constant Folding In the last part of our compiler writing journey, I realised that I'd have to add [constant folding](https://en.wikipedia.org/wiki/Constant_folding) as an optimisation, so that I could parse expressions as part of doing global variable declarations. So, in this part, I've added the constant folding optimisation for general expressions and in the next part I'll rewrite the code for global variable declarations. ## What is Constant Folding? Constant folding is a form of optimisation where an expression can be evaluated by the compiler at compile time, instead of generating code to evaluate the expression at run time. For example, we can see that `x= 5 + 4 * 5;` is really the same as `x= 25;`, so we can let the compiler evaluate the expression and just output the assembly code for `x= 25;`. ## So How Do We Do It? The answer is: look for sub-trees in an AST tree where the leaves are integer literals. If there is a binary operation which has two integer literals leaves, the compiler can evaluate the expression and replace the sub-tree with a single integer literal node. Similarly, if there is a unary operation with an integer literal leaf child, then the compiler can also evaluate the expression and replace the sub-tree with a single integer literal node. Once we can do this for sub-trees, we can write a function to traverse the entire tree looking for sub-trees to fold. At any node, we can do this algorithm: 1. Try to fold and replace the left child, i.e. recursively. 1. Try to fold and replace the right child, i.e. recursively. 1. If it's a binary operation with two literals child leaves, fold that. 1. If it's a unary operation with one literal child leaf, fold that. The fact that we replace the sub-trees means we recursively optimise the edges of the tree first, then work back up the tree to the root of the tree. An example: ``` * * * 50 / \ / \ / \ + - 10 - 10 5 / \ / \ / \ 6 4 8 3 8 3 ``` ## A New File, `opt.c` I've created a new source file for our compiler, `opt.c` and in it I've rewritten the same three functions, `fold2()`, `fold1()` and `fold()` that are in the [SubC](http://www.t3x.org/subc/) compiler written by Nils M Holm. One thing that Nils spends a lot of time in his code is to get the calculations correct. This is important when the compiler is a cross-compiler. For example, if we do the constant folding on a 64-bit machine, then the range we have for integer literals is much bigger than for 32-bit machines. Any constant folding we do on the 64-bit machine may not be the same result (due to lack of truncation) than the calculation of the same expression on a 32-bit machine. I know that this is an important concern, but I will stick with our "KISS principle" and write simple code for now. As required, I'll go back and fix it. ## Folding Binary Operations Here is the code to fold AST sub-trees which are binary operations on two children. I'm only folding a few operations; there are many more in `expr.c` that we could also fold. ```c // Fold an AST tree with a binary operator // and two A_INTLIT children. Return either // the original tree or a new leaf node. static struct ASTnode *fold2(struct ASTnode *n) { int val, leftval, rightval; // Get the values from each child leftval = n->left->a_intvalue; rightval = n->right->a_intvalue; ``` Another function will call `fold2()` and this ensures that both `n->left` and `n->right` are non-NULL pointers to A_INTLIT leaf nodes. Now that we have the values from both children, we can get to work. ```c // Perform some of the binary operations. // For any AST op we can't do, return // the original tree. switch (n->op) { case A_ADD: val = leftval + rightval; break; case A_SUBTRACT: val = leftval - rightval; break; case A_MULTIPLY: val = leftval * rightval; break; case A_DIVIDE: // Don't try to divide by zero. if (rightval == 0) return (n); val = leftval / rightval; break; default: return (n); } ``` We fold the normal four maths operations. Note the special code for division: if we try to divide by zero, the compiler will crash. Instead, we leave the sub-tree intact and let the code crash once it becomes an executable! Obviously, there is opportunity for a `fatal()` call here. We leave the switch statement with a single value `val` that represents the calculated value of the sub-tree. Time to replace the sub-tree. ```c // Return a leaf node with the new value return (mkastleaf(A_INTLIT, n->type, NULL, val)); } ``` So a binary AST tree goes in, and a leaf AST node (hopefully) comes out. ## Folding Unary Operations Now that you've seen folding on binary operations, the code for unary operations should be straight forward. I am only folding two unary operations, but there is room to add more. ```c // Fold an AST tree with a unary operator // and one INTLIT children. Return either // the original tree or a new leaf node. static struct ASTnode *fold1(struct ASTnode *n) { int val; // Get the child value. Do the // operation if recognised. // Return the new leaf node. val = n->left->a_intvalue; switch (n->op) { case A_WIDEN: break; case A_INVERT: val = ~val; break; case A_LOGNOT: val = !val; break; default: return (n); } // Return a leaf node with the new value return (mkastleaf(A_INTLIT, n->type, NULL, val)); } ``` There is one small wrinkle with implementing `fold1()` in our compiler, and that is we have AST nodes to widen values from one type to another. For example, in this expression `x= 3000 + 1;`, the '1' is parsed as a `char` literal. It needs to be widened to be of type `int` so that it can be added to the '3000'. The compiler without optimisation generates this AST tree: ``` A_ADD / \ A_INTLIT A_WIDEN 3000 \ A_INTLIT 1 ``` What we do here is treat the A_WIDEN as a unary AST operation and copy the literal value from the child and return a leaf node with the widened type and with the literal value. ## Recursively Folding a Whole AST Tree We have two functions to deal with the edges of the tree. Now we can code up the recursive function to optimise the edges and work from the edges back up to the root of the tree. ```c // Attempt to do constant folding on // the AST tree with the root node n static struct ASTnode *fold(struct ASTnode *n) { if (n == NULL) return (NULL); // Fold on the left child, then // do the same on the right child n->left = fold(n->left); n->right = fold(n->right); // If both children are A_INTLITs, do a fold2() if (n->left && n->left->op == A_INTLIT) { if (n->right && n->right->op == A_INTLIT) n = fold2(n); else // If only the left is A_INTLIT, do a fold1() n = fold1(n); } // Return the possibly modified tree return (n); } ``` The first thing to do is return NULL on a NULL tree. This allows us to recursively call `fold()` on this node's children on the following two lines of code. We have just optimised the sub-trees below us. Now, for an AST node with two integer literal leaf children, call `fold2()` to optimise them away (if possible). And if we have only one integer literal leaf child, call `fold1()` to do the same to it. We either have trimmed the tree, or the tree is unchanged. Either way, we can now return it to the recursion level above us. ## A Generic Optimisation Function Constant folding is only one optimisation we can do on our AST tree; there will be others later. Thus, it makes sense to write a front-end function that applies all the optimisations to the tree. Here it is with just constant folding: ```c // Optimise an AST tree by // constant folding in all sub-trees struct ASTnode *optimise(struct ASTnode *n) { n = fold(n); return (n); } ``` We can extend it later. This gets called in `function_declaration()` in `decl.c`. Once we have parsed a function and its body, we put the A_FUNCTION node on the top of the tree, and: ```c // Build the A_FUNCTION node which has the function's symbol pointer // and the compound statement sub-tree tree = mkastunary(A_FUNCTION, type, tree, oldfuncsym, endlabel); // Do optimisations on the AST tree tree= optimise(tree); ``` ## An Example Function The following program, `tests/input111.c`, should put the folding code through its paces. ```c #include int main() { int x= 2000 + 3 + 4 * 5 + 6; printf("%d\n", x); return(0); } ``` The compiler should replace the initialisation with `x=2029;`. Let's do a `cwj -T -S tests/input111.c` and see: ``` $ ./cwj -T -S z.c A_INTLIT 2029 A_WIDEN A_IDENT x A_ASSIGN ... $ ./cwj -o tests/input111 tests/input111.c $ ./tests/input111 2029 ``` It seems to work, and the compiler still passes all 110 previous tests, so for now it does its job. ## Conclusion and What's Next I was going to leave optimisation to the end of our journey, but I think it's good to see one type of optimisation now. In the next part of our compiler writing journey, we will replace our current global declaration parser with code that evaluates expressions using `binexpr()` and this new constant folding code. [Next step](../45_Globals_Again/Readme.md) ================================================ FILE: 44_Fold_Optimisation/cg.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\t.text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\t.data\n", Outfile); currSeg = data_seg; } } // Given a scalar type value, return the // size of the type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch (type) { case P_CHAR: return (offset); case P_INT: case P_LONG: break; default: if (!ptrtype(type)) fatald("Bad type in cg_align:", type); } // Here we have an int or a long. Align it on a 4-byte offset // I put the generic code here so it can be reused elsewhere. alignment = 4; offset = (offset + direction * (alignment - 1)) & ~(alignment - 1); return (offset); } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. static int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "%r10", "%r11", "%r12", "%r13", "%r9", "%r8", "%rcx", "%rdx", "%rsi", "%rdi" }; static char *breglist[] = { "%r10b", "%r11b", "%r12b", "%r13b", "%r9b", "%r8b", "%cl", "%dl", "%sil", "%dil" }; static char *dreglist[] = { "%r10d", "%r11d", "%r12d", "%r13d", "%r9d", "%r8d", "%ecx", "%edx", "%esi", "%edi" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); cgtextseg(); fprintf(Outfile, "# internal switch(expr) routine\n" "# %%rsi = switch table, %%rax = expr\n" "# from SubC: http://www.t3x.org/subc/\n" "\n" "switch:\n" " pushq %%rsi\n" " movq %%rdx, %%rsi\n" " movq %%rax, %%rbx\n" " cld\n" " lodsq\n" " movq %%rax, %%rcx\n" "next:\n" " lodsq\n" " movq %%rax, %%rdx\n" " lodsq\n" " cmpq %%rdx, %%rbx\n" " jnz no\n" " popq %%rsi\n" " jmp *%%rax\n" "no:\n" " loop next\n" " lodsq\n" " popq %%rsi\n" " jmp *%%rax\n" "\n"); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the %rsp and %rsp fprintf(Outfile, "\t.globl\t%s\n" "\t.type\t%s, @function\n" "%s:\n" "\tpushq\t%%rbp\n" "\tmovq\t%%rsp, %%rbp\n", name, name, name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->st_posn = paramOffset; paramOffset += 8; } else { parm->st_posn = newlocaloffset(parm->type); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->st_posn = newlocaloffset(locvar->type); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\taddq\t$%d,%%rsp\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->st_endlabel); fprintf(Outfile, "\taddq\t$%d,%%rsp\n", stackOffset); fputs("\tpopq %rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmovq\t$%d, %s\n", value, reglist[r]); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", sym->name); } else // Print out the code to initialise it switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovzbq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", sym->name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovslq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", sym->name); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", sym->st_posn); fprintf(Outfile, "\tmovq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", sym->st_posn); } else switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", sym->st_posn); fprintf(Outfile, "\tmovzbq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", sym->st_posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", sym->st_posn); fprintf(Outfile, "\tmovslq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", sym->st_posn); break; default: fatald("Bad type in cgloadlocal:", sym->type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tleaq\tL%d(%%rip), %s\n", label, reglist[r]); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\taddq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsubq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timulq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s,%%rax\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidivq\t%s\n", reglist[r2]); fprintf(Outfile, "\tmovq\t%%rax,%s\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tandq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\torq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txorq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshlq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshrq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tnegq\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnotq\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); return (r); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(struct symtable *sym, int numargs) { // Get a new register int outr = alloc_register(); // Call the function fprintf(Outfile, "\tcall\t%s@PLT\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\taddq\t$%d, %%rsp\n", 8 * (numargs - 6)); // and copy the return value into our register fprintf(Outfile, "\tmovq\t%%rax, %s\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpushq\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmovq\t%s, %s\n", reglist[r], reglist[FIRSTPARAMREG - argposn + 1]); } } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsalq\t$%d, %s\n", val, reglist[r]); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %s(%%rip)\n", reglist[r], sym->name); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %s(%%rip)\n", breglist[r], sym->name); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %s(%%rip)\n", dreglist[r], sym->name); break; default: fatald("Bad type in cgstorglob:", sym->type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %d(%%rbp)\n", reglist[r], sym->st_posn); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %d(%%rbp)\n", breglist[r], sym->st_posn); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %d(%%rbp)\n", dreglist[r], sym->st_posn); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size, type; int initvalue; int i; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the variable (or its elements if an array) // and the type of the variable if (node->stype == S_ARRAY) { size= typesize(value_at(node->type), node->ctype); type= value_at(node->type); } else { size = node->size; type= node->type; } // Generate the global identity and the label cgdataseg(); fprintf(Outfile, "\t.globl\t%s\n", node->name); fprintf(Outfile, "%s:\n", node->name); // Output space for one or more elements for (i=0; i < node->nelems; i++) { // Get any initial value initvalue= 0; if (node->initlist != NULL) initvalue= node->initlist[i]; // Generate the space for this type switch (size) { case 1: fprintf(Outfile, "\t.byte\t%d\n", initvalue); break; case 4: fprintf(Outfile, "\t.long\t%d\n", initvalue); break; case 8: // Generate the pointer to a string literal. Treat a zero value // as actually zero, not the label L0 if (node->initlist != NULL && type== pointer_to(P_CHAR) && initvalue != 0) fprintf(Outfile, "\t.quad\tL%d\n", initvalue); else fprintf(Outfile, "\t.quad\t%d\n", initvalue); break; default: for (int i = 0; i < size; i++) fprintf(Outfile, "\t.byte\t0\n"); } } } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\t.byte\t%d\n", *cptr); } fprintf(Outfile, "\t.byte\t0\n"); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzbl\t%s, %%eax\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %%eax\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } cgjump(sym->st_endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = alloc_register(); if (sym->class == C_GLOBAL) fprintf(Outfile, "\tleaq\t%s(%%rip), %s\n", sym->name, reglist[r]); else fprintf(Outfile, "\tleaq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzbq\t(%s), %s\n", reglist[r], reglist[r]); break; case 2: fprintf(Outfile, "\tmovslq\t(%s), %s\n", reglist[r], reglist[r]); break; case 4: case 8: fprintf(Outfile, "\tmovq\t(%s), %s\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { // Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmovb\t%s, (%s)\n", breglist[r1], reglist[r2]); break; case 2: case 4: case 8: fprintf(Outfile, "\tmovq\t%s, (%s)\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } // Generate a switch jump table and the code to // load the registers and call the switch() code void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel) { int i, label; // Get a label for the switch table label = genlabel(); cglabel(label); // Heuristic. If we have no cases, create one case // which points to the default case if (casecount == 0) { caseval[0] = 0; caselabel[0] = defaultlabel; casecount = 1; } // Generate the switch jump table. fprintf(Outfile, "\t.quad\t%d\n", casecount); for (i = 0; i < casecount; i++) fprintf(Outfile, "\t.quad\t%d, L%d\n", caseval[i], caselabel[i]); fprintf(Outfile, "\t.quad\tL%d\n", defaultlabel); // Load the specific registers cglabel(toplabel); fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); fprintf(Outfile, "\tleaq\tL%d(%%rip), %%rdx\n", label); fprintf(Outfile, "\tjmp\tswitch\n"); } ================================================ FILE: 44_Fold_Optimisation/cg_arm.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for ARMv6 on Raspberry Pi // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. static int freereg[4]; static char *reglist[4] = { "r4", "r5", "r6", "r7" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // We have to store large integer literal values in memory. // Keep a list of them which will be output in the postamble #define MAXINTS 1024 int Intlist[MAXINTS]; static int Intslot = 0; // Determine the offset of a large integer // literal from the .L3 label. If the integer // isn't in the list, add it. static void set_int_offset(int val) { int offset = -1; // See if it is already there for (int i = 0; i < Intslot; i++) { if (Intlist[i] == val) { offset = 4 * i; break; } } // Not in the list, so add it if (offset == -1) { offset = 4 * Intslot; if (Intslot == MAXINTS) fatal("Out of int slots in set_int_offset()"); Intlist[Intslot++] = val; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L3+%d\n", offset); } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n", Outfile); } // Print out the assembly postamble void cgpostamble() { // Print out the global variables fprintf(Outfile, ".L2:\n"); for (int i = 0; i < Globs; i++) { if (Symtable[i].stype == S_VARIABLE) fprintf(Outfile, "\t.word %s\n", Symtable[i].name); } // Print out the integer literals fprintf(Outfile, ".L3:\n"); for (int i = 0; i < Intslot; i++) { fprintf(Outfile, "\t.word %d\n", Intlist[i]); } } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Symtable[id].name; fprintf(Outfile, "\t.text\n" "\t.globl\t%s\n" "\t.type\t%s, \%%function\n" "%s:\n" "\tpush\t{fp, lr}\n" "\tadd\tfp, sp, #4\n" "\tsub\tsp, sp, #8\n" "\tstr\tr0, [fp, #-8]\n", name, name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Symtable[id].endlabel); fputs("\tsub\tsp, fp, #4\n" "\tpop\t{fp, pc}\n" "\t.align\t2\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); // If the literal value is small, do it with one instruction if (value <= 1000) fprintf(Outfile, "\tmov\t%s, #%d\n", reglist[r], value); else { set_int_offset(value); fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); } return (r); } // Determine the offset of a variable from the .L2 // label. Yes, this is inefficient code. static void set_var_offset(int id) { int offset = 0; // Walk the symbol table up to id. // Find S_VARIABLEs and add on 4 until // we get to our variable for (int i = 0; i < id; i++) { if (Symtable[i].stype == S_VARIABLE) offset += 4; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L2+%d\n", offset); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tldrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgloadglob:", Symtable[id].type); } return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s, %s\n", reglist[r1], reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\tmul\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { // To do a divide: r0 holds the dividend, r1 holds the divisor. // The quotient is in r0. fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r1]); fprintf(Outfile, "\tmov\tr1, %s\n", reglist[r2]); fprintf(Outfile, "\tbl\t__aeabi_idiv\n"); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r1]); free_register(r2); return (r1); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r]); fprintf(Outfile, "\tbl\t%s\n", Symtable[id].name); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r]); return (r); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tlsl\t%s, %s, #%d\n", reglist[r], reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, int id) { // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tstr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgstorglob:", Symtable[id].type); } return (r); } // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (4); switch (type) { case P_CHAR: return (1); case P_INT: case P_LONG: return (4); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Symtable[id].type); fprintf(Outfile, "\t.data\n" "\t.globl\t%s\n", Symtable[id].name); switch (typesize) { case 1: fprintf(Outfile, "%s:\t.byte\t0\n", Symtable[id].name); break; case 4: fprintf(Outfile, "%s:\t.long\t0\n", Symtable[id].name); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "moveq", "movne", "movlt", "movgt", "movle", "movge" }; // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "movne", "moveq", "movge", "movle", "movgt", "movlt" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #1\n", cmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #0\n", invcmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\tuxtb\t%s, %s\n", reglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tb\tL%d\n", l); } // List of inverted branch instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *brlist[] = { "bne", "beq", "bge", "ble", "bgt", "blt" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", brlist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[reg]); cgjump(Symtable[id].endlabel); } // Generate code to load the address of a global // identifier into a variable. Return a new register int cgaddress(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); fprintf(Outfile, "\tmov\t%s, r3\n", reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tldrb\t%s, [%s]\n", reglist[r], reglist[r]); break; case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [%s]\n", reglist[r], reglist[r]); break; } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [%s]\n", reglist[r1], reglist[r2]); break; case P_INT: case P_LONG: fprintf(Outfile, "\tstr\t%s, [%s]\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 44_Fold_Optimisation/cgn.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\tsection .text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\tsection .data\n", Outfile); currSeg = data_seg; } } // Given a scalar type value, return the // size of the type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch (type) { case P_CHAR: return (offset); case P_INT: case P_LONG: break; default: if (!ptrtype(type)) fatald("Bad type in cg_align:", type); } // Here we have an int or a long. Align it on a 4-byte offset // I put the generic code here so it can be reused elsewhere. alignment = 4; offset = (offset + direction * (alignment - 1)) & ~(alignment - 1); return (offset); } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "r10", "r11", "r12", "r13", "r9", "r8", "rcx", "rdx", "rsi", "rdi" }; static char *breglist[] = { "r10b", "r11b", "r12b", "r13b", "r9b", "r8b", "cl", "dl", "sil", "dil" }; static char *dreglist[] = { "r10d", "r11d", "r12d", "r13d", "r9d", "r8d", "ecx", "edx", "esi", "edi" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\textern\tprintint\n", Outfile); fputs("\textern\tprintchar\n", Outfile); fputs("\textern\topen\n", Outfile); fputs("\textern\tclose\n", Outfile); fputs("\textern\tread\n", Outfile); fputs("\textern\twrite\n", Outfile); fputs("\textern\tprintf\n", Outfile); cgtextseg(); fprintf(Outfile, "; internal switch(expr) routine\n" "; rsi = switch table, rax = expr\n" "; from SubC: http://www.t3x.org/subc/\n" "\n" "switch:\n" " push rsi\n" " mov rsi, rdx\n" " mov rbx, rax\n" " cld\n" " lodsq\n" " mov rcx, rax\n" "next:\n" " lodsq\n" " mov rdx, rax\n" " lodsq\n" " cmp rbx, rdx\n" " jnz no\n" " pop rsi\n" " jmp rax\n" "no:\n" " loop next\n" " lodsq\n" " pop rsi\n" " jmp rax\n" "\n"); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the rsp and rbp fprintf(Outfile, "\tglobal\t%s\n" "%s:\n" "\tpush\trbp\n" "\tmov\trbp, rsp\n", name, name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->st_posn = paramOffset; paramOffset += 8; } else { parm->st_posn = newlocaloffset(parm->type); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->st_posn = newlocaloffset(locvar->type); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\tadd\trsp, %d\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->st_endlabel); fprintf(Outfile, "\tadd\trsp, %d\n", stackOffset); fputs("\tpop rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], value); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", sym->name); fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], sym->name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", sym->name); } else // Print out the code to initialise it switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", sym->name); fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], sym->name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", sym->name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", sym->name); fprintf(Outfile, "\tmovsx\t%s, word [%s]\n", dreglist[r], sym->name); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", sym->name); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", sym->st_posn); fprintf(Outfile, "\tmov\t%s, [rbp+%d]\n", reglist[r], sym->st_posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", sym->st_posn); } else switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", sym->st_posn); fprintf(Outfile, "\tmovzx\t%s, byte [rbp+%d]\n", reglist[r], sym->st_posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", sym->st_posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", sym->st_posn); fprintf(Outfile, "\tmovsx\t%s, dword [rbp+%d]\n", reglist[r], sym->st_posn); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", sym->st_posn); break; default: fatald("Bad type in cgloadlocal:", sym->type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, L%d\n", reglist[r], label); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timul\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmov\trax, %s\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidiv\t%s\n", reglist[r2]); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tand\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\tor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshl\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshr\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tneg\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnot\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r], breglist[r]); return (r); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, byte %s\n", reglist[r], breglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(struct symtable *sym, int numargs) { // Get a new register int outr = alloc_register(); // Call the function fprintf(Outfile, "\tcall\t%s\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\tadd\trsp, %d\n", 8 * (numargs - 6)); // and copy the return value into our register fprintf(Outfile, "\tmov\t%s, rax\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpush\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmov\t%s, %s\n", reglist[FIRSTPARAMREG - argposn + 1], reglist[r]); } } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsal\t%s, %d\n", reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, dreglist[r]); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\tqword\t[rbp+%d], %s\n", sym->st_posn, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\tbyte\t[rbp+%d], %s\n", sym->st_posn, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\tdword\t[rbp+%d], %s\n", sym->st_posn, dreglist[r]); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size, type; int initvalue; int i; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the variable (or its elements if an array) // and the type of the variable if (node->stype == S_ARRAY) { size= typesize(value_at(node->type), node->ctype); type= value_at(node->type); } else { size = node->size; type= node->type; } // Generate the global identity and the label cgdataseg(); fprintf(Outfile, "\tsection\t.data\n" "\tglobal\t%s\n", node->name); fprintf(Outfile, "%s:\n", node->name); // Output space for one or more elements for (i = 0; i < node->nelems; i++) { // Get any initial value initvalue = 0; if (node->initlist != NULL) initvalue = node->initlist[i]; // Generate the space for this type // original version switch(size) { case 1: fprintf(Outfile, "\tdb\t%d\n", initvalue); break; case 4: fprintf(Outfile, "\tdd\t%d\n", initvalue); break; case 8: // Generate the pointer to a string literal. Treat a zero value // as actually zero, not the label L0 if (node->initlist != NULL && type == pointer_to(P_CHAR) && initvalue != 0) fprintf(Outfile, "\tdq\tL%d\n", initvalue); else fprintf(Outfile, "\tdq\t%d\n", initvalue); break; default: for (int i = 0; i < size; i++) fprintf(Outfile, "\tdb\t0\n"); /* compact version using times instead of loop fprintf(Outfile, "\ttimes\t%d\tdb\t0\n", size); */ } } } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\tdb\t%d\n", *cptr); } fprintf(Outfile, "\tdb\t0\n"); /* put string in readable format on a single line // probably went overboard with error checking int comma = 0, quote = 0, start = 1; fprintf(Outfile, "\tdb\t"); for (cptr=strvalue; *cptr; cptr++) { if ( ! isprint(*cptr) ) if (comma || start) { fprintf(Outfile, "%d, ", *cptr); start = 0; comma = 1; } else if (quote) { fprintf(Outfile, "\', %d, ", *cptr); comma = 1; quote = 0; } else { fprintf(Outfile, "%d, ", *cptr); comma = 1; quote = 0; } else if (start || comma) { fprintf(Outfile, "\'%c", *cptr); start = comma = 0; quote = 1; } else { fprintf(Outfile, "%c", *cptr); comma = 0; quote = 1; } } if (comma || start) fprintf(Outfile, "0\n"); else fprintf(Outfile, "\', 0\n"); */ } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r2], breglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzx\teax, %s\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmov\teax, %s\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } cgjump(sym->st_endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = alloc_register(); if (sym->class == C_GLOBAL) fprintf(Outfile, "\tmov\t%s, %s\n", reglist[r], sym->name); else fprintf(Outfile, "\tlea\t%s, [rbp+%d]\n", reglist[r], sym->st_posn); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], reglist[r]); break; case 2: fprintf(Outfile, "\tmovsx\t%s, dword [%s]\n", reglist[r], reglist[r]); break; case 4: case 8: fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { //Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmov\t[%s], byte %s\n", reglist[r2], breglist[r1]); break; case 2: case 4: case 8: fprintf(Outfile, "\tmov\t[%s], %s\n", reglist[r2], reglist[r1]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } // Generate a switch jump table and the code to // load the registers and call the switch() code void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel) { int i, label; // Get a label for the switch table label = genlabel(); cglabel(label); // Heuristic. If we have no cases, create one case // which points to the default case if (casecount == 0) { caseval[0] = 0; caselabel[0] = defaultlabel; casecount = 1; } // Generate the switch jump table. fprintf(Outfile, "\tdq\t%d\n", casecount); for (i = 0; i < casecount; i++) fprintf(Outfile, "\tdq\t%d, L%d\n", caseval[i], caselabel[i]); fprintf(Outfile, "\tdq\tL%d\n", defaultlabel); // Load the specific registers cglabel(toplabel); fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); fprintf(Outfile, "\tmov\trdx, L%d\n", label); fprintf(Outfile, "\tjmp\tswitch\n"); } ================================================ FILE: 44_Fold_Optimisation/data.h ================================================ #ifndef extern_ #define extern_ extern #endif // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 extern_ int Line; // Current line number extern_ int Putback; // Character put back by scanner extern_ struct symtable *Functionid; // Symbol ptr of the current function extern_ FILE *Infile; // Input and output files extern_ FILE *Outfile; extern_ char *Infilename; // Name of file we are parsing extern_ char *Outfilename; // Name of file we opened as Outfile extern_ struct token Token; // Last token scanned extern_ char Text[TEXTLEN + 1]; // Last identifier scanned extern_ int Looplevel; // Depth of nested loops extern_ int Switchlevel; // Depth of nested switches // Symbol table lists extern_ struct symtable *Globhead, *Globtail; // Global variables and functions extern_ struct symtable *Loclhead, *Locltail; // Local variables extern_ struct symtable *Parmhead, *Parmtail; // Local parameters extern_ struct symtable *Membhead, *Membtail; // Temp list of struct/union members extern_ struct symtable *Structhead, *Structtail; // List of struct types extern_ struct symtable *Unionhead, *Uniontail; // List of union types extern_ struct symtable *Enumhead, *Enumtail; // List of enum types and values extern_ struct symtable *Typehead, *Typetail; // List of typedefs // Command-line flags extern_ int O_dumpAST; // If true, dump the AST trees extern_ int O_keepasm; // If true, keep any assembly files extern_ int O_assemble; // If true, assemble the assembly files extern_ int O_dolink; // If true, link the object files extern_ int O_verbose; // If true, print info on compilation stages ================================================ FILE: 44_Fold_Optimisation/decl.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of declarations // Copyright (c) 2019 Warren Toomey, GPL3 static struct symtable *composite_declaration(int type); static int typedef_declaration(struct symtable **ctype); static int type_of_typedef(char *name, struct symtable **ctype); static void enum_declaration(void); // Parse the current token and return a primitive type enum value, // a pointer to any composite type and possibly modify // the class of the type. int parse_type(struct symtable **ctype, int *class) { int type, exstatic = 1; // See if the class has been changed to extern (later, static) while (exstatic) { switch (Token.token) { case T_EXTERN: *class = C_EXTERN; scan(&Token); break; default: exstatic = 0; } } // Now work on the actual type keyword switch (Token.token) { case T_VOID: type = P_VOID; scan(&Token); break; case T_CHAR: type = P_CHAR; scan(&Token); break; case T_INT: type = P_INT; scan(&Token); break; case T_LONG: type = P_LONG; scan(&Token); break; // For the following, if we have a ';' after the // parsing then there is no type, so return -1. // Example: struct x {int y; int z}; case T_STRUCT: type = P_STRUCT; *ctype = composite_declaration(P_STRUCT); if (Token.token == T_SEMI) type = -1; break; case T_UNION: type = P_UNION; *ctype = composite_declaration(P_UNION); if (Token.token == T_SEMI) type = -1; break; case T_ENUM: type = P_INT; // Enums are really ints enum_declaration(); if (Token.token == T_SEMI) type = -1; break; case T_TYPEDEF: type = typedef_declaration(ctype); if (Token.token == T_SEMI) type = -1; break; case T_IDENT: type = type_of_typedef(Text, ctype); break; default: fatals("Illegal type, token", Token.tokstr); } return (type); } // Given a type parsed by parse_type(), scan in any following // '*' tokens and return the new type int parse_stars(int type) { while (1) { if (Token.token != T_STAR) break; type = pointer_to(type); scan(&Token); } return (type); } // Parse a type which appears inside a cast int parse_cast(void) { int type, class; struct symtable *ctype; // Get the type inside the parentheses type= parse_stars(parse_type(&ctype, &class)); // Do some error checking. I'm sure more can be done if (type == P_STRUCT || type == P_UNION || type == P_VOID) fatal("Cannot cast to a struct, union or void type"); return(type); } // Given a type, check that the latest token is a literal // of that type. If an integer literal, return this value. // If a string literal, return the label number of the string. // Do not scan the next token. If type is P_NONE, relax all // parse restrictions int parse_literal(int type) { // We have a string literal. Store in memory and return the label if (Token.token == T_STRLIT) { if (type == pointer_to(P_CHAR) || type == P_NONE) return(genglobstr(Text)); } // We have an integer literal. Do some range checking. if (Token.token == T_INTLIT) { switch(type) { case P_CHAR: if (Token.intvalue < 0 || Token.intvalue > 255) fatal("Integer literal value too big for char type"); case P_NONE: case P_INT: case P_LONG: break; default: fatal("Type mismatch: integer literal vs. variable"); } } else fatal("Expecting an integer literal value"); return(Token.intvalue); } // Given the type, name and class of a scalar variable, // parse any initialisation value and allocate storage for it. // Return the variable's symbol table entry. static struct symtable *scalar_declaration(char *varname, int type, struct symtable *ctype, int class, struct ASTnode **tree) { struct symtable *sym=NULL; struct ASTnode *varnode, *exprnode; int casttype; *tree= NULL; // Add this as a known scalar switch (class) { case C_EXTERN: case C_GLOBAL: sym= addglob(varname, type, ctype, S_VARIABLE, class, 1, 0); break; case C_LOCAL: sym= addlocl(varname, type, ctype, S_VARIABLE, 1); break; case C_PARAM: sym= addparm(varname, type, ctype, S_VARIABLE); break; case C_MEMBER: sym= addmemb(varname, type, ctype, S_VARIABLE, 1); break; } // The variable is being initialised if (Token.token == T_ASSIGN) { // Only possible for a global or local if (class != C_GLOBAL && class != C_LOCAL) fatals("Variable can not be initialised", varname); scan(&Token); // Globals must be assigned a literal value if (class == C_GLOBAL) { // If there is a cast if (Token.token == T_LPAREN) { // Get the type in the cast scan(&Token); casttype= parse_cast(); rparen(); // Check that the two types are compatible. Change // the new type so that the literal parse below works. // A 'void *' casstype can be assigned to any pointer type. if (casttype == type || (casttype== pointer_to(P_VOID) && ptrtype(type))) type= P_NONE; else fatal("Type mismatch"); } // Create one initial value for the variable and // parse this value sym->initlist= (int *)malloc(sizeof(int)); sym->initlist[0]= parse_literal(type); scan(&Token); } if (class == C_LOCAL) { // Make an A_IDENT AST node with the variable varnode = mkastleaf(A_IDENT, sym->type, sym, 0); // Get the expression for the assignment, make into a rvalue exprnode = binexpr(0); exprnode->rvalue = 1; // Ensure the expression's type matches the variable exprnode = modify_type(exprnode, varnode->type, 0); if (exprnode == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree *tree = mkastnode(A_ASSIGN, exprnode->type, exprnode, NULL, varnode, NULL, 0); } } // Generate any global space if (class == C_GLOBAL) genglobsym(sym); return (sym); } // Given the type, name and class of an variable, parse // the size of the array, if any. Then parse any initialisation // value and allocate storage for it. // Return the variable's symbol table entry. static struct symtable *array_declaration(char *varname, int type, struct symtable *ctype, int class) { struct symtable *sym; // New symbol table entry int nelems= -1; // Assume the number of elements won't be given int maxelems; // The maximum number of elements in the init list int *initlist; // The list of initial elements int i=0, j; int casttype, newtype; // Skip past the '[' scan(&Token); // See we have an array size if (Token.token == T_INTLIT) { if (Token.intvalue <= 0) fatald("Array size is illegal", Token.intvalue); nelems= Token.intvalue; scan(&Token); } // Ensure we have a following ']' match(T_RBRACKET, "]"); // Add this as a known array. We treat the // array as a pointer to its elements' type switch (class) { case C_EXTERN: case C_GLOBAL: sym = addglob(varname, pointer_to(type), ctype, S_ARRAY, class, 0, 0); break; default: fatal("For now, declaration of non-global arrays is not implemented"); } // Array initialisation if (Token.token == T_ASSIGN) { if (class != C_GLOBAL) fatals("Variable can not be initialised", varname); scan(&Token); // Get the following left curly bracket match(T_LBRACE, "{"); #define TABLE_INCREMENT 10 // If the array already has nelems, allocate that many elements // in the list. Otherwise, start with TABLE_INCREMENT. if (nelems != -1) maxelems= nelems; else maxelems= TABLE_INCREMENT; initlist= (int *)malloc(maxelems *sizeof(int)); // Loop getting a new literal value from the list while (1) { // Get the original type newtype= type; // Check we can add the next value, then parse and add it if (nelems != -1 && i == maxelems) fatal("Too many values in initialisation list"); if (Token.token == T_LPAREN) { // Get the type in the cast scan(&Token); casttype= parse_cast(); rparen(); // Check that the two types are compatible. Change // the new type so that the literal parse below works. // A 'void *' casstype can be assigned to any pointer type. if (casttype == type || (casttype== pointer_to(P_VOID) && ptrtype(type))) newtype= P_NONE; else fatal("Type mismatch"); newtype= P_NONE; } initlist[i++]= parse_literal(newtype); scan(&Token); // Increase the list size if the original size was // not set and we have hit the end of the current list if (nelems == -1 && i == maxelems) { maxelems += TABLE_INCREMENT; initlist= (int *)realloc(initlist, maxelems *sizeof(int)); } // Leave when we hit the right curly bracket if (Token.token == T_RBRACE) { scan(&Token); break; } // Next token must be a comma, then comma(); } // Zero any unused elements in the initlist. // Attach the list to the symbol table entry for (j=i; j < sym->nelems; j++) initlist[j]=0; if (i > nelems) nelems = i; sym->initlist= initlist; } // Set the size of the array and the number of elements sym->nelems= nelems; sym->size= sym->nelems * typesize(type, ctype); // Generate any global space if (class == C_GLOBAL) genglobsym(sym); return (sym); } // Given a pointer to the new function being declared and // a possibly NULL pointer to the function's previous declaration, // parse a list of parameters and cross-check them against the // previous declaration. Return the count of parameters static int param_declaration_list(struct symtable *oldfuncsym, struct symtable *newfuncsym) { int type, paramcnt = 0; struct symtable *ctype; struct symtable *protoptr = NULL; struct ASTnode *unused; // Get the pointer to the first prototype parameter if (oldfuncsym != NULL) protoptr = oldfuncsym->member; // Loop getting any parameters while (Token.token != T_RPAREN) { // Get the type of the next parameter type = declaration_list(&ctype, C_PARAM, T_COMMA, T_RPAREN, &unused); if (type == -1) fatal("Bad type in parameter list"); // Ensure the type of this parameter matches the prototype if (protoptr != NULL) { if (type != protoptr->type) fatald("Type doesn't match prototype for parameter", paramcnt + 1); protoptr = protoptr->next; } paramcnt++; // Stop when we hit the right parenthesis if (Token.token == T_RPAREN) break; // We need a comma as separator comma(); } if (oldfuncsym != NULL && paramcnt != oldfuncsym->nelems) fatals("Parameter count mismatch for function", oldfuncsym->name); // Return the count of parameters return (paramcnt); } // // function_declaration: type identifier '(' parameter_list ')' ; // | type identifier '(' parameter_list ')' compound_statement ; // // Parse the declaration of function. static struct symtable *function_declaration(char *funcname, int type, struct symtable *ctype, int class) { struct ASTnode *tree, *finalstmt; struct symtable *oldfuncsym, *newfuncsym = NULL; int endlabel, paramcnt; // Text has the identifier's name. If this exists and is a // function, get the id. Otherwise, set oldfuncsym to NULL. if ((oldfuncsym = findsymbol(funcname)) != NULL) if (oldfuncsym->stype != S_FUNCTION) oldfuncsym = NULL; // If this is a new function declaration, get a // label-id for the end label, and add the function // to the symbol table, if (oldfuncsym == NULL) { endlabel = genlabel(); // Assumtion: functions only return scalar types, so NULL below newfuncsym = addglob(funcname, type, NULL, S_FUNCTION, C_GLOBAL, 0, endlabel); } // Scan in the '(', any parameters and the ')'. // Pass in any existing function prototype pointer lparen(); paramcnt = param_declaration_list(oldfuncsym, newfuncsym); rparen(); // If this is a new function declaration, update the // function symbol entry with the number of parameters. // Also copy the parameter list into the function's node. if (newfuncsym) { newfuncsym->nelems = paramcnt; newfuncsym->member = Parmhead; oldfuncsym = newfuncsym; } // Clear out the parameter list Parmhead = Parmtail = NULL; // Declaration ends in a semicolon, only a prototype. if (Token.token == T_SEMI) return (oldfuncsym); // This is not just a prototype. // Set the Functionid global to the function's symbol pointer Functionid = oldfuncsym; // Get the AST tree for the compound statement and mark // that we have parsed no loops or switches yet Looplevel = 0; Switchlevel = 0; lbrace(); tree = compound_statement(0); rbrace(); // If the function type isn't P_VOID .. if (type != P_VOID) { // Error if no statements in the function if (tree == NULL) fatal("No statements in function with non-void type"); // Check that the last AST operation in the // compound statement was a return statement finalstmt = (tree->op == A_GLUE) ? tree->right : tree; if (finalstmt == NULL || finalstmt->op != A_RETURN) fatal("No return for function with non-void type"); } // Build the A_FUNCTION node which has the function's symbol pointer // and the compound statement sub-tree tree = mkastunary(A_FUNCTION, type, tree, oldfuncsym, endlabel); // Do optimisations on the AST tree tree= optimise(tree); // Dump the AST tree if requested if (O_dumpAST) { dumpAST(tree, NOLABEL, 0); fprintf(stdout, "\n\n"); } // Generate the assembly code for it genAST(tree, NOLABEL, NOLABEL, NOLABEL, 0); // Now free the symbols associated // with this function freeloclsyms(); return (oldfuncsym); } // Parse composite type declarations: structs or unions. // Either find an existing struct/union declaration, or build // a struct/union symbol table entry and return its pointer. static struct symtable *composite_declaration(int type) { struct symtable *ctype = NULL; struct symtable *m; struct ASTnode *unused; int offset; int t; // Skip the struct/union keyword scan(&Token); // See if there is a following struct/union name if (Token.token == T_IDENT) { // Find any matching composite type if (type == P_STRUCT) ctype = findstruct(Text); else ctype = findunion(Text); scan(&Token); } // If the next token isn't an LBRACE , this is // the usage of an existing struct/union type. // Return the pointer to the type. if (Token.token != T_LBRACE) { if (ctype == NULL) fatals("unknown struct/union type", Text); return (ctype); } // Ensure this struct/union type hasn't been // previously defined if (ctype) fatals("previously defined struct/union", Text); // Build the composite type and skip the left brace if (type == P_STRUCT) ctype = addstruct(Text); else ctype = addunion(Text); scan(&Token); // Scan in the list of members while (1) { // Get the next member. m is used as a dummy t= declaration_list(&m, C_MEMBER, T_SEMI, T_RBRACE, &unused); if (t== -1) fatal("Bad type in member list"); if (Token.token == T_SEMI) scan(&Token); if (Token.token == T_RBRACE) break; } // Attach to the struct type's node rbrace(); if (Membhead==NULL) fatals("No members in struct", ctype->name); ctype->member = Membhead; Membhead = Membtail = NULL; // Set the offset of the initial member // and find the first free byte after it m = ctype->member; m->st_posn = 0; offset = typesize(m->type, m->ctype); // Set the position of each successive member in the composite type // Unions are easy. For structs, align the member and find the next free byte for (m = m->next; m != NULL; m = m->next) { // Set the offset for this member if (type == P_STRUCT) m->st_posn = genalign(m->type, offset, 1); else m->st_posn = 0; // Get the offset of the next free byte after this member offset += typesize(m->type, m->ctype); } // Set the overall size of the composite type ctype->size = offset; return (ctype); } // Parse an enum declaration static void enum_declaration(void) { struct symtable *etype = NULL; char *name= NULL; int intval = 0; // Skip the enum keyword. scan(&Token); // If there's a following enum type name, get a // pointer to any existing enum type node. if (Token.token == T_IDENT) { etype = findenumtype(Text); name = strdup(Text); // As it gets tromped soon scan(&Token); } // If the next token isn't a LBRACE, check // that we have an enum type name, then return if (Token.token != T_LBRACE) { if (etype == NULL) fatals("undeclared enum type:", name); return; } // We do have an LBRACE. Skip it scan(&Token); // If we have an enum type name, ensure that it // hasn't been declared before. if (etype != NULL) fatals("enum type redeclared:", etype->name); else // Build an enum type node for this identifier etype = addenum(name, C_ENUMTYPE, 0); // Loop to get all the enum values while (1) { // Ensure we have an identifier // Copy it in case there's an int literal coming up ident(); name = strdup(Text); // Ensure this enum value hasn't been declared before etype = findenumval(name); if (etype != NULL) fatals("enum value redeclared:", Text); // If the next token is an '=', skip it and // get the following int literal if (Token.token == T_ASSIGN) { scan(&Token); if (Token.token != T_INTLIT) fatal("Expected int literal after '='"); intval = Token.intvalue; scan(&Token); } // Build an enum value node for this identifier. // Increment the value for the next enum identifier. etype = addenum(name, C_ENUMVAL, intval++); // Bail out on a right curly bracket, else get a comma if (Token.token == T_RBRACE) break; comma(); } scan(&Token); // Skip over the right curly bracket } // Parse a typedef declaration and return the type // and ctype that it represents static int typedef_declaration(struct symtable **ctype) { int type, class = 0; // Skip the typedef keyword. scan(&Token); // Get the actual type following the keyword type = parse_type(ctype, &class); if (class != 0) fatal("Can't have extern in a typedef declaration"); // See if the typedef identifier already exists if (findtypedef(Text) != NULL) fatals("redefinition of typedef", Text); // Get any following '*' tokens type = parse_stars(type); // It doesn't exist so add it to the typedef list addtypedef(Text, type, *ctype); scan(&Token); return (type); } // Given a typedef name, return the type it represents static int type_of_typedef(char *name, struct symtable **ctype) { struct symtable *t; // Look up the typedef in the list t = findtypedef(name); if (t == NULL) fatals("unknown type", name); scan(&Token); *ctype = t->ctype; return (t->type); } // Parse the declaration of a variable or function. // The type and any following '*'s have been scanned, and we // have the identifier in the Token variable. // The class argument is the variable's class. // Return a pointer to the symbol's entry in the symbol table static struct symtable *symbol_declaration(int type, struct symtable *ctype, int class, struct ASTnode **tree) { struct symtable *sym = NULL; char *varname = strdup(Text); // Ensure that we have an identifier. // We copied it above so we can scan more tokens in, e.g. // an assignment expression for a local variable. ident(); // Deal with function declarations if (Token.token == T_LPAREN) { return (function_declaration(varname, type, ctype, class)); } // See if this array or scalar variable has already been declared switch (class) { case C_EXTERN: case C_GLOBAL: if (findglob(varname) != NULL) fatals("Duplicate global variable declaration", varname); case C_LOCAL: case C_PARAM: if (findlocl(varname) != NULL) fatals("Duplicate local variable declaration", varname); case C_MEMBER: if (findmember(varname) != NULL) fatals("Duplicate struct/union member declaration", varname); } // Add the array or scalar variable to the symbol table if (Token.token == T_LBRACKET) sym = array_declaration(varname, type, ctype, class); else sym = scalar_declaration(varname, type, ctype, class, tree); return (sym); } // Parse a list of symbols where there is an initial type. // Return the type of the symbols. et1 and et2 are end tokens. int declaration_list(struct symtable **ctype, int class, int et1, int et2, struct ASTnode **gluetree) { int inittype, type; struct symtable *sym; struct ASTnode *tree; *gluetree= NULL; // Get the initial type. If -1, it was // a composite type definition, return this if ((inittype = parse_type(ctype, &class)) == -1) return (inittype); // Now parse the list of symbols while (1) { // See if this symbol is a pointer type = parse_stars(inittype); // Parse this symbol sym = symbol_declaration(type, *ctype, class, &tree); // We parsed a function, there is no list so leave if (sym->stype == S_FUNCTION) { if (class != C_GLOBAL) fatal("Function definition not at global level"); return (type); } // Glue any AST tree from a local declaration // to build a sequence of assignments to perform if (*gluetree== NULL) *gluetree= tree; else *gluetree = mkastnode(A_GLUE, P_NONE, *gluetree, NULL, tree, NULL, 0); // We are at the end of the list, leave if (Token.token == et1 || Token.token == et2) return (type); // Otherwise, we need a comma as separator comma(); } } // Parse one or more global declarations, either // variables, functions or structs void global_declarations(void) { struct symtable *ctype; struct ASTnode *unused; while (Token.token != T_EOF) { declaration_list(&ctype, C_GLOBAL, T_SEMI, T_EOF, &unused); // Skip any semicolons and right curly brackets if (Token.token == T_SEMI) scan(&Token); } } ================================================ FILE: 44_Fold_Optimisation/decl.h ================================================ // Function prototypes for all compiler files // Copyright (c) 2019 Warren Toomey, GPL3 // scan.c void reject_token(struct token *t); int scan(struct token *t); // tree.c struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue); struct ASTnode *mkastleaf(int op, int type, struct symtable *sym, int intvalue); struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, struct symtable *sym, int intvalue); void dumpAST(struct ASTnode *n, int label, int parentASTop); // gen.c int genlabel(void); int genAST(struct ASTnode *n, int iflabel, int looptoplabel, int loopendlabel, int parentASTop); void genpreamble(); void genpostamble(); void genfreeregs(); void genglobsym(struct symtable *node); int genglobstr(char *strvalue); int genprimsize(int type); int genalign(int type, int offset, int direction); void genreturn(int reg, int id); // cg.c int cgprimsize(int type); int cgalign(int type, int offset, int direction); void cgtextseg(); void cgdataseg(); void freeall_registers(void); void cgpreamble(); void cgpostamble(); void cgfuncpreamble(struct symtable *sym); void cgfuncpostamble(struct symtable *sym); int cgloadint(int value, int type); int cgloadglob(struct symtable *sym, int op); int cgloadlocal(struct symtable *sym, int op); int cgloadglobstr(int label); int cgadd(int r1, int r2); int cgsub(int r1, int r2); int cgmul(int r1, int r2); int cgdiv(int r1, int r2); int cgshlconst(int r, int val); int cgcall(struct symtable *sym, int numargs); void cgcopyarg(int r, int argposn); int cgstorglob(int r, struct symtable *sym); int cgstorlocal(int r, struct symtable *sym); void cgglobsym(struct symtable *node); void cgglobstr(int l, char *strvalue); int cgcompare_and_set(int ASTop, int r1, int r2); int cgcompare_and_jump(int ASTop, int r1, int r2, int label); void cglabel(int l); void cgjump(int l); int cgwiden(int r, int oldtype, int newtype); void cgreturn(int reg, struct symtable *sym); int cgaddress(struct symtable *sym); int cgderef(int r, int type); int cgstorderef(int r1, int r2, int type); int cgnegate(int r); int cginvert(int r); int cglognot(int r); int cgboolean(int r, int op, int label); int cgand(int r1, int r2); int cgor(int r1, int r2); int cgxor(int r1, int r2); int cgshl(int r1, int r2); int cgshr(int r1, int r2); void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel); // expr.c struct ASTnode *expression_list(int endtoken); struct ASTnode *binexpr(int ptp); // stmt.c struct ASTnode *compound_statement(int inswitch); // misc.c void match(int t, char *what); void semi(void); void lbrace(void); void rbrace(void); void lparen(void); void rparen(void); void ident(void); void comma(void); void fatal(char *s); void fatals(char *s1, char *s2); void fatald(char *s, int d); void fatalc(char *s, int c); // sym.c void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node); struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn); struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn); struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int nelems); struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype); struct symtable *addstruct(char *name); struct symtable *addunion(char *name); struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int nelems); struct symtable *addenum(char *name, int class, int value); struct symtable *addtypedef(char *name, int type, struct symtable *ctype); struct symtable *findglob(char *s); struct symtable *findlocl(char *s); struct symtable *findsymbol(char *s); struct symtable *findmember(char *s); struct symtable *findstruct(char *s); struct symtable *findunion(char *s); struct symtable *findenumtype(char *s); struct symtable *findenumval(char *s); struct symtable *findtypedef(char *s); void clear_symtable(void); void freeloclsyms(void); // decl.c int parse_type(struct symtable **ctype, int *class); int parse_stars(int type); int parse_cast(void); int declaration_list(struct symtable **ctype, int class, int et1, int et2, struct ASTnode **gluetree); void global_declarations(void); // types.c int inttype(int type); int ptrtype(int type); int pointer_to(int type); int value_at(int type); int typesize(int type, struct symtable *ctype); struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op); // opt.c struct ASTnode *optimise(struct ASTnode *n); ================================================ FILE: 44_Fold_Optimisation/defs.h ================================================ #include #include #include #include // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 enum { TEXTLEN = 512 // Length of identifiers in input }; // Commands and default filenames #define AOUT "a.out" #ifdef __NASM__ #define ASCMD "nasm -f elf64 -o " #define LDCMD "cc -no-pie -fno-plt -Wall -o " #else #define ASCMD "as -o " #define LDCMD "cc -o " #endif #define CPPCMD "cpp -nostdinc -isystem " // Token types enum { T_EOF, // Binary operators T_ASSIGN, T_ASPLUS, T_ASMINUS, T_ASSTAR, T_ASSLASH, T_LOGOR, T_LOGAND, T_OR, T_XOR, T_AMPER, T_EQ, T_NE, T_LT, T_GT, T_LE, T_GE, T_LSHIFT, T_RSHIFT, T_PLUS, T_MINUS, T_STAR, T_SLASH, // Other operators T_INC, T_DEC, T_INVERT, T_LOGNOT, // Type keywords T_VOID, T_CHAR, T_INT, T_LONG, // Other keywords T_IF, T_ELSE, T_WHILE, T_FOR, T_RETURN, T_STRUCT, T_UNION, T_ENUM, T_TYPEDEF, T_EXTERN, T_BREAK, T_CONTINUE, T_SWITCH, T_CASE, T_DEFAULT, // Structural tokens T_INTLIT, T_STRLIT, T_SEMI, T_IDENT, T_LBRACE, T_RBRACE, T_LPAREN, T_RPAREN, T_LBRACKET, T_RBRACKET, T_COMMA, T_DOT, T_ARROW, T_COLON }; // Token structure struct token { int token; // Token type, from the enum list above char *tokstr; // String version of the token int intvalue; // For T_INTLIT, the integer value }; // AST node types. The first few line up // with the related tokens enum { A_ASSIGN = 1, A_ASPLUS, A_ASMINUS, A_ASSTAR, A_ASSLASH, A_LOGOR, A_LOGAND, A_OR, A_XOR, A_AND, A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE, A_LSHIFT, A_RSHIFT, A_ADD, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, A_INTLIT, A_STRLIT, A_IDENT, A_GLUE, A_IF, A_WHILE, A_FUNCTION, A_WIDEN, A_RETURN, A_FUNCCALL, A_DEREF, A_ADDR, A_SCALE, A_PREINC, A_PREDEC, A_POSTINC, A_POSTDEC, A_NEGATE, A_INVERT, A_LOGNOT, A_TOBOOL, A_BREAK, A_CONTINUE, A_SWITCH, A_CASE, A_DEFAULT, A_CAST }; // Primitive types. The bottom 4 bits is an integer // value that represents the level of indirection, // e.g. 0= no pointer, 1= pointer, 2= pointer pointer etc. enum { P_NONE, P_VOID = 16, P_CHAR = 32, P_INT = 48, P_LONG = 64, P_STRUCT=80, P_UNION=96 }; // Structural types enum { S_VARIABLE, S_FUNCTION, S_ARRAY }; // Storage classes enum { C_GLOBAL = 1, // Globally visible symbol C_LOCAL, // Locally visible symbol C_PARAM, // Locally visible function parameter C_EXTERN, // External globally visible symbol C_STRUCT, // A struct C_UNION, // A union C_MEMBER, // Member of a struct or union C_ENUMTYPE, // A named enumeration type C_ENUMVAL, // A named enumeration value C_TYPEDEF // A named typedef }; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol struct symtable *ctype; // If struct/union, ptr to that type int stype; // Structural type for the symbol int class; // Storage class for the symbol int size; // Total size in bytes of this symbol int nelems; // Functions: # params. Arrays: # elements #define st_endlabel st_posn // For functions, the end label int st_posn; // For locals, the negative offset // from the stack base pointer int *initlist; // List of initial values struct symtable *next; // Next symbol in one list struct symtable *member; // First member of a function, struct, }; // union or enum // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates int rvalue; // True if the node is an rvalue struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; struct symtable *sym; // For many AST nodes, the pointer to // the symbol in the symbol table #define a_intvalue a_size // For A_INTLIT, the integer value int a_size; // For A_SCALE, the size to scale by }; enum { NOREG = -1, // Use NOREG when the AST generation // functions have no register to return NOLABEL = 0 // Use NOLABEL when we have no label to // pass to genAST() }; ================================================ FILE: 44_Fold_Optimisation/expr.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of expressions // Copyright (c) 2019 Warren Toomey, GPL3 // expression_list: // | expression // | expression ',' expression_list // ; // Parse a list of zero or more comma-separated expressions and // return an AST composed of A_GLUE nodes with the left-hand child // being the sub-tree of previous expressions (or NULL) and the right-hand // child being the next expression. Each A_GLUE node will have size field // set to the number of expressions in the tree at this point. If no // expressions are parsed, NULL is returned struct ASTnode *expression_list(int endtoken) { struct ASTnode *tree = NULL; struct ASTnode *child = NULL; int exprcount = 0; // Loop until the end token while (Token.token != endtoken) { // Parse the next expression and increment the expression count child = binexpr(0); exprcount++; // Build an A_GLUE AST node with the previous tree as the left child // and the new expression as the right child. Store the expression count. tree = mkastnode(A_GLUE, P_NONE, tree, NULL, child, NULL, exprcount); // Stop when we reach the end token if (Token.token == endtoken) break; // Must have a ',' at this point match(T_COMMA, ","); } // Return the tree of expressions return (tree); } // Parse a function call and return its AST static struct ASTnode *funccall(void) { struct ASTnode *tree; struct symtable *funcptr; // Check that the identifier has been defined as a function, // then make a leaf node for it. if ((funcptr = findsymbol(Text)) == NULL || funcptr->stype != S_FUNCTION) { fatals("Undeclared function", Text); } // Get the '(' lparen(); // Parse the argument expression list tree = expression_list(T_RPAREN); // XXX Check type of each argument against the function's prototype // Build the function call AST node. Store the // function's return type as this node's type. // Also record the function's symbol-id tree = mkastunary(A_FUNCCALL, funcptr->type, tree, funcptr, 0); // Get the ')' rparen(); return (tree); } // Parse the index into an array and return an AST tree for it static struct ASTnode *array_access(void) { struct ASTnode *left, *right; struct symtable *aryptr; // Check that the identifier has been defined as an array // then make a leaf node for it that points at the base if ((aryptr = findsymbol(Text)) == NULL || aryptr->stype != S_ARRAY) { fatals("Undeclared array", Text); } left = mkastleaf(A_ADDR, aryptr->type, aryptr, 0); // Get the '[' scan(&Token); // Parse the following expression right = binexpr(0); // Get the ']' match(T_RBRACKET, "]"); // Ensure that this is of int type if (!inttype(right->type)) fatal("Array index is not of integer type"); // Scale the index by the size of the element's type right = modify_type(right, left->type, A_ADD); // Return an AST tree where the array's base has the offset // added to it, and dereference the element. Still an lvalue // at this point. left = mkastnode(A_ADD, aryptr->type, left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, value_at(left->type), left, NULL, 0); return (left); } // Parse the member reference of a struct or union // and return an AST tree for it. If withpointer is true, // the access is through a pointer to the member. static struct ASTnode *member_access(int withpointer) { struct ASTnode *left, *right; struct symtable *compvar; struct symtable *typeptr; struct symtable *m; // Check that the identifier has been declared as a // struct/union or a struct/union pointer if ((compvar = findsymbol(Text)) == NULL) fatals("Undeclared variable", Text); if (withpointer && compvar->type != pointer_to(P_STRUCT) && compvar->type != pointer_to(P_UNION)) fatals("Undeclared variable", Text); if (!withpointer && compvar->type != P_STRUCT && compvar->type != P_UNION) fatals("Undeclared variable", Text); // If a pointer to a struct/union, get the pointer's value. // Otherwise, make a leaf node that points at the base // Either way, it's an rvalue if (withpointer) { left = mkastleaf(A_IDENT, pointer_to(compvar->type), compvar, 0); } else left = mkastleaf(A_ADDR, compvar->type, compvar, 0); left->rvalue = 1; // Get the details of the composite type typeptr = compvar->ctype; // Skip the '.' or '->' token and get the member's name scan(&Token); ident(); // Find the matching member's name in the type // Die if we can't find it for (m = typeptr->member; m != NULL; m = m->next) if (!strcmp(m->name, Text)) break; if (m == NULL) fatals("No member found in struct/union: ", Text); // Build an A_INTLIT node with the offset right = mkastleaf(A_INTLIT, P_INT, NULL, m->st_posn); // Add the member's offset to the base of the struct/union // and dereference it. Still an lvalue at this point left = mkastnode(A_ADD, pointer_to(m->type), left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, m->type, left, NULL, 0); return (left); } // Parse a postfix expression and return // an AST node representing it. The // identifier is already in Text. static struct ASTnode *postfix(void) { struct ASTnode *n; struct symtable *varptr; struct symtable *enumptr; // If the identifier matches an enum value, // return an A_INTLIT node if ((enumptr = findenumval(Text)) != NULL) { scan(&Token); return (mkastleaf(A_INTLIT, P_INT, NULL, enumptr->st_posn)); } // Scan in the next token to see if we have a postfix expression scan(&Token); // Function call if (Token.token == T_LPAREN) return (funccall()); // An array reference if (Token.token == T_LBRACKET) return (array_access()); // Access into a struct or union if (Token.token == T_DOT) return (member_access(0)); if (Token.token == T_ARROW) return (member_access(1)); // A variable. Check that the variable exists. if ((varptr = findsymbol(Text)) == NULL || varptr->stype != S_VARIABLE) fatals("Unknown variable", Text); switch (Token.token) { // Post-increment: skip over the token case T_INC: scan(&Token); n = mkastleaf(A_POSTINC, varptr->type, varptr, 0); break; // Post-decrement: skip over the token case T_DEC: scan(&Token); n = mkastleaf(A_POSTDEC, varptr->type, varptr, 0); break; // Just a variable reference default: n = mkastleaf(A_IDENT, varptr->type, varptr, 0); } return (n); } // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(void) { struct ASTnode *n; int id; int type=0; switch (Token.token) { case T_INTLIT: // For an INTLIT token, make a leaf AST node for it. // Make it a P_CHAR if it's within the P_CHAR range if (Token.intvalue >= 0 && Token.intvalue < 256) n = mkastleaf(A_INTLIT, P_CHAR, NULL, Token.intvalue); else n = mkastleaf(A_INTLIT, P_INT, NULL, Token.intvalue); break; case T_STRLIT: // For a STRLIT token, generate the assembly for it. // Then make a leaf AST node for it. id is the string's label. id = genglobstr(Text); n = mkastleaf(A_STRLIT, pointer_to(P_CHAR), NULL, id); break; case T_IDENT: return (postfix()); case T_LPAREN: // Beginning of a parenthesised expression, skip the '('. scan(&Token); // If the token after is a type identifier, this is a cast expression switch (Token.token) { case T_IDENT: // We have to see if the identifier matches a typedef. // If not, treat it as an expression. if (findtypedef(Text) == NULL) { n = binexpr(0); break; } case T_VOID: case T_CHAR: case T_INT: case T_LONG: case T_STRUCT: case T_UNION: case T_ENUM: // Get the type inside the parentheses type= parse_cast(); // Skip the closing ')' and then parse the following expression rparen(); default: n = binexpr(0); // Scan in the expression } // We now have at least an expression in n, and possibly a non-zero type in type // if there was a cast. Skip the closing ')' if there was no cast. if (type == 0) rparen(); else // Otherwise, make a unary AST node for the cast n= mkastunary(A_CAST, type, n, NULL, 0); return (n); default: fatals("Expecting a primary expression, got token", Token.tokstr); } // Scan in the next token and return the leaf node scan(&Token); return (n); } // Convert a binary operator token into a binary AST operation. // We rely on a 1:1 mapping from token to AST operation static int binastop(int tokentype) { if (tokentype > T_EOF && tokentype <= T_SLASH) return (tokentype); fatald("Syntax error, token", tokentype); return (0); // Keep -Wall happy } // Return true if a token is right-associative, // false otherwise. static int rightassoc(int tokentype) { if (tokentype >= T_ASSIGN && tokentype <= T_ASSLASH) return (1); return (0); } // Operator precedence for each token. Must // match up with the order of tokens in defs.h static int OpPrec[] = { 0, 10, 10, // T_EOF, T_ASSIGN, T_ASPLUS, 10, 10, 10, // T_ASMINUS, T_ASSTAR, T_ASSLASH, 20, 30, // T_LOGOR, T_LOGAND 40, 50, 60, // T_OR, T_XOR, T_AMPER 70, 70, // T_EQ, T_NE 80, 80, 80, 80, // T_LT, T_GT, T_LE, T_GE 90, 90, // T_LSHIFT, T_RSHIFT 100, 100, // T_PLUS, T_MINUS 110, 110 // T_STAR, T_SLASH }; // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec; if (tokentype > T_SLASH) fatald("Token with no precedence in op_precedence:", tokentype); prec = OpPrec[tokentype]; if (prec == 0) fatald("Syntax error, token", tokentype); return (prec); } // prefix_expression: primary // | '*' prefix_expression // | '&' prefix_expression // | '-' prefix_expression // | '++' prefix_expression // | '--' prefix_expression // ; // Parse a prefix expression and return // a sub-tree representing it. struct ASTnode *prefix(void) { struct ASTnode *tree; switch (Token.token) { case T_AMPER: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Ensure that it's an identifier if (tree->op != A_IDENT) fatal("& operator must be followed by an identifier"); // Now change the operator to A_ADDR and the type to // a pointer to the original type tree->op = A_ADDR; tree->type = pointer_to(tree->type); break; case T_STAR: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's either another deref or an // identifier if (tree->op != A_IDENT && tree->op != A_DEREF) fatal("* operator must be followed by an identifier or *"); // Prepend an A_DEREF operation to the tree tree = mkastunary(A_DEREF, value_at(tree->type), tree, NULL, 0); break; case T_MINUS: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_NEGATE operation to the tree and // make the child an rvalue. Because chars are unsigned, // also widen this to int so that it's signed tree->rvalue = 1; tree = modify_type(tree, P_INT, 0); tree = mkastunary(A_NEGATE, tree->type, tree, NULL, 0); break; case T_INVERT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_INVERT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_INVERT, tree->type, tree, NULL, 0); break; case T_LOGNOT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_LOGNOT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_LOGNOT, tree->type, tree, NULL, 0); break; case T_INC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("++ operator must be followed by an identifier"); // Prepend an A_PREINC operation to the tree tree = mkastunary(A_PREINC, tree->type, tree, NULL, 0); break; case T_DEC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("-- operator must be followed by an identifier"); // Prepend an A_PREDEC operation to the tree tree = mkastunary(A_PREDEC, tree->type, tree, NULL, 0); break; default: tree = primary(); } return (tree); } // Return an AST tree whose root is a binary operator. // Parameter ptp is the previous token's precedence. struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; struct ASTnode *ltemp, *rtemp; int ASTop; int tokentype; // Get the tree on the left. // Fetch the next token at the same time. left = prefix(); // If we hit one of several terminating tokens, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA || tokentype == T_COLON) { left->rvalue = 1; return (left); } // While the precedence of this token is more than that of the // previous token precedence, or it's right associative and // equal to the previous token's precedence while ((op_precedence(tokentype) > ptp) || (rightassoc(tokentype) && op_precedence(tokentype) == ptp)) { // Fetch in the next integer literal scan(&Token); // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Determine the operation to be performed on the sub-trees ASTop = binastop(tokentype); if (ASTop == A_ASSIGN) { // Assignment // Make the right tree into an rvalue right->rvalue = 1; // Ensure the right's type matches the left right = modify_type(right, left->type, 0); if (right == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree. However, switch // left and right around, so that the right expression's // code will be generated before the left expression ltemp = left; left = right; right = ltemp; } else { // We are not doing an assignment, so both trees should be rvalues // Convert both trees into rvalue if they are lvalue trees left->rvalue = 1; right->rvalue = 1; // Ensure the two types are compatible by trying // to modify each tree to match the other's type. ltemp = modify_type(left, right->type, ASTop); rtemp = modify_type(right, left->type, ASTop); if (ltemp == NULL && rtemp == NULL) fatal("Incompatible types in binary expression"); if (ltemp != NULL) left = ltemp; if (rtemp != NULL) right = rtemp; } // Join that sub-tree with ours. Convert the token // into an AST operation at the same time. left = mkastnode(binastop(tokentype), left->type, left, NULL, right, NULL, 0); // Update the details of the current token. // If we hit a terminating token, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA || tokentype == T_COLON) { left->rvalue = 1; return (left); } } // Return the tree we have when the precedence // is the same or lower left->rvalue = 1; return (left); } ================================================ FILE: 44_Fold_Optimisation/gen.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Generic code generator // Copyright (c) 2019 Warren Toomey, GPL3 // Generate and return a new label number int genlabel(void) { static int id = 1; return (id++); } // Generate the code for an IF statement // and an optional ELSE clause static int genIF(struct ASTnode *n, int looptoplabel, int loopendlabel) { int Lfalse, Lend; // Generate two labels: one for the // false compound statement, and one // for the end of the overall IF statement. // When there is no ELSE clause, Lfalse _is_ // the ending label! Lfalse = genlabel(); if (n->right) Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. genAST(n->left, Lfalse, NOLABEL, NOLABEL, n->op); genfreeregs(); // Generate the true compound statement genAST(n->mid, NOLABEL, looptoplabel, loopendlabel, n->op); genfreeregs(); // If there is an optional ELSE clause, // generate the jump to skip to the end if (n->right) cgjump(Lend); // Now the false label cglabel(Lfalse); // Optional ELSE clause: generate the // false compound statement and the // end label if (n->right) { genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); genfreeregs(); cglabel(Lend); } return (NOREG); } // Generate the code for a WHILE statement static int genWHILE(struct ASTnode *n) { int Lstart, Lend; // Generate the start and end labels // and output the start label Lstart = genlabel(); Lend = genlabel(); cglabel(Lstart); // Generate the condition code followed // by a jump to the end label. genAST(n->left, Lend, Lstart, Lend, n->op); genfreeregs(); // Generate the compound statement for the body genAST(n->right, NOLABEL, Lstart, Lend, n->op); genfreeregs(); // Finally output the jump back to the condition, // and the end label cgjump(Lstart); cglabel(Lend); return (NOREG); } // Generate the code for a SWITCH statement static int genSWITCH(struct ASTnode *n) { int *caseval, *caselabel; int Ljumptop, Lend; int i, reg, defaultlabel = 0, casecount = 0; struct ASTnode *c; // Create arrays for the case values and associated labels. // Ensure that we have at least one position in each array. caseval = (int *) malloc((n->a_intvalue + 1) * sizeof(int)); caselabel = (int *) malloc((n->a_intvalue + 1) * sizeof(int)); // Generate labels for the top of the jump table, and the // end of the switch statement. Set a default label for // the end of the switch, in case we don't have a default. Ljumptop = genlabel(); Lend = genlabel(); defaultlabel = Lend; // Output the code to calculate the switch condition reg = genAST(n->left, NOLABEL, NOLABEL, NOLABEL, 0); cgjump(Ljumptop); genfreeregs(); // Walk the right-child linked list to // generate the code for each case for (i = 0, c = n->right; c != NULL; i++, c = c->right) { // Get a label for this case. Store it // and the case value in the arrays. // Record if it is the default case. caselabel[i] = genlabel(); caseval[i] = c->a_intvalue; cglabel(caselabel[i]); if (c->op == A_DEFAULT) defaultlabel = caselabel[i]; else casecount++; // Generate the case code. Pass in the end label for the breaks genAST(c->left, NOLABEL, NOLABEL, Lend, 0); genfreeregs(); } // Ensure the last case jumps past the switch table cgjump(Lend); // Now output the switch table and the end label. cgswitch(reg, casecount, Ljumptop, caselabel, caseval, defaultlabel); cglabel(Lend); return (NOREG); } // Generate the code to copy the arguments of a // function call to its parameters, then call the // function itself. Return the register that holds // the function's return value. static int gen_funccall(struct ASTnode *n) { struct ASTnode *gluetree = n->left; int reg; int numargs = 0; // If there is a list of arguments, walk this list // from the last argument (right-hand child) to the // first while (gluetree) { // Calculate the expression's value reg = genAST(gluetree->right, NOLABEL, NOLABEL, NOLABEL, gluetree->op); // Copy this into the n'th function parameter: size is 1, 2, 3, ... cgcopyarg(reg, gluetree->a_size); // Keep the first (highest) number of arguments if (numargs == 0) numargs = gluetree->a_size; genfreeregs(); gluetree = gluetree->left; } // Call the function, clean up the stack (based on numargs), // and return its result return (cgcall(n->sym, numargs)); } // Given an AST, an optional label, and the AST op // of the parent, generate assembly code recursively. // Return the register id with the tree's final value. int genAST(struct ASTnode *n, int iflabel, int looptoplabel, int loopendlabel, int parentASTop) { int leftreg, rightreg; // We have some specific AST node handling at the top // so that we don't evaluate the child sub-trees immediately switch (n->op) { case A_IF: return (genIF(n, looptoplabel, loopendlabel)); case A_WHILE: return (genWHILE(n)); case A_SWITCH: return (genSWITCH(n)); case A_FUNCCALL: return (gen_funccall(n)); case A_GLUE: // Do each child statement, and free the // registers after each child if (n->left != NULL) genAST(n->left, iflabel, looptoplabel, loopendlabel, n->op); genfreeregs(); if (n->right != NULL) genAST(n->right, iflabel, looptoplabel, loopendlabel, n->op); genfreeregs(); return (NOREG); case A_FUNCTION: // Generate the function's preamble before the code // in the child sub-tree cgfuncpreamble(n->sym); genAST(n->left, NOLABEL, NOLABEL, NOLABEL, n->op); cgfuncpostamble(n->sym); return (NOREG); } // General AST node handling below // Get the left and right sub-tree values if (n->left) leftreg = genAST(n->left, NOLABEL, NOLABEL, NOLABEL, n->op); if (n->right) rightreg = genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); switch (n->op) { case A_ADD: return (cgadd(leftreg, rightreg)); case A_SUBTRACT: return (cgsub(leftreg, rightreg)); case A_MULTIPLY: return (cgmul(leftreg, rightreg)); case A_DIVIDE: return (cgdiv(leftreg, rightreg)); case A_AND: return (cgand(leftreg, rightreg)); case A_OR: return (cgor(leftreg, rightreg)); case A_XOR: return (cgxor(leftreg, rightreg)); case A_LSHIFT: return (cgshl(leftreg, rightreg)); case A_RSHIFT: return (cgshr(leftreg, rightreg)); case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, compare registers // and set one to 1 or 0 based on the comparison. if (parentASTop == A_IF || parentASTop == A_WHILE) return (cgcompare_and_jump(n->op, leftreg, rightreg, iflabel)); else return (cgcompare_and_set(n->op, leftreg, rightreg)); case A_INTLIT: return (cgloadint(n->a_intvalue, n->type)); case A_STRLIT: return (cgloadglobstr(n->a_intvalue)); case A_IDENT: // Load our value if we are an rvalue // or we are being dereferenced if (n->rvalue || parentASTop == A_DEREF) { if (n->sym->class == C_GLOBAL) { return (cgloadglob(n->sym, n->op)); } else { return (cgloadlocal(n->sym, n->op)); } } else return (NOREG); case A_ASPLUS: case A_ASMINUS: case A_ASSTAR: case A_ASSLASH: case A_ASSIGN: // For the '+=' and friends operators, generate suitable code // and get the register with the result. Then take the left child, // make it the right child so that we can fall into the assignment code. switch (n->op) { case A_ASPLUS: leftreg= cgadd(leftreg, rightreg); n->right= n->left; break; case A_ASMINUS: leftreg= cgsub(leftreg, rightreg); n->right= n->left; break; case A_ASSTAR: leftreg= cgmul(leftreg, rightreg); n->right= n->left; break; case A_ASSLASH: leftreg= cgdiv(leftreg, rightreg); n->right= n->left; break; } // Now into the assignment code // Are we assigning to an identifier or through a pointer? switch (n->right->op) { case A_IDENT: if (n->right->sym->class == C_GLOBAL) return (cgstorglob(leftreg, n->right->sym)); else return (cgstorlocal(leftreg, n->right->sym)); case A_DEREF: return (cgstorderef(leftreg, rightreg, n->right->type)); default: fatald("Can't A_ASSIGN in genAST(), op", n->op); } case A_WIDEN: // Widen the child's type to the parent's type return (cgwiden(leftreg, n->left->type, n->type)); case A_RETURN: cgreturn(leftreg, Functionid); return (NOREG); case A_ADDR: return (cgaddress(n->sym)); case A_DEREF: // If we are an rvalue, dereference to get the value we point at, // otherwise leave it for A_ASSIGN to store through the pointer if (n->rvalue) return (cgderef(leftreg, n->left->type)); else return (leftreg); case A_SCALE: // Small optimisation: use shift if the // scale value is a known power of two switch (n->a_size) { case 2: return (cgshlconst(leftreg, 1)); case 4: return (cgshlconst(leftreg, 2)); case 8: return (cgshlconst(leftreg, 3)); default: // Load a register with the size and // multiply the leftreg by this size rightreg = cgloadint(n->a_size, P_INT); return (cgmul(leftreg, rightreg)); } case A_POSTINC: case A_POSTDEC: // Load and decrement the variable's value into a register // and post increment/decrement it if (n->sym->class == C_GLOBAL) return (cgloadglob(n->sym, n->op)); else return (cgloadlocal(n->sym, n->op)); case A_PREINC: case A_PREDEC: // Load and decrement the variable's value into a register // and pre increment/decrement it if (n->left->sym->class == C_GLOBAL) return (cgloadglob(n->left->sym, n->op)); else return (cgloadlocal(n->left->sym, n->op)); case A_NEGATE: return (cgnegate(leftreg)); case A_INVERT: return (cginvert(leftreg)); case A_LOGNOT: return (cglognot(leftreg)); case A_TOBOOL: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, set the register // to 0 or 1 based on it's zeroeness or non-zeroeness return (cgboolean(leftreg, parentASTop, iflabel)); case A_BREAK: cgjump(loopendlabel); return (NOREG); case A_CONTINUE: cgjump(looptoplabel); return (NOREG); case A_CAST: return (leftreg); // Not much to do default: fatald("Unknown AST operator", n->op); } return (NOREG); // Keep -Wall happy } void genpreamble() { cgpreamble(); } void genpostamble() { cgpostamble(); } void genfreeregs() { freeall_registers(); } void genglobsym(struct symtable *node) { cgglobsym(node); } int genglobstr(char *strvalue) { int l = genlabel(); cgglobstr(l, strvalue); return (l); } int genprimsize(int type) { return (cgprimsize(type)); } int genalign(int type, int offset, int direction) { return (cgalign(type, offset, direction)); } ================================================ FILE: 44_Fold_Optimisation/include/ctype.h ================================================ #ifndef _CTYPE_H_ # define _CTYPE_H_ #endif // _CTYPE_H_ ================================================ FILE: 44_Fold_Optimisation/include/fcntl.h ================================================ #ifndef _FCNTL_H_ # define _FCNTL_H_ #define O_RDONLY 00 #define O_WRONLY 01 #define O_RDWR 02 int open(char *pathname, int flags); #endif // _FCNTL_H_ ================================================ FILE: 44_Fold_Optimisation/include/stddef.h ================================================ #ifndef _STDDEF_H_ # define _STDDEF_H_ #ifndef NULL # define NULL (void *)0 #endif typedef long size_t; #endif //_STDDEF_H_ ================================================ FILE: 44_Fold_Optimisation/include/stdio.h ================================================ #ifndef _STDIO_H_ # define _STDIO_H_ #include #ifndef NULL # define NULL (void *)0 #endif // This FILE definition will do for now typedef char * FILE; FILE *fopen(char *pathname, char *mode); size_t fread(void *ptr, size_t size, size_t nmemb, FILE *stream); size_t fwrite(void *ptr, size_t size, size_t nmemb, FILE *stream); int fclose(FILE *stream); int printf(char *format); int fprintf(FILE *stream, char *format); #endif // _STDIO_H_ ================================================ FILE: 44_Fold_Optimisation/include/stdlib.h ================================================ #ifndef _STDLIB_H_ # define _STDLIB_H_ #endif // _STDLIB_H_ ================================================ FILE: 44_Fold_Optimisation/include/string.h ================================================ #ifndef _STRING_H_ # define _STRING_H_ #endif // _STRING_H_ ================================================ FILE: 44_Fold_Optimisation/include/unistd.h ================================================ #ifndef _UNISTD_H_ # define _UNISTD_H_ #endif // _UNISTD_H_ ================================================ FILE: 44_Fold_Optimisation/main.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "decl.h" #include #include // Compiler setup and top-level execution // Copyright (c) 2019 Warren Toomey, GPL3 // Given a string with a '.' and at least a 1-character suffix // after the '.', change the suffix to be the given character. // Return the new string or NULL if the original string could // not be modified char *alter_suffix(char *str, char suffix) { char *posn; char *newstr; // Clone the string if ((newstr = strdup(str)) == NULL) return (NULL); // Find the '.' if ((posn = strrchr(newstr, '.')) == NULL) return (NULL); // Ensure there is a suffix posn++; if (*posn == '\0') return (NULL); // Change the suffix and NUL-terminate the string *posn++ = suffix; *posn = '\0'; return (newstr); } // Given an input filename, compile that file // down to assembly code. Return the new file's name static char *do_compile(char *filename) { char cmd[TEXTLEN]; // Change the input file's suffix to .s Outfilename = alter_suffix(filename, 's'); if (Outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .c on the end\n", filename); exit(1); } // Generate the pre-processor command snprintf(cmd, TEXTLEN, "%s %s %s", CPPCMD, INCDIR, filename); // Open up the pre-processor pipe if ((Infile = popen(cmd, "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", filename, strerror(errno)); exit(1); } Infilename = filename; // Create the output file if ((Outfile = fopen(Outfilename, "w")) == NULL) { fprintf(stderr, "Unable to create %s: %s\n", Outfilename, strerror(errno)); exit(1); } Line = 1; // Reset the scanner Putback = '\n'; clear_symtable(); // Clear the symbol table if (O_verbose) printf("compiling %s\n", filename); scan(&Token); // Get the first token from the input genpreamble(); // Output the preamble global_declarations(); // Parse the global declarations genpostamble(); // Output the postamble fclose(Outfile); // Close the output file return (Outfilename); } // Given an input filename, assemble that file // down to object code. Return the object filename char *do_assemble(char *filename) { char cmd[TEXTLEN]; int err; char *outfilename = alter_suffix(filename, 'o'); if (outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .s on the end\n", filename); exit(1); } // Build the assembly command and run it snprintf(cmd, TEXTLEN, "%s %s %s", ASCMD, outfilename, filename); if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Assembly of %s failed\n", filename); exit(1); } return (outfilename); } // Given a list of object files and an output filename, // link all of the object filenames together. void do_link(char *outfilename, char *objlist[]) { int cnt, size = TEXTLEN; char cmd[TEXTLEN], *cptr; int err; // Start with the linker command and the output file cptr = cmd; cnt = snprintf(cptr, size, "%s %s ", LDCMD, outfilename); cptr += cnt; size -= cnt; // Now append each object file while (*objlist != NULL) { cnt = snprintf(cptr, size, "%s ", *objlist); cptr += cnt; size -= cnt; objlist++; } if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Linking failed\n"); exit(1); } } // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "Usage: %s [-vcST] [-o outfile] file [file ...]\n", prog); fprintf(stderr, " -v give verbose output of the compilation stages\n"); fprintf(stderr, " -c generate object files but don't link them\n"); fprintf(stderr, " -S generate assembly files but don't link them\n"); fprintf(stderr, " -T dump the AST trees for each input file\n"); fprintf(stderr, " -o outfile, produce the outfile executable file\n"); exit(1); } // Main program: check arguments and print a usage // if we don't have an argument. Open up the input // file and call scanfile() to scan the tokens in it. enum { MAXOBJ = 100 }; int main(int argc, char *argv[]) { char *outfilename = AOUT; char *asmfile, *objfile; char *objlist[MAXOBJ]; int i, objcnt = 0; // Initialise our variables O_dumpAST = 0; O_keepasm = 0; O_assemble = 0; O_verbose = 0; O_dolink = 1; // Scan for command-line options for (i = 1; i < argc; i++) { // No leading '-', stop scanning for options if (*argv[i] != '-') break; // For each option in this argument for (int j = 1; (*argv[i] == '-') && argv[i][j]; j++) { switch (argv[i][j]) { case 'o': outfilename = argv[++i]; // Save & skip to next argument break; case 'T': O_dumpAST = 1; break; case 'c': O_assemble = 1; O_keepasm = 0; O_dolink = 0; break; case 'S': O_keepasm = 1; O_assemble = 0; O_dolink = 0; break; case 'v': O_verbose = 1; break; default: usage(argv[0]); } } } // Ensure we have at lease one input file argument if (i >= argc) usage(argv[0]); // Work on each input file in turn while (i < argc) { asmfile = do_compile(argv[i]); // Compile the source file if (O_dolink || O_assemble) { objfile = do_assemble(asmfile); // Assemble it to object forma if (objcnt == (MAXOBJ - 2)) { fprintf(stderr, "Too many object files for the compiler to handle\n"); exit(1); } objlist[objcnt++] = objfile; // Add the object file's name objlist[objcnt] = NULL; // to the list of object files } if (!O_keepasm) // Remove the assembly file if unlink(asmfile); // we don't need to keep it i++; } // Now link all the object files together if (O_dolink) { do_link(outfilename, objlist); // If we don't need to keep the object // files, then remove them if (!O_assemble) { for (i = 0; objlist[i] != NULL; i++) unlink(objlist[i]); } } return (0); } ================================================ FILE: 44_Fold_Optimisation/misc.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" #include // Miscellaneous functions // Copyright (c) 2019 Warren Toomey, GPL3 // Ensure that the current token is t, // and fetch the next token. Otherwise // throw an error void match(int t, char *what) { if (Token.token == t) { scan(&Token); } else { fatals("Expected", what); } } // Match a semicolon and fetch the next token void semi(void) { match(T_SEMI, ";"); } // Match a left brace and fetch the next token void lbrace(void) { match(T_LBRACE, "{"); } // Match a right brace and fetch the next token void rbrace(void) { match(T_RBRACE, "}"); } // Match a left parenthesis and fetch the next token void lparen(void) { match(T_LPAREN, "("); } // Match a right parenthesis and fetch the next token void rparen(void) { match(T_RPAREN, ")"); } // Match an identifier and fetch the next token void ident(void) { match(T_IDENT, "identifier"); } // Match a comma and fetch the next token void comma(void) { match(T_COMMA, "comma"); } // Print out fatal messages void fatal(char *s) { fprintf(stderr, "%s on line %d of %s\n", s, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatals(char *s1, char *s2) { fprintf(stderr, "%s:%s on line %d of %s\n", s1, s2, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatald(char *s, int d) { fprintf(stderr, "%s:%d on line %d of %s\n", s, d, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatalc(char *s, int c) { fprintf(stderr, "%s:%c on line %d of %s\n", s, c, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } ================================================ FILE: 44_Fold_Optimisation/opt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST Tree Optimisation Code // Copyright (c) 2019 Warren Toomey, GPL3 // Fold an AST tree with a binary operator // and two A_INTLIT children. Return either // the original tree or a new leaf node. static struct ASTnode *fold2(struct ASTnode *n) { int val, leftval, rightval; // Get the values from each child leftval = n->left->a_intvalue; rightval = n->right->a_intvalue; // Perform some of the binary operations. // For any AST op we can't do, return // the original tree. switch (n->op) { case A_ADD: val = leftval + rightval; break; case A_SUBTRACT: val = leftval - rightval; break; case A_MULTIPLY: val = leftval * rightval; break; case A_DIVIDE: // Don't try to divide by zero. if (rightval == 0) return (n); val = leftval / rightval; break; default: return (n); } // Return a leaf node with the new value return (mkastleaf(A_INTLIT, n->type, NULL, val)); } // Fold an AST tree with a unary operator // and one INTLIT children. Return either // the original tree or a new leaf node. static struct ASTnode *fold1(struct ASTnode *n) { int val; // Get the child value. Do the // operation if recognised. // Return the new leaf node. val = n->left->a_intvalue; switch (n->op) { case A_WIDEN: break; case A_INVERT: val = ~val; break; case A_LOGNOT: val = !val; break; default: return (n); } // Return a leaf node with the new value return (mkastleaf(A_INTLIT, n->type, NULL, val)); } // Attempt to do constant folding on // the AST tree with the root node n static struct ASTnode *fold(struct ASTnode *n) { if (n == NULL) return (NULL); // Fold on the left child, then // do the same on the right child n->left = fold(n->left); n->right = fold(n->right); // If both children are A_INTLITs, do a fold2() if (n->left && n->left->op == A_INTLIT) { if (n->right && n->right->op == A_INTLIT) n = fold2(n); else // If only the left is A_INTLIT, do a fold1() n = fold1(n); } // Return the possibly modified tree return (n); } // Optimise an AST tree by // constant folding in all sub-trees struct ASTnode *optimise(struct ASTnode *n) { n = fold(n); return (n); } ================================================ FILE: 44_Fold_Optimisation/scan.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { char *p; p = strchr(s, c); return (p ? p - s : -1); } // Get the next character from the input file. static int next(void) { int c, l; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return (c); } c = fgetc(Infile); // Read from input file while (c == '#') { // We've hit a pre-processor statement scan(&Token); // Get the line number into l if (Token.token != T_INTLIT) fatals("Expecting pre-processor line number, got:", Text); l = Token.intvalue; scan(&Token); // Get the filename in Text if (Token.token != T_STRLIT) fatals("Expecting pre-processor file name, got:", Text); if (Text[0] != '<') { // If this is a real filename if (strcmp(Text, Infilename)) // and not the one we have now Infilename = strdup(Text); // save it. Then update the line num Line = l; } while ((c = fgetc(Infile)) != '\n'); // Skip to the end of the line c = fgetc(Infile); // and get the next character } if ('\n' == c) Line++; // Increment line count return (c); } // Put back an unwanted character static void putback(int c) { Putback = c; } // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } // Return the next character from a character // or string literal static int scanch(void) { int c; // Get the next input character and interpret // metacharacters that start with a backslash c = next(); if (c == '\\') { switch (c = next()) { case 'a': return '\a'; case 'b': return '\b'; case 'f': return '\f'; case 'n': return '\n'; case 'r': return '\r'; case 't': return '\t'; case 'v': return '\v'; case '\\': return '\\'; case '"': return '"'; case '\'': return '\''; default: fatalc("unknown escape sequence", c); } } return (c); // Just an ordinary old character! } // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0; // Convert each character into an int value while ((k = chrpos("0123456789", c)) >= 0) { val = val * 10 + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return (val); } // Scan in a string literal from the input file, // and store it in buf[]. Return the length of // the string. static int scanstr(char *buf) { int i, c; // Loop while we have enough buffer space for (i = 0; i < TEXTLEN - 1; i++) { // Get the next char and append to buf // Return when we hit the ending double quote if ((c = scanch()) == '"') { buf[i] = 0; return (i); } buf[i] = c; } // Ran out of buf[] space fatal("String literal too long"); return (0); } // Scan an identifier from the input file and // store it in buf[]. Return the identifier's length static int scanident(int c, char *buf, int lim) { int i = 0; // Allow digits, alpha and underscores while (isalpha(c) || isdigit(c) || '_' == c) { // Error if we hit the identifier length limit, // else append to buf[] and get next character if (lim - 1 == i) { fatal("Identifier too long"); } else if (i < lim - 1) { buf[i++] = c; } c = next(); } // We hit a non-valid character, put it back. // NUL-terminate the buf[] and return the length putback(c); buf[i] = '\0'; return (i); } // Given a word from the input, return the matching // keyword token number or 0 if it's not a keyword. // Switch on the first letter so that we don't have // to waste time strcmp()ing against all the keywords. static int keyword(char *s) { switch (*s) { case 'b': if (!strcmp(s, "break")) return (T_BREAK); break; case 'c': if (!strcmp(s, "case")) return (T_CASE); if (!strcmp(s, "char")) return (T_CHAR); if (!strcmp(s, "continue")) return (T_CONTINUE); break; case 'd': if (!strcmp(s, "default")) return (T_DEFAULT); break; case 'e': if (!strcmp(s, "else")) return (T_ELSE); if (!strcmp(s, "enum")) return (T_ENUM); if (!strcmp(s, "extern")) return (T_EXTERN); break; case 'f': if (!strcmp(s, "for")) return (T_FOR); break; case 'i': if (!strcmp(s, "if")) return (T_IF); if (!strcmp(s, "int")) return (T_INT); break; case 'l': if (!strcmp(s, "long")) return (T_LONG); break; case 'r': if (!strcmp(s, "return")) return (T_RETURN); break; case 's': if (!strcmp(s, "struct")) return (T_STRUCT); if (!strcmp(s, "switch")) return (T_SWITCH); break; case 't': if (!strcmp(s, "typedef")) return (T_TYPEDEF); break; case 'u': if (!strcmp(s, "union")) return (T_UNION); break; case 'v': if (!strcmp(s, "void")) return (T_VOID); break; case 'w': if (!strcmp(s, "while")) return (T_WHILE); break; } return (0); } // A pointer to a rejected token static struct token *Rejtoken = NULL; // Reject the token that we just scanned void reject_token(struct token *t) { if (Rejtoken != NULL) fatal("Can't reject token twice"); Rejtoken = t; } // List of token strings, for debugging purposes char *Tstring[] = { "EOF", "=", "+=", "-=", "*=", "/=", "||", "&&", "|", "^", "&", "==", "!=", ",", ">", "<=", ">=", "<<", ">>", "+", "-", "*", "/", "++", "--", "~", "!", "void", "char", "int", "long", "if", "else", "while", "for", "return", "struct", "union", "enum", "typedef", "extern", "break", "continue", "switch", "case", "default", "intlit", "strlit", ";", "identifier", "{", "}", "(", ")", "[", "]", ",", ".", "->", ":" }; // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c, tokentype; // If we have any rejected token, return it if (Rejtoken != NULL) { t = Rejtoken; Rejtoken = NULL; return (1); } // Skip whitespace c = skip(); // Determine the token based on // the input character switch (c) { case EOF: t->token = T_EOF; return (0); case '+': if ((c = next()) == '+') { t->token = T_INC; } else if (c == '=') { t->token = T_ASPLUS; } else { putback(c); t->token = T_PLUS; } break; case '-': if ((c = next()) == '-') { t->token = T_DEC; } else if (c == '>') { t->token = T_ARROW; } else if (c == '=') { t->token = T_ASMINUS; } else if (isdigit(c)) { // Negative int literal t->intvalue = -scanint(c); t->token = T_INTLIT; } else { putback(c); t->token = T_MINUS; } break; case '*': if ((c = next()) == '=') { t->token = T_ASSTAR; } else { putback(c); t->token = T_STAR; } break; case '/': if ((c = next()) == '=') { t->token = T_ASSLASH; } else { putback(c); t->token = T_SLASH; } break; case ';': t->token = T_SEMI; break; case '{': t->token = T_LBRACE; break; case '}': t->token = T_RBRACE; break; case '(': t->token = T_LPAREN; break; case ')': t->token = T_RPAREN; break; case '[': t->token = T_LBRACKET; break; case ']': t->token = T_RBRACKET; break; case '~': t->token = T_INVERT; break; case '^': t->token = T_XOR; break; case ',': t->token = T_COMMA; break; case '.': t->token = T_DOT; break; case ':': t->token = T_COLON; break; case '=': if ((c = next()) == '=') { t->token = T_EQ; } else { putback(c); t->token = T_ASSIGN; } break; case '!': if ((c = next()) == '=') { t->token = T_NE; } else { putback(c); t->token = T_LOGNOT; } break; case '<': if ((c = next()) == '=') { t->token = T_LE; } else if (c == '<') { t->token = T_LSHIFT; } else { putback(c); t->token = T_LT; } break; case '>': if ((c = next()) == '=') { t->token = T_GE; } else if (c == '>') { t->token = T_RSHIFT; } else { putback(c); t->token = T_GT; } break; case '&': if ((c = next()) == '&') { t->token = T_LOGAND; } else { putback(c); t->token = T_AMPER; } break; case '|': if ((c = next()) == '|') { t->token = T_LOGOR; } else { putback(c); t->token = T_OR; } break; case '\'': // If it's a quote, scan in the // literal character value and // the trailing quote t->intvalue = scanch(); t->token = T_INTLIT; if (next() != '\'') fatal("Expected '\\'' at end of char literal"); break; case '"': // Scan in a literal string scanstr(Text); t->token = T_STRLIT; break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } else if (isalpha(c) || '_' == c) { // Read in a keyword or identifier scanident(c, Text, TEXTLEN); // If it's a recognised keyword, return that token if ((tokentype = keyword(Text)) != 0) { t->token = tokentype; break; } // Not a recognised keyword, so it must be an identifier t->token = T_IDENT; break; } // The character isn't part of any recognised token, error fatalc("Unrecognised character", c); } // We found a token t->tokstr = Tstring[t->token]; return (1); } ================================================ FILE: 44_Fold_Optimisation/stmt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of statements // Copyright (c) 2019 Warren Toomey, GPL3 // Prototypes static struct ASTnode *single_statement(void); // compound_statement: // empty, i.e. no statement // | statement // | statement statements // ; // // statement: declaration // | expression_statement // | function_call // | if_statement // | while_statement // | for_statement // | return_statement // ; // if_statement: if_head // | if_head 'else' statement // ; // // if_head: 'if' '(' true_false_expression ')' statement ; // // Parse an IF statement including any // optional ELSE clause and return its AST static struct ASTnode *if_statement(void) { struct ASTnode *condAST, *trueAST, *falseAST = NULL; // Ensure we have 'if' '(' match(T_IF, "if"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); rparen(); // Get the AST for the statement trueAST = single_statement(); // If we have an 'else', skip it // and get the AST for the statement if (Token.token == T_ELSE) { scan(&Token); falseAST = single_statement(); } // Build and return the AST for this statement return (mkastnode(A_IF, P_NONE, condAST, trueAST, falseAST, NULL, 0)); } // while_statement: 'while' '(' true_false_expression ')' statement ; // // Parse a WHILE statement and return its AST static struct ASTnode *while_statement(void) { struct ASTnode *condAST, *bodyAST; // Ensure we have 'while' '(' match(T_WHILE, "while"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); rparen(); // Get the AST for the statement. // Update the loop depth in the process Looplevel++; bodyAST = single_statement(); Looplevel--; // Build and return the AST for this statement return (mkastnode(A_WHILE, P_NONE, condAST, NULL, bodyAST, NULL, 0)); } // for_statement: 'for' '(' expression_list ';' // true_false_expression ';' // expression_list ')' statement ; // // Parse a FOR statement and return its AST static struct ASTnode *for_statement(void) { struct ASTnode *condAST, *bodyAST; struct ASTnode *preopAST, *postopAST; struct ASTnode *tree; // Ensure we have 'for' '(' match(T_FOR, "for"); lparen(); // Get the pre_op expression and the ';' preopAST = expression_list(T_SEMI); semi(); // Get the condition and the ';'. // Force a non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); semi(); // Get the post_op expression and the ')' postopAST = expression_list(T_RPAREN); rparen(); // Get the statement which is the body // Update the loop depth in the process Looplevel++; bodyAST = single_statement(); Looplevel--; // Glue the statement and the postop tree tree = mkastnode(A_GLUE, P_NONE, bodyAST, NULL, postopAST, NULL, 0); // Make a WHILE loop with the condition and this new body tree = mkastnode(A_WHILE, P_NONE, condAST, NULL, tree, NULL, 0); // And glue the preop tree to the A_WHILE tree return (mkastnode(A_GLUE, P_NONE, preopAST, NULL, tree, NULL, 0)); } // return_statement: 'return' '(' expression ')' ; // // Parse a return statement and return its AST static struct ASTnode *return_statement(void) { struct ASTnode *tree; // Can't return a value if function returns P_VOID if (Functionid->type == P_VOID) fatal("Can't return from a void function"); // Ensure we have 'return' '(' match(T_RETURN, "return"); lparen(); // Parse the following expression tree = binexpr(0); // Ensure this is compatible with the function's type tree = modify_type(tree, Functionid->type, 0); if (tree == NULL) fatal("Incompatible type to return"); // Add on the A_RETURN node tree = mkastunary(A_RETURN, P_NONE, tree, NULL, 0); // Get the ')' and ';' rparen(); semi(); return (tree); } // break_statement: 'break' ; // // Parse a break statement and return its AST static struct ASTnode *break_statement(void) { if (Looplevel == 0 && Switchlevel == 0) fatal("no loop or switch to break out from"); scan(&Token); semi(); return (mkastleaf(A_BREAK, 0, NULL, 0)); } // continue_statement: 'continue' ; // // Parse a continue statement and return its AST static struct ASTnode *continue_statement(void) { if (Looplevel == 0) fatal("no loop to continue to"); scan(&Token); semi(); return (mkastleaf(A_CONTINUE, 0, NULL, 0)); } // Parse a switch statement and return its AST static struct ASTnode *switch_statement(void) { struct ASTnode *left, *n, *c, *casetree= NULL, *casetail; int inloop=1, casecount=0; int seendefault=0; int ASTop, casevalue; // Skip the 'switch' and '(' scan(&Token); lparen(); // Get the switch expression, the ')' and the '{' left= binexpr(0); rparen(); lbrace(); // Ensure that this is of int type if (!inttype(left->type)) fatal("Switch expression is not of integer type"); // Build an A_SWITCH subtree with the expression as // the child n= mkastunary(A_SWITCH, 0, left, NULL, 0); // Now parse the cases Switchlevel++; while (inloop) { switch(Token.token) { // Leave the loop when we hit a '}' case T_RBRACE: if (casecount==0) fatal("No cases in switch"); inloop=0; break; case T_CASE: case T_DEFAULT: // Ensure this isn't after a previous 'default' if (seendefault) fatal("case or default after existing default"); // Set the AST operation. Scan the case value if required if (Token.token==T_DEFAULT) { ASTop= A_DEFAULT; seendefault= 1; scan(&Token); } else { ASTop= A_CASE; scan(&Token); left= binexpr(0); // Ensure the case value is an integer literal if (left->op != A_INTLIT) fatal("Expecting integer literal for case value"); casevalue= left->a_intvalue; // Walk the list of existing case values to ensure // that there isn't a duplicate case value for (c= casetree; c != NULL; c= c -> right) if (casevalue == c->a_intvalue) fatal("Duplicate case value"); } // Scan the ':' and get the compound expression match(T_COLON, ":"); left= compound_statement(1); casecount++; // Build a sub-tree with the compound statement as the left child // and link it in to the growing A_CASE tree if (casetree==NULL) { casetree= casetail= mkastunary(ASTop, 0, left, NULL, casevalue); } else { casetail->right= mkastunary(ASTop, 0, left, NULL, casevalue); casetail= casetail->right; } break; default: fatals("Unexpected token in switch", Token.tokstr); } } Switchlevel--; // We have a sub-tree with the cases and any default. Put the // case count into the A_SWITCH node and attach the case tree. n->a_intvalue= casecount; n->right= casetree; rbrace(); return(n); } // Parse a single statement and return its AST. static struct ASTnode *single_statement(void) { struct ASTnode *stmt; struct symtable *ctype; switch (Token.token) { case T_LBRACE: // We have a '{', so this is a compound statement lbrace(); stmt = compound_statement(0); rbrace(); return(stmt); case T_IDENT: // We have to see if the identifier matches a typedef. // If not, treat it as an expression. // Otherwise, fall down to the parse_type() call. if (findtypedef(Text) == NULL) { stmt= binexpr(0); semi(); return(stmt); } case T_CHAR: case T_INT: case T_LONG: case T_STRUCT: case T_UNION: case T_ENUM: case T_TYPEDEF: // The beginning of a variable declaration list. declaration_list(&ctype, C_LOCAL, T_SEMI, T_EOF, &stmt); semi(); return (stmt); // Any assignments from the declarations case T_IF: return (if_statement()); case T_WHILE: return (while_statement()); case T_FOR: return (for_statement()); case T_RETURN: return (return_statement()); case T_BREAK: return (break_statement()); case T_CONTINUE: return (continue_statement()); case T_SWITCH: return (switch_statement()); default: // For now, see if this is an expression. // This catches assignment statements. stmt= binexpr(0); semi(); return(stmt); } return (NULL); // Keep -Wall happy } // Parse a compound statement // and return its AST. If inswitch is true, // we look for a '}', 'case' or 'default' token // to end the parsing. Otherwise, look for // just a '}' to end the parsing. struct ASTnode *compound_statement(int inswitch) { struct ASTnode *left = NULL; struct ASTnode *tree; while (1) { // Parse a single statement tree = single_statement(); // For each new tree, either save it in left // if left is empty, or glue the left and the // new tree together if (tree != NULL) { if (left == NULL) left = tree; else left = mkastnode(A_GLUE, P_NONE, left, NULL, tree, NULL, 0); } // Leave if we've hit the end token if (Token.token == T_RBRACE) return(left); if (inswitch && (Token.token == T_CASE || Token.token == T_DEFAULT)) return(left); } } ================================================ FILE: 44_Fold_Optimisation/sym.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Symbol table functions // Copyright (c) 2019 Warren Toomey, GPL3 // Append a node to the singly-linked list pointed to by head or tail void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node) { // Check for valid pointers if (head == NULL || tail == NULL || node == NULL) fatal("Either head, tail or node is NULL in appendsym"); // Append to the list if (*tail) { (*tail)->next = node; *tail = node; } else *head = *tail = node; node->next = NULL; } // Create a symbol node to be added to a symbol table list. // Set up the node's: // + type: char, int etc. // + ctype: composite type pointer for struct/union // + structural type: var, function, array etc. // + size: number of elements, or endlabel: end label for a function // + posn: Position information for local symbols // Return a pointer to the new node struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn) { // Get a new node struct symtable *node = (struct symtable *) malloc(sizeof(struct symtable)); if (node == NULL) fatal("Unable to malloc a symbol table node in newsym"); // Fill in the values if (name == NULL) node->name = NULL; else node->name = strdup(name); node->type = type; node->ctype = ctype; node->stype = stype; node->class = class; node->nelems = nelems; // For pointers and integer types, set the size // of the symbol. structs and union declarations // manually set this up themselves. if (ptrtype(type) || inttype(type)) node->size = nelems * typesize(type, ctype); node->st_posn = posn; node->next = NULL; node->member = NULL; node->initlist = NULL; return (node); } // Add a symbol to the global symbol list struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn) { struct symtable *sym = newsym(name, type, ctype, stype, class, nelems, posn); // For structs and unions, copy the size from the type node if (type== P_STRUCT || type== P_UNION) sym->size = ctype->size; appendsym(&Globhead, &Globtail, sym); return (sym); } // Add a symbol to the local symbol list struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int nelems) { struct symtable *sym = newsym(name, type, ctype, stype, C_LOCAL, nelems, 0); // For structs and unions, copy the size from the type node if (type== P_STRUCT || type== P_UNION) sym->size = ctype->size; appendsym(&Loclhead, &Locltail, sym); return (sym); } // Add a symbol to the parameter list struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype) { struct symtable *sym = newsym(name, type, ctype, stype, C_PARAM, 1, 0); appendsym(&Parmhead, &Parmtail, sym); return (sym); } // Add a symbol to the temporary member list struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int nelems) { struct symtable *sym = newsym(name, type, ctype, stype, C_MEMBER, nelems, 0); // For structs and unions, copy the size from the type node if (type== P_STRUCT || type== P_UNION) sym->size = ctype->size; appendsym(&Membhead, &Membtail, sym); return (sym); } // Add a struct to the struct list struct symtable *addstruct(char *name) { struct symtable *sym = newsym(name, P_STRUCT, NULL, 0, C_STRUCT, 0, 0); appendsym(&Structhead, &Structtail, sym); return (sym); } // Add a struct to the union list struct symtable *addunion(char *name) { struct symtable *sym = newsym(name, P_UNION, NULL, 0, C_UNION, 0, 0); appendsym(&Unionhead, &Uniontail, sym); return (sym); } // Add an enum type or value to the enum list. // Class is C_ENUMTYPE or C_ENUMVAL. // Use posn to store the int value. struct symtable *addenum(char *name, int class, int value) { struct symtable *sym = newsym(name, P_INT, NULL, 0, class, 0, value); appendsym(&Enumhead, &Enumtail, sym); return (sym); } // Add a typedef to the typedef list struct symtable *addtypedef(char *name, int type, struct symtable *ctype) { struct symtable *sym = newsym(name, type, ctype, 0, C_TYPEDEF, 0, 0); appendsym(&Typehead, &Typetail, sym); return (sym); } // Search for a symbol in a specific list. // Return a pointer to the found node or NULL if not found. // If class is not zero, also match on the given class static struct symtable *findsyminlist(char *s, struct symtable *list, int class) { for (; list != NULL; list = list->next) if ((list->name != NULL) && !strcmp(s, list->name)) if (class==0 || class== list->class) return (list); return (NULL); } // Determine if the symbol s is in the global symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findglob(char *s) { return (findsyminlist(s, Globhead, 0)); } // Determine if the symbol s is in the local symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findlocl(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member, 0); if (node) return (node); } return (findsyminlist(s, Loclhead, 0)); } // Determine if the symbol s is in the symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findsymbol(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member, 0); if (node) return (node); } // Otherwise, try the local and global symbol lists node = findsyminlist(s, Loclhead, 0); if (node) return (node); return (findsyminlist(s, Globhead, 0)); } // Find a member in the member list // Return a pointer to the found node or NULL if not found. struct symtable *findmember(char *s) { return (findsyminlist(s, Membhead, 0)); } // Find a struct in the struct list // Return a pointer to the found node or NULL if not found. struct symtable *findstruct(char *s) { return (findsyminlist(s, Structhead, 0)); } // Find a struct in the union list // Return a pointer to the found node or NULL if not found. struct symtable *findunion(char *s) { return (findsyminlist(s, Unionhead, 0)); } // Find an enum type in the enum list // Return a pointer to the found node or NULL if not found. struct symtable *findenumtype(char *s) { return (findsyminlist(s, Enumhead, C_ENUMTYPE)); } // Find an enum value in the enum list // Return a pointer to the found node or NULL if not found. struct symtable *findenumval(char *s) { return (findsyminlist(s, Enumhead, C_ENUMVAL)); } // Find a type in the tyedef list // Return a pointer to the found node or NULL if not found. struct symtable *findtypedef(char *s) { return (findsyminlist(s, Typehead, 0)); } // Reset the contents of the symbol table void clear_symtable(void) { Globhead = Globtail = NULL; Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Membhead = Membtail = NULL; Structhead = Structtail = NULL; Unionhead = Uniontail = NULL; Enumhead = Enumtail = NULL; Typehead = Typetail = NULL; } // Clear all the entries in the local symbol table void freeloclsyms(void) { Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Functionid = NULL; } ================================================ FILE: 44_Fold_Optimisation/tests/err.input031.c ================================================ Expecting a primary expression, got token:+ on line 5 of input031.c ================================================ FILE: 44_Fold_Optimisation/tests/err.input032.c ================================================ Unknown variable:cow on line 4 of input032.c ================================================ FILE: 44_Fold_Optimisation/tests/err.input033.c ================================================ Incompatible type to return on line 4 of input033.c ================================================ FILE: 44_Fold_Optimisation/tests/err.input034.c ================================================ For now, declaration of non-global arrays is not implemented on line 4 of input034.c ================================================ FILE: 44_Fold_Optimisation/tests/err.input035.c ================================================ Duplicate local variable declaration:a on line 4 of input035.c ================================================ FILE: 44_Fold_Optimisation/tests/err.input036.c ================================================ Type doesn't match prototype for parameter:2 on line 4 of input036.c ================================================ FILE: 44_Fold_Optimisation/tests/err.input037.c ================================================ Expected:comma on line 3 of input037.c ================================================ FILE: 44_Fold_Optimisation/tests/err.input038.c ================================================ Type doesn't match prototype for parameter:2 on line 4 of input038.c ================================================ FILE: 44_Fold_Optimisation/tests/err.input039.c ================================================ No statements in function with non-void type on line 4 of input039.c ================================================ FILE: 44_Fold_Optimisation/tests/err.input040.c ================================================ No return for function with non-void type on line 4 of input040.c ================================================ FILE: 44_Fold_Optimisation/tests/err.input041.c ================================================ Can't return from a void function on line 3 of input041.c ================================================ FILE: 44_Fold_Optimisation/tests/err.input042.c ================================================ Undeclared function:fred on line 3 of input042.c ================================================ FILE: 44_Fold_Optimisation/tests/err.input043.c ================================================ Undeclared array:b on line 3 of input043.c ================================================ FILE: 44_Fold_Optimisation/tests/err.input044.c ================================================ Unknown variable:z on line 3 of input044.c ================================================ FILE: 44_Fold_Optimisation/tests/err.input045.c ================================================ & operator must be followed by an identifier on line 3 of input045.c ================================================ FILE: 44_Fold_Optimisation/tests/err.input046.c ================================================ * operator must be followed by an identifier or * on line 3 of input046.c ================================================ FILE: 44_Fold_Optimisation/tests/err.input047.c ================================================ ++ operator must be followed by an identifier on line 3 of input047.c ================================================ FILE: 44_Fold_Optimisation/tests/err.input048.c ================================================ -- operator must be followed by an identifier on line 3 of input048.c ================================================ FILE: 44_Fold_Optimisation/tests/err.input049.c ================================================ Incompatible expression in assignment on line 6 of input049.c ================================================ FILE: 44_Fold_Optimisation/tests/err.input050.c ================================================ Incompatible types in binary expression on line 6 of input050.c ================================================ FILE: 44_Fold_Optimisation/tests/err.input051.c ================================================ Expected '\'' at end of char literal on line 4 of input051.c ================================================ FILE: 44_Fold_Optimisation/tests/err.input052.c ================================================ Unrecognised character:$ on line 5 of input052.c ================================================ FILE: 44_Fold_Optimisation/tests/err.input056.c ================================================ unknown struct/union type:var1 on line 2 of input056.c ================================================ FILE: 44_Fold_Optimisation/tests/err.input057.c ================================================ previously defined struct/union:fred on line 2 of input057.c ================================================ FILE: 44_Fold_Optimisation/tests/err.input059.c ================================================ Undeclared variable:y on line 3 of input059.c ================================================ FILE: 44_Fold_Optimisation/tests/err.input060.c ================================================ Undeclared variable:x on line 3 of input060.c ================================================ FILE: 44_Fold_Optimisation/tests/err.input061.c ================================================ Undeclared variable:x on line 3 of input061.c ================================================ FILE: 44_Fold_Optimisation/tests/err.input064.c ================================================ undeclared enum type::fred on line 1 of input064.c ================================================ FILE: 44_Fold_Optimisation/tests/err.input065.c ================================================ enum type redeclared::fred on line 2 of input065.c ================================================ FILE: 44_Fold_Optimisation/tests/err.input066.c ================================================ enum value redeclared::z on line 2 of input066.c ================================================ FILE: 44_Fold_Optimisation/tests/err.input068.c ================================================ redefinition of typedef:FOO on line 2 of input068.c ================================================ FILE: 44_Fold_Optimisation/tests/err.input069.c ================================================ unknown type:FLOO on line 2 of input069.c ================================================ FILE: 44_Fold_Optimisation/tests/err.input072.c ================================================ no loop or switch to break out from on line 1 of input072.c ================================================ FILE: 44_Fold_Optimisation/tests/err.input073.c ================================================ no loop to continue to on line 1 of input073.c ================================================ FILE: 44_Fold_Optimisation/tests/err.input075.c ================================================ Unexpected token in switch:if on line 4 of input075.c ================================================ FILE: 44_Fold_Optimisation/tests/err.input076.c ================================================ No cases in switch on line 3 of input076.c ================================================ FILE: 44_Fold_Optimisation/tests/err.input077.c ================================================ case or default after existing default on line 6 of input077.c ================================================ FILE: 44_Fold_Optimisation/tests/err.input078.c ================================================ case or default after existing default on line 6 of input078.c ================================================ FILE: 44_Fold_Optimisation/tests/err.input079.c ================================================ Duplicate case value on line 6 of input079.c ================================================ FILE: 44_Fold_Optimisation/tests/err.input085.c ================================================ Bad type in parameter list on line 1 of input085.c ================================================ FILE: 44_Fold_Optimisation/tests/err.input086.c ================================================ Function definition not at global level on line 3 of input086.c ================================================ FILE: 44_Fold_Optimisation/tests/err.input087.c ================================================ Bad type in member list on line 4 of input087.c ================================================ FILE: 44_Fold_Optimisation/tests/err.input092.c ================================================ Integer literal value too big for char type on line 1 of input092.c ================================================ FILE: 44_Fold_Optimisation/tests/err.input093.c ================================================ Expecting an integer literal value on line 1 of input093.c ================================================ FILE: 44_Fold_Optimisation/tests/err.input094.c ================================================ Type mismatch: integer literal vs. variable on line 1 of input094.c ================================================ FILE: 44_Fold_Optimisation/tests/err.input095.c ================================================ Variable can not be initialised:x on line 1 of input095.c ================================================ FILE: 44_Fold_Optimisation/tests/err.input096.c ================================================ Array size is illegal:0 on line 1 of input096.c ================================================ FILE: 44_Fold_Optimisation/tests/err.input097.c ================================================ For now, declaration of non-global arrays is not implemented on line 2 of input097.c ================================================ FILE: 44_Fold_Optimisation/tests/err.input098.c ================================================ Too many values in initialisation list on line 1 of input098.c ================================================ FILE: 44_Fold_Optimisation/tests/err.input102.c ================================================ Cannot cast to a struct, union or void type on line 3 of input102.c ================================================ FILE: 44_Fold_Optimisation/tests/err.input103.c ================================================ Cannot cast to a struct, union or void type on line 3 of input103.c ================================================ FILE: 44_Fold_Optimisation/tests/err.input104.c ================================================ Cannot cast to a struct, union or void type on line 2 of input104.c ================================================ FILE: 44_Fold_Optimisation/tests/err.input105.c ================================================ Incompatible expression in assignment on line 4 of input105.c ================================================ FILE: 44_Fold_Optimisation/tests/input001.c ================================================ int printf(char *fmt); void main() { printf("%d\n", 12 * 3); printf("%d\n", 18 - 2 * 4); printf("%d\n", 1 + 2 + 9 - 5/2 + 3*5); } ================================================ FILE: 44_Fold_Optimisation/tests/input002.c ================================================ int printf(char *fmt); void main() { int fred; int jim; fred= 5; jim= 12; printf("%d\n", fred + jim); } ================================================ FILE: 44_Fold_Optimisation/tests/input003.c ================================================ int printf(char *fmt); void main() { int x; x= 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); } ================================================ FILE: 44_Fold_Optimisation/tests/input004.c ================================================ int printf(char *fmt); void main() { int x; x= 7 < 9; printf("%d\n", x); x= 7 <= 9; printf("%d\n", x); x= 7 != 9; printf("%d\n", x); x= 7 == 7; printf("%d\n", x); x= 7 >= 7; printf("%d\n", x); x= 7 <= 7; printf("%d\n", x); x= 9 > 7; printf("%d\n", x); x= 9 >= 7; printf("%d\n", x); x= 9 != 7; printf("%d\n", x); } ================================================ FILE: 44_Fold_Optimisation/tests/input005.c ================================================ int printf(char *fmt); void main() { int i; int j; i=6; j=12; if (i < j) { printf("%d\n", i); } else { printf("%d\n", j); } } ================================================ FILE: 44_Fold_Optimisation/tests/input006.c ================================================ int printf(char *fmt); void main() { int i; i=1; while (i <= 10) { printf("%d\n", i); i= i + 1; } } ================================================ FILE: 44_Fold_Optimisation/tests/input007.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 44_Fold_Optimisation/tests/input008.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 44_Fold_Optimisation/tests/input009.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } void fred() { int a; int b; a= 12; b= 3 * a; if (a >= b) { printf("%d\n", 2 * b - a); } } ================================================ FILE: 44_Fold_Optimisation/tests/input010.c ================================================ int printf(char *fmt); void main() { int i; char j; j= 20; printf("%d\n", j); i= 10; printf("%d\n", i); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 2; j= j + 1) { printf("%d\n", j); } } ================================================ FILE: 44_Fold_Optimisation/tests/input011.c ================================================ int printf(char *fmt); int main() { int i; char j; long k; i= 10; printf("%d\n", i); j= 20; printf("%d\n", j); k= 30; printf("%d\n", k); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 4; j= j + 1) { printf("%d\n", j); } for (k= 1; k <= 5; k= k + 1) { printf("%d\n", k); } return(i); printf("%d\n", 12345); return(3); } ================================================ FILE: 44_Fold_Optimisation/tests/input012.c ================================================ int printf(char *fmt); int fred() { return(5); } void main() { int x; x= fred(2); printf("%d\n", x); } ================================================ FILE: 44_Fold_Optimisation/tests/input013.c ================================================ int printf(char *fmt); int fred() { return(56); } void main() { int dummy; int result; dummy= printf("%d\n", 23); result= fred(10); dummy= printf("%d\n", result); } ================================================ FILE: 44_Fold_Optimisation/tests/input014.c ================================================ int printf(char *fmt); int fred() { return(20); } int main() { int result; printf("%d\n", 10); result= fred(15); printf("%d\n", result); printf("%d\n", fred(15)+10); return(0); } ================================================ FILE: 44_Fold_Optimisation/tests/input015.c ================================================ int printf(char *fmt); int main() { char a; char *b; char c; int d; int *e; int f; a= 18; printf("%d\n", a); b= &a; c= *b; printf("%d\n", c); d= 12; printf("%d\n", d); e= &d; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 44_Fold_Optimisation/tests/input016.c ================================================ int printf(char *fmt); int c; int d; int *e; int f; int main() { c= 12; d=18; printf("%d\n", c); e= &c + 1; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 44_Fold_Optimisation/tests/input017.c ================================================ int printf(char *fmt); int main() { char a; char *b; int d; int *e; b= &a; *b= 19; printf("%d\n", a); e= &d; *e= 12; printf("%d\n", d); return(0); } ================================================ FILE: 44_Fold_Optimisation/tests/input018.c ================================================ int printf(char *fmt); int main() { int a; int b; a= b= 34; printf("%d\n", a); printf("%d\n", b); return(0); } ================================================ FILE: 44_Fold_Optimisation/tests/input018a.c ================================================ int printf(char *fmt); int a; int *b; char c; char *d; int main() { b= &a; *b= 15; printf("%d\n", a); d= &c; *d= 16; printf("%d\n", c); return(0); } ================================================ FILE: 44_Fold_Optimisation/tests/input019.c ================================================ int printf(char *fmt); int a; int b; int c; int d; int e; int main() { a= 2; b= 4; c= 3; d= 2; e= (a+b) * (c+d); printf("%d\n", e); return(0); } ================================================ FILE: 44_Fold_Optimisation/tests/input020.c ================================================ int printf(char *fmt); int a; int b[25]; int main() { b[3]= 12; a= b[3]; printf("%d\n", a); return(0); } ================================================ FILE: 44_Fold_Optimisation/tests/input021.c ================================================ int printf(char *fmt); char c; char *str; int main() { c= '\n'; printf("%d\n", c); for (str= "Hello world\n"; *str != 0; str= str + 1) { printf("%c", *str); } return(0); } ================================================ FILE: 44_Fold_Optimisation/tests/input022.c ================================================ int printf(char *fmt); char a; char b; char c; int d; int e; int f; long g; long h; long i; int main() { b= 5; c= 7; a= b + c++; printf("%d\n", a); e= 5; f= 7; d= e + f++; printf("%d\n", d); h= 5; i= 7; g= h + i++; printf("%d\n", g); a= b-- + c; printf("%d\n", a); d= e-- + f; printf("%d\n", d); g= h-- + i; printf("%d\n", g); a= ++b + c; printf("%d\n", a); d= ++e + f; printf("%d\n", d); g= ++h + i; printf("%d\n", g); a= b * --c; printf("%d\n", a); d= e * --f; printf("%d\n", d); g= h * --i; printf("%d\n", g); return(0); } ================================================ FILE: 44_Fold_Optimisation/tests/input023.c ================================================ int printf(char *fmt); char *str; int x; int main() { x= -23; printf("%d\n", x); printf("%d\n", -10 * -10); x= 1; x= ~x; printf("%d\n", x); x= 2 > 5; printf("%d\n", x); x= !x; printf("%d\n", x); x= !x; printf("%d\n", x); x= 13; if (x) { printf("%d\n", 13); } x= 0; if (!x) { printf("%d\n", 14); } for (str= "Hello world\n"; *str; str++) { printf("%c", *str); } return(0); } ================================================ FILE: 44_Fold_Optimisation/tests/input024.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { a= 42; b= 19; printf("%d\n", a & b); printf("%d\n", a | b); printf("%d\n", a ^ b); printf("%d\n", 1 << 3); printf("%d\n", 63 >> 3); return(0); } ================================================ FILE: 44_Fold_Optimisation/tests/input025.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { char z; int y; int x; x= 10; y= 20; z= 30; printf("%d\n", x); printf("%d\n", y); printf("%d\n", z); a= 5; b= 15; c= 25; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); return(0); } ================================================ FILE: 44_Fold_Optimisation/tests/input026.c ================================================ int printf(char *fmt); int main(int a, char b, long c, int d, int e, int f, int g, int h) { int i; int j; int k; a= 13; printf("%d\n", a); b= 23; printf("%d\n", b); c= 34; printf("%d\n", c); d= 44; printf("%d\n", d); e= 54; printf("%d\n", e); f= 64; printf("%d\n", f); g= 74; printf("%d\n", g); h= 84; printf("%d\n", h); i= 94; printf("%d\n", i); j= 95; printf("%d\n", j); k= 96; printf("%d\n", k); return(0); } ================================================ FILE: 44_Fold_Optimisation/tests/input027.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int param5(int a, int b, int c, int d, int e) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param2(int a, int b) { int c; int d; int e; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param0() { int a; int b; int c; int d; int e; a= 1; b= 2; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int main() { param8(1,2,3,4,5,6,7,8); param5(1,2,3,4,5); param2(1,2); param0(); return(0); } ================================================ FILE: 44_Fold_Optimisation/tests/input028.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 44_Fold_Optimisation/tests/input029.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h); int fred(int a, int b, int c); int main(); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 44_Fold_Optimisation/tests/input030.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input030.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 44_Fold_Optimisation/tests/input031.c ================================================ int printf(char *fmt); int main() { int x; x= 2 + + 3 - * / ; } ================================================ FILE: 44_Fold_Optimisation/tests/input032.c ================================================ int printf(char *fmt); int main() { pizza cow llama sausage; } ================================================ FILE: 44_Fold_Optimisation/tests/input033.c ================================================ int printf(char *fmt); int main() { char *z; return(z); } ================================================ FILE: 44_Fold_Optimisation/tests/input034.c ================================================ int printf(char *fmt); int main() { int a[12]; return(0); } ================================================ FILE: 44_Fold_Optimisation/tests/input035.c ================================================ int printf(char *fmt); int fred(int a, int b) { int a; return(a); } ================================================ FILE: 44_Fold_Optimisation/tests/input036.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c); ================================================ FILE: 44_Fold_Optimisation/tests/input037.c ================================================ int printf(char *fmt); int fred(int a, char b +, int z); ================================================ FILE: 44_Fold_Optimisation/tests/input038.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c, int g); ================================================ FILE: 44_Fold_Optimisation/tests/input039.c ================================================ int printf(char *fmt); int main() { int a; } ================================================ FILE: 44_Fold_Optimisation/tests/input040.c ================================================ int printf(char *fmt); int main() { int a; a= 5; } ================================================ FILE: 44_Fold_Optimisation/tests/input041.c ================================================ int printf(char *fmt); void fred() { return(5); } ================================================ FILE: 44_Fold_Optimisation/tests/input042.c ================================================ int printf(char *fmt); int main() { fred(5); } ================================================ FILE: 44_Fold_Optimisation/tests/input043.c ================================================ int printf(char *fmt); int main() { int a; a= b[4]; } ================================================ FILE: 44_Fold_Optimisation/tests/input044.c ================================================ int printf(char *fmt); int main() { int a; a= z; } ================================================ FILE: 44_Fold_Optimisation/tests/input045.c ================================================ int printf(char *fmt); int main() { int a; a= &5; } ================================================ FILE: 44_Fold_Optimisation/tests/input046.c ================================================ int printf(char *fmt); int main() { int a; a= *5; } ================================================ FILE: 44_Fold_Optimisation/tests/input047.c ================================================ int printf(char *fmt); int main() { int a; a= ++5; } ================================================ FILE: 44_Fold_Optimisation/tests/input048.c ================================================ int printf(char *fmt); int main() { int a; a= --5; } ================================================ FILE: 44_Fold_Optimisation/tests/input049.c ================================================ int printf(char *fmt); int main() { int x; char y; y= x; } ================================================ FILE: 44_Fold_Optimisation/tests/input050.c ================================================ int printf(char *fmt); int main() { char *a; char *b; a= a + b; } ================================================ FILE: 44_Fold_Optimisation/tests/input051.c ================================================ int printf(char *fmt); int main() { char a; a= 'fred'; } ================================================ FILE: 44_Fold_Optimisation/tests/input052.c ================================================ int printf(char *fmt); int main() { int a; a= $5.00; } ================================================ FILE: 44_Fold_Optimisation/tests/input053.c ================================================ int printf(char *fmt); int main() { printf("Hello world, %d\n", 23); return(0); } ================================================ FILE: 44_Fold_Optimisation/tests/input054.c ================================================ int printf(char *fmt); int main() { int i; for (i=0; i < 20; i++) { printf("Hello world, %d\n", i); } return(0); } ================================================ FILE: 44_Fold_Optimisation/tests/input055.c ================================================ int printf(char *fmt); int main(int argc, char **argv) { int i; char *argument; printf("Hello world\n"); for (i=0; i < argc; i++) { argument= *argv; argv= argv + 1; printf("Argument %d is %s\n", i, argument); } return(0); } ================================================ FILE: 44_Fold_Optimisation/tests/input056.c ================================================ struct foo { int x; }; struct mary var1; ================================================ FILE: 44_Fold_Optimisation/tests/input057.c ================================================ struct fred { int x; } ; struct fred { char y; } ; ================================================ FILE: 44_Fold_Optimisation/tests/input058.c ================================================ int printf(char *fmt); struct fred { int x; char y; long z; }; struct fred var2; struct fred *varptr; int main() { long result; var2.x= 12; printf("%d\n", var2.x); var2.y= 'c'; printf("%d\n", var2.y); var2.z= 4005; printf("%d\n", var2.z); result= var2.x + var2.y + var2.z; printf("%d\n", result); varptr= &var2; result= varptr->x + varptr->y + varptr->z; printf("%d\n", result); return(0); } ================================================ FILE: 44_Fold_Optimisation/tests/input059.c ================================================ int main() { int x; x= y.foo; } ================================================ FILE: 44_Fold_Optimisation/tests/input060.c ================================================ int main() { int x; x= x.foo; } ================================================ FILE: 44_Fold_Optimisation/tests/input061.c ================================================ int main() { int x; x= x->foo; } ================================================ FILE: 44_Fold_Optimisation/tests/input062.c ================================================ int printf(char *fmt); union fred { char w; int x; int y; long z; }; union fred var1; union fred *varptr; int main() { var1.x= 65; printf("%d\n", var1.x); var1.x= 66; printf("%d\n", var1.x); printf("%d\n", var1.y); printf("The next two depend on the endian of the platform\n"); printf("%d\n", var1.w); printf("%d\n", var1.z); varptr= &var1; varptr->x= 67; printf("%d\n", varptr->x); printf("%d\n", varptr->y); return(0); } ================================================ FILE: 44_Fold_Optimisation/tests/input063.c ================================================ int printf(char *fmt); enum fred { apple=1, banana, carrot, pear=10, peach, mango, papaya }; enum jane { aple=1, bnana, crrot, par=10, pech, mago, paaya }; enum fred var1; enum jane var2; enum fred var3; int main() { var1= carrot + pear + mango; printf("%d\n", var1); return(0); } ================================================ FILE: 44_Fold_Optimisation/tests/input064.c ================================================ enum fred var3; ================================================ FILE: 44_Fold_Optimisation/tests/input065.c ================================================ enum fred { x, y, z }; enum fred { a, b }; ================================================ FILE: 44_Fold_Optimisation/tests/input066.c ================================================ enum fred { x, y, z }; enum mary { a, b, z }; ================================================ FILE: 44_Fold_Optimisation/tests/input067.c ================================================ int printf(char *fmt); typedef int FOO; FOO var1; struct bar { int x; int y} ; typedef struct bar BAR; BAR var2; int main() { var1= 5; printf("%d\n", var1); var2.x= 7; var2.y= 10; printf("%d\n", var2.x + var2.y); return(0); } ================================================ FILE: 44_Fold_Optimisation/tests/input068.c ================================================ typedef int FOO; typedef char FOO; ================================================ FILE: 44_Fold_Optimisation/tests/input069.c ================================================ typedef int FOO; FLOO y; ================================================ FILE: 44_Fold_Optimisation/tests/input070.c ================================================ #include typedef int FOO; int main() { FOO x; x= 56; printf("%d\n", x); return(0); } ================================================ FILE: 44_Fold_Optimisation/tests/input071.c ================================================ #include int main() { int x; x = 0; while (x < 100) { if (x == 5) { x = x + 2; continue; } printf("%d\n", x); if (x == 14) { break; } x = x + 1; } printf("Done\n"); return (0); } ================================================ FILE: 44_Fold_Optimisation/tests/input072.c ================================================ int main() { break; } ================================================ FILE: 44_Fold_Optimisation/tests/input073.c ================================================ int main() { continue; } ================================================ FILE: 44_Fold_Optimisation/tests/input074.c ================================================ #include int main() { int x; int y; y= 0; for (x=0; x < 5; x++) { switch(x) { case 1: { y= 5; break; } case 2: { y= 7; break; } case 3: { y= 9; } default: { y= 100; } } printf("%d\n", y); } return(0); } ================================================ FILE: 44_Fold_Optimisation/tests/input075.c ================================================ int main() { int x; switch(x) { if (x<5); } } ================================================ FILE: 44_Fold_Optimisation/tests/input076.c ================================================ int main() { int x; switch(x) { } } ================================================ FILE: 44_Fold_Optimisation/tests/input077.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } default: { x= 3; } case 4: { x= 2; } } } ================================================ FILE: 44_Fold_Optimisation/tests/input078.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } default: { x= 3; } default: { x= 2; } } } ================================================ FILE: 44_Fold_Optimisation/tests/input079.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } case 2: { x= 2; } case 1: { x= 2; } default: { x= 2; } } } ================================================ FILE: 44_Fold_Optimisation/tests/input080.c ================================================ #include int x; int y; int main() { for (x=0, y=1; x < 6; x++, y=y+2) { printf("%d %d\n", x, y); } return(0); } ================================================ FILE: 44_Fold_Optimisation/tests/input081.c ================================================ #include int x; int y; int main() { x= 0; y=1; for (;x<5;) { printf("%d %d\n", x, y); x=x+1; y=y+2; } return(0); } ================================================ FILE: 44_Fold_Optimisation/tests/input082.c ================================================ #include int main() { int x; x= 10; // Dangling else test if (x > 5) if (x > 15) printf("x > 15\n"); else printf("15 >= x > 5\n"); else printf("5 >= x\n"); return(0); } ================================================ FILE: 44_Fold_Optimisation/tests/input083.c ================================================ #include int main() { int x; // Dangling else test. // We should not print anything for x<= 5 for (x=0; x < 12; x++) if (x > 5) if (x > 10) printf("10 < %2d\n", x); else printf(" 5 < %2d <= 10\n", x); return(0); } ================================================ FILE: 44_Fold_Optimisation/tests/input084.c ================================================ #include int main() { int x, y; x=2; y=3; printf("%d %d\n", x, y); char a, *b; a= 'f'; b= &a; printf("%c %c\n", a, *b); return(0); } ================================================ FILE: 44_Fold_Optimisation/tests/input085.c ================================================ int main(struct foo { int x; }; , int a) { return(0); } ================================================ FILE: 44_Fold_Optimisation/tests/input086.c ================================================ int main() { int fred() { return(5); } int x; x=2; return(x); } ================================================ FILE: 44_Fold_Optimisation/tests/input087.c ================================================ struct foo { int x; int y; struct blah { int g; }; }; ================================================ FILE: 44_Fold_Optimisation/tests/input088.c ================================================ #include struct foo { int x; int y; } fred, mary; struct foo james; int main() { fred.x= 5; mary.y= 6; printf("%d %d\n", fred.x, mary.y); return(0); } ================================================ FILE: 44_Fold_Optimisation/tests/input089.c ================================================ #include int x= 23; char y= 'H'; char *z= "Hello world"; int main() { printf("%d %c %s\n", x, y, z); return(0); } ================================================ FILE: 44_Fold_Optimisation/tests/input090.c ================================================ #include int a = 23, b = 100; char y = 'H', *z = "Hello world"; int main() { printf("%d %d %c %s\n", a, b, y, z); return (0); } ================================================ FILE: 44_Fold_Optimisation/tests/input091.c ================================================ #include int fred[] = { 1, 2, 3, 4, 5 }; int jim[10] = { 1, 2, 3, 4, 5 }; int main() { int i; for (i=0; i < 5; i++) printf("%d\n", fred[i]); for (i=0; i < 10; i++) printf("%d\n", jim[i]); return(0); } ================================================ FILE: 44_Fold_Optimisation/tests/input092.c ================================================ char x= 3000; ================================================ FILE: 44_Fold_Optimisation/tests/input093.c ================================================ char x= fred; ================================================ FILE: 44_Fold_Optimisation/tests/input094.c ================================================ char *s= 54; ================================================ FILE: 44_Fold_Optimisation/tests/input095.c ================================================ int fred(int x=2) { return(x); } ================================================ FILE: 44_Fold_Optimisation/tests/input096.c ================================================ int fred[0]; ================================================ FILE: 44_Fold_Optimisation/tests/input097.c ================================================ int main() { int x[45]; return(0); } ================================================ FILE: 44_Fold_Optimisation/tests/input098.c ================================================ int fred[3]= { 1, 2, 3, 4, 5 }; ================================================ FILE: 44_Fold_Optimisation/tests/input099.c ================================================ #include // List of token strings, for debugging purposes. // As yet, we can't store a NULL into the list char *Tstring[] = { "EOF", "=", "||", "&&", "|", "^", "&", "==", "!=", ",", ">", "<=", ">=", "<<", ">>", "+", "-", "*", "/", "++", "--", "~", "!", "void", "char", "int", "long", "if", "else", "while", "for", "return", "struct", "union", "enum", "typedef", "extern", "break", "continue", "switch", "case", "default", "intlit", "strlit", ";", "identifier", "{", "}", "(", ")", "[", "]", ",", ".", "->", ":", "" }; int main() { int i; char *str; i=0; while (1) { str= Tstring[i]; if (*str == 0) break; printf("%s\n", str); i++; } return(0); } ================================================ FILE: 44_Fold_Optimisation/tests/input100.c ================================================ #include int main() { int x= 3, y=14; int z= 2 * x + y; char *str= "Hello world"; printf("%s %d %d\n", str, x+y, z); return(0); } ================================================ FILE: 44_Fold_Optimisation/tests/input101.c ================================================ #include int main() { int x= 65535; char y; char *str; y= (char )x; printf("0x%x\n", y); str= (char *)0; printf("0x%lx\n", (long)str); return(0); } ================================================ FILE: 44_Fold_Optimisation/tests/input102.c ================================================ int main() { struct foo { int p; }; int y= (struct foo) x; } ================================================ FILE: 44_Fold_Optimisation/tests/input103.c ================================================ int main() { union foo { int p; }; int y= (union foo) x; } ================================================ FILE: 44_Fold_Optimisation/tests/input104.c ================================================ int main() { int y= (void) x; } ================================================ FILE: 44_Fold_Optimisation/tests/input105.c ================================================ int main() { int x; char *y; y= (char) x; } ================================================ FILE: 44_Fold_Optimisation/tests/input106.c ================================================ #include char *y= (char *)0; int main() { printf("0x%lx\n", (long)y); return(0); } ================================================ FILE: 44_Fold_Optimisation/tests/input107.c ================================================ #include char *y[] = { "fish", "cow", NULL }; char *z= NULL; int main() { int i; char *ptr; for (i=0; i < 3; i++) { ptr= y[i]; if (ptr != (char *)0) printf("%s\n", y[i]); else printf("NULL\n"); } return(0); } ================================================ FILE: 44_Fold_Optimisation/tests/input108.c ================================================ int main() { char *str= (void *)0; return(0); } ================================================ FILE: 44_Fold_Optimisation/tests/input109.c ================================================ #include int main() { int x= 17 - 1; printf("%d\n", x); return(0); } ================================================ FILE: 44_Fold_Optimisation/tests/input110.c ================================================ #include int x; int y; int main() { x= 3; y= 15; y += x; printf("%d\n", y); x= 3; y= 15; y -= x; printf("%d\n", y); x= 3; y= 15; y *= x; printf("%d\n", y); x= 3; y= 15; y /= x; printf("%d\n", y); return(0); } ================================================ FILE: 44_Fold_Optimisation/tests/input111.c ================================================ #include int main() { int x= 2000 + 3 + 4 * 5 + 6; printf("%d\n", x); return(0); } ================================================ FILE: 44_Fold_Optimisation/tests/mktests ================================================ #!/bin/sh # Make the output files for each test # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make install) fi for i in input*c do if [ ! -f "out.$i" -a ! -f "err.$i" ] then ../cwj -o out $i 2> "err.$i" # If the err file is empty if [ ! -s "err.$i" ] then rm -f "err.$i" cc -o out $i ./out > "out.$i" fi fi rm -f out out.s done ================================================ FILE: 44_Fold_Optimisation/tests/out.input001.c ================================================ 36 10 25 ================================================ FILE: 44_Fold_Optimisation/tests/out.input002.c ================================================ 17 ================================================ FILE: 44_Fold_Optimisation/tests/out.input003.c ================================================ 1 2 3 4 5 ================================================ FILE: 44_Fold_Optimisation/tests/out.input004.c ================================================ 1 1 1 1 1 1 1 1 1 ================================================ FILE: 44_Fold_Optimisation/tests/out.input005.c ================================================ 6 ================================================ FILE: 44_Fold_Optimisation/tests/out.input006.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 44_Fold_Optimisation/tests/out.input007.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 44_Fold_Optimisation/tests/out.input008.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 44_Fold_Optimisation/tests/out.input009.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 44_Fold_Optimisation/tests/out.input010.c ================================================ 20 10 1 2 3 4 5 253 254 255 0 1 ================================================ FILE: 44_Fold_Optimisation/tests/out.input011.c ================================================ 10 20 30 1 2 3 4 5 253 254 255 0 1 2 3 1 2 3 4 5 ================================================ FILE: 44_Fold_Optimisation/tests/out.input012.c ================================================ 5 ================================================ FILE: 44_Fold_Optimisation/tests/out.input013.c ================================================ 23 56 ================================================ FILE: 44_Fold_Optimisation/tests/out.input014.c ================================================ 10 20 30 ================================================ FILE: 44_Fold_Optimisation/tests/out.input015.c ================================================ 18 18 12 12 ================================================ FILE: 44_Fold_Optimisation/tests/out.input016.c ================================================ 12 18 ================================================ FILE: 44_Fold_Optimisation/tests/out.input017.c ================================================ 19 12 ================================================ FILE: 44_Fold_Optimisation/tests/out.input018.c ================================================ 34 34 ================================================ FILE: 44_Fold_Optimisation/tests/out.input018a.c ================================================ 15 16 ================================================ FILE: 44_Fold_Optimisation/tests/out.input019.c ================================================ 30 ================================================ FILE: 44_Fold_Optimisation/tests/out.input020.c ================================================ 12 ================================================ FILE: 44_Fold_Optimisation/tests/out.input021.c ================================================ 10 Hello world ================================================ FILE: 44_Fold_Optimisation/tests/out.input022.c ================================================ 12 12 12 13 13 13 13 13 13 35 35 35 ================================================ FILE: 44_Fold_Optimisation/tests/out.input023.c ================================================ -23 100 -2 0 1 0 13 14 Hello world ================================================ FILE: 44_Fold_Optimisation/tests/out.input024.c ================================================ 2 59 57 8 7 ================================================ FILE: 44_Fold_Optimisation/tests/out.input025.c ================================================ 10 20 30 5 15 25 ================================================ FILE: 44_Fold_Optimisation/tests/out.input026.c ================================================ 13 23 34 44 54 64 74 84 94 95 96 ================================================ FILE: 44_Fold_Optimisation/tests/out.input027.c ================================================ 1 2 3 4 5 6 7 8 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 ================================================ FILE: 44_Fold_Optimisation/tests/out.input028.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 44_Fold_Optimisation/tests/out.input029.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 44_Fold_Optimisation/tests/out.input030.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input030.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 44_Fold_Optimisation/tests/out.input053.c ================================================ Hello world, 23 ================================================ FILE: 44_Fold_Optimisation/tests/out.input054.c ================================================ Hello world, 0 Hello world, 1 Hello world, 2 Hello world, 3 Hello world, 4 Hello world, 5 Hello world, 6 Hello world, 7 Hello world, 8 Hello world, 9 Hello world, 10 Hello world, 11 Hello world, 12 Hello world, 13 Hello world, 14 Hello world, 15 Hello world, 16 Hello world, 17 Hello world, 18 Hello world, 19 ================================================ FILE: 44_Fold_Optimisation/tests/out.input055.c ================================================ Hello world Argument 0 is ./out ================================================ FILE: 44_Fold_Optimisation/tests/out.input058.c ================================================ 12 99 4005 4116 4116 ================================================ FILE: 44_Fold_Optimisation/tests/out.input062.c ================================================ 65 66 66 The next two depend on the endian of the platform 66 66 67 67 ================================================ FILE: 44_Fold_Optimisation/tests/out.input063.c ================================================ 25 ================================================ FILE: 44_Fold_Optimisation/tests/out.input067.c ================================================ 5 17 ================================================ FILE: 44_Fold_Optimisation/tests/out.input070.c ================================================ 56 ================================================ FILE: 44_Fold_Optimisation/tests/out.input071.c ================================================ 0 1 2 3 4 7 8 9 10 11 12 13 14 Done ================================================ FILE: 44_Fold_Optimisation/tests/out.input074.c ================================================ 100 5 7 100 100 ================================================ FILE: 44_Fold_Optimisation/tests/out.input080.c ================================================ 0 1 1 3 2 5 3 7 4 9 5 11 ================================================ FILE: 44_Fold_Optimisation/tests/out.input081.c ================================================ 0 1 1 3 2 5 3 7 4 9 ================================================ FILE: 44_Fold_Optimisation/tests/out.input082.c ================================================ 15 >= x > 5 ================================================ FILE: 44_Fold_Optimisation/tests/out.input083.c ================================================ 5 < 6 <= 10 5 < 7 <= 10 5 < 8 <= 10 5 < 9 <= 10 5 < 10 <= 10 10 < 11 ================================================ FILE: 44_Fold_Optimisation/tests/out.input084.c ================================================ 2 3 f f ================================================ FILE: 44_Fold_Optimisation/tests/out.input088.c ================================================ 5 6 ================================================ FILE: 44_Fold_Optimisation/tests/out.input089.c ================================================ 23 H Hello world ================================================ FILE: 44_Fold_Optimisation/tests/out.input090.c ================================================ 23 100 H Hello world ================================================ FILE: 44_Fold_Optimisation/tests/out.input091.c ================================================ 1 2 3 4 5 1 2 3 4 5 0 0 0 0 0 ================================================ FILE: 44_Fold_Optimisation/tests/out.input099.c ================================================ EOF = || && | ^ & == != , > <= >= << >> + - * / ++ -- ~ ! void char int long if else while for return struct union enum typedef extern break continue switch case default intlit strlit ; identifier { } ( ) [ ] , . -> : ================================================ FILE: 44_Fold_Optimisation/tests/out.input100.c ================================================ Hello world 17 20 ================================================ FILE: 44_Fold_Optimisation/tests/out.input101.c ================================================ 0xff 0x0 ================================================ FILE: 44_Fold_Optimisation/tests/out.input106.c ================================================ 0x0 ================================================ FILE: 44_Fold_Optimisation/tests/out.input107.c ================================================ fish cow NULL ================================================ FILE: 44_Fold_Optimisation/tests/out.input108.c ================================================ ================================================ FILE: 44_Fold_Optimisation/tests/out.input109.c ================================================ 16 ================================================ FILE: 44_Fold_Optimisation/tests/out.input110.c ================================================ 18 12 45 5 ================================================ FILE: 44_Fold_Optimisation/tests/out.input111.c ================================================ 2029 ================================================ FILE: 44_Fold_Optimisation/tests/runtests ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make install) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../cwj -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../cwj $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.s "trial.$i" done ================================================ FILE: 44_Fold_Optimisation/tests/runtestsn ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../compn ] then (cd ..; make install) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../compn -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../compn $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.o out.s "trial.$i" done ================================================ FILE: 44_Fold_Optimisation/tree.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree functions // Copyright (c) 2019 Warren Toomey, GPL3 // Build and return a generic AST node struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue) { struct ASTnode *n; // Malloc a new ASTnode n = (struct ASTnode *) malloc(sizeof(struct ASTnode)); if (n == NULL) fatal("Unable to malloc in mkastnode()"); // Copy in the field values and return it n->op = op; n->type = type; n->left = left; n->mid = mid; n->right = right; n->sym = sym; n->a_intvalue = intvalue; return (n); } // Make an AST leaf node struct ASTnode *mkastleaf(int op, int type, struct symtable *sym, int intvalue) { return (mkastnode(op, type, NULL, NULL, NULL, sym, intvalue)); } // Make a unary AST node: only one child struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, struct symtable *sym, int intvalue) { return (mkastnode(op, type, left, NULL, NULL, sym, intvalue)); } // Generate and return a new label number // just for AST dumping purposes static int gendumplabel(void) { static int id = 1; return (id++); } // Given an AST tree, print it out and follow the // traversal of the tree that genAST() follows void dumpAST(struct ASTnode *n, int label, int level) { int Lfalse, Lstart, Lend; switch (n->op) { case A_IF: Lfalse = gendumplabel(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_IF"); if (n->right) { Lend = gendumplabel(); fprintf(stdout, ", end L%d", Lend); } fprintf(stdout, "\n"); dumpAST(n->left, Lfalse, level + 2); dumpAST(n->mid, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); return; case A_WHILE: Lstart = gendumplabel(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_WHILE, start L%d\n", Lstart); Lend = gendumplabel(); dumpAST(n->left, Lend, level + 2); dumpAST(n->right, NOLABEL, level + 2); return; } // Reset level to -2 for A_GLUE if (n->op == A_GLUE) level = -2; // General AST node handling if (n->left) dumpAST(n->left, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); for (int i = 0; i < level; i++) fprintf(stdout, " "); switch (n->op) { case A_GLUE: fprintf(stdout, "\n\n"); return; case A_FUNCTION: fprintf(stdout, "A_FUNCTION %s\n", n->sym->name); return; case A_ADD: fprintf(stdout, "A_ADD\n"); return; case A_SUBTRACT: fprintf(stdout, "A_SUBTRACT\n"); return; case A_MULTIPLY: fprintf(stdout, "A_MULTIPLY\n"); return; case A_DIVIDE: fprintf(stdout, "A_DIVIDE\n"); return; case A_EQ: fprintf(stdout, "A_EQ\n"); return; case A_NE: fprintf(stdout, "A_NE\n"); return; case A_LT: fprintf(stdout, "A_LE\n"); return; case A_GT: fprintf(stdout, "A_GT\n"); return; case A_LE: fprintf(stdout, "A_LE\n"); return; case A_GE: fprintf(stdout, "A_GE\n"); return; case A_INTLIT: fprintf(stdout, "A_INTLIT %d\n", n->a_intvalue); return; case A_STRLIT: fprintf(stdout, "A_STRLIT rval label L%d\n", n->a_intvalue); return; case A_IDENT: if (n->rvalue) fprintf(stdout, "A_IDENT rval %s\n", n->sym->name); else fprintf(stdout, "A_IDENT %s\n", n->sym->name); return; case A_ASSIGN: fprintf(stdout, "A_ASSIGN\n"); return; case A_WIDEN: fprintf(stdout, "A_WIDEN\n"); return; case A_RETURN: fprintf(stdout, "A_RETURN\n"); return; case A_FUNCCALL: fprintf(stdout, "A_FUNCCALL %s\n", n->sym->name); return; case A_ADDR: fprintf(stdout, "A_ADDR %s\n", n->sym->name); return; case A_DEREF: if (n->rvalue) fprintf(stdout, "A_DEREF rval\n"); else fprintf(stdout, "A_DEREF\n"); return; case A_SCALE: fprintf(stdout, "A_SCALE %d\n", n->a_size); return; case A_PREINC: fprintf(stdout, "A_PREINC %s\n", n->sym->name); return; case A_PREDEC: fprintf(stdout, "A_PREDEC %s\n", n->sym->name); return; case A_POSTINC: fprintf(stdout, "A_POSTINC\n"); return; case A_POSTDEC: fprintf(stdout, "A_POSTDEC\n"); return; case A_NEGATE: fprintf(stdout, "A_NEGATE\n"); return; case A_BREAK: fprintf(stdout, "A_BREAK\n"); return; case A_CONTINUE: fprintf(stdout, "A_CONTINUE\n"); return; case A_CASE: fprintf(stdout, "A_CASE %d\n", n->a_intvalue); return; case A_DEFAULT: fprintf(stdout, "A_DEFAULT\n"); return; case A_SWITCH: fprintf(stdout, "A_SWITCH\n"); return; case A_CAST: fprintf(stdout, "A_CAST %d\n", n->type); return; case A_ASPLUS: fprintf(stdout, "A_ASPLUS\n"); return; case A_ASMINUS: fprintf(stdout, "A_ASMINUS\n"); return; case A_ASSTAR: fprintf(stdout, "A_ASSTAR\n"); return; case A_ASSLASH: fprintf(stdout, "A_ASSLASH\n"); return; default: fatald("Unknown dumpAST operator", n->op); } } ================================================ FILE: 44_Fold_Optimisation/types.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Types and type handling // Copyright (c) 2019 Warren Toomey, GPL3 // Return true if a type is an int type // of any size, false otherwise int inttype(int type) { return (((type & 0xf) == 0) && (type >= P_CHAR && type <= P_LONG)); } // Return true if a type is of pointer type int ptrtype(int type) { return ((type & 0xf) != 0); } // Given a primitive type, return // the type which is a pointer to it int pointer_to(int type) { if ((type & 0xf) == 0xf) fatald("Unrecognised in pointer_to: type", type); return (type + 1); } // Given a primitive pointer type, return // the type which it points to int value_at(int type) { if ((type & 0xf) == 0x0) fatald("Unrecognised in value_at: type", type); return (type - 1); } // Given a type and a composite type pointer, return // the size of this type in bytes int typesize(int type, struct symtable *ctype) { if (type == P_STRUCT || type == P_UNION) return (ctype->size); return (genprimsize(type)); } // Given an AST tree and a type which we want it to become, // possibly modify the tree by widening or scaling so that // it is compatible with this type. Return the original tree // if no changes occurred, a modified tree, or NULL if the // tree is not compatible with the given type. // If this will be part of a binary operation, the AST op is not zero. struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op) { int ltype; int lsize, rsize; ltype = tree->type; // XXX No idea on these yet if (ltype == P_STRUCT || ltype == P_UNION) fatal("Don't know how to do this yet"); if (rtype == P_STRUCT || rtype == P_UNION) fatal("Don't know how to do this yet"); // Compare scalar int types if (inttype(ltype) && inttype(rtype)) { // Both types same, nothing to do if (ltype == rtype) return (tree); // Get the sizes for each type lsize = typesize(ltype, NULL); rsize = typesize(rtype, NULL); // Tree's size is too big if (lsize > rsize) return (NULL); // Widen to the right if (rsize > lsize) return (mkastunary(A_WIDEN, rtype, tree, NULL, 0)); } // For pointers if (ptrtype(ltype) && ptrtype(rtype)) { // We can compare them if (op >= A_EQ && op <= A_GE) return(tree); // A comparison of the same type for a non-binary operation is OK, // or when the left tree is of `void *` type. if (op == 0 && (ltype == rtype || ltype == pointer_to(P_VOID))) return (tree); } // We can scale only on A_ADD or A_SUBTRACT operation if (op == A_ADD || op == A_SUBTRACT) { // Left is int type, right is pointer type and the size // of the original type is >1: scale the left if (inttype(ltype) && ptrtype(rtype)) { rsize = genprimsize(value_at(rtype)); if (rsize > 1) return (mkastunary(A_SCALE, rtype, tree, NULL, rsize)); else return (tree); // Size 1, no need to scale } } // If we get here, the types are not compatible return (NULL); } ================================================ FILE: 45_Globals_Again/Makefile ================================================ # Define the location of the include directory # and the location to install the compiler binary INCDIR=/tmp/include BINDIR=/tmp HSRCS= data.h decl.h defs.h SRCS= cg.c decl.c expr.c gen.c main.c misc.c \ opt.c scan.c stmt.c sym.c tree.c types.c SRCN= cgn.c decl.c expr.c gen.c main.c misc.c \ opt.c scan.c stmt.c sym.c tree.c types.c ARMSRCS= cg_arm.c decl.c expr.c gen.c main.c misc.c \ opt.c scan.c stmt.c sym.c tree.c types.c cwj: $(SRCS) $(HSRCS) cc -o cwj -g -Wall -DINCDIR=\"$(INCDIR)\" $(SRCS) compn: $(SRCN) $(HSRCS) cc -D__NASM__ -o compn -g -Wall -DINCDIR=\"$(INCDIR)\" $(SRCN) cwjarm: $(ARMSRCS) $(HSRCS) cc -o cwjarm -g -Wall $(ARMSRCS) cp cwjarm cwj install: cwj mkdir -p $(INCDIR) rsync -a include/. $(INCDIR) cp cwj $(BINDIR) chmod +x $(BINDIR)/cwj installn: compn mkdir -p $(INCDIR) rsync -a include/. $(INCDIR) cp compn $(BINDIR) chmod +x $(BINDIR)/compn clean: rm -f cwj cwjarm compn *.o *.s out a.out test: install tests/runtests (cd tests; chmod +x runtests; ./runtests) armtest: cwjarm tests/runtests (cd tests; chmod +x runtests; ./runtests) testn: installn tests/runtestsn (cd tests; chmod +x runtestsn; ./runtestsn) ================================================ FILE: 45_Globals_Again/Readme.md ================================================ # Part 45: Global Variable Declarations, revisited Two parts ago, I was trying to compile this line: ```c enum { TEXTLEN = 512 }; // Length of identifiers in input extern char Text[TEXTLEN + 1]; ``` and realised that our declaration parsing code could only deal with a single integer literal as the size of an array. But my compiler code, as shown above, uses an expression with two integer literals. In the last part, I added constant folding to the compiler so that an expression of integer literals will be folded down to a single integer literal. Now we need to discard all of that wonderful hand-written parsing of the literal value and associated casting, and call our expression parser to get an AST tree with the literal value in it. ## Keep or Discard `parse_literal()`? In our current compiler in `decl.c`, we have a function called `parse_literal()` which does the manual parsing of strings and integer literals. Should we keep it as a function, or just throw it away and call `binexpr()` manually elsewhere? I've decided to keep the function, toss all the existing code and change the purpose of this function a little bit. It will now also parse any cast which precedes an expression with several literal values. The function header in `decl.c` is now: ```c // Given a type, parse an expression of literals and ensure // that the type of this expression matches the given type. // Parse any type cast that precedes the expression. // If an integer literal, return this value. // If a string literal, return the label number of the string. int parse_literal(int type); ``` So, it's a drop-in replacement for the old `parse_literal()` except that any cast parsing code we had before can be discarded. Let's now look at the new code in `parse_literal()`. ```c int parse_literal(int type) { struct ASTnode *tree; // Parse the expression and optimise the resulting AST tree tree= optimise(binexpr(0)); ``` Ahah. We call `binexpr()` to parse whatever expression is at this point in the input file, and then `optimise()` to fold all the literal expressions. Now, for this to be a tree we can use, the root node should be either an A_INTLIT, an A_STRLIT or a A_CAST (if there was a preceding cast). ```c // If there's a cast, get the child and // mark it as having the type from the cast if (tree->op == A_CAST) { tree->left->type= tree->type; tree= tree->left; } ``` It was a cast, so we get rid of the A_CAST node but keep the type that the child was cast to. ```c // The tree must now have an integer or string literal if (tree->op != A_INTLIT && tree->op != A_STRLIT) fatal("Cannot initialise globals with a general expression"); ``` Oops, they gave us something we cannot use, so tell them and stop. ```c // If the type is char * and if (type == pointer_to(P_CHAR)) { // We have a string literal, return the label number if (tree->op == A_STRLIT) return(tree->a_intvalue); // We have a zero int literal, so that's a NULL if (tree->op == A_INTLIT && tree->a_intvalue==0) return(0); } ``` We need to be able to accept both of these as input: ```c char *c= "Hello"; char *c= (char *)0; ``` and the two inner IF statements above match the two input lines shown. If not a string literal, ... ```c // We only get here with an integer literal. The input type // is an integer type and is wide enough to hold the literal value if (inttype(type) && typesize(type, NULL) >= typesize(tree->type, NULL)) return(tree->a_intvalue); fatal("Type mismatch: literal vs. variable"); return(0); // Keep -Wall happy } ``` This took me a while to figure out. We have to parse these: ```c long x= 3; // allow this, where 3 is type char char y= 4000; // prevent this, where 4000 is too wide char *z= 4000; // prevent this, as z is not integer type ``` so the IF statement checks the input type and ensures that it is wide enough to accept the integer literal. ## The Other Parse Changes in `decl.c` Now that we have a function that can parse a literal expression possibly preceded by a cast, we can use it. This is where we toss out our old cast parsing code and replace it. The changes are: ```c // Parse a scalar declaration static struct symtable *scalar_declaration(...) { ... // Globals must be assigned a literal value if (class == C_GLOBAL) { // Create one initial value for the variable and // parse this value sym->initlist= (int *)malloc(sizeof(int)); sym->initlist[0]= parse_literal(type); } ... } // Parse an array declaration static struct symtable *array_declaration(...) { ... // See we have an array size if (Token.token != T_RBRACKET) { nelems= parse_literal(P_INT); if (nelems <= 0) fatald("Array size is illegal", nelems); } ... // Get the list of initial values while (1) { ... initlist[i++]= parse_literal(type); ... } ... } ``` By doing this, we lose about 20 to 30 lines of code to parse any possible cast that used to come before the old `parse_literal()`. Mind you, we had to add 100 lines of constant folding to get that 30 line reduction! Luckily, the constant folding is used in general expressions as well as here, so it is still a win. ## One `expr.c` Change There is one further change to our compiler code to support the new `parse_literal()`. In our general function to parse expressions, `binexpr()`, we now must inform it that an expression can be ended by a '}' token, such as appears here: ```c int fred[]= { 1, 2, 6 }; ``` The small change to `binexpr()` is: ```c // If we hit a terminating token, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA || tokentype == T_COLON || tokentype == T_RBRACE) { // T_RBRACE is new left->rvalue = 1; return (left); } ``` ## Code to Test The Changes Our existing tests will test the situation where there is a single literal value to initialise a global variable. This code in `tests/input112.c` tests both a literal expression to initialise a scalar variable, and a literal expression as the size of an array: ```c #include char* y = NULL; int x= 10 + 6; int fred [ 2 + 3 ]; int main() { fred[2]= x; printf("%d\n", fred[2]); return(0); } ``` ## Conclusion and What's Next In the next part of our compiler writing journey, I will probably feed more of the compiler source to itself and see what we still have to implement. [Next step](../46_Void_Functions/Readme.md) ================================================ FILE: 45_Globals_Again/cg.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\t.text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\t.data\n", Outfile); currSeg = data_seg; } } // Given a scalar type value, return the // size of the type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch (type) { case P_CHAR: return (offset); case P_INT: case P_LONG: break; default: if (!ptrtype(type)) fatald("Bad type in cg_align:", type); } // Here we have an int or a long. Align it on a 4-byte offset // I put the generic code here so it can be reused elsewhere. alignment = 4; offset = (offset + direction * (alignment - 1)) & ~(alignment - 1); return (offset); } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. static int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "%r10", "%r11", "%r12", "%r13", "%r9", "%r8", "%rcx", "%rdx", "%rsi", "%rdi" }; static char *breglist[] = { "%r10b", "%r11b", "%r12b", "%r13b", "%r9b", "%r8b", "%cl", "%dl", "%sil", "%dil" }; static char *dreglist[] = { "%r10d", "%r11d", "%r12d", "%r13d", "%r9d", "%r8d", "%ecx", "%edx", "%esi", "%edi" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); cgtextseg(); fprintf(Outfile, "# internal switch(expr) routine\n" "# %%rsi = switch table, %%rax = expr\n" "# from SubC: http://www.t3x.org/subc/\n" "\n" "switch:\n" " pushq %%rsi\n" " movq %%rdx, %%rsi\n" " movq %%rax, %%rbx\n" " cld\n" " lodsq\n" " movq %%rax, %%rcx\n" "next:\n" " lodsq\n" " movq %%rax, %%rdx\n" " lodsq\n" " cmpq %%rdx, %%rbx\n" " jnz no\n" " popq %%rsi\n" " jmp *%%rax\n" "no:\n" " loop next\n" " lodsq\n" " popq %%rsi\n" " jmp *%%rax\n" "\n"); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the %rsp and %rsp fprintf(Outfile, "\t.globl\t%s\n" "\t.type\t%s, @function\n" "%s:\n" "\tpushq\t%%rbp\n" "\tmovq\t%%rsp, %%rbp\n", name, name, name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->st_posn = paramOffset; paramOffset += 8; } else { parm->st_posn = newlocaloffset(parm->type); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->st_posn = newlocaloffset(locvar->type); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\taddq\t$%d,%%rsp\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->st_endlabel); fprintf(Outfile, "\taddq\t$%d,%%rsp\n", stackOffset); fputs("\tpopq %rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmovq\t$%d, %s\n", value, reglist[r]); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", sym->name); } else // Print out the code to initialise it switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovzbq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", sym->name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovslq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", sym->name); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", sym->st_posn); fprintf(Outfile, "\tmovq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", sym->st_posn); } else switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", sym->st_posn); fprintf(Outfile, "\tmovzbq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", sym->st_posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", sym->st_posn); fprintf(Outfile, "\tmovslq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", sym->st_posn); break; default: fatald("Bad type in cgloadlocal:", sym->type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tleaq\tL%d(%%rip), %s\n", label, reglist[r]); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\taddq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsubq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timulq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s,%%rax\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidivq\t%s\n", reglist[r2]); fprintf(Outfile, "\tmovq\t%%rax,%s\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tandq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\torq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txorq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshlq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshrq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tnegq\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnotq\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); return (r); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(struct symtable *sym, int numargs) { // Get a new register int outr = alloc_register(); // Call the function fprintf(Outfile, "\tcall\t%s@PLT\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\taddq\t$%d, %%rsp\n", 8 * (numargs - 6)); // and copy the return value into our register fprintf(Outfile, "\tmovq\t%%rax, %s\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpushq\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmovq\t%s, %s\n", reglist[r], reglist[FIRSTPARAMREG - argposn + 1]); } } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsalq\t$%d, %s\n", val, reglist[r]); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %s(%%rip)\n", reglist[r], sym->name); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %s(%%rip)\n", breglist[r], sym->name); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %s(%%rip)\n", dreglist[r], sym->name); break; default: fatald("Bad type in cgstorglob:", sym->type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %d(%%rbp)\n", reglist[r], sym->st_posn); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %d(%%rbp)\n", breglist[r], sym->st_posn); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %d(%%rbp)\n", dreglist[r], sym->st_posn); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size, type; int initvalue; int i; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the variable (or its elements if an array) // and the type of the variable if (node->stype == S_ARRAY) { size= typesize(value_at(node->type), node->ctype); type= value_at(node->type); } else { size = node->size; type= node->type; } // Generate the global identity and the label cgdataseg(); fprintf(Outfile, "\t.globl\t%s\n", node->name); fprintf(Outfile, "%s:\n", node->name); // Output space for one or more elements for (i=0; i < node->nelems; i++) { // Get any initial value initvalue= 0; if (node->initlist != NULL) initvalue= node->initlist[i]; // Generate the space for this type switch (size) { case 1: fprintf(Outfile, "\t.byte\t%d\n", initvalue); break; case 4: fprintf(Outfile, "\t.long\t%d\n", initvalue); break; case 8: // Generate the pointer to a string literal. Treat a zero value // as actually zero, not the label L0 if (node->initlist != NULL && type== pointer_to(P_CHAR) && initvalue != 0) fprintf(Outfile, "\t.quad\tL%d\n", initvalue); else fprintf(Outfile, "\t.quad\t%d\n", initvalue); break; default: for (int i = 0; i < size; i++) fprintf(Outfile, "\t.byte\t0\n"); } } } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\t.byte\t%d\n", *cptr); } fprintf(Outfile, "\t.byte\t0\n"); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzbl\t%s, %%eax\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %%eax\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } cgjump(sym->st_endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = alloc_register(); if (sym->class == C_GLOBAL) fprintf(Outfile, "\tleaq\t%s(%%rip), %s\n", sym->name, reglist[r]); else fprintf(Outfile, "\tleaq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzbq\t(%s), %s\n", reglist[r], reglist[r]); break; case 2: fprintf(Outfile, "\tmovslq\t(%s), %s\n", reglist[r], reglist[r]); break; case 4: case 8: fprintf(Outfile, "\tmovq\t(%s), %s\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { // Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmovb\t%s, (%s)\n", breglist[r1], reglist[r2]); break; case 2: case 4: case 8: fprintf(Outfile, "\tmovq\t%s, (%s)\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } // Generate a switch jump table and the code to // load the registers and call the switch() code void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel) { int i, label; // Get a label for the switch table label = genlabel(); cglabel(label); // Heuristic. If we have no cases, create one case // which points to the default case if (casecount == 0) { caseval[0] = 0; caselabel[0] = defaultlabel; casecount = 1; } // Generate the switch jump table. fprintf(Outfile, "\t.quad\t%d\n", casecount); for (i = 0; i < casecount; i++) fprintf(Outfile, "\t.quad\t%d, L%d\n", caseval[i], caselabel[i]); fprintf(Outfile, "\t.quad\tL%d\n", defaultlabel); // Load the specific registers cglabel(toplabel); fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); fprintf(Outfile, "\tleaq\tL%d(%%rip), %%rdx\n", label); fprintf(Outfile, "\tjmp\tswitch\n"); } ================================================ FILE: 45_Globals_Again/cg_arm.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for ARMv6 on Raspberry Pi // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. static int freereg[4]; static char *reglist[4] = { "r4", "r5", "r6", "r7" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // We have to store large integer literal values in memory. // Keep a list of them which will be output in the postamble #define MAXINTS 1024 int Intlist[MAXINTS]; static int Intslot = 0; // Determine the offset of a large integer // literal from the .L3 label. If the integer // isn't in the list, add it. static void set_int_offset(int val) { int offset = -1; // See if it is already there for (int i = 0; i < Intslot; i++) { if (Intlist[i] == val) { offset = 4 * i; break; } } // Not in the list, so add it if (offset == -1) { offset = 4 * Intslot; if (Intslot == MAXINTS) fatal("Out of int slots in set_int_offset()"); Intlist[Intslot++] = val; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L3+%d\n", offset); } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n", Outfile); } // Print out the assembly postamble void cgpostamble() { // Print out the global variables fprintf(Outfile, ".L2:\n"); for (int i = 0; i < Globs; i++) { if (Symtable[i].stype == S_VARIABLE) fprintf(Outfile, "\t.word %s\n", Symtable[i].name); } // Print out the integer literals fprintf(Outfile, ".L3:\n"); for (int i = 0; i < Intslot; i++) { fprintf(Outfile, "\t.word %d\n", Intlist[i]); } } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Symtable[id].name; fprintf(Outfile, "\t.text\n" "\t.globl\t%s\n" "\t.type\t%s, \%%function\n" "%s:\n" "\tpush\t{fp, lr}\n" "\tadd\tfp, sp, #4\n" "\tsub\tsp, sp, #8\n" "\tstr\tr0, [fp, #-8]\n", name, name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Symtable[id].endlabel); fputs("\tsub\tsp, fp, #4\n" "\tpop\t{fp, pc}\n" "\t.align\t2\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); // If the literal value is small, do it with one instruction if (value <= 1000) fprintf(Outfile, "\tmov\t%s, #%d\n", reglist[r], value); else { set_int_offset(value); fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); } return (r); } // Determine the offset of a variable from the .L2 // label. Yes, this is inefficient code. static void set_var_offset(int id) { int offset = 0; // Walk the symbol table up to id. // Find S_VARIABLEs and add on 4 until // we get to our variable for (int i = 0; i < id; i++) { if (Symtable[i].stype == S_VARIABLE) offset += 4; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L2+%d\n", offset); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tldrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgloadglob:", Symtable[id].type); } return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s, %s\n", reglist[r1], reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\tmul\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { // To do a divide: r0 holds the dividend, r1 holds the divisor. // The quotient is in r0. fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r1]); fprintf(Outfile, "\tmov\tr1, %s\n", reglist[r2]); fprintf(Outfile, "\tbl\t__aeabi_idiv\n"); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r1]); free_register(r2); return (r1); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r]); fprintf(Outfile, "\tbl\t%s\n", Symtable[id].name); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r]); return (r); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tlsl\t%s, %s, #%d\n", reglist[r], reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, int id) { // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tstr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgstorglob:", Symtable[id].type); } return (r); } // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (4); switch (type) { case P_CHAR: return (1); case P_INT: case P_LONG: return (4); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Symtable[id].type); fprintf(Outfile, "\t.data\n" "\t.globl\t%s\n", Symtable[id].name); switch (typesize) { case 1: fprintf(Outfile, "%s:\t.byte\t0\n", Symtable[id].name); break; case 4: fprintf(Outfile, "%s:\t.long\t0\n", Symtable[id].name); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "moveq", "movne", "movlt", "movgt", "movle", "movge" }; // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "movne", "moveq", "movge", "movle", "movgt", "movlt" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #1\n", cmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #0\n", invcmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\tuxtb\t%s, %s\n", reglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tb\tL%d\n", l); } // List of inverted branch instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *brlist[] = { "bne", "beq", "bge", "ble", "bgt", "blt" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", brlist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[reg]); cgjump(Symtable[id].endlabel); } // Generate code to load the address of a global // identifier into a variable. Return a new register int cgaddress(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); fprintf(Outfile, "\tmov\t%s, r3\n", reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tldrb\t%s, [%s]\n", reglist[r], reglist[r]); break; case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [%s]\n", reglist[r], reglist[r]); break; } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [%s]\n", reglist[r1], reglist[r2]); break; case P_INT: case P_LONG: fprintf(Outfile, "\tstr\t%s, [%s]\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 45_Globals_Again/cgn.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\tsection .text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\tsection .data\n", Outfile); currSeg = data_seg; } } // Given a scalar type value, return the // size of the type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch (type) { case P_CHAR: return (offset); case P_INT: case P_LONG: break; default: if (!ptrtype(type)) fatald("Bad type in cg_align:", type); } // Here we have an int or a long. Align it on a 4-byte offset // I put the generic code here so it can be reused elsewhere. alignment = 4; offset = (offset + direction * (alignment - 1)) & ~(alignment - 1); return (offset); } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "r10", "r11", "r12", "r13", "r9", "r8", "rcx", "rdx", "rsi", "rdi" }; static char *breglist[] = { "r10b", "r11b", "r12b", "r13b", "r9b", "r8b", "cl", "dl", "sil", "dil" }; static char *dreglist[] = { "r10d", "r11d", "r12d", "r13d", "r9d", "r8d", "ecx", "edx", "esi", "edi" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\textern\tprintint\n", Outfile); fputs("\textern\tprintchar\n", Outfile); fputs("\textern\topen\n", Outfile); fputs("\textern\tclose\n", Outfile); fputs("\textern\tread\n", Outfile); fputs("\textern\twrite\n", Outfile); fputs("\textern\tprintf\n", Outfile); cgtextseg(); fprintf(Outfile, "; internal switch(expr) routine\n" "; rsi = switch table, rax = expr\n" "; from SubC: http://www.t3x.org/subc/\n" "\n" "switch:\n" " push rsi\n" " mov rsi, rdx\n" " mov rbx, rax\n" " cld\n" " lodsq\n" " mov rcx, rax\n" "next:\n" " lodsq\n" " mov rdx, rax\n" " lodsq\n" " cmp rbx, rdx\n" " jnz no\n" " pop rsi\n" " jmp rax\n" "no:\n" " loop next\n" " lodsq\n" " pop rsi\n" " jmp rax\n" "\n"); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the rsp and rbp fprintf(Outfile, "\tglobal\t%s\n" "%s:\n" "\tpush\trbp\n" "\tmov\trbp, rsp\n", name, name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->st_posn = paramOffset; paramOffset += 8; } else { parm->st_posn = newlocaloffset(parm->type); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->st_posn = newlocaloffset(locvar->type); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\tadd\trsp, %d\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->st_endlabel); fprintf(Outfile, "\tadd\trsp, %d\n", stackOffset); fputs("\tpop rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], value); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", sym->name); fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], sym->name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", sym->name); } else // Print out the code to initialise it switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", sym->name); fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], sym->name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", sym->name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", sym->name); fprintf(Outfile, "\tmovsx\t%s, word [%s]\n", dreglist[r], sym->name); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", sym->name); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", sym->st_posn); fprintf(Outfile, "\tmov\t%s, [rbp+%d]\n", reglist[r], sym->st_posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", sym->st_posn); } else switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", sym->st_posn); fprintf(Outfile, "\tmovzx\t%s, byte [rbp+%d]\n", reglist[r], sym->st_posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", sym->st_posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", sym->st_posn); fprintf(Outfile, "\tmovsx\t%s, dword [rbp+%d]\n", reglist[r], sym->st_posn); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", sym->st_posn); break; default: fatald("Bad type in cgloadlocal:", sym->type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, L%d\n", reglist[r], label); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timul\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmov\trax, %s\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidiv\t%s\n", reglist[r2]); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tand\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\tor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshl\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshr\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tneg\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnot\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r], breglist[r]); return (r); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, byte %s\n", reglist[r], breglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(struct symtable *sym, int numargs) { // Get a new register int outr = alloc_register(); // Call the function fprintf(Outfile, "\tcall\t%s\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\tadd\trsp, %d\n", 8 * (numargs - 6)); // and copy the return value into our register fprintf(Outfile, "\tmov\t%s, rax\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpush\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmov\t%s, %s\n", reglist[FIRSTPARAMREG - argposn + 1], reglist[r]); } } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsal\t%s, %d\n", reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, dreglist[r]); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\tqword\t[rbp+%d], %s\n", sym->st_posn, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\tbyte\t[rbp+%d], %s\n", sym->st_posn, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\tdword\t[rbp+%d], %s\n", sym->st_posn, dreglist[r]); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size, type; int initvalue; int i; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the variable (or its elements if an array) // and the type of the variable if (node->stype == S_ARRAY) { size= typesize(value_at(node->type), node->ctype); type= value_at(node->type); } else { size = node->size; type= node->type; } // Generate the global identity and the label cgdataseg(); fprintf(Outfile, "\tsection\t.data\n" "\tglobal\t%s\n", node->name); fprintf(Outfile, "%s:\n", node->name); // Output space for one or more elements for (i = 0; i < node->nelems; i++) { // Get any initial value initvalue = 0; if (node->initlist != NULL) initvalue = node->initlist[i]; // Generate the space for this type // original version switch(size) { case 1: fprintf(Outfile, "\tdb\t%d\n", initvalue); break; case 4: fprintf(Outfile, "\tdd\t%d\n", initvalue); break; case 8: // Generate the pointer to a string literal. Treat a zero value // as actually zero, not the label L0 if (node->initlist != NULL && type == pointer_to(P_CHAR) && initvalue != 0) fprintf(Outfile, "\tdq\tL%d\n", initvalue); else fprintf(Outfile, "\tdq\t%d\n", initvalue); break; default: for (int i = 0; i < size; i++) fprintf(Outfile, "\tdb\t0\n"); /* compact version using times instead of loop fprintf(Outfile, "\ttimes\t%d\tdb\t0\n", size); */ } } } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\tdb\t%d\n", *cptr); } fprintf(Outfile, "\tdb\t0\n"); /* put string in readable format on a single line // probably went overboard with error checking int comma = 0, quote = 0, start = 1; fprintf(Outfile, "\tdb\t"); for (cptr=strvalue; *cptr; cptr++) { if ( ! isprint(*cptr) ) if (comma || start) { fprintf(Outfile, "%d, ", *cptr); start = 0; comma = 1; } else if (quote) { fprintf(Outfile, "\', %d, ", *cptr); comma = 1; quote = 0; } else { fprintf(Outfile, "%d, ", *cptr); comma = 1; quote = 0; } else if (start || comma) { fprintf(Outfile, "\'%c", *cptr); start = comma = 0; quote = 1; } else { fprintf(Outfile, "%c", *cptr); comma = 0; quote = 1; } } if (comma || start) fprintf(Outfile, "0\n"); else fprintf(Outfile, "\', 0\n"); */ } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r2], breglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzx\teax, %s\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmov\teax, %s\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } cgjump(sym->st_endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = alloc_register(); if (sym->class == C_GLOBAL) fprintf(Outfile, "\tmov\t%s, %s\n", reglist[r], sym->name); else fprintf(Outfile, "\tlea\t%s, [rbp+%d]\n", reglist[r], sym->st_posn); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], reglist[r]); break; case 2: fprintf(Outfile, "\tmovsx\t%s, dword [%s]\n", reglist[r], reglist[r]); break; case 4: case 8: fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { //Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmov\t[%s], byte %s\n", reglist[r2], breglist[r1]); break; case 2: case 4: case 8: fprintf(Outfile, "\tmov\t[%s], %s\n", reglist[r2], reglist[r1]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } // Generate a switch jump table and the code to // load the registers and call the switch() code void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel) { int i, label; // Get a label for the switch table label = genlabel(); cglabel(label); // Heuristic. If we have no cases, create one case // which points to the default case if (casecount == 0) { caseval[0] = 0; caselabel[0] = defaultlabel; casecount = 1; } // Generate the switch jump table. fprintf(Outfile, "\tdq\t%d\n", casecount); for (i = 0; i < casecount; i++) fprintf(Outfile, "\tdq\t%d, L%d\n", caseval[i], caselabel[i]); fprintf(Outfile, "\tdq\tL%d\n", defaultlabel); // Load the specific registers cglabel(toplabel); fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); fprintf(Outfile, "\tmov\trdx, L%d\n", label); fprintf(Outfile, "\tjmp\tswitch\n"); } ================================================ FILE: 45_Globals_Again/data.h ================================================ #ifndef extern_ #define extern_ extern #endif // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 extern_ int Line; // Current line number extern_ int Putback; // Character put back by scanner extern_ struct symtable *Functionid; // Symbol ptr of the current function extern_ FILE *Infile; // Input and output files extern_ FILE *Outfile; extern_ char *Infilename; // Name of file we are parsing extern_ char *Outfilename; // Name of file we opened as Outfile extern_ struct token Token; // Last token scanned extern_ char Text[TEXTLEN + 1]; // Last identifier scanned extern_ int Looplevel; // Depth of nested loops extern_ int Switchlevel; // Depth of nested switches // Symbol table lists extern_ struct symtable *Globhead, *Globtail; // Global variables and functions extern_ struct symtable *Loclhead, *Locltail; // Local variables extern_ struct symtable *Parmhead, *Parmtail; // Local parameters extern_ struct symtable *Membhead, *Membtail; // Temp list of struct/union members extern_ struct symtable *Structhead, *Structtail; // List of struct types extern_ struct symtable *Unionhead, *Uniontail; // List of union types extern_ struct symtable *Enumhead, *Enumtail; // List of enum types and values extern_ struct symtable *Typehead, *Typetail; // List of typedefs // Command-line flags extern_ int O_dumpAST; // If true, dump the AST trees extern_ int O_keepasm; // If true, keep any assembly files extern_ int O_assemble; // If true, assemble the assembly files extern_ int O_dolink; // If true, link the object files extern_ int O_verbose; // If true, print info on compilation stages ================================================ FILE: 45_Globals_Again/decl.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of declarations // Copyright (c) 2019 Warren Toomey, GPL3 static struct symtable *composite_declaration(int type); static int typedef_declaration(struct symtable **ctype); static int type_of_typedef(char *name, struct symtable **ctype); static void enum_declaration(void); // Parse the current token and return a primitive type enum value, // a pointer to any composite type and possibly modify // the class of the type. int parse_type(struct symtable **ctype, int *class) { int type, exstatic = 1; // See if the class has been changed to extern (later, static) while (exstatic) { switch (Token.token) { case T_EXTERN: *class = C_EXTERN; scan(&Token); break; default: exstatic = 0; } } // Now work on the actual type keyword switch (Token.token) { case T_VOID: type = P_VOID; scan(&Token); break; case T_CHAR: type = P_CHAR; scan(&Token); break; case T_INT: type = P_INT; scan(&Token); break; case T_LONG: type = P_LONG; scan(&Token); break; // For the following, if we have a ';' after the // parsing then there is no type, so return -1. // Example: struct x {int y; int z}; case T_STRUCT: type = P_STRUCT; *ctype = composite_declaration(P_STRUCT); if (Token.token == T_SEMI) type = -1; break; case T_UNION: type = P_UNION; *ctype = composite_declaration(P_UNION); if (Token.token == T_SEMI) type = -1; break; case T_ENUM: type = P_INT; // Enums are really ints enum_declaration(); if (Token.token == T_SEMI) type = -1; break; case T_TYPEDEF: type = typedef_declaration(ctype); if (Token.token == T_SEMI) type = -1; break; case T_IDENT: type = type_of_typedef(Text, ctype); break; default: fatals("Illegal type, token", Token.tokstr); } return (type); } // Given a type parsed by parse_type(), scan in any following // '*' tokens and return the new type int parse_stars(int type) { while (1) { if (Token.token != T_STAR) break; type = pointer_to(type); scan(&Token); } return (type); } // Parse a type which appears inside a cast int parse_cast(void) { int type, class; struct symtable *ctype; // Get the type inside the parentheses type= parse_stars(parse_type(&ctype, &class)); // Do some error checking. I'm sure more can be done if (type == P_STRUCT || type == P_UNION || type == P_VOID) fatal("Cannot cast to a struct, union or void type"); return(type); } // Given a type, parse an expression of literals and ensure // that the type of this expression matches the given type. // Parse any type cast that precedes the expression. // If an integer literal, return this value. // If a string literal, return the label number of the string. int parse_literal(int type) { struct ASTnode *tree; // Parse the expression and optimise the resulting AST tree tree= optimise(binexpr(0)); // If there's a cast, get the child and // mark it as having the type from the cast if (tree->op == A_CAST) { tree->left->type= tree->type; tree= tree->left; } // The tree must now have an integer or string literal if (tree->op != A_INTLIT && tree->op != A_STRLIT) fatal("Cannot initialise globals with a general expression"); // If the type is char * and if (type == pointer_to(P_CHAR)) { // We have a string literal, return the label number if (tree->op == A_STRLIT) return(tree->a_intvalue); // We have a zero int literal, so that's a NULL if (tree->op == A_INTLIT && tree->a_intvalue==0) return(0); } // We only get here with an integer literal. The input type // is an integer type and is wide enough to hold the literal value if (inttype(type) && typesize(type, NULL) >= typesize(tree->type, NULL)) return(tree->a_intvalue); fatal("Type mismatch: literal vs. variable"); return(0); // Keep -Wall happy } // Given the type, name and class of a scalar variable, // parse any initialisation value and allocate storage for it. // Return the variable's symbol table entry. static struct symtable *scalar_declaration(char *varname, int type, struct symtable *ctype, int class, struct ASTnode **tree) { struct symtable *sym=NULL; struct ASTnode *varnode, *exprnode; *tree= NULL; // Add this as a known scalar switch (class) { case C_EXTERN: case C_GLOBAL: sym= addglob(varname, type, ctype, S_VARIABLE, class, 1, 0); break; case C_LOCAL: sym= addlocl(varname, type, ctype, S_VARIABLE, 1); break; case C_PARAM: sym= addparm(varname, type, ctype, S_VARIABLE); break; case C_MEMBER: sym= addmemb(varname, type, ctype, S_VARIABLE, 1); break; } // The variable is being initialised if (Token.token == T_ASSIGN) { // Only possible for a global or local if (class != C_GLOBAL && class != C_LOCAL) fatals("Variable can not be initialised", varname); scan(&Token); // Globals must be assigned a literal value if (class == C_GLOBAL) { // Create one initial value for the variable and // parse this value sym->initlist= (int *)malloc(sizeof(int)); sym->initlist[0]= parse_literal(type); } if (class == C_LOCAL) { // Make an A_IDENT AST node with the variable varnode = mkastleaf(A_IDENT, sym->type, sym, 0); // Get the expression for the assignment, make into a rvalue exprnode = binexpr(0); exprnode->rvalue = 1; // Ensure the expression's type matches the variable exprnode = modify_type(exprnode, varnode->type, 0); if (exprnode == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree *tree = mkastnode(A_ASSIGN, exprnode->type, exprnode, NULL, varnode, NULL, 0); } } // Generate any global space if (class == C_GLOBAL) genglobsym(sym); return (sym); } // Given the type, name and class of an variable, parse // the size of the array, if any. Then parse any initialisation // value and allocate storage for it. // Return the variable's symbol table entry. static struct symtable *array_declaration(char *varname, int type, struct symtable *ctype, int class) { struct symtable *sym; // New symbol table entry int nelems= -1; // Assume the number of elements won't be given int maxelems; // The maximum number of elements in the init list int *initlist; // The list of initial elements int i=0, j; // Skip past the '[' scan(&Token); // See we have an array size if (Token.token != T_RBRACKET) { nelems= parse_literal(P_INT); if (nelems <= 0) fatald("Array size is illegal", nelems); } // Ensure we have a following ']' match(T_RBRACKET, "]"); // Add this as a known array. We treat the // array as a pointer to its elements' type switch (class) { case C_EXTERN: case C_GLOBAL: sym = addglob(varname, pointer_to(type), ctype, S_ARRAY, class, 0, 0); break; default: fatal("For now, declaration of non-global arrays is not implemented"); } // Array initialisation if (Token.token == T_ASSIGN) { if (class != C_GLOBAL) fatals("Variable can not be initialised", varname); scan(&Token); // Get the following left curly bracket match(T_LBRACE, "{"); #define TABLE_INCREMENT 10 // If the array already has nelems, allocate that many elements // in the list. Otherwise, start with TABLE_INCREMENT. if (nelems != -1) maxelems= nelems; else maxelems= TABLE_INCREMENT; initlist= (int *)malloc(maxelems *sizeof(int)); // Loop getting a new literal value from the list while (1) { // Check we can add the next value, then parse and add it if (nelems != -1 && i == maxelems) fatal("Too many values in initialisation list"); initlist[i++]= parse_literal(type); // Increase the list size if the original size was // not set and we have hit the end of the current list if (nelems == -1 && i == maxelems) { maxelems += TABLE_INCREMENT; initlist= (int *)realloc(initlist, maxelems *sizeof(int)); } // Leave when we hit the right curly bracket if (Token.token == T_RBRACE) { scan(&Token); break; } // Next token must be a comma, then comma(); } // Zero any unused elements in the initlist. // Attach the list to the symbol table entry for (j=i; j < sym->nelems; j++) initlist[j]=0; if (i > nelems) nelems = i; sym->initlist= initlist; } // Set the size of the array and the number of elements sym->nelems= nelems; sym->size= sym->nelems * typesize(type, ctype); // Generate any global space if (class == C_GLOBAL) genglobsym(sym); return (sym); } // Given a pointer to the new function being declared and // a possibly NULL pointer to the function's previous declaration, // parse a list of parameters and cross-check them against the // previous declaration. Return the count of parameters static int param_declaration_list(struct symtable *oldfuncsym, struct symtable *newfuncsym) { int type, paramcnt = 0; struct symtable *ctype; struct symtable *protoptr = NULL; struct ASTnode *unused; // Get the pointer to the first prototype parameter if (oldfuncsym != NULL) protoptr = oldfuncsym->member; // Loop getting any parameters while (Token.token != T_RPAREN) { // Get the type of the next parameter type = declaration_list(&ctype, C_PARAM, T_COMMA, T_RPAREN, &unused); if (type == -1) fatal("Bad type in parameter list"); // Ensure the type of this parameter matches the prototype if (protoptr != NULL) { if (type != protoptr->type) fatald("Type doesn't match prototype for parameter", paramcnt + 1); protoptr = protoptr->next; } paramcnt++; // Stop when we hit the right parenthesis if (Token.token == T_RPAREN) break; // We need a comma as separator comma(); } if (oldfuncsym != NULL && paramcnt != oldfuncsym->nelems) fatals("Parameter count mismatch for function", oldfuncsym->name); // Return the count of parameters return (paramcnt); } // // function_declaration: type identifier '(' parameter_list ')' ; // | type identifier '(' parameter_list ')' compound_statement ; // // Parse the declaration of function. static struct symtable *function_declaration(char *funcname, int type, struct symtable *ctype, int class) { struct ASTnode *tree, *finalstmt; struct symtable *oldfuncsym, *newfuncsym = NULL; int endlabel, paramcnt; // Text has the identifier's name. If this exists and is a // function, get the id. Otherwise, set oldfuncsym to NULL. if ((oldfuncsym = findsymbol(funcname)) != NULL) if (oldfuncsym->stype != S_FUNCTION) oldfuncsym = NULL; // If this is a new function declaration, get a // label-id for the end label, and add the function // to the symbol table, if (oldfuncsym == NULL) { endlabel = genlabel(); // Assumtion: functions only return scalar types, so NULL below newfuncsym = addglob(funcname, type, NULL, S_FUNCTION, C_GLOBAL, 0, endlabel); } // Scan in the '(', any parameters and the ')'. // Pass in any existing function prototype pointer lparen(); paramcnt = param_declaration_list(oldfuncsym, newfuncsym); rparen(); // If this is a new function declaration, update the // function symbol entry with the number of parameters. // Also copy the parameter list into the function's node. if (newfuncsym) { newfuncsym->nelems = paramcnt; newfuncsym->member = Parmhead; oldfuncsym = newfuncsym; } // Clear out the parameter list Parmhead = Parmtail = NULL; // Declaration ends in a semicolon, only a prototype. if (Token.token == T_SEMI) return (oldfuncsym); // This is not just a prototype. // Set the Functionid global to the function's symbol pointer Functionid = oldfuncsym; // Get the AST tree for the compound statement and mark // that we have parsed no loops or switches yet Looplevel = 0; Switchlevel = 0; lbrace(); tree = compound_statement(0); rbrace(); // If the function type isn't P_VOID .. if (type != P_VOID) { // Error if no statements in the function if (tree == NULL) fatal("No statements in function with non-void type"); // Check that the last AST operation in the // compound statement was a return statement finalstmt = (tree->op == A_GLUE) ? tree->right : tree; if (finalstmt == NULL || finalstmt->op != A_RETURN) fatal("No return for function with non-void type"); } // Build the A_FUNCTION node which has the function's symbol pointer // and the compound statement sub-tree tree = mkastunary(A_FUNCTION, type, tree, oldfuncsym, endlabel); // Do optimisations on the AST tree tree= optimise(tree); // Dump the AST tree if requested if (O_dumpAST) { dumpAST(tree, NOLABEL, 0); fprintf(stdout, "\n\n"); } // Generate the assembly code for it genAST(tree, NOLABEL, NOLABEL, NOLABEL, 0); // Now free the symbols associated // with this function freeloclsyms(); return (oldfuncsym); } // Parse composite type declarations: structs or unions. // Either find an existing struct/union declaration, or build // a struct/union symbol table entry and return its pointer. static struct symtable *composite_declaration(int type) { struct symtable *ctype = NULL; struct symtable *m; struct ASTnode *unused; int offset; int t; // Skip the struct/union keyword scan(&Token); // See if there is a following struct/union name if (Token.token == T_IDENT) { // Find any matching composite type if (type == P_STRUCT) ctype = findstruct(Text); else ctype = findunion(Text); scan(&Token); } // If the next token isn't an LBRACE , this is // the usage of an existing struct/union type. // Return the pointer to the type. if (Token.token != T_LBRACE) { if (ctype == NULL) fatals("unknown struct/union type", Text); return (ctype); } // Ensure this struct/union type hasn't been // previously defined if (ctype) fatals("previously defined struct/union", Text); // Build the composite type and skip the left brace if (type == P_STRUCT) ctype = addstruct(Text); else ctype = addunion(Text); scan(&Token); // Scan in the list of members while (1) { // Get the next member. m is used as a dummy t= declaration_list(&m, C_MEMBER, T_SEMI, T_RBRACE, &unused); if (t== -1) fatal("Bad type in member list"); if (Token.token == T_SEMI) scan(&Token); if (Token.token == T_RBRACE) break; } // Attach to the struct type's node rbrace(); if (Membhead==NULL) fatals("No members in struct", ctype->name); ctype->member = Membhead; Membhead = Membtail = NULL; // Set the offset of the initial member // and find the first free byte after it m = ctype->member; m->st_posn = 0; offset = typesize(m->type, m->ctype); // Set the position of each successive member in the composite type // Unions are easy. For structs, align the member and find the next free byte for (m = m->next; m != NULL; m = m->next) { // Set the offset for this member if (type == P_STRUCT) m->st_posn = genalign(m->type, offset, 1); else m->st_posn = 0; // Get the offset of the next free byte after this member offset += typesize(m->type, m->ctype); } // Set the overall size of the composite type ctype->size = offset; return (ctype); } // Parse an enum declaration static void enum_declaration(void) { struct symtable *etype = NULL; char *name= NULL; int intval = 0; // Skip the enum keyword. scan(&Token); // If there's a following enum type name, get a // pointer to any existing enum type node. if (Token.token == T_IDENT) { etype = findenumtype(Text); name = strdup(Text); // As it gets tromped soon scan(&Token); } // If the next token isn't a LBRACE, check // that we have an enum type name, then return if (Token.token != T_LBRACE) { if (etype == NULL) fatals("undeclared enum type:", name); return; } // We do have an LBRACE. Skip it scan(&Token); // If we have an enum type name, ensure that it // hasn't been declared before. if (etype != NULL) fatals("enum type redeclared:", etype->name); else // Build an enum type node for this identifier etype = addenum(name, C_ENUMTYPE, 0); // Loop to get all the enum values while (1) { // Ensure we have an identifier // Copy it in case there's an int literal coming up ident(); name = strdup(Text); // Ensure this enum value hasn't been declared before etype = findenumval(name); if (etype != NULL) fatals("enum value redeclared:", Text); // If the next token is an '=', skip it and // get the following int literal if (Token.token == T_ASSIGN) { scan(&Token); if (Token.token != T_INTLIT) fatal("Expected int literal after '='"); intval = Token.intvalue; scan(&Token); } // Build an enum value node for this identifier. // Increment the value for the next enum identifier. etype = addenum(name, C_ENUMVAL, intval++); // Bail out on a right curly bracket, else get a comma if (Token.token == T_RBRACE) break; comma(); } scan(&Token); // Skip over the right curly bracket } // Parse a typedef declaration and return the type // and ctype that it represents static int typedef_declaration(struct symtable **ctype) { int type, class = 0; // Skip the typedef keyword. scan(&Token); // Get the actual type following the keyword type = parse_type(ctype, &class); if (class != 0) fatal("Can't have extern in a typedef declaration"); // See if the typedef identifier already exists if (findtypedef(Text) != NULL) fatals("redefinition of typedef", Text); // Get any following '*' tokens type = parse_stars(type); // It doesn't exist so add it to the typedef list addtypedef(Text, type, *ctype); scan(&Token); return (type); } // Given a typedef name, return the type it represents static int type_of_typedef(char *name, struct symtable **ctype) { struct symtable *t; // Look up the typedef in the list t = findtypedef(name); if (t == NULL) fatals("unknown type", name); scan(&Token); *ctype = t->ctype; return (t->type); } // Parse the declaration of a variable or function. // The type and any following '*'s have been scanned, and we // have the identifier in the Token variable. // The class argument is the variable's class. // Return a pointer to the symbol's entry in the symbol table static struct symtable *symbol_declaration(int type, struct symtable *ctype, int class, struct ASTnode **tree) { struct symtable *sym = NULL; char *varname = strdup(Text); // Ensure that we have an identifier. // We copied it above so we can scan more tokens in, e.g. // an assignment expression for a local variable. ident(); // Deal with function declarations if (Token.token == T_LPAREN) { return (function_declaration(varname, type, ctype, class)); } // See if this array or scalar variable has already been declared switch (class) { case C_EXTERN: case C_GLOBAL: if (findglob(varname) != NULL) fatals("Duplicate global variable declaration", varname); case C_LOCAL: case C_PARAM: if (findlocl(varname) != NULL) fatals("Duplicate local variable declaration", varname); case C_MEMBER: if (findmember(varname) != NULL) fatals("Duplicate struct/union member declaration", varname); } // Add the array or scalar variable to the symbol table if (Token.token == T_LBRACKET) sym = array_declaration(varname, type, ctype, class); else sym = scalar_declaration(varname, type, ctype, class, tree); return (sym); } // Parse a list of symbols where there is an initial type. // Return the type of the symbols. et1 and et2 are end tokens. int declaration_list(struct symtable **ctype, int class, int et1, int et2, struct ASTnode **gluetree) { int inittype, type; struct symtable *sym; struct ASTnode *tree; *gluetree= NULL; // Get the initial type. If -1, it was // a composite type definition, return this if ((inittype = parse_type(ctype, &class)) == -1) return (inittype); // Now parse the list of symbols while (1) { // See if this symbol is a pointer type = parse_stars(inittype); // Parse this symbol sym = symbol_declaration(type, *ctype, class, &tree); // We parsed a function, there is no list so leave if (sym->stype == S_FUNCTION) { if (class != C_GLOBAL) fatal("Function definition not at global level"); return (type); } // Glue any AST tree from a local declaration // to build a sequence of assignments to perform if (*gluetree== NULL) *gluetree= tree; else *gluetree = mkastnode(A_GLUE, P_NONE, *gluetree, NULL, tree, NULL, 0); // We are at the end of the list, leave if (Token.token == et1 || Token.token == et2) return (type); // Otherwise, we need a comma as separator comma(); } } // Parse one or more global declarations, either // variables, functions or structs void global_declarations(void) { struct symtable *ctype; struct ASTnode *unused; while (Token.token != T_EOF) { declaration_list(&ctype, C_GLOBAL, T_SEMI, T_EOF, &unused); // Skip any semicolons and right curly brackets if (Token.token == T_SEMI) scan(&Token); } } ================================================ FILE: 45_Globals_Again/decl.h ================================================ // Function prototypes for all compiler files // Copyright (c) 2019 Warren Toomey, GPL3 // scan.c void reject_token(struct token *t); int scan(struct token *t); // tree.c struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue); struct ASTnode *mkastleaf(int op, int type, struct symtable *sym, int intvalue); struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, struct symtable *sym, int intvalue); void dumpAST(struct ASTnode *n, int label, int parentASTop); // gen.c int genlabel(void); int genAST(struct ASTnode *n, int iflabel, int looptoplabel, int loopendlabel, int parentASTop); void genpreamble(); void genpostamble(); void genfreeregs(); void genglobsym(struct symtable *node); int genglobstr(char *strvalue); int genprimsize(int type); int genalign(int type, int offset, int direction); void genreturn(int reg, int id); // cg.c int cgprimsize(int type); int cgalign(int type, int offset, int direction); void cgtextseg(); void cgdataseg(); void freeall_registers(void); void cgpreamble(); void cgpostamble(); void cgfuncpreamble(struct symtable *sym); void cgfuncpostamble(struct symtable *sym); int cgloadint(int value, int type); int cgloadglob(struct symtable *sym, int op); int cgloadlocal(struct symtable *sym, int op); int cgloadglobstr(int label); int cgadd(int r1, int r2); int cgsub(int r1, int r2); int cgmul(int r1, int r2); int cgdiv(int r1, int r2); int cgshlconst(int r, int val); int cgcall(struct symtable *sym, int numargs); void cgcopyarg(int r, int argposn); int cgstorglob(int r, struct symtable *sym); int cgstorlocal(int r, struct symtable *sym); void cgglobsym(struct symtable *node); void cgglobstr(int l, char *strvalue); int cgcompare_and_set(int ASTop, int r1, int r2); int cgcompare_and_jump(int ASTop, int r1, int r2, int label); void cglabel(int l); void cgjump(int l); int cgwiden(int r, int oldtype, int newtype); void cgreturn(int reg, struct symtable *sym); int cgaddress(struct symtable *sym); int cgderef(int r, int type); int cgstorderef(int r1, int r2, int type); int cgnegate(int r); int cginvert(int r); int cglognot(int r); int cgboolean(int r, int op, int label); int cgand(int r1, int r2); int cgor(int r1, int r2); int cgxor(int r1, int r2); int cgshl(int r1, int r2); int cgshr(int r1, int r2); void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel); // expr.c struct ASTnode *expression_list(int endtoken); struct ASTnode *binexpr(int ptp); // stmt.c struct ASTnode *compound_statement(int inswitch); // misc.c void match(int t, char *what); void semi(void); void lbrace(void); void rbrace(void); void lparen(void); void rparen(void); void ident(void); void comma(void); void fatal(char *s); void fatals(char *s1, char *s2); void fatald(char *s, int d); void fatalc(char *s, int c); // sym.c void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node); struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn); struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn); struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int nelems); struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype); struct symtable *addstruct(char *name); struct symtable *addunion(char *name); struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int nelems); struct symtable *addenum(char *name, int class, int value); struct symtable *addtypedef(char *name, int type, struct symtable *ctype); struct symtable *findglob(char *s); struct symtable *findlocl(char *s); struct symtable *findsymbol(char *s); struct symtable *findmember(char *s); struct symtable *findstruct(char *s); struct symtable *findunion(char *s); struct symtable *findenumtype(char *s); struct symtable *findenumval(char *s); struct symtable *findtypedef(char *s); void clear_symtable(void); void freeloclsyms(void); // decl.c int parse_type(struct symtable **ctype, int *class); int parse_stars(int type); int parse_cast(void); int declaration_list(struct symtable **ctype, int class, int et1, int et2, struct ASTnode **gluetree); void global_declarations(void); // types.c int inttype(int type); int ptrtype(int type); int pointer_to(int type); int value_at(int type); int typesize(int type, struct symtable *ctype); struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op); // opt.c struct ASTnode *optimise(struct ASTnode *n); ================================================ FILE: 45_Globals_Again/defs.h ================================================ #include #include #include #include // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 enum { TEXTLEN = 512 // Length of identifiers in input }; // Commands and default filenames #define AOUT "a.out" #ifdef __NASM__ #define ASCMD "nasm -f elf64 -o " #define LDCMD "cc -no-pie -fno-plt -Wall -o " #else #define ASCMD "as -o " #define LDCMD "cc -o " #endif #define CPPCMD "cpp -nostdinc -isystem " // Token types enum { T_EOF, // Binary operators T_ASSIGN, T_ASPLUS, T_ASMINUS, T_ASSTAR, T_ASSLASH, T_LOGOR, T_LOGAND, T_OR, T_XOR, T_AMPER, T_EQ, T_NE, T_LT, T_GT, T_LE, T_GE, T_LSHIFT, T_RSHIFT, T_PLUS, T_MINUS, T_STAR, T_SLASH, // Other operators T_INC, T_DEC, T_INVERT, T_LOGNOT, // Type keywords T_VOID, T_CHAR, T_INT, T_LONG, // Other keywords T_IF, T_ELSE, T_WHILE, T_FOR, T_RETURN, T_STRUCT, T_UNION, T_ENUM, T_TYPEDEF, T_EXTERN, T_BREAK, T_CONTINUE, T_SWITCH, T_CASE, T_DEFAULT, // Structural tokens T_INTLIT, T_STRLIT, T_SEMI, T_IDENT, T_LBRACE, T_RBRACE, T_LPAREN, T_RPAREN, T_LBRACKET, T_RBRACKET, T_COMMA, T_DOT, T_ARROW, T_COLON }; // Token structure struct token { int token; // Token type, from the enum list above char *tokstr; // String version of the token int intvalue; // For T_INTLIT, the integer value }; // AST node types. The first few line up // with the related tokens enum { A_ASSIGN = 1, A_ASPLUS, A_ASMINUS, A_ASSTAR, A_ASSLASH, A_LOGOR, A_LOGAND, A_OR, A_XOR, A_AND, A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE, A_LSHIFT, A_RSHIFT, A_ADD, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, A_INTLIT, A_STRLIT, A_IDENT, A_GLUE, A_IF, A_WHILE, A_FUNCTION, A_WIDEN, A_RETURN, A_FUNCCALL, A_DEREF, A_ADDR, A_SCALE, A_PREINC, A_PREDEC, A_POSTINC, A_POSTDEC, A_NEGATE, A_INVERT, A_LOGNOT, A_TOBOOL, A_BREAK, A_CONTINUE, A_SWITCH, A_CASE, A_DEFAULT, A_CAST }; // Primitive types. The bottom 4 bits is an integer // value that represents the level of indirection, // e.g. 0= no pointer, 1= pointer, 2= pointer pointer etc. enum { P_NONE, P_VOID = 16, P_CHAR = 32, P_INT = 48, P_LONG = 64, P_STRUCT=80, P_UNION=96 }; // Structural types enum { S_VARIABLE, S_FUNCTION, S_ARRAY }; // Storage classes enum { C_GLOBAL = 1, // Globally visible symbol C_LOCAL, // Locally visible symbol C_PARAM, // Locally visible function parameter C_EXTERN, // External globally visible symbol C_STRUCT, // A struct C_UNION, // A union C_MEMBER, // Member of a struct or union C_ENUMTYPE, // A named enumeration type C_ENUMVAL, // A named enumeration value C_TYPEDEF // A named typedef }; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol struct symtable *ctype; // If struct/union, ptr to that type int stype; // Structural type for the symbol int class; // Storage class for the symbol int size; // Total size in bytes of this symbol int nelems; // Functions: # params. Arrays: # elements #define st_endlabel st_posn // For functions, the end label int st_posn; // For locals, the negative offset // from the stack base pointer int *initlist; // List of initial values struct symtable *next; // Next symbol in one list struct symtable *member; // First member of a function, struct, }; // union or enum // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates int rvalue; // True if the node is an rvalue struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; struct symtable *sym; // For many AST nodes, the pointer to // the symbol in the symbol table #define a_intvalue a_size // For A_INTLIT, the integer value int a_size; // For A_SCALE, the size to scale by }; enum { NOREG = -1, // Use NOREG when the AST generation // functions have no register to return NOLABEL = 0 // Use NOLABEL when we have no label to // pass to genAST() }; ================================================ FILE: 45_Globals_Again/expr.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of expressions // Copyright (c) 2019 Warren Toomey, GPL3 // expression_list: // | expression // | expression ',' expression_list // ; // Parse a list of zero or more comma-separated expressions and // return an AST composed of A_GLUE nodes with the left-hand child // being the sub-tree of previous expressions (or NULL) and the right-hand // child being the next expression. Each A_GLUE node will have size field // set to the number of expressions in the tree at this point. If no // expressions are parsed, NULL is returned struct ASTnode *expression_list(int endtoken) { struct ASTnode *tree = NULL; struct ASTnode *child = NULL; int exprcount = 0; // Loop until the end token while (Token.token != endtoken) { // Parse the next expression and increment the expression count child = binexpr(0); exprcount++; // Build an A_GLUE AST node with the previous tree as the left child // and the new expression as the right child. Store the expression count. tree = mkastnode(A_GLUE, P_NONE, tree, NULL, child, NULL, exprcount); // Stop when we reach the end token if (Token.token == endtoken) break; // Must have a ',' at this point match(T_COMMA, ","); } // Return the tree of expressions return (tree); } // Parse a function call and return its AST static struct ASTnode *funccall(void) { struct ASTnode *tree; struct symtable *funcptr; // Check that the identifier has been defined as a function, // then make a leaf node for it. if ((funcptr = findsymbol(Text)) == NULL || funcptr->stype != S_FUNCTION) { fatals("Undeclared function", Text); } // Get the '(' lparen(); // Parse the argument expression list tree = expression_list(T_RPAREN); // XXX Check type of each argument against the function's prototype // Build the function call AST node. Store the // function's return type as this node's type. // Also record the function's symbol-id tree = mkastunary(A_FUNCCALL, funcptr->type, tree, funcptr, 0); // Get the ')' rparen(); return (tree); } // Parse the index into an array and return an AST tree for it static struct ASTnode *array_access(void) { struct ASTnode *left, *right; struct symtable *aryptr; // Check that the identifier has been defined as an array // then make a leaf node for it that points at the base if ((aryptr = findsymbol(Text)) == NULL || aryptr->stype != S_ARRAY) { fatals("Undeclared array", Text); } left = mkastleaf(A_ADDR, aryptr->type, aryptr, 0); // Get the '[' scan(&Token); // Parse the following expression right = binexpr(0); // Get the ']' match(T_RBRACKET, "]"); // Ensure that this is of int type if (!inttype(right->type)) fatal("Array index is not of integer type"); // Scale the index by the size of the element's type right = modify_type(right, left->type, A_ADD); // Return an AST tree where the array's base has the offset // added to it, and dereference the element. Still an lvalue // at this point. left = mkastnode(A_ADD, aryptr->type, left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, value_at(left->type), left, NULL, 0); return (left); } // Parse the member reference of a struct or union // and return an AST tree for it. If withpointer is true, // the access is through a pointer to the member. static struct ASTnode *member_access(int withpointer) { struct ASTnode *left, *right; struct symtable *compvar; struct symtable *typeptr; struct symtable *m; // Check that the identifier has been declared as a // struct/union or a struct/union pointer if ((compvar = findsymbol(Text)) == NULL) fatals("Undeclared variable", Text); if (withpointer && compvar->type != pointer_to(P_STRUCT) && compvar->type != pointer_to(P_UNION)) fatals("Undeclared variable", Text); if (!withpointer && compvar->type != P_STRUCT && compvar->type != P_UNION) fatals("Undeclared variable", Text); // If a pointer to a struct/union, get the pointer's value. // Otherwise, make a leaf node that points at the base // Either way, it's an rvalue if (withpointer) { left = mkastleaf(A_IDENT, pointer_to(compvar->type), compvar, 0); } else left = mkastleaf(A_ADDR, compvar->type, compvar, 0); left->rvalue = 1; // Get the details of the composite type typeptr = compvar->ctype; // Skip the '.' or '->' token and get the member's name scan(&Token); ident(); // Find the matching member's name in the type // Die if we can't find it for (m = typeptr->member; m != NULL; m = m->next) if (!strcmp(m->name, Text)) break; if (m == NULL) fatals("No member found in struct/union: ", Text); // Build an A_INTLIT node with the offset right = mkastleaf(A_INTLIT, P_INT, NULL, m->st_posn); // Add the member's offset to the base of the struct/union // and dereference it. Still an lvalue at this point left = mkastnode(A_ADD, pointer_to(m->type), left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, m->type, left, NULL, 0); return (left); } // Parse a postfix expression and return // an AST node representing it. The // identifier is already in Text. static struct ASTnode *postfix(void) { struct ASTnode *n; struct symtable *varptr; struct symtable *enumptr; // If the identifier matches an enum value, // return an A_INTLIT node if ((enumptr = findenumval(Text)) != NULL) { scan(&Token); return (mkastleaf(A_INTLIT, P_INT, NULL, enumptr->st_posn)); } // Scan in the next token to see if we have a postfix expression scan(&Token); // Function call if (Token.token == T_LPAREN) return (funccall()); // An array reference if (Token.token == T_LBRACKET) return (array_access()); // Access into a struct or union if (Token.token == T_DOT) return (member_access(0)); if (Token.token == T_ARROW) return (member_access(1)); // A variable. Check that the variable exists. if ((varptr = findsymbol(Text)) == NULL || varptr->stype != S_VARIABLE) fatals("Unknown variable", Text); switch (Token.token) { // Post-increment: skip over the token case T_INC: scan(&Token); n = mkastleaf(A_POSTINC, varptr->type, varptr, 0); break; // Post-decrement: skip over the token case T_DEC: scan(&Token); n = mkastleaf(A_POSTDEC, varptr->type, varptr, 0); break; // Just a variable reference default: n = mkastleaf(A_IDENT, varptr->type, varptr, 0); } return (n); } // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(void) { struct ASTnode *n; int id; int type=0; switch (Token.token) { case T_INTLIT: // For an INTLIT token, make a leaf AST node for it. // Make it a P_CHAR if it's within the P_CHAR range if (Token.intvalue >= 0 && Token.intvalue < 256) n = mkastleaf(A_INTLIT, P_CHAR, NULL, Token.intvalue); else n = mkastleaf(A_INTLIT, P_INT, NULL, Token.intvalue); break; case T_STRLIT: // For a STRLIT token, generate the assembly for it. // Then make a leaf AST node for it. id is the string's label. id = genglobstr(Text); n = mkastleaf(A_STRLIT, pointer_to(P_CHAR), NULL, id); break; case T_IDENT: return (postfix()); case T_LPAREN: // Beginning of a parenthesised expression, skip the '('. scan(&Token); // If the token after is a type identifier, this is a cast expression switch (Token.token) { case T_IDENT: // We have to see if the identifier matches a typedef. // If not, treat it as an expression. if (findtypedef(Text) == NULL) { n = binexpr(0); break; } case T_VOID: case T_CHAR: case T_INT: case T_LONG: case T_STRUCT: case T_UNION: case T_ENUM: // Get the type inside the parentheses type= parse_cast(); // Skip the closing ')' and then parse the following expression rparen(); default: n = binexpr(0); // Scan in the expression } // We now have at least an expression in n, and possibly a non-zero type in type // if there was a cast. Skip the closing ')' if there was no cast. if (type == 0) rparen(); else // Otherwise, make a unary AST node for the cast n= mkastunary(A_CAST, type, n, NULL, 0); return (n); default: fatals("Expecting a primary expression, got token", Token.tokstr); } // Scan in the next token and return the leaf node scan(&Token); return (n); } // Convert a binary operator token into a binary AST operation. // We rely on a 1:1 mapping from token to AST operation static int binastop(int tokentype) { if (tokentype > T_EOF && tokentype <= T_SLASH) return (tokentype); fatald("Syntax error, token", tokentype); return (0); // Keep -Wall happy } // Return true if a token is right-associative, // false otherwise. static int rightassoc(int tokentype) { if (tokentype >= T_ASSIGN && tokentype <= T_ASSLASH) return (1); return (0); } // Operator precedence for each token. Must // match up with the order of tokens in defs.h static int OpPrec[] = { 0, 10, 10, // T_EOF, T_ASSIGN, T_ASPLUS, 10, 10, 10, // T_ASMINUS, T_ASSTAR, T_ASSLASH, 20, 30, // T_LOGOR, T_LOGAND 40, 50, 60, // T_OR, T_XOR, T_AMPER 70, 70, // T_EQ, T_NE 80, 80, 80, 80, // T_LT, T_GT, T_LE, T_GE 90, 90, // T_LSHIFT, T_RSHIFT 100, 100, // T_PLUS, T_MINUS 110, 110 // T_STAR, T_SLASH }; // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec; if (tokentype > T_SLASH) fatald("Token with no precedence in op_precedence:", tokentype); prec = OpPrec[tokentype]; if (prec == 0) fatald("Syntax error, token", tokentype); return (prec); } // prefix_expression: primary // | '*' prefix_expression // | '&' prefix_expression // | '-' prefix_expression // | '++' prefix_expression // | '--' prefix_expression // ; // Parse a prefix expression and return // a sub-tree representing it. struct ASTnode *prefix(void) { struct ASTnode *tree; switch (Token.token) { case T_AMPER: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Ensure that it's an identifier if (tree->op != A_IDENT) fatal("& operator must be followed by an identifier"); // Now change the operator to A_ADDR and the type to // a pointer to the original type tree->op = A_ADDR; tree->type = pointer_to(tree->type); break; case T_STAR: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's either another deref or an // identifier if (tree->op != A_IDENT && tree->op != A_DEREF) fatal("* operator must be followed by an identifier or *"); // Prepend an A_DEREF operation to the tree tree = mkastunary(A_DEREF, value_at(tree->type), tree, NULL, 0); break; case T_MINUS: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_NEGATE operation to the tree and // make the child an rvalue. Because chars are unsigned, // also widen this to int so that it's signed tree->rvalue = 1; tree = modify_type(tree, P_INT, 0); tree = mkastunary(A_NEGATE, tree->type, tree, NULL, 0); break; case T_INVERT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_INVERT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_INVERT, tree->type, tree, NULL, 0); break; case T_LOGNOT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_LOGNOT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_LOGNOT, tree->type, tree, NULL, 0); break; case T_INC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("++ operator must be followed by an identifier"); // Prepend an A_PREINC operation to the tree tree = mkastunary(A_PREINC, tree->type, tree, NULL, 0); break; case T_DEC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("-- operator must be followed by an identifier"); // Prepend an A_PREDEC operation to the tree tree = mkastunary(A_PREDEC, tree->type, tree, NULL, 0); break; default: tree = primary(); } return (tree); } // Return an AST tree whose root is a binary operator. // Parameter ptp is the previous token's precedence. struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; struct ASTnode *ltemp, *rtemp; int ASTop; int tokentype; // Get the tree on the left. // Fetch the next token at the same time. left = prefix(); // If we hit one of several terminating tokens, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA || tokentype == T_COLON || tokentype == T_RBRACE) { left->rvalue = 1; return (left); } // While the precedence of this token is more than that of the // previous token precedence, or it's right associative and // equal to the previous token's precedence while ((op_precedence(tokentype) > ptp) || (rightassoc(tokentype) && op_precedence(tokentype) == ptp)) { // Fetch in the next integer literal scan(&Token); // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Determine the operation to be performed on the sub-trees ASTop = binastop(tokentype); if (ASTop == A_ASSIGN) { // Assignment // Make the right tree into an rvalue right->rvalue = 1; // Ensure the right's type matches the left right = modify_type(right, left->type, 0); if (right == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree. However, switch // left and right around, so that the right expression's // code will be generated before the left expression ltemp = left; left = right; right = ltemp; } else { // We are not doing an assignment, so both trees should be rvalues // Convert both trees into rvalue if they are lvalue trees left->rvalue = 1; right->rvalue = 1; // Ensure the two types are compatible by trying // to modify each tree to match the other's type. ltemp = modify_type(left, right->type, ASTop); rtemp = modify_type(right, left->type, ASTop); if (ltemp == NULL && rtemp == NULL) fatal("Incompatible types in binary expression"); if (ltemp != NULL) left = ltemp; if (rtemp != NULL) right = rtemp; } // Join that sub-tree with ours. Convert the token // into an AST operation at the same time. left = mkastnode(binastop(tokentype), left->type, left, NULL, right, NULL, 0); // Update the details of the current token. // If we hit a terminating token, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA || tokentype == T_COLON || tokentype == T_RBRACE) { left->rvalue = 1; return (left); } } // Return the tree we have when the precedence // is the same or lower left->rvalue = 1; return (left); } ================================================ FILE: 45_Globals_Again/gen.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Generic code generator // Copyright (c) 2019 Warren Toomey, GPL3 // Generate and return a new label number int genlabel(void) { static int id = 1; return (id++); } // Generate the code for an IF statement // and an optional ELSE clause static int genIF(struct ASTnode *n, int looptoplabel, int loopendlabel) { int Lfalse, Lend; // Generate two labels: one for the // false compound statement, and one // for the end of the overall IF statement. // When there is no ELSE clause, Lfalse _is_ // the ending label! Lfalse = genlabel(); if (n->right) Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. genAST(n->left, Lfalse, NOLABEL, NOLABEL, n->op); genfreeregs(); // Generate the true compound statement genAST(n->mid, NOLABEL, looptoplabel, loopendlabel, n->op); genfreeregs(); // If there is an optional ELSE clause, // generate the jump to skip to the end if (n->right) cgjump(Lend); // Now the false label cglabel(Lfalse); // Optional ELSE clause: generate the // false compound statement and the // end label if (n->right) { genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); genfreeregs(); cglabel(Lend); } return (NOREG); } // Generate the code for a WHILE statement static int genWHILE(struct ASTnode *n) { int Lstart, Lend; // Generate the start and end labels // and output the start label Lstart = genlabel(); Lend = genlabel(); cglabel(Lstart); // Generate the condition code followed // by a jump to the end label. genAST(n->left, Lend, Lstart, Lend, n->op); genfreeregs(); // Generate the compound statement for the body genAST(n->right, NOLABEL, Lstart, Lend, n->op); genfreeregs(); // Finally output the jump back to the condition, // and the end label cgjump(Lstart); cglabel(Lend); return (NOREG); } // Generate the code for a SWITCH statement static int genSWITCH(struct ASTnode *n) { int *caseval, *caselabel; int Ljumptop, Lend; int i, reg, defaultlabel = 0, casecount = 0; struct ASTnode *c; // Create arrays for the case values and associated labels. // Ensure that we have at least one position in each array. caseval = (int *) malloc((n->a_intvalue + 1) * sizeof(int)); caselabel = (int *) malloc((n->a_intvalue + 1) * sizeof(int)); // Generate labels for the top of the jump table, and the // end of the switch statement. Set a default label for // the end of the switch, in case we don't have a default. Ljumptop = genlabel(); Lend = genlabel(); defaultlabel = Lend; // Output the code to calculate the switch condition reg = genAST(n->left, NOLABEL, NOLABEL, NOLABEL, 0); cgjump(Ljumptop); genfreeregs(); // Walk the right-child linked list to // generate the code for each case for (i = 0, c = n->right; c != NULL; i++, c = c->right) { // Get a label for this case. Store it // and the case value in the arrays. // Record if it is the default case. caselabel[i] = genlabel(); caseval[i] = c->a_intvalue; cglabel(caselabel[i]); if (c->op == A_DEFAULT) defaultlabel = caselabel[i]; else casecount++; // Generate the case code. Pass in the end label for the breaks genAST(c->left, NOLABEL, NOLABEL, Lend, 0); genfreeregs(); } // Ensure the last case jumps past the switch table cgjump(Lend); // Now output the switch table and the end label. cgswitch(reg, casecount, Ljumptop, caselabel, caseval, defaultlabel); cglabel(Lend); return (NOREG); } // Generate the code to copy the arguments of a // function call to its parameters, then call the // function itself. Return the register that holds // the function's return value. static int gen_funccall(struct ASTnode *n) { struct ASTnode *gluetree = n->left; int reg; int numargs = 0; // If there is a list of arguments, walk this list // from the last argument (right-hand child) to the // first while (gluetree) { // Calculate the expression's value reg = genAST(gluetree->right, NOLABEL, NOLABEL, NOLABEL, gluetree->op); // Copy this into the n'th function parameter: size is 1, 2, 3, ... cgcopyarg(reg, gluetree->a_size); // Keep the first (highest) number of arguments if (numargs == 0) numargs = gluetree->a_size; genfreeregs(); gluetree = gluetree->left; } // Call the function, clean up the stack (based on numargs), // and return its result return (cgcall(n->sym, numargs)); } // Given an AST, an optional label, and the AST op // of the parent, generate assembly code recursively. // Return the register id with the tree's final value. int genAST(struct ASTnode *n, int iflabel, int looptoplabel, int loopendlabel, int parentASTop) { int leftreg, rightreg; // We have some specific AST node handling at the top // so that we don't evaluate the child sub-trees immediately switch (n->op) { case A_IF: return (genIF(n, looptoplabel, loopendlabel)); case A_WHILE: return (genWHILE(n)); case A_SWITCH: return (genSWITCH(n)); case A_FUNCCALL: return (gen_funccall(n)); case A_GLUE: // Do each child statement, and free the // registers after each child if (n->left != NULL) genAST(n->left, iflabel, looptoplabel, loopendlabel, n->op); genfreeregs(); if (n->right != NULL) genAST(n->right, iflabel, looptoplabel, loopendlabel, n->op); genfreeregs(); return (NOREG); case A_FUNCTION: // Generate the function's preamble before the code // in the child sub-tree cgfuncpreamble(n->sym); genAST(n->left, NOLABEL, NOLABEL, NOLABEL, n->op); cgfuncpostamble(n->sym); return (NOREG); } // General AST node handling below // Get the left and right sub-tree values if (n->left) leftreg = genAST(n->left, NOLABEL, NOLABEL, NOLABEL, n->op); if (n->right) rightreg = genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); switch (n->op) { case A_ADD: return (cgadd(leftreg, rightreg)); case A_SUBTRACT: return (cgsub(leftreg, rightreg)); case A_MULTIPLY: return (cgmul(leftreg, rightreg)); case A_DIVIDE: return (cgdiv(leftreg, rightreg)); case A_AND: return (cgand(leftreg, rightreg)); case A_OR: return (cgor(leftreg, rightreg)); case A_XOR: return (cgxor(leftreg, rightreg)); case A_LSHIFT: return (cgshl(leftreg, rightreg)); case A_RSHIFT: return (cgshr(leftreg, rightreg)); case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, compare registers // and set one to 1 or 0 based on the comparison. if (parentASTop == A_IF || parentASTop == A_WHILE) return (cgcompare_and_jump(n->op, leftreg, rightreg, iflabel)); else return (cgcompare_and_set(n->op, leftreg, rightreg)); case A_INTLIT: return (cgloadint(n->a_intvalue, n->type)); case A_STRLIT: return (cgloadglobstr(n->a_intvalue)); case A_IDENT: // Load our value if we are an rvalue // or we are being dereferenced if (n->rvalue || parentASTop == A_DEREF) { if (n->sym->class == C_GLOBAL) { return (cgloadglob(n->sym, n->op)); } else { return (cgloadlocal(n->sym, n->op)); } } else return (NOREG); case A_ASPLUS: case A_ASMINUS: case A_ASSTAR: case A_ASSLASH: case A_ASSIGN: // For the '+=' and friends operators, generate suitable code // and get the register with the result. Then take the left child, // make it the right child so that we can fall into the assignment code. switch (n->op) { case A_ASPLUS: leftreg= cgadd(leftreg, rightreg); n->right= n->left; break; case A_ASMINUS: leftreg= cgsub(leftreg, rightreg); n->right= n->left; break; case A_ASSTAR: leftreg= cgmul(leftreg, rightreg); n->right= n->left; break; case A_ASSLASH: leftreg= cgdiv(leftreg, rightreg); n->right= n->left; break; } // Now into the assignment code // Are we assigning to an identifier or through a pointer? switch (n->right->op) { case A_IDENT: if (n->right->sym->class == C_GLOBAL) return (cgstorglob(leftreg, n->right->sym)); else return (cgstorlocal(leftreg, n->right->sym)); case A_DEREF: return (cgstorderef(leftreg, rightreg, n->right->type)); default: fatald("Can't A_ASSIGN in genAST(), op", n->op); } case A_WIDEN: // Widen the child's type to the parent's type return (cgwiden(leftreg, n->left->type, n->type)); case A_RETURN: cgreturn(leftreg, Functionid); return (NOREG); case A_ADDR: return (cgaddress(n->sym)); case A_DEREF: // If we are an rvalue, dereference to get the value we point at, // otherwise leave it for A_ASSIGN to store through the pointer if (n->rvalue) return (cgderef(leftreg, n->left->type)); else return (leftreg); case A_SCALE: // Small optimisation: use shift if the // scale value is a known power of two switch (n->a_size) { case 2: return (cgshlconst(leftreg, 1)); case 4: return (cgshlconst(leftreg, 2)); case 8: return (cgshlconst(leftreg, 3)); default: // Load a register with the size and // multiply the leftreg by this size rightreg = cgloadint(n->a_size, P_INT); return (cgmul(leftreg, rightreg)); } case A_POSTINC: case A_POSTDEC: // Load and decrement the variable's value into a register // and post increment/decrement it if (n->sym->class == C_GLOBAL) return (cgloadglob(n->sym, n->op)); else return (cgloadlocal(n->sym, n->op)); case A_PREINC: case A_PREDEC: // Load and decrement the variable's value into a register // and pre increment/decrement it if (n->left->sym->class == C_GLOBAL) return (cgloadglob(n->left->sym, n->op)); else return (cgloadlocal(n->left->sym, n->op)); case A_NEGATE: return (cgnegate(leftreg)); case A_INVERT: return (cginvert(leftreg)); case A_LOGNOT: return (cglognot(leftreg)); case A_TOBOOL: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, set the register // to 0 or 1 based on it's zeroeness or non-zeroeness return (cgboolean(leftreg, parentASTop, iflabel)); case A_BREAK: cgjump(loopendlabel); return (NOREG); case A_CONTINUE: cgjump(looptoplabel); return (NOREG); case A_CAST: return (leftreg); // Not much to do default: fatald("Unknown AST operator", n->op); } return (NOREG); // Keep -Wall happy } void genpreamble() { cgpreamble(); } void genpostamble() { cgpostamble(); } void genfreeregs() { freeall_registers(); } void genglobsym(struct symtable *node) { cgglobsym(node); } int genglobstr(char *strvalue) { int l = genlabel(); cgglobstr(l, strvalue); return (l); } int genprimsize(int type) { return (cgprimsize(type)); } int genalign(int type, int offset, int direction) { return (cgalign(type, offset, direction)); } ================================================ FILE: 45_Globals_Again/include/ctype.h ================================================ #ifndef _CTYPE_H_ # define _CTYPE_H_ #endif // _CTYPE_H_ ================================================ FILE: 45_Globals_Again/include/fcntl.h ================================================ #ifndef _FCNTL_H_ # define _FCNTL_H_ #define O_RDONLY 00 #define O_WRONLY 01 #define O_RDWR 02 int open(char *pathname, int flags); #endif // _FCNTL_H_ ================================================ FILE: 45_Globals_Again/include/stddef.h ================================================ #ifndef _STDDEF_H_ # define _STDDEF_H_ #ifndef NULL # define NULL (void *)0 #endif typedef long size_t; #endif //_STDDEF_H_ ================================================ FILE: 45_Globals_Again/include/stdio.h ================================================ #ifndef _STDIO_H_ # define _STDIO_H_ #include #ifndef NULL # define NULL (void *)0 #endif // This FILE definition will do for now typedef char * FILE; FILE *fopen(char *pathname, char *mode); size_t fread(void *ptr, size_t size, size_t nmemb, FILE *stream); size_t fwrite(void *ptr, size_t size, size_t nmemb, FILE *stream); int fclose(FILE *stream); int printf(char *format); int fprintf(FILE *stream, char *format); #endif // _STDIO_H_ ================================================ FILE: 45_Globals_Again/include/stdlib.h ================================================ #ifndef _STDLIB_H_ # define _STDLIB_H_ #endif // _STDLIB_H_ ================================================ FILE: 45_Globals_Again/include/string.h ================================================ #ifndef _STRING_H_ # define _STRING_H_ #endif // _STRING_H_ ================================================ FILE: 45_Globals_Again/include/unistd.h ================================================ #ifndef _UNISTD_H_ # define _UNISTD_H_ #endif // _UNISTD_H_ ================================================ FILE: 45_Globals_Again/main.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "decl.h" #include #include // Compiler setup and top-level execution // Copyright (c) 2019 Warren Toomey, GPL3 // Given a string with a '.' and at least a 1-character suffix // after the '.', change the suffix to be the given character. // Return the new string or NULL if the original string could // not be modified char *alter_suffix(char *str, char suffix) { char *posn; char *newstr; // Clone the string if ((newstr = strdup(str)) == NULL) return (NULL); // Find the '.' if ((posn = strrchr(newstr, '.')) == NULL) return (NULL); // Ensure there is a suffix posn++; if (*posn == '\0') return (NULL); // Change the suffix and NUL-terminate the string *posn++ = suffix; *posn = '\0'; return (newstr); } // Given an input filename, compile that file // down to assembly code. Return the new file's name static char *do_compile(char *filename) { char cmd[TEXTLEN]; // Change the input file's suffix to .s Outfilename = alter_suffix(filename, 's'); if (Outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .c on the end\n", filename); exit(1); } // Generate the pre-processor command snprintf(cmd, TEXTLEN, "%s %s %s", CPPCMD, INCDIR, filename); // Open up the pre-processor pipe if ((Infile = popen(cmd, "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", filename, strerror(errno)); exit(1); } Infilename = filename; // Create the output file if ((Outfile = fopen(Outfilename, "w")) == NULL) { fprintf(stderr, "Unable to create %s: %s\n", Outfilename, strerror(errno)); exit(1); } Line = 1; // Reset the scanner Putback = '\n'; clear_symtable(); // Clear the symbol table if (O_verbose) printf("compiling %s\n", filename); scan(&Token); // Get the first token from the input genpreamble(); // Output the preamble global_declarations(); // Parse the global declarations genpostamble(); // Output the postamble fclose(Outfile); // Close the output file return (Outfilename); } // Given an input filename, assemble that file // down to object code. Return the object filename char *do_assemble(char *filename) { char cmd[TEXTLEN]; int err; char *outfilename = alter_suffix(filename, 'o'); if (outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .s on the end\n", filename); exit(1); } // Build the assembly command and run it snprintf(cmd, TEXTLEN, "%s %s %s", ASCMD, outfilename, filename); if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Assembly of %s failed\n", filename); exit(1); } return (outfilename); } // Given a list of object files and an output filename, // link all of the object filenames together. void do_link(char *outfilename, char *objlist[]) { int cnt, size = TEXTLEN; char cmd[TEXTLEN], *cptr; int err; // Start with the linker command and the output file cptr = cmd; cnt = snprintf(cptr, size, "%s %s ", LDCMD, outfilename); cptr += cnt; size -= cnt; // Now append each object file while (*objlist != NULL) { cnt = snprintf(cptr, size, "%s ", *objlist); cptr += cnt; size -= cnt; objlist++; } if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Linking failed\n"); exit(1); } } // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "Usage: %s [-vcST] [-o outfile] file [file ...]\n", prog); fprintf(stderr, " -v give verbose output of the compilation stages\n"); fprintf(stderr, " -c generate object files but don't link them\n"); fprintf(stderr, " -S generate assembly files but don't link them\n"); fprintf(stderr, " -T dump the AST trees for each input file\n"); fprintf(stderr, " -o outfile, produce the outfile executable file\n"); exit(1); } // Main program: check arguments and print a usage // if we don't have an argument. Open up the input // file and call scanfile() to scan the tokens in it. enum { MAXOBJ = 100 }; int main(int argc, char *argv[]) { char *outfilename = AOUT; char *asmfile, *objfile; char *objlist[MAXOBJ]; int i, objcnt = 0; // Initialise our variables O_dumpAST = 0; O_keepasm = 0; O_assemble = 0; O_verbose = 0; O_dolink = 1; // Scan for command-line options for (i = 1; i < argc; i++) { // No leading '-', stop scanning for options if (*argv[i] != '-') break; // For each option in this argument for (int j = 1; (*argv[i] == '-') && argv[i][j]; j++) { switch (argv[i][j]) { case 'o': outfilename = argv[++i]; // Save & skip to next argument break; case 'T': O_dumpAST = 1; break; case 'c': O_assemble = 1; O_keepasm = 0; O_dolink = 0; break; case 'S': O_keepasm = 1; O_assemble = 0; O_dolink = 0; break; case 'v': O_verbose = 1; break; default: usage(argv[0]); } } } // Ensure we have at lease one input file argument if (i >= argc) usage(argv[0]); // Work on each input file in turn while (i < argc) { asmfile = do_compile(argv[i]); // Compile the source file if (O_dolink || O_assemble) { objfile = do_assemble(asmfile); // Assemble it to object forma if (objcnt == (MAXOBJ - 2)) { fprintf(stderr, "Too many object files for the compiler to handle\n"); exit(1); } objlist[objcnt++] = objfile; // Add the object file's name objlist[objcnt] = NULL; // to the list of object files } if (!O_keepasm) // Remove the assembly file if unlink(asmfile); // we don't need to keep it i++; } // Now link all the object files together if (O_dolink) { do_link(outfilename, objlist); // If we don't need to keep the object // files, then remove them if (!O_assemble) { for (i = 0; objlist[i] != NULL; i++) unlink(objlist[i]); } } return (0); } ================================================ FILE: 45_Globals_Again/misc.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" #include // Miscellaneous functions // Copyright (c) 2019 Warren Toomey, GPL3 // Ensure that the current token is t, // and fetch the next token. Otherwise // throw an error void match(int t, char *what) { if (Token.token == t) { scan(&Token); } else { fatals("Expected", what); } } // Match a semicolon and fetch the next token void semi(void) { match(T_SEMI, ";"); } // Match a left brace and fetch the next token void lbrace(void) { match(T_LBRACE, "{"); } // Match a right brace and fetch the next token void rbrace(void) { match(T_RBRACE, "}"); } // Match a left parenthesis and fetch the next token void lparen(void) { match(T_LPAREN, "("); } // Match a right parenthesis and fetch the next token void rparen(void) { match(T_RPAREN, ")"); } // Match an identifier and fetch the next token void ident(void) { match(T_IDENT, "identifier"); } // Match a comma and fetch the next token void comma(void) { match(T_COMMA, "comma"); } // Print out fatal messages void fatal(char *s) { fprintf(stderr, "%s on line %d of %s\n", s, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatals(char *s1, char *s2) { fprintf(stderr, "%s:%s on line %d of %s\n", s1, s2, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatald(char *s, int d) { fprintf(stderr, "%s:%d on line %d of %s\n", s, d, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatalc(char *s, int c) { fprintf(stderr, "%s:%c on line %d of %s\n", s, c, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } ================================================ FILE: 45_Globals_Again/opt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST Tree Optimisation Code // Copyright (c) 2019 Warren Toomey, GPL3 // Fold an AST tree with a binary operator // and two A_INTLIT children. Return either // the original tree or a new leaf node. static struct ASTnode *fold2(struct ASTnode *n) { int val, leftval, rightval; // Get the values from each child leftval = n->left->a_intvalue; rightval = n->right->a_intvalue; // Perform some of the binary operations. // For any AST op we can't do, return // the original tree. switch (n->op) { case A_ADD: val = leftval + rightval; break; case A_SUBTRACT: val = leftval - rightval; break; case A_MULTIPLY: val = leftval * rightval; break; case A_DIVIDE: // Don't try to divide by zero. if (rightval == 0) return (n); val = leftval / rightval; break; default: return (n); } // Return a leaf node with the new value return (mkastleaf(A_INTLIT, n->type, NULL, val)); } // Fold an AST tree with a unary operator // and one INTLIT children. Return either // the original tree or a new leaf node. static struct ASTnode *fold1(struct ASTnode *n) { int val; // Get the child value. Do the // operation if recognised. // Return the new leaf node. val = n->left->a_intvalue; switch (n->op) { case A_WIDEN: break; case A_INVERT: val = ~val; break; case A_LOGNOT: val = !val; break; default: return (n); } // Return a leaf node with the new value return (mkastleaf(A_INTLIT, n->type, NULL, val)); } // Attempt to do constant folding on // the AST tree with the root node n static struct ASTnode *fold(struct ASTnode *n) { if (n == NULL) return (NULL); // Fold on the left child, then // do the same on the right child n->left = fold(n->left); n->right = fold(n->right); // If both children are A_INTLITs, do a fold2() if (n->left && n->left->op == A_INTLIT) { if (n->right && n->right->op == A_INTLIT) n = fold2(n); else // If only the left is A_INTLIT, do a fold1() n = fold1(n); } // Return the possibly modified tree return (n); } // Optimise an AST tree by // constant folding in all sub-trees struct ASTnode *optimise(struct ASTnode *n) { n = fold(n); return (n); } ================================================ FILE: 45_Globals_Again/scan.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { char *p; p = strchr(s, c); return (p ? p - s : -1); } // Get the next character from the input file. static int next(void) { int c, l; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return (c); } c = fgetc(Infile); // Read from input file while (c == '#') { // We've hit a pre-processor statement scan(&Token); // Get the line number into l if (Token.token != T_INTLIT) fatals("Expecting pre-processor line number, got:", Text); l = Token.intvalue; scan(&Token); // Get the filename in Text if (Token.token != T_STRLIT) fatals("Expecting pre-processor file name, got:", Text); if (Text[0] != '<') { // If this is a real filename if (strcmp(Text, Infilename)) // and not the one we have now Infilename = strdup(Text); // save it. Then update the line num Line = l; } while ((c = fgetc(Infile)) != '\n'); // Skip to the end of the line c = fgetc(Infile); // and get the next character } if ('\n' == c) Line++; // Increment line count return (c); } // Put back an unwanted character static void putback(int c) { Putback = c; } // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } // Return the next character from a character // or string literal static int scanch(void) { int c; // Get the next input character and interpret // metacharacters that start with a backslash c = next(); if (c == '\\') { switch (c = next()) { case 'a': return '\a'; case 'b': return '\b'; case 'f': return '\f'; case 'n': return '\n'; case 'r': return '\r'; case 't': return '\t'; case 'v': return '\v'; case '\\': return '\\'; case '"': return '"'; case '\'': return '\''; default: fatalc("unknown escape sequence", c); } } return (c); // Just an ordinary old character! } // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0; // Convert each character into an int value while ((k = chrpos("0123456789", c)) >= 0) { val = val * 10 + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return (val); } // Scan in a string literal from the input file, // and store it in buf[]. Return the length of // the string. static int scanstr(char *buf) { int i, c; // Loop while we have enough buffer space for (i = 0; i < TEXTLEN - 1; i++) { // Get the next char and append to buf // Return when we hit the ending double quote if ((c = scanch()) == '"') { buf[i] = 0; return (i); } buf[i] = c; } // Ran out of buf[] space fatal("String literal too long"); return (0); } // Scan an identifier from the input file and // store it in buf[]. Return the identifier's length static int scanident(int c, char *buf, int lim) { int i = 0; // Allow digits, alpha and underscores while (isalpha(c) || isdigit(c) || '_' == c) { // Error if we hit the identifier length limit, // else append to buf[] and get next character if (lim - 1 == i) { fatal("Identifier too long"); } else if (i < lim - 1) { buf[i++] = c; } c = next(); } // We hit a non-valid character, put it back. // NUL-terminate the buf[] and return the length putback(c); buf[i] = '\0'; return (i); } // Given a word from the input, return the matching // keyword token number or 0 if it's not a keyword. // Switch on the first letter so that we don't have // to waste time strcmp()ing against all the keywords. static int keyword(char *s) { switch (*s) { case 'b': if (!strcmp(s, "break")) return (T_BREAK); break; case 'c': if (!strcmp(s, "case")) return (T_CASE); if (!strcmp(s, "char")) return (T_CHAR); if (!strcmp(s, "continue")) return (T_CONTINUE); break; case 'd': if (!strcmp(s, "default")) return (T_DEFAULT); break; case 'e': if (!strcmp(s, "else")) return (T_ELSE); if (!strcmp(s, "enum")) return (T_ENUM); if (!strcmp(s, "extern")) return (T_EXTERN); break; case 'f': if (!strcmp(s, "for")) return (T_FOR); break; case 'i': if (!strcmp(s, "if")) return (T_IF); if (!strcmp(s, "int")) return (T_INT); break; case 'l': if (!strcmp(s, "long")) return (T_LONG); break; case 'r': if (!strcmp(s, "return")) return (T_RETURN); break; case 's': if (!strcmp(s, "struct")) return (T_STRUCT); if (!strcmp(s, "switch")) return (T_SWITCH); break; case 't': if (!strcmp(s, "typedef")) return (T_TYPEDEF); break; case 'u': if (!strcmp(s, "union")) return (T_UNION); break; case 'v': if (!strcmp(s, "void")) return (T_VOID); break; case 'w': if (!strcmp(s, "while")) return (T_WHILE); break; } return (0); } // A pointer to a rejected token static struct token *Rejtoken = NULL; // Reject the token that we just scanned void reject_token(struct token *t) { if (Rejtoken != NULL) fatal("Can't reject token twice"); Rejtoken = t; } // List of token strings, for debugging purposes char *Tstring[] = { "EOF", "=", "+=", "-=", "*=", "/=", "||", "&&", "|", "^", "&", "==", "!=", ",", ">", "<=", ">=", "<<", ">>", "+", "-", "*", "/", "++", "--", "~", "!", "void", "char", "int", "long", "if", "else", "while", "for", "return", "struct", "union", "enum", "typedef", "extern", "break", "continue", "switch", "case", "default", "intlit", "strlit", ";", "identifier", "{", "}", "(", ")", "[", "]", ",", ".", "->", ":" }; // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c, tokentype; // If we have any rejected token, return it if (Rejtoken != NULL) { t = Rejtoken; Rejtoken = NULL; return (1); } // Skip whitespace c = skip(); // Determine the token based on // the input character switch (c) { case EOF: t->token = T_EOF; return (0); case '+': if ((c = next()) == '+') { t->token = T_INC; } else if (c == '=') { t->token = T_ASPLUS; } else { putback(c); t->token = T_PLUS; } break; case '-': if ((c = next()) == '-') { t->token = T_DEC; } else if (c == '>') { t->token = T_ARROW; } else if (c == '=') { t->token = T_ASMINUS; } else if (isdigit(c)) { // Negative int literal t->intvalue = -scanint(c); t->token = T_INTLIT; } else { putback(c); t->token = T_MINUS; } break; case '*': if ((c = next()) == '=') { t->token = T_ASSTAR; } else { putback(c); t->token = T_STAR; } break; case '/': if ((c = next()) == '=') { t->token = T_ASSLASH; } else { putback(c); t->token = T_SLASH; } break; case ';': t->token = T_SEMI; break; case '{': t->token = T_LBRACE; break; case '}': t->token = T_RBRACE; break; case '(': t->token = T_LPAREN; break; case ')': t->token = T_RPAREN; break; case '[': t->token = T_LBRACKET; break; case ']': t->token = T_RBRACKET; break; case '~': t->token = T_INVERT; break; case '^': t->token = T_XOR; break; case ',': t->token = T_COMMA; break; case '.': t->token = T_DOT; break; case ':': t->token = T_COLON; break; case '=': if ((c = next()) == '=') { t->token = T_EQ; } else { putback(c); t->token = T_ASSIGN; } break; case '!': if ((c = next()) == '=') { t->token = T_NE; } else { putback(c); t->token = T_LOGNOT; } break; case '<': if ((c = next()) == '=') { t->token = T_LE; } else if (c == '<') { t->token = T_LSHIFT; } else { putback(c); t->token = T_LT; } break; case '>': if ((c = next()) == '=') { t->token = T_GE; } else if (c == '>') { t->token = T_RSHIFT; } else { putback(c); t->token = T_GT; } break; case '&': if ((c = next()) == '&') { t->token = T_LOGAND; } else { putback(c); t->token = T_AMPER; } break; case '|': if ((c = next()) == '|') { t->token = T_LOGOR; } else { putback(c); t->token = T_OR; } break; case '\'': // If it's a quote, scan in the // literal character value and // the trailing quote t->intvalue = scanch(); t->token = T_INTLIT; if (next() != '\'') fatal("Expected '\\'' at end of char literal"); break; case '"': // Scan in a literal string scanstr(Text); t->token = T_STRLIT; break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } else if (isalpha(c) || '_' == c) { // Read in a keyword or identifier scanident(c, Text, TEXTLEN); // If it's a recognised keyword, return that token if ((tokentype = keyword(Text)) != 0) { t->token = tokentype; break; } // Not a recognised keyword, so it must be an identifier t->token = T_IDENT; break; } // The character isn't part of any recognised token, error fatalc("Unrecognised character", c); } // We found a token t->tokstr = Tstring[t->token]; return (1); } ================================================ FILE: 45_Globals_Again/stmt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of statements // Copyright (c) 2019 Warren Toomey, GPL3 // Prototypes static struct ASTnode *single_statement(void); // compound_statement: // empty, i.e. no statement // | statement // | statement statements // ; // // statement: declaration // | expression_statement // | function_call // | if_statement // | while_statement // | for_statement // | return_statement // ; // if_statement: if_head // | if_head 'else' statement // ; // // if_head: 'if' '(' true_false_expression ')' statement ; // // Parse an IF statement including any // optional ELSE clause and return its AST static struct ASTnode *if_statement(void) { struct ASTnode *condAST, *trueAST, *falseAST = NULL; // Ensure we have 'if' '(' match(T_IF, "if"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); rparen(); // Get the AST for the statement trueAST = single_statement(); // If we have an 'else', skip it // and get the AST for the statement if (Token.token == T_ELSE) { scan(&Token); falseAST = single_statement(); } // Build and return the AST for this statement return (mkastnode(A_IF, P_NONE, condAST, trueAST, falseAST, NULL, 0)); } // while_statement: 'while' '(' true_false_expression ')' statement ; // // Parse a WHILE statement and return its AST static struct ASTnode *while_statement(void) { struct ASTnode *condAST, *bodyAST; // Ensure we have 'while' '(' match(T_WHILE, "while"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); rparen(); // Get the AST for the statement. // Update the loop depth in the process Looplevel++; bodyAST = single_statement(); Looplevel--; // Build and return the AST for this statement return (mkastnode(A_WHILE, P_NONE, condAST, NULL, bodyAST, NULL, 0)); } // for_statement: 'for' '(' expression_list ';' // true_false_expression ';' // expression_list ')' statement ; // // Parse a FOR statement and return its AST static struct ASTnode *for_statement(void) { struct ASTnode *condAST, *bodyAST; struct ASTnode *preopAST, *postopAST; struct ASTnode *tree; // Ensure we have 'for' '(' match(T_FOR, "for"); lparen(); // Get the pre_op expression and the ';' preopAST = expression_list(T_SEMI); semi(); // Get the condition and the ';'. // Force a non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); semi(); // Get the post_op expression and the ')' postopAST = expression_list(T_RPAREN); rparen(); // Get the statement which is the body // Update the loop depth in the process Looplevel++; bodyAST = single_statement(); Looplevel--; // Glue the statement and the postop tree tree = mkastnode(A_GLUE, P_NONE, bodyAST, NULL, postopAST, NULL, 0); // Make a WHILE loop with the condition and this new body tree = mkastnode(A_WHILE, P_NONE, condAST, NULL, tree, NULL, 0); // And glue the preop tree to the A_WHILE tree return (mkastnode(A_GLUE, P_NONE, preopAST, NULL, tree, NULL, 0)); } // return_statement: 'return' '(' expression ')' ; // // Parse a return statement and return its AST static struct ASTnode *return_statement(void) { struct ASTnode *tree; // Can't return a value if function returns P_VOID if (Functionid->type == P_VOID) fatal("Can't return from a void function"); // Ensure we have 'return' '(' match(T_RETURN, "return"); lparen(); // Parse the following expression tree = binexpr(0); // Ensure this is compatible with the function's type tree = modify_type(tree, Functionid->type, 0); if (tree == NULL) fatal("Incompatible type to return"); // Add on the A_RETURN node tree = mkastunary(A_RETURN, P_NONE, tree, NULL, 0); // Get the ')' and ';' rparen(); semi(); return (tree); } // break_statement: 'break' ; // // Parse a break statement and return its AST static struct ASTnode *break_statement(void) { if (Looplevel == 0 && Switchlevel == 0) fatal("no loop or switch to break out from"); scan(&Token); semi(); return (mkastleaf(A_BREAK, 0, NULL, 0)); } // continue_statement: 'continue' ; // // Parse a continue statement and return its AST static struct ASTnode *continue_statement(void) { if (Looplevel == 0) fatal("no loop to continue to"); scan(&Token); semi(); return (mkastleaf(A_CONTINUE, 0, NULL, 0)); } // Parse a switch statement and return its AST static struct ASTnode *switch_statement(void) { struct ASTnode *left, *n, *c, *casetree= NULL, *casetail; int inloop=1, casecount=0; int seendefault=0; int ASTop, casevalue; // Skip the 'switch' and '(' scan(&Token); lparen(); // Get the switch expression, the ')' and the '{' left= binexpr(0); rparen(); lbrace(); // Ensure that this is of int type if (!inttype(left->type)) fatal("Switch expression is not of integer type"); // Build an A_SWITCH subtree with the expression as // the child n= mkastunary(A_SWITCH, 0, left, NULL, 0); // Now parse the cases Switchlevel++; while (inloop) { switch(Token.token) { // Leave the loop when we hit a '}' case T_RBRACE: if (casecount==0) fatal("No cases in switch"); inloop=0; break; case T_CASE: case T_DEFAULT: // Ensure this isn't after a previous 'default' if (seendefault) fatal("case or default after existing default"); // Set the AST operation. Scan the case value if required if (Token.token==T_DEFAULT) { ASTop= A_DEFAULT; seendefault= 1; scan(&Token); } else { ASTop= A_CASE; scan(&Token); left= binexpr(0); // Ensure the case value is an integer literal if (left->op != A_INTLIT) fatal("Expecting integer literal for case value"); casevalue= left->a_intvalue; // Walk the list of existing case values to ensure // that there isn't a duplicate case value for (c= casetree; c != NULL; c= c -> right) if (casevalue == c->a_intvalue) fatal("Duplicate case value"); } // Scan the ':' and get the compound expression match(T_COLON, ":"); left= compound_statement(1); casecount++; // Build a sub-tree with the compound statement as the left child // and link it in to the growing A_CASE tree if (casetree==NULL) { casetree= casetail= mkastunary(ASTop, 0, left, NULL, casevalue); } else { casetail->right= mkastunary(ASTop, 0, left, NULL, casevalue); casetail= casetail->right; } break; default: fatals("Unexpected token in switch", Token.tokstr); } } Switchlevel--; // We have a sub-tree with the cases and any default. Put the // case count into the A_SWITCH node and attach the case tree. n->a_intvalue= casecount; n->right= casetree; rbrace(); return(n); } // Parse a single statement and return its AST. static struct ASTnode *single_statement(void) { struct ASTnode *stmt; struct symtable *ctype; switch (Token.token) { case T_LBRACE: // We have a '{', so this is a compound statement lbrace(); stmt = compound_statement(0); rbrace(); return(stmt); case T_IDENT: // We have to see if the identifier matches a typedef. // If not, treat it as an expression. // Otherwise, fall down to the parse_type() call. if (findtypedef(Text) == NULL) { stmt= binexpr(0); semi(); return(stmt); } case T_CHAR: case T_INT: case T_LONG: case T_STRUCT: case T_UNION: case T_ENUM: case T_TYPEDEF: // The beginning of a variable declaration list. declaration_list(&ctype, C_LOCAL, T_SEMI, T_EOF, &stmt); semi(); return (stmt); // Any assignments from the declarations case T_IF: return (if_statement()); case T_WHILE: return (while_statement()); case T_FOR: return (for_statement()); case T_RETURN: return (return_statement()); case T_BREAK: return (break_statement()); case T_CONTINUE: return (continue_statement()); case T_SWITCH: return (switch_statement()); default: // For now, see if this is an expression. // This catches assignment statements. stmt= binexpr(0); semi(); return(stmt); } return (NULL); // Keep -Wall happy } // Parse a compound statement // and return its AST. If inswitch is true, // we look for a '}', 'case' or 'default' token // to end the parsing. Otherwise, look for // just a '}' to end the parsing. struct ASTnode *compound_statement(int inswitch) { struct ASTnode *left = NULL; struct ASTnode *tree; while (1) { // Parse a single statement tree = single_statement(); // For each new tree, either save it in left // if left is empty, or glue the left and the // new tree together if (tree != NULL) { if (left == NULL) left = tree; else left = mkastnode(A_GLUE, P_NONE, left, NULL, tree, NULL, 0); } // Leave if we've hit the end token if (Token.token == T_RBRACE) return(left); if (inswitch && (Token.token == T_CASE || Token.token == T_DEFAULT)) return(left); } } ================================================ FILE: 45_Globals_Again/sym.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Symbol table functions // Copyright (c) 2019 Warren Toomey, GPL3 // Append a node to the singly-linked list pointed to by head or tail void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node) { // Check for valid pointers if (head == NULL || tail == NULL || node == NULL) fatal("Either head, tail or node is NULL in appendsym"); // Append to the list if (*tail) { (*tail)->next = node; *tail = node; } else *head = *tail = node; node->next = NULL; } // Create a symbol node to be added to a symbol table list. // Set up the node's: // + type: char, int etc. // + ctype: composite type pointer for struct/union // + structural type: var, function, array etc. // + size: number of elements, or endlabel: end label for a function // + posn: Position information for local symbols // Return a pointer to the new node struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn) { // Get a new node struct symtable *node = (struct symtable *) malloc(sizeof(struct symtable)); if (node == NULL) fatal("Unable to malloc a symbol table node in newsym"); // Fill in the values if (name == NULL) node->name = NULL; else node->name = strdup(name); node->type = type; node->ctype = ctype; node->stype = stype; node->class = class; node->nelems = nelems; // For pointers and integer types, set the size // of the symbol. structs and union declarations // manually set this up themselves. if (ptrtype(type) || inttype(type)) node->size = nelems * typesize(type, ctype); node->st_posn = posn; node->next = NULL; node->member = NULL; node->initlist = NULL; return (node); } // Add a symbol to the global symbol list struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn) { struct symtable *sym = newsym(name, type, ctype, stype, class, nelems, posn); // For structs and unions, copy the size from the type node if (type== P_STRUCT || type== P_UNION) sym->size = ctype->size; appendsym(&Globhead, &Globtail, sym); return (sym); } // Add a symbol to the local symbol list struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int nelems) { struct symtable *sym = newsym(name, type, ctype, stype, C_LOCAL, nelems, 0); // For structs and unions, copy the size from the type node if (type== P_STRUCT || type== P_UNION) sym->size = ctype->size; appendsym(&Loclhead, &Locltail, sym); return (sym); } // Add a symbol to the parameter list struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype) { struct symtable *sym = newsym(name, type, ctype, stype, C_PARAM, 1, 0); appendsym(&Parmhead, &Parmtail, sym); return (sym); } // Add a symbol to the temporary member list struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int nelems) { struct symtable *sym = newsym(name, type, ctype, stype, C_MEMBER, nelems, 0); // For structs and unions, copy the size from the type node if (type== P_STRUCT || type== P_UNION) sym->size = ctype->size; appendsym(&Membhead, &Membtail, sym); return (sym); } // Add a struct to the struct list struct symtable *addstruct(char *name) { struct symtable *sym = newsym(name, P_STRUCT, NULL, 0, C_STRUCT, 0, 0); appendsym(&Structhead, &Structtail, sym); return (sym); } // Add a struct to the union list struct symtable *addunion(char *name) { struct symtable *sym = newsym(name, P_UNION, NULL, 0, C_UNION, 0, 0); appendsym(&Unionhead, &Uniontail, sym); return (sym); } // Add an enum type or value to the enum list. // Class is C_ENUMTYPE or C_ENUMVAL. // Use posn to store the int value. struct symtable *addenum(char *name, int class, int value) { struct symtable *sym = newsym(name, P_INT, NULL, 0, class, 0, value); appendsym(&Enumhead, &Enumtail, sym); return (sym); } // Add a typedef to the typedef list struct symtable *addtypedef(char *name, int type, struct symtable *ctype) { struct symtable *sym = newsym(name, type, ctype, 0, C_TYPEDEF, 0, 0); appendsym(&Typehead, &Typetail, sym); return (sym); } // Search for a symbol in a specific list. // Return a pointer to the found node or NULL if not found. // If class is not zero, also match on the given class static struct symtable *findsyminlist(char *s, struct symtable *list, int class) { for (; list != NULL; list = list->next) if ((list->name != NULL) && !strcmp(s, list->name)) if (class==0 || class== list->class) return (list); return (NULL); } // Determine if the symbol s is in the global symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findglob(char *s) { return (findsyminlist(s, Globhead, 0)); } // Determine if the symbol s is in the local symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findlocl(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member, 0); if (node) return (node); } return (findsyminlist(s, Loclhead, 0)); } // Determine if the symbol s is in the symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findsymbol(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member, 0); if (node) return (node); } // Otherwise, try the local and global symbol lists node = findsyminlist(s, Loclhead, 0); if (node) return (node); return (findsyminlist(s, Globhead, 0)); } // Find a member in the member list // Return a pointer to the found node or NULL if not found. struct symtable *findmember(char *s) { return (findsyminlist(s, Membhead, 0)); } // Find a struct in the struct list // Return a pointer to the found node or NULL if not found. struct symtable *findstruct(char *s) { return (findsyminlist(s, Structhead, 0)); } // Find a struct in the union list // Return a pointer to the found node or NULL if not found. struct symtable *findunion(char *s) { return (findsyminlist(s, Unionhead, 0)); } // Find an enum type in the enum list // Return a pointer to the found node or NULL if not found. struct symtable *findenumtype(char *s) { return (findsyminlist(s, Enumhead, C_ENUMTYPE)); } // Find an enum value in the enum list // Return a pointer to the found node or NULL if not found. struct symtable *findenumval(char *s) { return (findsyminlist(s, Enumhead, C_ENUMVAL)); } // Find a type in the tyedef list // Return a pointer to the found node or NULL if not found. struct symtable *findtypedef(char *s) { return (findsyminlist(s, Typehead, 0)); } // Reset the contents of the symbol table void clear_symtable(void) { Globhead = Globtail = NULL; Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Membhead = Membtail = NULL; Structhead = Structtail = NULL; Unionhead = Uniontail = NULL; Enumhead = Enumtail = NULL; Typehead = Typetail = NULL; } // Clear all the entries in the local symbol table void freeloclsyms(void) { Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Functionid = NULL; } ================================================ FILE: 45_Globals_Again/tests/err.input031.c ================================================ Expecting a primary expression, got token:+ on line 5 of input031.c ================================================ FILE: 45_Globals_Again/tests/err.input032.c ================================================ Unknown variable:cow on line 4 of input032.c ================================================ FILE: 45_Globals_Again/tests/err.input033.c ================================================ Incompatible type to return on line 4 of input033.c ================================================ FILE: 45_Globals_Again/tests/err.input034.c ================================================ For now, declaration of non-global arrays is not implemented on line 4 of input034.c ================================================ FILE: 45_Globals_Again/tests/err.input035.c ================================================ Duplicate local variable declaration:a on line 4 of input035.c ================================================ FILE: 45_Globals_Again/tests/err.input036.c ================================================ Type doesn't match prototype for parameter:2 on line 4 of input036.c ================================================ FILE: 45_Globals_Again/tests/err.input037.c ================================================ Expected:comma on line 3 of input037.c ================================================ FILE: 45_Globals_Again/tests/err.input038.c ================================================ Type doesn't match prototype for parameter:2 on line 4 of input038.c ================================================ FILE: 45_Globals_Again/tests/err.input039.c ================================================ No statements in function with non-void type on line 4 of input039.c ================================================ FILE: 45_Globals_Again/tests/err.input040.c ================================================ No return for function with non-void type on line 4 of input040.c ================================================ FILE: 45_Globals_Again/tests/err.input041.c ================================================ Can't return from a void function on line 3 of input041.c ================================================ FILE: 45_Globals_Again/tests/err.input042.c ================================================ Undeclared function:fred on line 3 of input042.c ================================================ FILE: 45_Globals_Again/tests/err.input043.c ================================================ Undeclared array:b on line 3 of input043.c ================================================ FILE: 45_Globals_Again/tests/err.input044.c ================================================ Unknown variable:z on line 3 of input044.c ================================================ FILE: 45_Globals_Again/tests/err.input045.c ================================================ & operator must be followed by an identifier on line 3 of input045.c ================================================ FILE: 45_Globals_Again/tests/err.input046.c ================================================ * operator must be followed by an identifier or * on line 3 of input046.c ================================================ FILE: 45_Globals_Again/tests/err.input047.c ================================================ ++ operator must be followed by an identifier on line 3 of input047.c ================================================ FILE: 45_Globals_Again/tests/err.input048.c ================================================ -- operator must be followed by an identifier on line 3 of input048.c ================================================ FILE: 45_Globals_Again/tests/err.input049.c ================================================ Incompatible expression in assignment on line 6 of input049.c ================================================ FILE: 45_Globals_Again/tests/err.input050.c ================================================ Incompatible types in binary expression on line 6 of input050.c ================================================ FILE: 45_Globals_Again/tests/err.input051.c ================================================ Expected '\'' at end of char literal on line 4 of input051.c ================================================ FILE: 45_Globals_Again/tests/err.input052.c ================================================ Unrecognised character:$ on line 5 of input052.c ================================================ FILE: 45_Globals_Again/tests/err.input056.c ================================================ unknown struct/union type:var1 on line 2 of input056.c ================================================ FILE: 45_Globals_Again/tests/err.input057.c ================================================ previously defined struct/union:fred on line 2 of input057.c ================================================ FILE: 45_Globals_Again/tests/err.input059.c ================================================ Undeclared variable:y on line 3 of input059.c ================================================ FILE: 45_Globals_Again/tests/err.input060.c ================================================ Undeclared variable:x on line 3 of input060.c ================================================ FILE: 45_Globals_Again/tests/err.input061.c ================================================ Undeclared variable:x on line 3 of input061.c ================================================ FILE: 45_Globals_Again/tests/err.input064.c ================================================ undeclared enum type::fred on line 1 of input064.c ================================================ FILE: 45_Globals_Again/tests/err.input065.c ================================================ enum type redeclared::fred on line 2 of input065.c ================================================ FILE: 45_Globals_Again/tests/err.input066.c ================================================ enum value redeclared::z on line 2 of input066.c ================================================ FILE: 45_Globals_Again/tests/err.input068.c ================================================ redefinition of typedef:FOO on line 2 of input068.c ================================================ FILE: 45_Globals_Again/tests/err.input069.c ================================================ unknown type:FLOO on line 2 of input069.c ================================================ FILE: 45_Globals_Again/tests/err.input072.c ================================================ no loop or switch to break out from on line 1 of input072.c ================================================ FILE: 45_Globals_Again/tests/err.input073.c ================================================ no loop to continue to on line 1 of input073.c ================================================ FILE: 45_Globals_Again/tests/err.input075.c ================================================ Unexpected token in switch:if on line 4 of input075.c ================================================ FILE: 45_Globals_Again/tests/err.input076.c ================================================ No cases in switch on line 3 of input076.c ================================================ FILE: 45_Globals_Again/tests/err.input077.c ================================================ case or default after existing default on line 6 of input077.c ================================================ FILE: 45_Globals_Again/tests/err.input078.c ================================================ case or default after existing default on line 6 of input078.c ================================================ FILE: 45_Globals_Again/tests/err.input079.c ================================================ Duplicate case value on line 6 of input079.c ================================================ FILE: 45_Globals_Again/tests/err.input085.c ================================================ Bad type in parameter list on line 1 of input085.c ================================================ FILE: 45_Globals_Again/tests/err.input086.c ================================================ Function definition not at global level on line 3 of input086.c ================================================ FILE: 45_Globals_Again/tests/err.input087.c ================================================ Bad type in member list on line 4 of input087.c ================================================ FILE: 45_Globals_Again/tests/err.input092.c ================================================ Type mismatch: literal vs. variable on line 1 of input092.c ================================================ FILE: 45_Globals_Again/tests/err.input093.c ================================================ Unknown variable:fred on line 1 of input093.c ================================================ FILE: 45_Globals_Again/tests/err.input094.c ================================================ Type mismatch: literal vs. variable on line 1 of input094.c ================================================ FILE: 45_Globals_Again/tests/err.input095.c ================================================ Variable can not be initialised:x on line 1 of input095.c ================================================ FILE: 45_Globals_Again/tests/err.input096.c ================================================ Array size is illegal:0 on line 1 of input096.c ================================================ FILE: 45_Globals_Again/tests/err.input097.c ================================================ For now, declaration of non-global arrays is not implemented on line 2 of input097.c ================================================ FILE: 45_Globals_Again/tests/err.input098.c ================================================ Too many values in initialisation list on line 1 of input098.c ================================================ FILE: 45_Globals_Again/tests/err.input102.c ================================================ Cannot cast to a struct, union or void type on line 3 of input102.c ================================================ FILE: 45_Globals_Again/tests/err.input103.c ================================================ Cannot cast to a struct, union or void type on line 3 of input103.c ================================================ FILE: 45_Globals_Again/tests/err.input104.c ================================================ Cannot cast to a struct, union or void type on line 2 of input104.c ================================================ FILE: 45_Globals_Again/tests/err.input105.c ================================================ Incompatible expression in assignment on line 4 of input105.c ================================================ FILE: 45_Globals_Again/tests/input001.c ================================================ int printf(char *fmt); void main() { printf("%d\n", 12 * 3); printf("%d\n", 18 - 2 * 4); printf("%d\n", 1 + 2 + 9 - 5/2 + 3*5); } ================================================ FILE: 45_Globals_Again/tests/input002.c ================================================ int printf(char *fmt); void main() { int fred; int jim; fred= 5; jim= 12; printf("%d\n", fred + jim); } ================================================ FILE: 45_Globals_Again/tests/input003.c ================================================ int printf(char *fmt); void main() { int x; x= 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); } ================================================ FILE: 45_Globals_Again/tests/input004.c ================================================ int printf(char *fmt); void main() { int x; x= 7 < 9; printf("%d\n", x); x= 7 <= 9; printf("%d\n", x); x= 7 != 9; printf("%d\n", x); x= 7 == 7; printf("%d\n", x); x= 7 >= 7; printf("%d\n", x); x= 7 <= 7; printf("%d\n", x); x= 9 > 7; printf("%d\n", x); x= 9 >= 7; printf("%d\n", x); x= 9 != 7; printf("%d\n", x); } ================================================ FILE: 45_Globals_Again/tests/input005.c ================================================ int printf(char *fmt); void main() { int i; int j; i=6; j=12; if (i < j) { printf("%d\n", i); } else { printf("%d\n", j); } } ================================================ FILE: 45_Globals_Again/tests/input006.c ================================================ int printf(char *fmt); void main() { int i; i=1; while (i <= 10) { printf("%d\n", i); i= i + 1; } } ================================================ FILE: 45_Globals_Again/tests/input007.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 45_Globals_Again/tests/input008.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 45_Globals_Again/tests/input009.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } void fred() { int a; int b; a= 12; b= 3 * a; if (a >= b) { printf("%d\n", 2 * b - a); } } ================================================ FILE: 45_Globals_Again/tests/input010.c ================================================ int printf(char *fmt); void main() { int i; char j; j= 20; printf("%d\n", j); i= 10; printf("%d\n", i); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 2; j= j + 1) { printf("%d\n", j); } } ================================================ FILE: 45_Globals_Again/tests/input011.c ================================================ int printf(char *fmt); int main() { int i; char j; long k; i= 10; printf("%d\n", i); j= 20; printf("%d\n", j); k= 30; printf("%d\n", k); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 4; j= j + 1) { printf("%d\n", j); } for (k= 1; k <= 5; k= k + 1) { printf("%d\n", k); } return(i); printf("%d\n", 12345); return(3); } ================================================ FILE: 45_Globals_Again/tests/input012.c ================================================ int printf(char *fmt); int fred() { return(5); } void main() { int x; x= fred(2); printf("%d\n", x); } ================================================ FILE: 45_Globals_Again/tests/input013.c ================================================ int printf(char *fmt); int fred() { return(56); } void main() { int dummy; int result; dummy= printf("%d\n", 23); result= fred(10); dummy= printf("%d\n", result); } ================================================ FILE: 45_Globals_Again/tests/input014.c ================================================ int printf(char *fmt); int fred() { return(20); } int main() { int result; printf("%d\n", 10); result= fred(15); printf("%d\n", result); printf("%d\n", fred(15)+10); return(0); } ================================================ FILE: 45_Globals_Again/tests/input015.c ================================================ int printf(char *fmt); int main() { char a; char *b; char c; int d; int *e; int f; a= 18; printf("%d\n", a); b= &a; c= *b; printf("%d\n", c); d= 12; printf("%d\n", d); e= &d; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 45_Globals_Again/tests/input016.c ================================================ int printf(char *fmt); int c; int d; int *e; int f; int main() { c= 12; d=18; printf("%d\n", c); e= &c + 1; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 45_Globals_Again/tests/input017.c ================================================ int printf(char *fmt); int main() { char a; char *b; int d; int *e; b= &a; *b= 19; printf("%d\n", a); e= &d; *e= 12; printf("%d\n", d); return(0); } ================================================ FILE: 45_Globals_Again/tests/input018.c ================================================ int printf(char *fmt); int main() { int a; int b; a= b= 34; printf("%d\n", a); printf("%d\n", b); return(0); } ================================================ FILE: 45_Globals_Again/tests/input018a.c ================================================ int printf(char *fmt); int a; int *b; char c; char *d; int main() { b= &a; *b= 15; printf("%d\n", a); d= &c; *d= 16; printf("%d\n", c); return(0); } ================================================ FILE: 45_Globals_Again/tests/input019.c ================================================ int printf(char *fmt); int a; int b; int c; int d; int e; int main() { a= 2; b= 4; c= 3; d= 2; e= (a+b) * (c+d); printf("%d\n", e); return(0); } ================================================ FILE: 45_Globals_Again/tests/input020.c ================================================ int printf(char *fmt); int a; int b[25]; int main() { b[3]= 12; a= b[3]; printf("%d\n", a); return(0); } ================================================ FILE: 45_Globals_Again/tests/input021.c ================================================ int printf(char *fmt); char c; char *str; int main() { c= '\n'; printf("%d\n", c); for (str= "Hello world\n"; *str != 0; str= str + 1) { printf("%c", *str); } return(0); } ================================================ FILE: 45_Globals_Again/tests/input022.c ================================================ int printf(char *fmt); char a; char b; char c; int d; int e; int f; long g; long h; long i; int main() { b= 5; c= 7; a= b + c++; printf("%d\n", a); e= 5; f= 7; d= e + f++; printf("%d\n", d); h= 5; i= 7; g= h + i++; printf("%d\n", g); a= b-- + c; printf("%d\n", a); d= e-- + f; printf("%d\n", d); g= h-- + i; printf("%d\n", g); a= ++b + c; printf("%d\n", a); d= ++e + f; printf("%d\n", d); g= ++h + i; printf("%d\n", g); a= b * --c; printf("%d\n", a); d= e * --f; printf("%d\n", d); g= h * --i; printf("%d\n", g); return(0); } ================================================ FILE: 45_Globals_Again/tests/input023.c ================================================ int printf(char *fmt); char *str; int x; int main() { x= -23; printf("%d\n", x); printf("%d\n", -10 * -10); x= 1; x= ~x; printf("%d\n", x); x= 2 > 5; printf("%d\n", x); x= !x; printf("%d\n", x); x= !x; printf("%d\n", x); x= 13; if (x) { printf("%d\n", 13); } x= 0; if (!x) { printf("%d\n", 14); } for (str= "Hello world\n"; *str; str++) { printf("%c", *str); } return(0); } ================================================ FILE: 45_Globals_Again/tests/input024.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { a= 42; b= 19; printf("%d\n", a & b); printf("%d\n", a | b); printf("%d\n", a ^ b); printf("%d\n", 1 << 3); printf("%d\n", 63 >> 3); return(0); } ================================================ FILE: 45_Globals_Again/tests/input025.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { char z; int y; int x; x= 10; y= 20; z= 30; printf("%d\n", x); printf("%d\n", y); printf("%d\n", z); a= 5; b= 15; c= 25; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); return(0); } ================================================ FILE: 45_Globals_Again/tests/input026.c ================================================ int printf(char *fmt); int main(int a, char b, long c, int d, int e, int f, int g, int h) { int i; int j; int k; a= 13; printf("%d\n", a); b= 23; printf("%d\n", b); c= 34; printf("%d\n", c); d= 44; printf("%d\n", d); e= 54; printf("%d\n", e); f= 64; printf("%d\n", f); g= 74; printf("%d\n", g); h= 84; printf("%d\n", h); i= 94; printf("%d\n", i); j= 95; printf("%d\n", j); k= 96; printf("%d\n", k); return(0); } ================================================ FILE: 45_Globals_Again/tests/input027.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int param5(int a, int b, int c, int d, int e) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param2(int a, int b) { int c; int d; int e; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param0() { int a; int b; int c; int d; int e; a= 1; b= 2; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int main() { param8(1,2,3,4,5,6,7,8); param5(1,2,3,4,5); param2(1,2); param0(); return(0); } ================================================ FILE: 45_Globals_Again/tests/input028.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 45_Globals_Again/tests/input029.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h); int fred(int a, int b, int c); int main(); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 45_Globals_Again/tests/input030.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input030.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 45_Globals_Again/tests/input031.c ================================================ int printf(char *fmt); int main() { int x; x= 2 + + 3 - * / ; } ================================================ FILE: 45_Globals_Again/tests/input032.c ================================================ int printf(char *fmt); int main() { pizza cow llama sausage; } ================================================ FILE: 45_Globals_Again/tests/input033.c ================================================ int printf(char *fmt); int main() { char *z; return(z); } ================================================ FILE: 45_Globals_Again/tests/input034.c ================================================ int printf(char *fmt); int main() { int a[12]; return(0); } ================================================ FILE: 45_Globals_Again/tests/input035.c ================================================ int printf(char *fmt); int fred(int a, int b) { int a; return(a); } ================================================ FILE: 45_Globals_Again/tests/input036.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c); ================================================ FILE: 45_Globals_Again/tests/input037.c ================================================ int printf(char *fmt); int fred(int a, char b +, int z); ================================================ FILE: 45_Globals_Again/tests/input038.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c, int g); ================================================ FILE: 45_Globals_Again/tests/input039.c ================================================ int printf(char *fmt); int main() { int a; } ================================================ FILE: 45_Globals_Again/tests/input040.c ================================================ int printf(char *fmt); int main() { int a; a= 5; } ================================================ FILE: 45_Globals_Again/tests/input041.c ================================================ int printf(char *fmt); void fred() { return(5); } ================================================ FILE: 45_Globals_Again/tests/input042.c ================================================ int printf(char *fmt); int main() { fred(5); } ================================================ FILE: 45_Globals_Again/tests/input043.c ================================================ int printf(char *fmt); int main() { int a; a= b[4]; } ================================================ FILE: 45_Globals_Again/tests/input044.c ================================================ int printf(char *fmt); int main() { int a; a= z; } ================================================ FILE: 45_Globals_Again/tests/input045.c ================================================ int printf(char *fmt); int main() { int a; a= &5; } ================================================ FILE: 45_Globals_Again/tests/input046.c ================================================ int printf(char *fmt); int main() { int a; a= *5; } ================================================ FILE: 45_Globals_Again/tests/input047.c ================================================ int printf(char *fmt); int main() { int a; a= ++5; } ================================================ FILE: 45_Globals_Again/tests/input048.c ================================================ int printf(char *fmt); int main() { int a; a= --5; } ================================================ FILE: 45_Globals_Again/tests/input049.c ================================================ int printf(char *fmt); int main() { int x; char y; y= x; } ================================================ FILE: 45_Globals_Again/tests/input050.c ================================================ int printf(char *fmt); int main() { char *a; char *b; a= a + b; } ================================================ FILE: 45_Globals_Again/tests/input051.c ================================================ int printf(char *fmt); int main() { char a; a= 'fred'; } ================================================ FILE: 45_Globals_Again/tests/input052.c ================================================ int printf(char *fmt); int main() { int a; a= $5.00; } ================================================ FILE: 45_Globals_Again/tests/input053.c ================================================ int printf(char *fmt); int main() { printf("Hello world, %d\n", 23); return(0); } ================================================ FILE: 45_Globals_Again/tests/input054.c ================================================ int printf(char *fmt); int main() { int i; for (i=0; i < 20; i++) { printf("Hello world, %d\n", i); } return(0); } ================================================ FILE: 45_Globals_Again/tests/input055.c ================================================ int printf(char *fmt); int main(int argc, char **argv) { int i; char *argument; printf("Hello world\n"); for (i=0; i < argc; i++) { argument= *argv; argv= argv + 1; printf("Argument %d is %s\n", i, argument); } return(0); } ================================================ FILE: 45_Globals_Again/tests/input056.c ================================================ struct foo { int x; }; struct mary var1; ================================================ FILE: 45_Globals_Again/tests/input057.c ================================================ struct fred { int x; } ; struct fred { char y; } ; ================================================ FILE: 45_Globals_Again/tests/input058.c ================================================ int printf(char *fmt); struct fred { int x; char y; long z; }; struct fred var2; struct fred *varptr; int main() { long result; var2.x= 12; printf("%d\n", var2.x); var2.y= 'c'; printf("%d\n", var2.y); var2.z= 4005; printf("%d\n", var2.z); result= var2.x + var2.y + var2.z; printf("%d\n", result); varptr= &var2; result= varptr->x + varptr->y + varptr->z; printf("%d\n", result); return(0); } ================================================ FILE: 45_Globals_Again/tests/input059.c ================================================ int main() { int x; x= y.foo; } ================================================ FILE: 45_Globals_Again/tests/input060.c ================================================ int main() { int x; x= x.foo; } ================================================ FILE: 45_Globals_Again/tests/input061.c ================================================ int main() { int x; x= x->foo; } ================================================ FILE: 45_Globals_Again/tests/input062.c ================================================ int printf(char *fmt); union fred { char w; int x; int y; long z; }; union fred var1; union fred *varptr; int main() { var1.x= 65; printf("%d\n", var1.x); var1.x= 66; printf("%d\n", var1.x); printf("%d\n", var1.y); printf("The next two depend on the endian of the platform\n"); printf("%d\n", var1.w); printf("%d\n", var1.z); varptr= &var1; varptr->x= 67; printf("%d\n", varptr->x); printf("%d\n", varptr->y); return(0); } ================================================ FILE: 45_Globals_Again/tests/input063.c ================================================ int printf(char *fmt); enum fred { apple=1, banana, carrot, pear=10, peach, mango, papaya }; enum jane { aple=1, bnana, crrot, par=10, pech, mago, paaya }; enum fred var1; enum jane var2; enum fred var3; int main() { var1= carrot + pear + mango; printf("%d\n", var1); return(0); } ================================================ FILE: 45_Globals_Again/tests/input064.c ================================================ enum fred var3; ================================================ FILE: 45_Globals_Again/tests/input065.c ================================================ enum fred { x, y, z }; enum fred { a, b }; ================================================ FILE: 45_Globals_Again/tests/input066.c ================================================ enum fred { x, y, z }; enum mary { a, b, z }; ================================================ FILE: 45_Globals_Again/tests/input067.c ================================================ int printf(char *fmt); typedef int FOO; FOO var1; struct bar { int x; int y} ; typedef struct bar BAR; BAR var2; int main() { var1= 5; printf("%d\n", var1); var2.x= 7; var2.y= 10; printf("%d\n", var2.x + var2.y); return(0); } ================================================ FILE: 45_Globals_Again/tests/input068.c ================================================ typedef int FOO; typedef char FOO; ================================================ FILE: 45_Globals_Again/tests/input069.c ================================================ typedef int FOO; FLOO y; ================================================ FILE: 45_Globals_Again/tests/input070.c ================================================ #include typedef int FOO; int main() { FOO x; x= 56; printf("%d\n", x); return(0); } ================================================ FILE: 45_Globals_Again/tests/input071.c ================================================ #include int main() { int x; x = 0; while (x < 100) { if (x == 5) { x = x + 2; continue; } printf("%d\n", x); if (x == 14) { break; } x = x + 1; } printf("Done\n"); return (0); } ================================================ FILE: 45_Globals_Again/tests/input072.c ================================================ int main() { break; } ================================================ FILE: 45_Globals_Again/tests/input073.c ================================================ int main() { continue; } ================================================ FILE: 45_Globals_Again/tests/input074.c ================================================ #include int main() { int x; int y; y= 0; for (x=0; x < 5; x++) { switch(x) { case 1: { y= 5; break; } case 2: { y= 7; break; } case 3: { y= 9; } default: { y= 100; } } printf("%d\n", y); } return(0); } ================================================ FILE: 45_Globals_Again/tests/input075.c ================================================ int main() { int x; switch(x) { if (x<5); } } ================================================ FILE: 45_Globals_Again/tests/input076.c ================================================ int main() { int x; switch(x) { } } ================================================ FILE: 45_Globals_Again/tests/input077.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } default: { x= 3; } case 4: { x= 2; } } } ================================================ FILE: 45_Globals_Again/tests/input078.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } default: { x= 3; } default: { x= 2; } } } ================================================ FILE: 45_Globals_Again/tests/input079.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } case 2: { x= 2; } case 1: { x= 2; } default: { x= 2; } } } ================================================ FILE: 45_Globals_Again/tests/input080.c ================================================ #include int x; int y; int main() { for (x=0, y=1; x < 6; x++, y=y+2) { printf("%d %d\n", x, y); } return(0); } ================================================ FILE: 45_Globals_Again/tests/input081.c ================================================ #include int x; int y; int main() { x= 0; y=1; for (;x<5;) { printf("%d %d\n", x, y); x=x+1; y=y+2; } return(0); } ================================================ FILE: 45_Globals_Again/tests/input082.c ================================================ #include int main() { int x; x= 10; // Dangling else test if (x > 5) if (x > 15) printf("x > 15\n"); else printf("15 >= x > 5\n"); else printf("5 >= x\n"); return(0); } ================================================ FILE: 45_Globals_Again/tests/input083.c ================================================ #include int main() { int x; // Dangling else test. // We should not print anything for x<= 5 for (x=0; x < 12; x++) if (x > 5) if (x > 10) printf("10 < %2d\n", x); else printf(" 5 < %2d <= 10\n", x); return(0); } ================================================ FILE: 45_Globals_Again/tests/input084.c ================================================ #include int main() { int x, y; x=2; y=3; printf("%d %d\n", x, y); char a, *b; a= 'f'; b= &a; printf("%c %c\n", a, *b); return(0); } ================================================ FILE: 45_Globals_Again/tests/input085.c ================================================ int main(struct foo { int x; }; , int a) { return(0); } ================================================ FILE: 45_Globals_Again/tests/input086.c ================================================ int main() { int fred() { return(5); } int x; x=2; return(x); } ================================================ FILE: 45_Globals_Again/tests/input087.c ================================================ struct foo { int x; int y; struct blah { int g; }; }; ================================================ FILE: 45_Globals_Again/tests/input088.c ================================================ #include struct foo { int x; int y; } fred, mary; struct foo james; int main() { fred.x= 5; mary.y= 6; printf("%d %d\n", fred.x, mary.y); return(0); } ================================================ FILE: 45_Globals_Again/tests/input089.c ================================================ #include int x= 23; char y= 'H'; char *z= "Hello world"; int main() { printf("%d %c %s\n", x, y, z); return(0); } ================================================ FILE: 45_Globals_Again/tests/input090.c ================================================ #include int a = 23, b = 100; char y = 'H', *z = "Hello world"; int main() { printf("%d %d %c %s\n", a, b, y, z); return (0); } ================================================ FILE: 45_Globals_Again/tests/input091.c ================================================ #include int fred[] = { 1, 2, 3, 4, 5 }; int jim[10] = { 1, 2, 3, 4, 5 }; int main() { int i; for (i=0; i < 5; i++) printf("%d\n", fred[i]); for (i=0; i < 10; i++) printf("%d\n", jim[i]); return(0); } ================================================ FILE: 45_Globals_Again/tests/input092.c ================================================ char x= 3000; ================================================ FILE: 45_Globals_Again/tests/input093.c ================================================ char x= fred; ================================================ FILE: 45_Globals_Again/tests/input094.c ================================================ char *s= 54; ================================================ FILE: 45_Globals_Again/tests/input095.c ================================================ int fred(int x=2) { return(x); } ================================================ FILE: 45_Globals_Again/tests/input096.c ================================================ int fred[0]; ================================================ FILE: 45_Globals_Again/tests/input097.c ================================================ int main() { int x[45]; return(0); } ================================================ FILE: 45_Globals_Again/tests/input098.c ================================================ int fred[3]= { 1, 2, 3, 4, 5 }; ================================================ FILE: 45_Globals_Again/tests/input099.c ================================================ #include // List of token strings, for debugging purposes. // As yet, we can't store a NULL into the list char *Tstring[] = { "EOF", "=", "||", "&&", "|", "^", "&", "==", "!=", ",", ">", "<=", ">=", "<<", ">>", "+", "-", "*", "/", "++", "--", "~", "!", "void", "char", "int", "long", "if", "else", "while", "for", "return", "struct", "union", "enum", "typedef", "extern", "break", "continue", "switch", "case", "default", "intlit", "strlit", ";", "identifier", "{", "}", "(", ")", "[", "]", ",", ".", "->", ":", "" }; int main() { int i; char *str; i=0; while (1) { str= Tstring[i]; if (*str == 0) break; printf("%s\n", str); i++; } return(0); } ================================================ FILE: 45_Globals_Again/tests/input100.c ================================================ #include int main() { int x= 3, y=14; int z= 2 * x + y; char *str= "Hello world"; printf("%s %d %d\n", str, x+y, z); return(0); } ================================================ FILE: 45_Globals_Again/tests/input101.c ================================================ #include int main() { int x= 65535; char y; char *str; y= (char )x; printf("0x%x\n", y); str= (char *)0; printf("0x%lx\n", (long)str); return(0); } ================================================ FILE: 45_Globals_Again/tests/input102.c ================================================ int main() { struct foo { int p; }; int y= (struct foo) x; } ================================================ FILE: 45_Globals_Again/tests/input103.c ================================================ int main() { union foo { int p; }; int y= (union foo) x; } ================================================ FILE: 45_Globals_Again/tests/input104.c ================================================ int main() { int y= (void) x; } ================================================ FILE: 45_Globals_Again/tests/input105.c ================================================ int main() { int x; char *y; y= (char) x; } ================================================ FILE: 45_Globals_Again/tests/input106.c ================================================ #include char *y= (char *)0; int main() { printf("0x%lx\n", (long)y); return(0); } ================================================ FILE: 45_Globals_Again/tests/input107.c ================================================ #include char *y[] = { "fish", "cow", NULL }; char *z= NULL; int main() { int i; char *ptr; for (i=0; i < 3; i++) { ptr= y[i]; if (ptr != (char *)0) printf("%s\n", y[i]); else printf("NULL\n"); } return(0); } ================================================ FILE: 45_Globals_Again/tests/input108.c ================================================ int main() { char *str= (void *)0; return(0); } ================================================ FILE: 45_Globals_Again/tests/input109.c ================================================ #include int main() { int x= 17 - 1; printf("%d\n", x); return(0); } ================================================ FILE: 45_Globals_Again/tests/input110.c ================================================ #include int x; int y; int main() { x= 3; y= 15; y += x; printf("%d\n", y); x= 3; y= 15; y -= x; printf("%d\n", y); x= 3; y= 15; y *= x; printf("%d\n", y); x= 3; y= 15; y /= x; printf("%d\n", y); return(0); } ================================================ FILE: 45_Globals_Again/tests/input111.c ================================================ #include int main() { int x= 2000 + 3 + 4 * 5 + 6; printf("%d\n", x); return(0); } ================================================ FILE: 45_Globals_Again/tests/input112.c ================================================ #include char* y = NULL; int x= 10 + 6; int fred [ 2 + 3 ]; int main() { fred[2]= x; printf("%d\n", fred[2]); return(0); } ================================================ FILE: 45_Globals_Again/tests/mktests ================================================ #!/bin/sh # Make the output files for each test # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make install) fi for i in input*c do if [ ! -f "out.$i" -a ! -f "err.$i" ] then ../cwj -o out $i 2> "err.$i" # If the err file is empty if [ ! -s "err.$i" ] then rm -f "err.$i" cc -o out $i ./out > "out.$i" fi fi rm -f out out.s done ================================================ FILE: 45_Globals_Again/tests/out.input001.c ================================================ 36 10 25 ================================================ FILE: 45_Globals_Again/tests/out.input002.c ================================================ 17 ================================================ FILE: 45_Globals_Again/tests/out.input003.c ================================================ 1 2 3 4 5 ================================================ FILE: 45_Globals_Again/tests/out.input004.c ================================================ 1 1 1 1 1 1 1 1 1 ================================================ FILE: 45_Globals_Again/tests/out.input005.c ================================================ 6 ================================================ FILE: 45_Globals_Again/tests/out.input006.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 45_Globals_Again/tests/out.input007.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 45_Globals_Again/tests/out.input008.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 45_Globals_Again/tests/out.input009.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 45_Globals_Again/tests/out.input010.c ================================================ 20 10 1 2 3 4 5 253 254 255 0 1 ================================================ FILE: 45_Globals_Again/tests/out.input011.c ================================================ 10 20 30 1 2 3 4 5 253 254 255 0 1 2 3 1 2 3 4 5 ================================================ FILE: 45_Globals_Again/tests/out.input012.c ================================================ 5 ================================================ FILE: 45_Globals_Again/tests/out.input013.c ================================================ 23 56 ================================================ FILE: 45_Globals_Again/tests/out.input014.c ================================================ 10 20 30 ================================================ FILE: 45_Globals_Again/tests/out.input015.c ================================================ 18 18 12 12 ================================================ FILE: 45_Globals_Again/tests/out.input016.c ================================================ 12 18 ================================================ FILE: 45_Globals_Again/tests/out.input017.c ================================================ 19 12 ================================================ FILE: 45_Globals_Again/tests/out.input018.c ================================================ 34 34 ================================================ FILE: 45_Globals_Again/tests/out.input018a.c ================================================ 15 16 ================================================ FILE: 45_Globals_Again/tests/out.input019.c ================================================ 30 ================================================ FILE: 45_Globals_Again/tests/out.input020.c ================================================ 12 ================================================ FILE: 45_Globals_Again/tests/out.input021.c ================================================ 10 Hello world ================================================ FILE: 45_Globals_Again/tests/out.input022.c ================================================ 12 12 12 13 13 13 13 13 13 35 35 35 ================================================ FILE: 45_Globals_Again/tests/out.input023.c ================================================ -23 100 -2 0 1 0 13 14 Hello world ================================================ FILE: 45_Globals_Again/tests/out.input024.c ================================================ 2 59 57 8 7 ================================================ FILE: 45_Globals_Again/tests/out.input025.c ================================================ 10 20 30 5 15 25 ================================================ FILE: 45_Globals_Again/tests/out.input026.c ================================================ 13 23 34 44 54 64 74 84 94 95 96 ================================================ FILE: 45_Globals_Again/tests/out.input027.c ================================================ 1 2 3 4 5 6 7 8 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 ================================================ FILE: 45_Globals_Again/tests/out.input028.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 45_Globals_Again/tests/out.input029.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 45_Globals_Again/tests/out.input030.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input030.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 45_Globals_Again/tests/out.input053.c ================================================ Hello world, 23 ================================================ FILE: 45_Globals_Again/tests/out.input054.c ================================================ Hello world, 0 Hello world, 1 Hello world, 2 Hello world, 3 Hello world, 4 Hello world, 5 Hello world, 6 Hello world, 7 Hello world, 8 Hello world, 9 Hello world, 10 Hello world, 11 Hello world, 12 Hello world, 13 Hello world, 14 Hello world, 15 Hello world, 16 Hello world, 17 Hello world, 18 Hello world, 19 ================================================ FILE: 45_Globals_Again/tests/out.input055.c ================================================ Hello world Argument 0 is ./out ================================================ FILE: 45_Globals_Again/tests/out.input058.c ================================================ 12 99 4005 4116 4116 ================================================ FILE: 45_Globals_Again/tests/out.input062.c ================================================ 65 66 66 The next two depend on the endian of the platform 66 66 67 67 ================================================ FILE: 45_Globals_Again/tests/out.input063.c ================================================ 25 ================================================ FILE: 45_Globals_Again/tests/out.input067.c ================================================ 5 17 ================================================ FILE: 45_Globals_Again/tests/out.input070.c ================================================ 56 ================================================ FILE: 45_Globals_Again/tests/out.input071.c ================================================ 0 1 2 3 4 7 8 9 10 11 12 13 14 Done ================================================ FILE: 45_Globals_Again/tests/out.input074.c ================================================ 100 5 7 100 100 ================================================ FILE: 45_Globals_Again/tests/out.input080.c ================================================ 0 1 1 3 2 5 3 7 4 9 5 11 ================================================ FILE: 45_Globals_Again/tests/out.input081.c ================================================ 0 1 1 3 2 5 3 7 4 9 ================================================ FILE: 45_Globals_Again/tests/out.input082.c ================================================ 15 >= x > 5 ================================================ FILE: 45_Globals_Again/tests/out.input083.c ================================================ 5 < 6 <= 10 5 < 7 <= 10 5 < 8 <= 10 5 < 9 <= 10 5 < 10 <= 10 10 < 11 ================================================ FILE: 45_Globals_Again/tests/out.input084.c ================================================ 2 3 f f ================================================ FILE: 45_Globals_Again/tests/out.input088.c ================================================ 5 6 ================================================ FILE: 45_Globals_Again/tests/out.input089.c ================================================ 23 H Hello world ================================================ FILE: 45_Globals_Again/tests/out.input090.c ================================================ 23 100 H Hello world ================================================ FILE: 45_Globals_Again/tests/out.input091.c ================================================ 1 2 3 4 5 1 2 3 4 5 0 0 0 0 0 ================================================ FILE: 45_Globals_Again/tests/out.input099.c ================================================ EOF = || && | ^ & == != , > <= >= << >> + - * / ++ -- ~ ! void char int long if else while for return struct union enum typedef extern break continue switch case default intlit strlit ; identifier { } ( ) [ ] , . -> : ================================================ FILE: 45_Globals_Again/tests/out.input100.c ================================================ Hello world 17 20 ================================================ FILE: 45_Globals_Again/tests/out.input101.c ================================================ 0xff 0x0 ================================================ FILE: 45_Globals_Again/tests/out.input106.c ================================================ 0x0 ================================================ FILE: 45_Globals_Again/tests/out.input107.c ================================================ fish cow NULL ================================================ FILE: 45_Globals_Again/tests/out.input108.c ================================================ ================================================ FILE: 45_Globals_Again/tests/out.input109.c ================================================ 16 ================================================ FILE: 45_Globals_Again/tests/out.input110.c ================================================ 18 12 45 5 ================================================ FILE: 45_Globals_Again/tests/out.input111.c ================================================ 2029 ================================================ FILE: 45_Globals_Again/tests/out.input112.c ================================================ 16 ================================================ FILE: 45_Globals_Again/tests/runtests ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make install) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../cwj -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../cwj $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.s "trial.$i" done ================================================ FILE: 45_Globals_Again/tests/runtestsn ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../compn ] then (cd ..; make install) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../compn -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../compn $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.o out.s "trial.$i" done ================================================ FILE: 45_Globals_Again/tree.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree functions // Copyright (c) 2019 Warren Toomey, GPL3 // Build and return a generic AST node struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue) { struct ASTnode *n; // Malloc a new ASTnode n = (struct ASTnode *) malloc(sizeof(struct ASTnode)); if (n == NULL) fatal("Unable to malloc in mkastnode()"); // Copy in the field values and return it n->op = op; n->type = type; n->left = left; n->mid = mid; n->right = right; n->sym = sym; n->a_intvalue = intvalue; return (n); } // Make an AST leaf node struct ASTnode *mkastleaf(int op, int type, struct symtable *sym, int intvalue) { return (mkastnode(op, type, NULL, NULL, NULL, sym, intvalue)); } // Make a unary AST node: only one child struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, struct symtable *sym, int intvalue) { return (mkastnode(op, type, left, NULL, NULL, sym, intvalue)); } // Generate and return a new label number // just for AST dumping purposes static int gendumplabel(void) { static int id = 1; return (id++); } // Given an AST tree, print it out and follow the // traversal of the tree that genAST() follows void dumpAST(struct ASTnode *n, int label, int level) { int Lfalse, Lstart, Lend; switch (n->op) { case A_IF: Lfalse = gendumplabel(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_IF"); if (n->right) { Lend = gendumplabel(); fprintf(stdout, ", end L%d", Lend); } fprintf(stdout, "\n"); dumpAST(n->left, Lfalse, level + 2); dumpAST(n->mid, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); return; case A_WHILE: Lstart = gendumplabel(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_WHILE, start L%d\n", Lstart); Lend = gendumplabel(); dumpAST(n->left, Lend, level + 2); dumpAST(n->right, NOLABEL, level + 2); return; } // Reset level to -2 for A_GLUE if (n->op == A_GLUE) level = -2; // General AST node handling if (n->left) dumpAST(n->left, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); for (int i = 0; i < level; i++) fprintf(stdout, " "); switch (n->op) { case A_GLUE: fprintf(stdout, "\n\n"); return; case A_FUNCTION: fprintf(stdout, "A_FUNCTION %s\n", n->sym->name); return; case A_ADD: fprintf(stdout, "A_ADD\n"); return; case A_SUBTRACT: fprintf(stdout, "A_SUBTRACT\n"); return; case A_MULTIPLY: fprintf(stdout, "A_MULTIPLY\n"); return; case A_DIVIDE: fprintf(stdout, "A_DIVIDE\n"); return; case A_EQ: fprintf(stdout, "A_EQ\n"); return; case A_NE: fprintf(stdout, "A_NE\n"); return; case A_LT: fprintf(stdout, "A_LE\n"); return; case A_GT: fprintf(stdout, "A_GT\n"); return; case A_LE: fprintf(stdout, "A_LE\n"); return; case A_GE: fprintf(stdout, "A_GE\n"); return; case A_INTLIT: fprintf(stdout, "A_INTLIT %d\n", n->a_intvalue); return; case A_STRLIT: fprintf(stdout, "A_STRLIT rval label L%d\n", n->a_intvalue); return; case A_IDENT: if (n->rvalue) fprintf(stdout, "A_IDENT rval %s\n", n->sym->name); else fprintf(stdout, "A_IDENT %s\n", n->sym->name); return; case A_ASSIGN: fprintf(stdout, "A_ASSIGN\n"); return; case A_WIDEN: fprintf(stdout, "A_WIDEN\n"); return; case A_RETURN: fprintf(stdout, "A_RETURN\n"); return; case A_FUNCCALL: fprintf(stdout, "A_FUNCCALL %s\n", n->sym->name); return; case A_ADDR: fprintf(stdout, "A_ADDR %s\n", n->sym->name); return; case A_DEREF: if (n->rvalue) fprintf(stdout, "A_DEREF rval\n"); else fprintf(stdout, "A_DEREF\n"); return; case A_SCALE: fprintf(stdout, "A_SCALE %d\n", n->a_size); return; case A_PREINC: fprintf(stdout, "A_PREINC %s\n", n->sym->name); return; case A_PREDEC: fprintf(stdout, "A_PREDEC %s\n", n->sym->name); return; case A_POSTINC: fprintf(stdout, "A_POSTINC\n"); return; case A_POSTDEC: fprintf(stdout, "A_POSTDEC\n"); return; case A_NEGATE: fprintf(stdout, "A_NEGATE\n"); return; case A_BREAK: fprintf(stdout, "A_BREAK\n"); return; case A_CONTINUE: fprintf(stdout, "A_CONTINUE\n"); return; case A_CASE: fprintf(stdout, "A_CASE %d\n", n->a_intvalue); return; case A_DEFAULT: fprintf(stdout, "A_DEFAULT\n"); return; case A_SWITCH: fprintf(stdout, "A_SWITCH\n"); return; case A_CAST: fprintf(stdout, "A_CAST %d\n", n->type); return; case A_ASPLUS: fprintf(stdout, "A_ASPLUS\n"); return; case A_ASMINUS: fprintf(stdout, "A_ASMINUS\n"); return; case A_ASSTAR: fprintf(stdout, "A_ASSTAR\n"); return; case A_ASSLASH: fprintf(stdout, "A_ASSLASH\n"); return; default: fatald("Unknown dumpAST operator", n->op); } } ================================================ FILE: 45_Globals_Again/types.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Types and type handling // Copyright (c) 2019 Warren Toomey, GPL3 // Return true if a type is an int type // of any size, false otherwise int inttype(int type) { return (((type & 0xf) == 0) && (type >= P_CHAR && type <= P_LONG)); } // Return true if a type is of pointer type int ptrtype(int type) { return ((type & 0xf) != 0); } // Given a primitive type, return // the type which is a pointer to it int pointer_to(int type) { if ((type & 0xf) == 0xf) fatald("Unrecognised in pointer_to: type", type); return (type + 1); } // Given a primitive pointer type, return // the type which it points to int value_at(int type) { if ((type & 0xf) == 0x0) fatald("Unrecognised in value_at: type", type); return (type - 1); } // Given a type and a composite type pointer, return // the size of this type in bytes int typesize(int type, struct symtable *ctype) { if (type == P_STRUCT || type == P_UNION) return (ctype->size); return (genprimsize(type)); } // Given an AST tree and a type which we want it to become, // possibly modify the tree by widening or scaling so that // it is compatible with this type. Return the original tree // if no changes occurred, a modified tree, or NULL if the // tree is not compatible with the given type. // If this will be part of a binary operation, the AST op is not zero. struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op) { int ltype; int lsize, rsize; ltype = tree->type; // XXX No idea on these yet if (ltype == P_STRUCT || ltype == P_UNION) fatal("Don't know how to do this yet"); if (rtype == P_STRUCT || rtype == P_UNION) fatal("Don't know how to do this yet"); // Compare scalar int types if (inttype(ltype) && inttype(rtype)) { // Both types same, nothing to do if (ltype == rtype) return (tree); // Get the sizes for each type lsize = typesize(ltype, NULL); rsize = typesize(rtype, NULL); // Tree's size is too big if (lsize > rsize) return (NULL); // Widen to the right if (rsize > lsize) return (mkastunary(A_WIDEN, rtype, tree, NULL, 0)); } // For pointers if (ptrtype(ltype) && ptrtype(rtype)) { // We can compare them if (op >= A_EQ && op <= A_GE) return(tree); // A comparison of the same type for a non-binary operation is OK, // or when the left tree is of `void *` type. if (op == 0 && (ltype == rtype || ltype == pointer_to(P_VOID))) return (tree); } // We can scale only on A_ADD or A_SUBTRACT operation if (op == A_ADD || op == A_SUBTRACT) { // Left is int type, right is pointer type and the size // of the original type is >1: scale the left if (inttype(ltype) && ptrtype(rtype)) { rsize = genprimsize(value_at(rtype)); if (rsize > 1) return (mkastunary(A_SCALE, rtype, tree, NULL, rsize)); else return (tree); // Size 1, no need to scale } } // If we get here, the types are not compatible return (NULL); } ================================================ FILE: 46_Void_Functions/Makefile ================================================ # Define the location of the include directory # and the location to install the compiler binary INCDIR=/tmp/include BINDIR=/tmp HSRCS= data.h decl.h defs.h SRCS= cg.c decl.c expr.c gen.c main.c misc.c \ opt.c scan.c stmt.c sym.c tree.c types.c SRCN= cgn.c decl.c expr.c gen.c main.c misc.c \ opt.c scan.c stmt.c sym.c tree.c types.c ARMSRCS= cg_arm.c decl.c expr.c gen.c main.c misc.c \ opt.c scan.c stmt.c sym.c tree.c types.c cwj: $(SRCS) $(HSRCS) cc -o cwj -g -Wall -DINCDIR=\"$(INCDIR)\" $(SRCS) compn: $(SRCN) $(HSRCS) cc -D__NASM__ -o compn -g -Wall -DINCDIR=\"$(INCDIR)\" $(SRCN) cwjarm: $(ARMSRCS) $(HSRCS) cc -o cwjarm -g -Wall $(ARMSRCS) cp cwjarm cwj install: cwj mkdir -p $(INCDIR) rsync -a include/. $(INCDIR) cp cwj $(BINDIR) chmod +x $(BINDIR)/cwj installn: compn mkdir -p $(INCDIR) rsync -a include/. $(INCDIR) cp compn $(BINDIR) chmod +x $(BINDIR)/compn clean: rm -f cwj cwjarm compn *.o *.s out a.out test: install tests/runtests (cd tests; chmod +x runtests; ./runtests) armtest: cwjarm tests/runtests (cd tests; chmod +x runtests; ./runtests) testn: installn tests/runtestsn (cd tests; chmod +x runtestsn; ./runtestsn) ================================================ FILE: 46_Void_Functions/Readme.md ================================================ # Part 46: Void Function Parameters and Scanning Changes In this part of our compiler writing journey, I've made several changes which involve the scanner and the parser. ## Void Function Parameters We start with this common C construct to indicate that a function has no parameters: ```c int fred(void); // Void means no parameters, but int fred(); // No parameters also means no parameters ``` It does seem strange that we already have a way of indicating no parameters but, anyway, it's a common thing so we need to support it. The problem is that, once we hit the left parenthesis, we fall into the `declaration_list()` function in `decl.c`. This has been set up to parse a type with a definite following identifier. It's not going to be easy to alter it to deal with a type and *no* identifier. So we need to go back to the `param_declaration_list()` function and parse the 'void' ')' tokens there. I already have a function in the scanner called `reject_token()` in `scan.c`. We should be able to scan a token, look at it, decide we don't want it, and reject it. Then, the next scanned token will be the one we reject. I've never used this function and, as it turns out, it was broken. Anyway, I took a step back and decided that it would be easier to *peek* at the next token. If we decide we like it, we can scan it in as per normal. If we don't like it, we don't have to do anything: it will get scanned in on the next real token scan. Now, why do we need this? It's because our pseudo-code for dealing with 'void' in the parameter list will be: ``` parse the '(' if the next token is 'void' { peek at the one after it if the one after 'void' is ')', then return zero parameters } call declaration_list() to get the real parameters so that 'void' is still the current token ``` We need to do the peek because both of the following are legal: ```c int fred(void); int jane(void *ptr, int x, int y); ``` If we scan and parse the next token after 'void' and see it is the asterisk, then we have lost the 'void' token. When we then call `declaration_list()`, the first token it will see is the asterisk and it will get upset. Thus, we need the ability to peek beyond the current token while keeping the current token intact. ## New Scanner Code In `data.h` we have a new token variable: ```c extern_ struct token Token; // Last token scanned extern_ struct token Peektoken; // A look-ahead token ``` and `Peektoken.token` is intialised to zero by code in `main.c`. We modify the main `scan()` function in `scan.c` as follows: ```c // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c, tokentype; // If we have a lookahead token, return this token if (Peektoken.token != 0) { t->token = Peektoken.token; t->tokstr = Peektoken.tokstr; t->intvalue = Peektoken.intvalue; Peektoken.token = 0; return (1); } ... } ``` If `Peektoken.token` remains zero, we get the next token. But once something is stored in `Peektoken`, then that will be the next token we return. ## Declaration Modifications Now that we can peek ahead at the next token, let's put it into action. We modify the code in `param_declaration_list()` as follows: ```c // Loop getting any parameters while (Token.token != T_RPAREN) { // If the first token is 'void' if (Token.token == T_VOID) { // Peek at the next token. If a ')', the function // has no parameters, so leave the loop. scan(&Peektoken); if (Peektoken.token == T_RPAREN) { // Move the Peektoken into the Token paramcnt= 0; scan(&Token); break; } } ... // Get the type of the next parameter type = declaration_list(&ctype, C_PARAM, T_COMMA, T_RPAREN, &unused); ... } ``` Assume that we have scanned in the 'void'. We now `scan(&Peektoken);` to see what's up next without altering the current `Token`. If that's a right parenthesis, we can leave with `paramcnt` set to zero after skipping the 'void' token. But if the next token wasn't a right parenthesis, we still have `Token` set to 'void' and we can now call `declaration_list()` to get the actual list of parameters. ## Hex and Octal Integer Constants I found the above problem because I've started to feed the compiler's source code to itself. Once I had fixed the 'void' parameter issue, the next thing that I found was that the compiler is unable to parse hex and octal constants like `0x314A` and `0073`. Luckily, the [SubC](http://www.t3x.org/subc/) compiler written by Nils M Holm has code to do this, and I can borrow it wholesale to add to our compiler. We need to modify the `scanint()` function in `scan.c` to do this: ```c // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0, radix = 10; // NEW CODE: Assume the radix is 10, but if it starts with 0 if (c == '0') { // and the next character is 'x', it's radix 16 if ((c = next()) == 'x') { radix = 16; c = next(); } else // Otherwise, it's radix 8 radix = 8; } // Convert each character into an int value while ((k = chrpos("0123456789abcdef", tolower(c))) >= 0) { if (k >= radix) fatalc("invalid digit in integer literal", c); val = val * radix + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return (val); } ``` We already had the `k= chrpos("0123456789")` code in the function to deal with decimal literal values. The new code above this now scans for a leading '0' digit. If it sees this, it checks the following character. If it's an 'x', the radix is 16; if not, the radix is 8. The other change is that we multiply the previous value by the radix instead of the constant 10. It's a very elegant way to solve this problem, and many thanks to Nils for writing the code. ## More Character Constants The next problem I hit was code in our compiler that says: ```c if (*posn == '\0') ``` That's a character literal which our compiler doesn't recognise. We will need to modify `scanch()` in `scan.c` to deal with character literals which are specified as octal values. But character literals which are specified as hexadecimal values are also possible, e.g. '\0x41'. Again, the code from SubC comes to our rescue: ```c // Read in a hexadecimal constant from the input static int hexchar(void) { int c, h, n = 0, f = 0; // Loop getting characters while (isxdigit(c = next())) { // Convert from char to int value h = chrpos("0123456789abcdef", tolower(c)); // Add to running hex value n = n * 16 + h; f = 1; } // We hit a non-hex character, put it back putback(c); // Flag tells us we never saw any hex characters if (!f) fatal("missing digits after '\\x'"); if (n > 255) fatal("value out of range after '\\x'"); return n; } // Return the next character from a character // or string literal static int scanch(void) { int i, c, c2; // Get the next input character and interpret // metacharacters that start with a backslash c = next(); if (c == '\\') { switch (c = next()) { ... case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': // Code from SubC for (i = c2 = 0; isdigit(c) && c < '8'; c = next()) { if (++i > 3) break; c2 = c2 * 8 + (c - '0'); } putback(c); // Put back the first non-octal char return (c2); case 'x': return hexchar(); // Code from SubC default: fatalc("unknown escape sequence", c); } } return (c); // Just an ordinary old character! } ``` Again, it's nice and elegant code. However, we now have two code fragments to do hex conversion and three code fragments to do radix conversion, so there is still some potential refactoring here. # Conclusion and What's Next We mostly made changes to the scanner in this part of the journey. They were not earth shattering changes, but they are some of the little things that we need to get done to have the compiler be self-compiling. Two big things that we will need to tackle are static functions and variables, and the `sizeof()` operator. In the next part of our compiler writing journey, I will probably work on the `sizeof()` operator because `static` still scares me a bit! [Next step](../47_Sizeof/Readme.md) ================================================ FILE: 46_Void_Functions/cg.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\t.text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\t.data\n", Outfile); currSeg = data_seg; } } // Given a scalar type value, return the // size of the type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch (type) { case P_CHAR: return (offset); case P_INT: case P_LONG: break; default: if (!ptrtype(type)) fatald("Bad type in cg_align:", type); } // Here we have an int or a long. Align it on a 4-byte offset // I put the generic code here so it can be reused elsewhere. alignment = 4; offset = (offset + direction * (alignment - 1)) & ~(alignment - 1); return (offset); } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. static int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "%r10", "%r11", "%r12", "%r13", "%r9", "%r8", "%rcx", "%rdx", "%rsi", "%rdi" }; static char *breglist[] = { "%r10b", "%r11b", "%r12b", "%r13b", "%r9b", "%r8b", "%cl", "%dl", "%sil", "%dil" }; static char *dreglist[] = { "%r10d", "%r11d", "%r12d", "%r13d", "%r9d", "%r8d", "%ecx", "%edx", "%esi", "%edi" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); cgtextseg(); fprintf(Outfile, "# internal switch(expr) routine\n" "# %%rsi = switch table, %%rax = expr\n" "# from SubC: http://www.t3x.org/subc/\n" "\n" "switch:\n" " pushq %%rsi\n" " movq %%rdx, %%rsi\n" " movq %%rax, %%rbx\n" " cld\n" " lodsq\n" " movq %%rax, %%rcx\n" "next:\n" " lodsq\n" " movq %%rax, %%rdx\n" " lodsq\n" " cmpq %%rdx, %%rbx\n" " jnz no\n" " popq %%rsi\n" " jmp *%%rax\n" "no:\n" " loop next\n" " lodsq\n" " popq %%rsi\n" " jmp *%%rax\n" "\n"); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the %rsp and %rsp fprintf(Outfile, "\t.globl\t%s\n" "\t.type\t%s, @function\n" "%s:\n" "\tpushq\t%%rbp\n" "\tmovq\t%%rsp, %%rbp\n", name, name, name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->st_posn = paramOffset; paramOffset += 8; } else { parm->st_posn = newlocaloffset(parm->type); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->st_posn = newlocaloffset(locvar->type); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\taddq\t$%d,%%rsp\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->st_endlabel); fprintf(Outfile, "\taddq\t$%d,%%rsp\n", stackOffset); fputs("\tpopq %rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmovq\t$%d, %s\n", value, reglist[r]); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", sym->name); } else // Print out the code to initialise it switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovzbq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", sym->name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovslq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", sym->name); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", sym->st_posn); fprintf(Outfile, "\tmovq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", sym->st_posn); } else switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", sym->st_posn); fprintf(Outfile, "\tmovzbq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", sym->st_posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", sym->st_posn); fprintf(Outfile, "\tmovslq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", sym->st_posn); break; default: fatald("Bad type in cgloadlocal:", sym->type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tleaq\tL%d(%%rip), %s\n", label, reglist[r]); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\taddq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsubq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timulq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s,%%rax\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidivq\t%s\n", reglist[r2]); fprintf(Outfile, "\tmovq\t%%rax,%s\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tandq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\torq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txorq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshlq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshrq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tnegq\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnotq\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); return (r); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(struct symtable *sym, int numargs) { // Get a new register int outr = alloc_register(); // Call the function fprintf(Outfile, "\tcall\t%s@PLT\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\taddq\t$%d, %%rsp\n", 8 * (numargs - 6)); // and copy the return value into our register fprintf(Outfile, "\tmovq\t%%rax, %s\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpushq\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmovq\t%s, %s\n", reglist[r], reglist[FIRSTPARAMREG - argposn + 1]); } } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsalq\t$%d, %s\n", val, reglist[r]); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %s(%%rip)\n", reglist[r], sym->name); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %s(%%rip)\n", breglist[r], sym->name); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %s(%%rip)\n", dreglist[r], sym->name); break; default: fatald("Bad type in cgstorglob:", sym->type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %d(%%rbp)\n", reglist[r], sym->st_posn); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %d(%%rbp)\n", breglist[r], sym->st_posn); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %d(%%rbp)\n", dreglist[r], sym->st_posn); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size, type; int initvalue; int i; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the variable (or its elements if an array) // and the type of the variable if (node->stype == S_ARRAY) { size= typesize(value_at(node->type), node->ctype); type= value_at(node->type); } else { size = node->size; type= node->type; } // Generate the global identity and the label cgdataseg(); fprintf(Outfile, "\t.globl\t%s\n", node->name); fprintf(Outfile, "%s:\n", node->name); // Output space for one or more elements for (i=0; i < node->nelems; i++) { // Get any initial value initvalue= 0; if (node->initlist != NULL) initvalue= node->initlist[i]; // Generate the space for this type switch (size) { case 1: fprintf(Outfile, "\t.byte\t%d\n", initvalue); break; case 4: fprintf(Outfile, "\t.long\t%d\n", initvalue); break; case 8: // Generate the pointer to a string literal. Treat a zero value // as actually zero, not the label L0 if (node->initlist != NULL && type== pointer_to(P_CHAR) && initvalue != 0) fprintf(Outfile, "\t.quad\tL%d\n", initvalue); else fprintf(Outfile, "\t.quad\t%d\n", initvalue); break; default: for (int i = 0; i < size; i++) fprintf(Outfile, "\t.byte\t0\n"); } } } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\t.byte\t%d\n", *cptr); } fprintf(Outfile, "\t.byte\t0\n"); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzbl\t%s, %%eax\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %%eax\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } cgjump(sym->st_endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = alloc_register(); if (sym->class == C_GLOBAL) fprintf(Outfile, "\tleaq\t%s(%%rip), %s\n", sym->name, reglist[r]); else fprintf(Outfile, "\tleaq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzbq\t(%s), %s\n", reglist[r], reglist[r]); break; case 2: fprintf(Outfile, "\tmovslq\t(%s), %s\n", reglist[r], reglist[r]); break; case 4: case 8: fprintf(Outfile, "\tmovq\t(%s), %s\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { // Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmovb\t%s, (%s)\n", breglist[r1], reglist[r2]); break; case 2: case 4: case 8: fprintf(Outfile, "\tmovq\t%s, (%s)\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } // Generate a switch jump table and the code to // load the registers and call the switch() code void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel) { int i, label; // Get a label for the switch table label = genlabel(); cglabel(label); // Heuristic. If we have no cases, create one case // which points to the default case if (casecount == 0) { caseval[0] = 0; caselabel[0] = defaultlabel; casecount = 1; } // Generate the switch jump table. fprintf(Outfile, "\t.quad\t%d\n", casecount); for (i = 0; i < casecount; i++) fprintf(Outfile, "\t.quad\t%d, L%d\n", caseval[i], caselabel[i]); fprintf(Outfile, "\t.quad\tL%d\n", defaultlabel); // Load the specific registers cglabel(toplabel); fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); fprintf(Outfile, "\tleaq\tL%d(%%rip), %%rdx\n", label); fprintf(Outfile, "\tjmp\tswitch\n"); } ================================================ FILE: 46_Void_Functions/cg_arm.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for ARMv6 on Raspberry Pi // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. static int freereg[4]; static char *reglist[4] = { "r4", "r5", "r6", "r7" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // We have to store large integer literal values in memory. // Keep a list of them which will be output in the postamble #define MAXINTS 1024 int Intlist[MAXINTS]; static int Intslot = 0; // Determine the offset of a large integer // literal from the .L3 label. If the integer // isn't in the list, add it. static void set_int_offset(int val) { int offset = -1; // See if it is already there for (int i = 0; i < Intslot; i++) { if (Intlist[i] == val) { offset = 4 * i; break; } } // Not in the list, so add it if (offset == -1) { offset = 4 * Intslot; if (Intslot == MAXINTS) fatal("Out of int slots in set_int_offset()"); Intlist[Intslot++] = val; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L3+%d\n", offset); } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n", Outfile); } // Print out the assembly postamble void cgpostamble() { // Print out the global variables fprintf(Outfile, ".L2:\n"); for (int i = 0; i < Globs; i++) { if (Symtable[i].stype == S_VARIABLE) fprintf(Outfile, "\t.word %s\n", Symtable[i].name); } // Print out the integer literals fprintf(Outfile, ".L3:\n"); for (int i = 0; i < Intslot; i++) { fprintf(Outfile, "\t.word %d\n", Intlist[i]); } } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Symtable[id].name; fprintf(Outfile, "\t.text\n" "\t.globl\t%s\n" "\t.type\t%s, \%%function\n" "%s:\n" "\tpush\t{fp, lr}\n" "\tadd\tfp, sp, #4\n" "\tsub\tsp, sp, #8\n" "\tstr\tr0, [fp, #-8]\n", name, name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Symtable[id].endlabel); fputs("\tsub\tsp, fp, #4\n" "\tpop\t{fp, pc}\n" "\t.align\t2\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); // If the literal value is small, do it with one instruction if (value <= 1000) fprintf(Outfile, "\tmov\t%s, #%d\n", reglist[r], value); else { set_int_offset(value); fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); } return (r); } // Determine the offset of a variable from the .L2 // label. Yes, this is inefficient code. static void set_var_offset(int id) { int offset = 0; // Walk the symbol table up to id. // Find S_VARIABLEs and add on 4 until // we get to our variable for (int i = 0; i < id; i++) { if (Symtable[i].stype == S_VARIABLE) offset += 4; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L2+%d\n", offset); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tldrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgloadglob:", Symtable[id].type); } return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s, %s\n", reglist[r1], reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\tmul\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { // To do a divide: r0 holds the dividend, r1 holds the divisor. // The quotient is in r0. fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r1]); fprintf(Outfile, "\tmov\tr1, %s\n", reglist[r2]); fprintf(Outfile, "\tbl\t__aeabi_idiv\n"); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r1]); free_register(r2); return (r1); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r]); fprintf(Outfile, "\tbl\t%s\n", Symtable[id].name); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r]); return (r); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tlsl\t%s, %s, #%d\n", reglist[r], reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, int id) { // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tstr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgstorglob:", Symtable[id].type); } return (r); } // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (4); switch (type) { case P_CHAR: return (1); case P_INT: case P_LONG: return (4); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Symtable[id].type); fprintf(Outfile, "\t.data\n" "\t.globl\t%s\n", Symtable[id].name); switch (typesize) { case 1: fprintf(Outfile, "%s:\t.byte\t0\n", Symtable[id].name); break; case 4: fprintf(Outfile, "%s:\t.long\t0\n", Symtable[id].name); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "moveq", "movne", "movlt", "movgt", "movle", "movge" }; // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "movne", "moveq", "movge", "movle", "movgt", "movlt" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #1\n", cmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #0\n", invcmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\tuxtb\t%s, %s\n", reglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tb\tL%d\n", l); } // List of inverted branch instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *brlist[] = { "bne", "beq", "bge", "ble", "bgt", "blt" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", brlist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[reg]); cgjump(Symtable[id].endlabel); } // Generate code to load the address of a global // identifier into a variable. Return a new register int cgaddress(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); fprintf(Outfile, "\tmov\t%s, r3\n", reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tldrb\t%s, [%s]\n", reglist[r], reglist[r]); break; case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [%s]\n", reglist[r], reglist[r]); break; } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [%s]\n", reglist[r1], reglist[r2]); break; case P_INT: case P_LONG: fprintf(Outfile, "\tstr\t%s, [%s]\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 46_Void_Functions/cgn.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\tsection .text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\tsection .data\n", Outfile); currSeg = data_seg; } } // Given a scalar type value, return the // size of the type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch (type) { case P_CHAR: return (offset); case P_INT: case P_LONG: break; default: if (!ptrtype(type)) fatald("Bad type in cg_align:", type); } // Here we have an int or a long. Align it on a 4-byte offset // I put the generic code here so it can be reused elsewhere. alignment = 4; offset = (offset + direction * (alignment - 1)) & ~(alignment - 1); return (offset); } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "r10", "r11", "r12", "r13", "r9", "r8", "rcx", "rdx", "rsi", "rdi" }; static char *breglist[] = { "r10b", "r11b", "r12b", "r13b", "r9b", "r8b", "cl", "dl", "sil", "dil" }; static char *dreglist[] = { "r10d", "r11d", "r12d", "r13d", "r9d", "r8d", "ecx", "edx", "esi", "edi" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\textern\tprintint\n", Outfile); fputs("\textern\tprintchar\n", Outfile); fputs("\textern\topen\n", Outfile); fputs("\textern\tclose\n", Outfile); fputs("\textern\tread\n", Outfile); fputs("\textern\twrite\n", Outfile); fputs("\textern\tprintf\n", Outfile); cgtextseg(); fprintf(Outfile, "; internal switch(expr) routine\n" "; rsi = switch table, rax = expr\n" "; from SubC: http://www.t3x.org/subc/\n" "\n" "switch:\n" " push rsi\n" " mov rsi, rdx\n" " mov rbx, rax\n" " cld\n" " lodsq\n" " mov rcx, rax\n" "next:\n" " lodsq\n" " mov rdx, rax\n" " lodsq\n" " cmp rbx, rdx\n" " jnz no\n" " pop rsi\n" " jmp rax\n" "no:\n" " loop next\n" " lodsq\n" " pop rsi\n" " jmp rax\n" "\n"); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the rsp and rbp fprintf(Outfile, "\tglobal\t%s\n" "%s:\n" "\tpush\trbp\n" "\tmov\trbp, rsp\n", name, name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->st_posn = paramOffset; paramOffset += 8; } else { parm->st_posn = newlocaloffset(parm->type); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->st_posn = newlocaloffset(locvar->type); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\tadd\trsp, %d\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->st_endlabel); fprintf(Outfile, "\tadd\trsp, %d\n", stackOffset); fputs("\tpop rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], value); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", sym->name); fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], sym->name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", sym->name); } else // Print out the code to initialise it switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", sym->name); fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], sym->name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", sym->name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", sym->name); fprintf(Outfile, "\tmovsx\t%s, word [%s]\n", dreglist[r], sym->name); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", sym->name); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", sym->st_posn); fprintf(Outfile, "\tmov\t%s, [rbp+%d]\n", reglist[r], sym->st_posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", sym->st_posn); } else switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", sym->st_posn); fprintf(Outfile, "\tmovzx\t%s, byte [rbp+%d]\n", reglist[r], sym->st_posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", sym->st_posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", sym->st_posn); fprintf(Outfile, "\tmovsx\t%s, dword [rbp+%d]\n", reglist[r], sym->st_posn); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", sym->st_posn); break; default: fatald("Bad type in cgloadlocal:", sym->type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, L%d\n", reglist[r], label); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timul\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmov\trax, %s\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidiv\t%s\n", reglist[r2]); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tand\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\tor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshl\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshr\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tneg\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnot\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r], breglist[r]); return (r); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, byte %s\n", reglist[r], breglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(struct symtable *sym, int numargs) { // Get a new register int outr = alloc_register(); // Call the function fprintf(Outfile, "\tcall\t%s\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\tadd\trsp, %d\n", 8 * (numargs - 6)); // and copy the return value into our register fprintf(Outfile, "\tmov\t%s, rax\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpush\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmov\t%s, %s\n", reglist[FIRSTPARAMREG - argposn + 1], reglist[r]); } } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsal\t%s, %d\n", reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, dreglist[r]); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\tqword\t[rbp+%d], %s\n", sym->st_posn, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\tbyte\t[rbp+%d], %s\n", sym->st_posn, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\tdword\t[rbp+%d], %s\n", sym->st_posn, dreglist[r]); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size, type; int initvalue; int i; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the variable (or its elements if an array) // and the type of the variable if (node->stype == S_ARRAY) { size= typesize(value_at(node->type), node->ctype); type= value_at(node->type); } else { size = node->size; type= node->type; } // Generate the global identity and the label cgdataseg(); fprintf(Outfile, "\tsection\t.data\n" "\tglobal\t%s\n", node->name); fprintf(Outfile, "%s:\n", node->name); // Output space for one or more elements for (i = 0; i < node->nelems; i++) { // Get any initial value initvalue = 0; if (node->initlist != NULL) initvalue = node->initlist[i]; // Generate the space for this type // original version switch(size) { case 1: fprintf(Outfile, "\tdb\t%d\n", initvalue); break; case 4: fprintf(Outfile, "\tdd\t%d\n", initvalue); break; case 8: // Generate the pointer to a string literal. Treat a zero value // as actually zero, not the label L0 if (node->initlist != NULL && type == pointer_to(P_CHAR) && initvalue != 0) fprintf(Outfile, "\tdq\tL%d\n", initvalue); else fprintf(Outfile, "\tdq\t%d\n", initvalue); break; default: for (int i = 0; i < size; i++) fprintf(Outfile, "\tdb\t0\n"); /* compact version using times instead of loop fprintf(Outfile, "\ttimes\t%d\tdb\t0\n", size); */ } } } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\tdb\t%d\n", *cptr); } fprintf(Outfile, "\tdb\t0\n"); /* put string in readable format on a single line // probably went overboard with error checking int comma = 0, quote = 0, start = 1; fprintf(Outfile, "\tdb\t"); for (cptr=strvalue; *cptr; cptr++) { if ( ! isprint(*cptr) ) if (comma || start) { fprintf(Outfile, "%d, ", *cptr); start = 0; comma = 1; } else if (quote) { fprintf(Outfile, "\', %d, ", *cptr); comma = 1; quote = 0; } else { fprintf(Outfile, "%d, ", *cptr); comma = 1; quote = 0; } else if (start || comma) { fprintf(Outfile, "\'%c", *cptr); start = comma = 0; quote = 1; } else { fprintf(Outfile, "%c", *cptr); comma = 0; quote = 1; } } if (comma || start) fprintf(Outfile, "0\n"); else fprintf(Outfile, "\', 0\n"); */ } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r2], breglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzx\teax, %s\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmov\teax, %s\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } cgjump(sym->st_endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = alloc_register(); if (sym->class == C_GLOBAL) fprintf(Outfile, "\tmov\t%s, %s\n", reglist[r], sym->name); else fprintf(Outfile, "\tlea\t%s, [rbp+%d]\n", reglist[r], sym->st_posn); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], reglist[r]); break; case 2: fprintf(Outfile, "\tmovsx\t%s, dword [%s]\n", reglist[r], reglist[r]); break; case 4: case 8: fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { //Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmov\t[%s], byte %s\n", reglist[r2], breglist[r1]); break; case 2: case 4: case 8: fprintf(Outfile, "\tmov\t[%s], %s\n", reglist[r2], reglist[r1]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } // Generate a switch jump table and the code to // load the registers and call the switch() code void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel) { int i, label; // Get a label for the switch table label = genlabel(); cglabel(label); // Heuristic. If we have no cases, create one case // which points to the default case if (casecount == 0) { caseval[0] = 0; caselabel[0] = defaultlabel; casecount = 1; } // Generate the switch jump table. fprintf(Outfile, "\tdq\t%d\n", casecount); for (i = 0; i < casecount; i++) fprintf(Outfile, "\tdq\t%d, L%d\n", caseval[i], caselabel[i]); fprintf(Outfile, "\tdq\tL%d\n", defaultlabel); // Load the specific registers cglabel(toplabel); fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); fprintf(Outfile, "\tmov\trdx, L%d\n", label); fprintf(Outfile, "\tjmp\tswitch\n"); } ================================================ FILE: 46_Void_Functions/data.h ================================================ #ifndef extern_ #define extern_ extern #endif // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 extern_ int Line; // Current line number extern_ int Putback; // Character put back by scanner extern_ struct symtable *Functionid; // Symbol ptr of the current function extern_ FILE *Infile; // Input and output files extern_ FILE *Outfile; extern_ char *Infilename; // Name of file we are parsing extern_ char *Outfilename; // Name of file we opened as Outfile extern_ struct token Token; // Last token scanned extern_ struct token Peektoken; // A look-ahead token extern_ char Text[TEXTLEN + 1]; // Last identifier scanned extern_ int Looplevel; // Depth of nested loops extern_ int Switchlevel; // Depth of nested switches // Symbol table lists extern_ struct symtable *Globhead, *Globtail; // Global variables and functions extern_ struct symtable *Loclhead, *Locltail; // Local variables extern_ struct symtable *Parmhead, *Parmtail; // Local parameters extern_ struct symtable *Membhead, *Membtail; // Temp list of struct/union members extern_ struct symtable *Structhead, *Structtail; // List of struct types extern_ struct symtable *Unionhead, *Uniontail; // List of union types extern_ struct symtable *Enumhead, *Enumtail; // List of enum types and values extern_ struct symtable *Typehead, *Typetail; // List of typedefs // Command-line flags extern_ int O_dumpAST; // If true, dump the AST trees extern_ int O_keepasm; // If true, keep any assembly files extern_ int O_assemble; // If true, assemble the assembly files extern_ int O_dolink; // If true, link the object files extern_ int O_verbose; // If true, print info on compilation stages ================================================ FILE: 46_Void_Functions/decl.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of declarations // Copyright (c) 2019 Warren Toomey, GPL3 static struct symtable *composite_declaration(int type); static int typedef_declaration(struct symtable **ctype); static int type_of_typedef(char *name, struct symtable **ctype); static void enum_declaration(void); // Parse the current token and return a primitive type enum value, // a pointer to any composite type and possibly modify // the class of the type. int parse_type(struct symtable **ctype, int *class) { int type, exstatic = 1; // See if the class has been changed to extern (later, static) while (exstatic) { switch (Token.token) { case T_EXTERN: *class = C_EXTERN; scan(&Token); break; default: exstatic = 0; } } // Now work on the actual type keyword switch (Token.token) { case T_VOID: type = P_VOID; scan(&Token); break; case T_CHAR: type = P_CHAR; scan(&Token); break; case T_INT: type = P_INT; scan(&Token); break; case T_LONG: type = P_LONG; scan(&Token); break; // For the following, if we have a ';' after the // parsing then there is no type, so return -1. // Example: struct x {int y; int z}; case T_STRUCT: type = P_STRUCT; *ctype = composite_declaration(P_STRUCT); if (Token.token == T_SEMI) type = -1; break; case T_UNION: type = P_UNION; *ctype = composite_declaration(P_UNION); if (Token.token == T_SEMI) type = -1; break; case T_ENUM: type = P_INT; // Enums are really ints enum_declaration(); if (Token.token == T_SEMI) type = -1; break; case T_TYPEDEF: type = typedef_declaration(ctype); if (Token.token == T_SEMI) type = -1; break; case T_IDENT: type = type_of_typedef(Text, ctype); break; default: fatals("Illegal type, token", Token.tokstr); } return (type); } // Given a type parsed by parse_type(), scan in any following // '*' tokens and return the new type int parse_stars(int type) { while (1) { if (Token.token != T_STAR) break; type = pointer_to(type); scan(&Token); } return (type); } // Parse a type which appears inside a cast int parse_cast(void) { int type, class; struct symtable *ctype; // Get the type inside the parentheses type= parse_stars(parse_type(&ctype, &class)); // Do some error checking. I'm sure more can be done if (type == P_STRUCT || type == P_UNION || type == P_VOID) fatal("Cannot cast to a struct, union or void type"); return(type); } // Given a type, parse an expression of literals and ensure // that the type of this expression matches the given type. // Parse any type cast that precedes the expression. // If an integer literal, return this value. // If a string literal, return the label number of the string. int parse_literal(int type) { struct ASTnode *tree; // Parse the expression and optimise the resulting AST tree tree= optimise(binexpr(0)); // If there's a cast, get the child and // mark it as having the type from the cast if (tree->op == A_CAST) { tree->left->type= tree->type; tree= tree->left; } // The tree must now have an integer or string literal if (tree->op != A_INTLIT && tree->op != A_STRLIT) fatal("Cannot initialise globals with a general expression"); // If the type is char * and if (type == pointer_to(P_CHAR)) { // We have a string literal, return the label number if (tree->op == A_STRLIT) return(tree->a_intvalue); // We have a zero int literal, so that's a NULL if (tree->op == A_INTLIT && tree->a_intvalue==0) return(0); } // We only get here with an integer literal. The input type // is an integer type and is wide enough to hold the literal value if (inttype(type) && typesize(type, NULL) >= typesize(tree->type, NULL)) return(tree->a_intvalue); fatal("Type mismatch: literal vs. variable"); return(0); // Keep -Wall happy } // Given the type, name and class of a scalar variable, // parse any initialisation value and allocate storage for it. // Return the variable's symbol table entry. static struct symtable *scalar_declaration(char *varname, int type, struct symtable *ctype, int class, struct ASTnode **tree) { struct symtable *sym=NULL; struct ASTnode *varnode, *exprnode; *tree= NULL; // Add this as a known scalar switch (class) { case C_EXTERN: case C_GLOBAL: sym= addglob(varname, type, ctype, S_VARIABLE, class, 1, 0); break; case C_LOCAL: sym= addlocl(varname, type, ctype, S_VARIABLE, 1); break; case C_PARAM: sym= addparm(varname, type, ctype, S_VARIABLE); break; case C_MEMBER: sym= addmemb(varname, type, ctype, S_VARIABLE, 1); break; } // The variable is being initialised if (Token.token == T_ASSIGN) { // Only possible for a global or local if (class != C_GLOBAL && class != C_LOCAL) fatals("Variable can not be initialised", varname); scan(&Token); // Globals must be assigned a literal value if (class == C_GLOBAL) { // Create one initial value for the variable and // parse this value sym->initlist= (int *)malloc(sizeof(int)); sym->initlist[0]= parse_literal(type); } if (class == C_LOCAL) { // Make an A_IDENT AST node with the variable varnode = mkastleaf(A_IDENT, sym->type, sym, 0); // Get the expression for the assignment, make into a rvalue exprnode = binexpr(0); exprnode->rvalue = 1; // Ensure the expression's type matches the variable exprnode = modify_type(exprnode, varnode->type, 0); if (exprnode == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree *tree = mkastnode(A_ASSIGN, exprnode->type, exprnode, NULL, varnode, NULL, 0); } } // Generate any global space if (class == C_GLOBAL) genglobsym(sym); return (sym); } // Given the type, name and class of an variable, parse // the size of the array, if any. Then parse any initialisation // value and allocate storage for it. // Return the variable's symbol table entry. static struct symtable *array_declaration(char *varname, int type, struct symtable *ctype, int class) { struct symtable *sym; // New symbol table entry int nelems= -1; // Assume the number of elements won't be given int maxelems; // The maximum number of elements in the init list int *initlist; // The list of initial elements int i=0, j; // Skip past the '[' scan(&Token); // See we have an array size if (Token.token != T_RBRACKET) { nelems= parse_literal(P_INT); if (nelems <= 0) fatald("Array size is illegal", nelems); } // Ensure we have a following ']' match(T_RBRACKET, "]"); // Add this as a known array. We treat the // array as a pointer to its elements' type switch (class) { case C_EXTERN: case C_GLOBAL: sym = addglob(varname, pointer_to(type), ctype, S_ARRAY, class, 0, 0); break; default: fatal("For now, declaration of non-global arrays is not implemented"); } // Array initialisation if (Token.token == T_ASSIGN) { if (class != C_GLOBAL) fatals("Variable can not be initialised", varname); scan(&Token); // Get the following left curly bracket match(T_LBRACE, "{"); #define TABLE_INCREMENT 10 // If the array already has nelems, allocate that many elements // in the list. Otherwise, start with TABLE_INCREMENT. if (nelems != -1) maxelems= nelems; else maxelems= TABLE_INCREMENT; initlist= (int *)malloc(maxelems *sizeof(int)); // Loop getting a new literal value from the list while (1) { // Check we can add the next value, then parse and add it if (nelems != -1 && i == maxelems) fatal("Too many values in initialisation list"); initlist[i++]= parse_literal(type); // Increase the list size if the original size was // not set and we have hit the end of the current list if (nelems == -1 && i == maxelems) { maxelems += TABLE_INCREMENT; initlist= (int *)realloc(initlist, maxelems *sizeof(int)); } // Leave when we hit the right curly bracket if (Token.token == T_RBRACE) { scan(&Token); break; } // Next token must be a comma, then comma(); } // Zero any unused elements in the initlist. // Attach the list to the symbol table entry for (j=i; j < sym->nelems; j++) initlist[j]=0; if (i > nelems) nelems = i; sym->initlist= initlist; } // Set the size of the array and the number of elements sym->nelems= nelems; sym->size= sym->nelems * typesize(type, ctype); // Generate any global space if (class == C_GLOBAL) genglobsym(sym); return (sym); } // Given a pointer to the new function being declared and // a possibly NULL pointer to the function's previous declaration, // parse a list of parameters and cross-check them against the // previous declaration. Return the count of parameters static int param_declaration_list(struct symtable *oldfuncsym, struct symtable *newfuncsym) { int type, paramcnt = 0; struct symtable *ctype; struct symtable *protoptr = NULL; struct ASTnode *unused; // Get the pointer to the first prototype parameter if (oldfuncsym != NULL) protoptr = oldfuncsym->member; // Loop getting any parameters while (Token.token != T_RPAREN) { // If the first token is 'void' if (Token.token == T_VOID) { // Peek at the next token. If a ')', the function // has no parameters, so leave the loop. scan(&Peektoken); if (Peektoken.token == T_RPAREN) { // Move the Peektoken into the Token paramcnt= 0; scan(&Token); break; } } // Get the type of the next parameter type = declaration_list(&ctype, C_PARAM, T_COMMA, T_RPAREN, &unused); if (type == -1) fatal("Bad type in parameter list"); // Ensure the type of this parameter matches the prototype if (protoptr != NULL) { if (type != protoptr->type) fatald("Type doesn't match prototype for parameter", paramcnt + 1); protoptr = protoptr->next; } paramcnt++; // Stop when we hit the right parenthesis if (Token.token == T_RPAREN) break; // We need a comma as separator comma(); } if (oldfuncsym != NULL && paramcnt != oldfuncsym->nelems) fatals("Parameter count mismatch for function", oldfuncsym->name); // Return the count of parameters return (paramcnt); } // // function_declaration: type identifier '(' parameter_list ')' ; // | type identifier '(' parameter_list ')' compound_statement ; // // Parse the declaration of function. static struct symtable *function_declaration(char *funcname, int type, struct symtable *ctype, int class) { struct ASTnode *tree, *finalstmt; struct symtable *oldfuncsym, *newfuncsym = NULL; int endlabel, paramcnt; // Text has the identifier's name. If this exists and is a // function, get the id. Otherwise, set oldfuncsym to NULL. if ((oldfuncsym = findsymbol(funcname)) != NULL) if (oldfuncsym->stype != S_FUNCTION) oldfuncsym = NULL; // If this is a new function declaration, get a // label-id for the end label, and add the function // to the symbol table, if (oldfuncsym == NULL) { endlabel = genlabel(); // Assumtion: functions only return scalar types, so NULL below newfuncsym = addglob(funcname, type, NULL, S_FUNCTION, C_GLOBAL, 0, endlabel); } // Scan in the '(', any parameters and the ')'. // Pass in any existing function prototype pointer lparen(); paramcnt = param_declaration_list(oldfuncsym, newfuncsym); rparen(); // If this is a new function declaration, update the // function symbol entry with the number of parameters. // Also copy the parameter list into the function's node. if (newfuncsym) { newfuncsym->nelems = paramcnt; newfuncsym->member = Parmhead; oldfuncsym = newfuncsym; } // Clear out the parameter list Parmhead = Parmtail = NULL; // Declaration ends in a semicolon, only a prototype. if (Token.token == T_SEMI) return (oldfuncsym); // This is not just a prototype. // Set the Functionid global to the function's symbol pointer Functionid = oldfuncsym; // Get the AST tree for the compound statement and mark // that we have parsed no loops or switches yet Looplevel = 0; Switchlevel = 0; lbrace(); tree = compound_statement(0); rbrace(); // If the function type isn't P_VOID .. if (type != P_VOID) { // Error if no statements in the function if (tree == NULL) fatal("No statements in function with non-void type"); // Check that the last AST operation in the // compound statement was a return statement finalstmt = (tree->op == A_GLUE) ? tree->right : tree; if (finalstmt == NULL || finalstmt->op != A_RETURN) fatal("No return for function with non-void type"); } // Build the A_FUNCTION node which has the function's symbol pointer // and the compound statement sub-tree tree = mkastunary(A_FUNCTION, type, tree, oldfuncsym, endlabel); // Do optimisations on the AST tree tree= optimise(tree); // Dump the AST tree if requested if (O_dumpAST) { dumpAST(tree, NOLABEL, 0); fprintf(stdout, "\n\n"); } // Generate the assembly code for it genAST(tree, NOLABEL, NOLABEL, NOLABEL, 0); // Now free the symbols associated // with this function freeloclsyms(); return (oldfuncsym); } // Parse composite type declarations: structs or unions. // Either find an existing struct/union declaration, or build // a struct/union symbol table entry and return its pointer. static struct symtable *composite_declaration(int type) { struct symtable *ctype = NULL; struct symtable *m; struct ASTnode *unused; int offset; int t; // Skip the struct/union keyword scan(&Token); // See if there is a following struct/union name if (Token.token == T_IDENT) { // Find any matching composite type if (type == P_STRUCT) ctype = findstruct(Text); else ctype = findunion(Text); scan(&Token); } // If the next token isn't an LBRACE , this is // the usage of an existing struct/union type. // Return the pointer to the type. if (Token.token != T_LBRACE) { if (ctype == NULL) fatals("unknown struct/union type", Text); return (ctype); } // Ensure this struct/union type hasn't been // previously defined if (ctype) fatals("previously defined struct/union", Text); // Build the composite type and skip the left brace if (type == P_STRUCT) ctype = addstruct(Text); else ctype = addunion(Text); scan(&Token); // Scan in the list of members while (1) { // Get the next member. m is used as a dummy t= declaration_list(&m, C_MEMBER, T_SEMI, T_RBRACE, &unused); if (t== -1) fatal("Bad type in member list"); if (Token.token == T_SEMI) scan(&Token); if (Token.token == T_RBRACE) break; } // Attach to the struct type's node rbrace(); if (Membhead==NULL) fatals("No members in struct", ctype->name); ctype->member = Membhead; Membhead = Membtail = NULL; // Set the offset of the initial member // and find the first free byte after it m = ctype->member; m->st_posn = 0; offset = typesize(m->type, m->ctype); // Set the position of each successive member in the composite type // Unions are easy. For structs, align the member and find the next free byte for (m = m->next; m != NULL; m = m->next) { // Set the offset for this member if (type == P_STRUCT) m->st_posn = genalign(m->type, offset, 1); else m->st_posn = 0; // Get the offset of the next free byte after this member offset += typesize(m->type, m->ctype); } // Set the overall size of the composite type ctype->size = offset; return (ctype); } // Parse an enum declaration static void enum_declaration(void) { struct symtable *etype = NULL; char *name= NULL; int intval = 0; // Skip the enum keyword. scan(&Token); // If there's a following enum type name, get a // pointer to any existing enum type node. if (Token.token == T_IDENT) { etype = findenumtype(Text); name = strdup(Text); // As it gets tromped soon scan(&Token); } // If the next token isn't a LBRACE, check // that we have an enum type name, then return if (Token.token != T_LBRACE) { if (etype == NULL) fatals("undeclared enum type:", name); return; } // We do have an LBRACE. Skip it scan(&Token); // If we have an enum type name, ensure that it // hasn't been declared before. if (etype != NULL) fatals("enum type redeclared:", etype->name); else // Build an enum type node for this identifier etype = addenum(name, C_ENUMTYPE, 0); // Loop to get all the enum values while (1) { // Ensure we have an identifier // Copy it in case there's an int literal coming up ident(); name = strdup(Text); // Ensure this enum value hasn't been declared before etype = findenumval(name); if (etype != NULL) fatals("enum value redeclared:", Text); // If the next token is an '=', skip it and // get the following int literal if (Token.token == T_ASSIGN) { scan(&Token); if (Token.token != T_INTLIT) fatal("Expected int literal after '='"); intval = Token.intvalue; scan(&Token); } // Build an enum value node for this identifier. // Increment the value for the next enum identifier. etype = addenum(name, C_ENUMVAL, intval++); // Bail out on a right curly bracket, else get a comma if (Token.token == T_RBRACE) break; comma(); } scan(&Token); // Skip over the right curly bracket } // Parse a typedef declaration and return the type // and ctype that it represents static int typedef_declaration(struct symtable **ctype) { int type, class = 0; // Skip the typedef keyword. scan(&Token); // Get the actual type following the keyword type = parse_type(ctype, &class); if (class != 0) fatal("Can't have extern in a typedef declaration"); // See if the typedef identifier already exists if (findtypedef(Text) != NULL) fatals("redefinition of typedef", Text); // Get any following '*' tokens type = parse_stars(type); // It doesn't exist so add it to the typedef list addtypedef(Text, type, *ctype); scan(&Token); return (type); } // Given a typedef name, return the type it represents static int type_of_typedef(char *name, struct symtable **ctype) { struct symtable *t; // Look up the typedef in the list t = findtypedef(name); if (t == NULL) fatals("unknown type", name); scan(&Token); *ctype = t->ctype; return (t->type); } // Parse the declaration of a variable or function. // The type and any following '*'s have been scanned, and we // have the identifier in the Token variable. // The class argument is the variable's class. // Return a pointer to the symbol's entry in the symbol table static struct symtable *symbol_declaration(int type, struct symtable *ctype, int class, struct ASTnode **tree) { struct symtable *sym = NULL; char *varname = strdup(Text); // Ensure that we have an identifier. // We copied it above so we can scan more tokens in, e.g. // an assignment expression for a local variable. ident(); // Deal with function declarations if (Token.token == T_LPAREN) { return (function_declaration(varname, type, ctype, class)); } // See if this array or scalar variable has already been declared switch (class) { case C_EXTERN: case C_GLOBAL: if (findglob(varname) != NULL) fatals("Duplicate global variable declaration", varname); case C_LOCAL: case C_PARAM: if (findlocl(varname) != NULL) fatals("Duplicate local variable declaration", varname); case C_MEMBER: if (findmember(varname) != NULL) fatals("Duplicate struct/union member declaration", varname); } // Add the array or scalar variable to the symbol table if (Token.token == T_LBRACKET) sym = array_declaration(varname, type, ctype, class); else sym = scalar_declaration(varname, type, ctype, class, tree); return (sym); } // Parse a list of symbols where there is an initial type. // Return the type of the symbols. et1 and et2 are end tokens. int declaration_list(struct symtable **ctype, int class, int et1, int et2, struct ASTnode **gluetree) { int inittype, type; struct symtable *sym; struct ASTnode *tree; *gluetree= NULL; // Get the initial type. If -1, it was // a composite type definition, return this if ((inittype = parse_type(ctype, &class)) == -1) return (inittype); // Now parse the list of symbols while (1) { // See if this symbol is a pointer type = parse_stars(inittype); // Parse this symbol sym = symbol_declaration(type, *ctype, class, &tree); // We parsed a function, there is no list so leave if (sym->stype == S_FUNCTION) { if (class != C_GLOBAL) fatal("Function definition not at global level"); return (type); } // Glue any AST tree from a local declaration // to build a sequence of assignments to perform if (*gluetree== NULL) *gluetree= tree; else *gluetree = mkastnode(A_GLUE, P_NONE, *gluetree, NULL, tree, NULL, 0); // We are at the end of the list, leave if (Token.token == et1 || Token.token == et2) return (type); // Otherwise, we need a comma as separator comma(); } } // Parse one or more global declarations, either // variables, functions or structs void global_declarations(void) { struct symtable *ctype; struct ASTnode *unused; while (Token.token != T_EOF) { declaration_list(&ctype, C_GLOBAL, T_SEMI, T_EOF, &unused); // Skip any semicolons and right curly brackets if (Token.token == T_SEMI) scan(&Token); } } ================================================ FILE: 46_Void_Functions/decl.h ================================================ // Function prototypes for all compiler files // Copyright (c) 2019 Warren Toomey, GPL3 // scan.c void reject_token(struct token *t); int scan(struct token *t); // tree.c struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue); struct ASTnode *mkastleaf(int op, int type, struct symtable *sym, int intvalue); struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, struct symtable *sym, int intvalue); void dumpAST(struct ASTnode *n, int label, int parentASTop); // gen.c int genlabel(void); int genAST(struct ASTnode *n, int iflabel, int looptoplabel, int loopendlabel, int parentASTop); void genpreamble(); void genpostamble(); void genfreeregs(); void genglobsym(struct symtable *node); int genglobstr(char *strvalue); int genprimsize(int type); int genalign(int type, int offset, int direction); void genreturn(int reg, int id); // cg.c int cgprimsize(int type); int cgalign(int type, int offset, int direction); void cgtextseg(); void cgdataseg(); void freeall_registers(void); void cgpreamble(); void cgpostamble(); void cgfuncpreamble(struct symtable *sym); void cgfuncpostamble(struct symtable *sym); int cgloadint(int value, int type); int cgloadglob(struct symtable *sym, int op); int cgloadlocal(struct symtable *sym, int op); int cgloadglobstr(int label); int cgadd(int r1, int r2); int cgsub(int r1, int r2); int cgmul(int r1, int r2); int cgdiv(int r1, int r2); int cgshlconst(int r, int val); int cgcall(struct symtable *sym, int numargs); void cgcopyarg(int r, int argposn); int cgstorglob(int r, struct symtable *sym); int cgstorlocal(int r, struct symtable *sym); void cgglobsym(struct symtable *node); void cgglobstr(int l, char *strvalue); int cgcompare_and_set(int ASTop, int r1, int r2); int cgcompare_and_jump(int ASTop, int r1, int r2, int label); void cglabel(int l); void cgjump(int l); int cgwiden(int r, int oldtype, int newtype); void cgreturn(int reg, struct symtable *sym); int cgaddress(struct symtable *sym); int cgderef(int r, int type); int cgstorderef(int r1, int r2, int type); int cgnegate(int r); int cginvert(int r); int cglognot(int r); int cgboolean(int r, int op, int label); int cgand(int r1, int r2); int cgor(int r1, int r2); int cgxor(int r1, int r2); int cgshl(int r1, int r2); int cgshr(int r1, int r2); void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel); // expr.c struct ASTnode *expression_list(int endtoken); struct ASTnode *binexpr(int ptp); // stmt.c struct ASTnode *compound_statement(int inswitch); // misc.c void match(int t, char *what); void semi(void); void lbrace(void); void rbrace(void); void lparen(void); void rparen(void); void ident(void); void comma(void); void fatal(char *s); void fatals(char *s1, char *s2); void fatald(char *s, int d); void fatalc(char *s, int c); // sym.c void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node); struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn); struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn); struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int nelems); struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype); struct symtable *addstruct(char *name); struct symtable *addunion(char *name); struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int nelems); struct symtable *addenum(char *name, int class, int value); struct symtable *addtypedef(char *name, int type, struct symtable *ctype); struct symtable *findglob(char *s); struct symtable *findlocl(char *s); struct symtable *findsymbol(char *s); struct symtable *findmember(char *s); struct symtable *findstruct(char *s); struct symtable *findunion(char *s); struct symtable *findenumtype(char *s); struct symtable *findenumval(char *s); struct symtable *findtypedef(char *s); void clear_symtable(void); void freeloclsyms(void); // decl.c int parse_type(struct symtable **ctype, int *class); int parse_stars(int type); int parse_cast(void); int declaration_list(struct symtable **ctype, int class, int et1, int et2, struct ASTnode **gluetree); void global_declarations(void); // types.c int inttype(int type); int ptrtype(int type); int pointer_to(int type); int value_at(int type); int typesize(int type, struct symtable *ctype); struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op); // opt.c struct ASTnode *optimise(struct ASTnode *n); ================================================ FILE: 46_Void_Functions/defs.h ================================================ #include #include #include #include // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 enum { TEXTLEN = 512 // Length of identifiers in input }; // Commands and default filenames #define AOUT "a.out" #ifdef __NASM__ #define ASCMD "nasm -f elf64 -o " #define LDCMD "cc -no-pie -fno-plt -Wall -o " #else #define ASCMD "as -o " #define LDCMD "cc -o " #endif #define CPPCMD "cpp -nostdinc -isystem " // Token types enum { T_EOF, // Binary operators T_ASSIGN, T_ASPLUS, T_ASMINUS, T_ASSTAR, T_ASSLASH, T_LOGOR, T_LOGAND, T_OR, T_XOR, T_AMPER, T_EQ, T_NE, T_LT, T_GT, T_LE, T_GE, T_LSHIFT, T_RSHIFT, T_PLUS, T_MINUS, T_STAR, T_SLASH, // Other operators T_INC, T_DEC, T_INVERT, T_LOGNOT, // Type keywords T_VOID, T_CHAR, T_INT, T_LONG, // Other keywords T_IF, T_ELSE, T_WHILE, T_FOR, T_RETURN, T_STRUCT, T_UNION, T_ENUM, T_TYPEDEF, T_EXTERN, T_BREAK, T_CONTINUE, T_SWITCH, T_CASE, T_DEFAULT, // Structural tokens T_INTLIT, T_STRLIT, T_SEMI, T_IDENT, T_LBRACE, T_RBRACE, T_LPAREN, T_RPAREN, T_LBRACKET, T_RBRACKET, T_COMMA, T_DOT, T_ARROW, T_COLON }; // Token structure struct token { int token; // Token type, from the enum list above char *tokstr; // String version of the token int intvalue; // For T_INTLIT, the integer value }; // AST node types. The first few line up // with the related tokens enum { A_ASSIGN = 1, A_ASPLUS, A_ASMINUS, A_ASSTAR, A_ASSLASH, A_LOGOR, A_LOGAND, A_OR, A_XOR, A_AND, A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE, A_LSHIFT, A_RSHIFT, A_ADD, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, A_INTLIT, A_STRLIT, A_IDENT, A_GLUE, A_IF, A_WHILE, A_FUNCTION, A_WIDEN, A_RETURN, A_FUNCCALL, A_DEREF, A_ADDR, A_SCALE, A_PREINC, A_PREDEC, A_POSTINC, A_POSTDEC, A_NEGATE, A_INVERT, A_LOGNOT, A_TOBOOL, A_BREAK, A_CONTINUE, A_SWITCH, A_CASE, A_DEFAULT, A_CAST }; // Primitive types. The bottom 4 bits is an integer // value that represents the level of indirection, // e.g. 0= no pointer, 1= pointer, 2= pointer pointer etc. enum { P_NONE, P_VOID = 16, P_CHAR = 32, P_INT = 48, P_LONG = 64, P_STRUCT=80, P_UNION=96 }; // Structural types enum { S_VARIABLE, S_FUNCTION, S_ARRAY }; // Storage classes enum { C_GLOBAL = 1, // Globally visible symbol C_LOCAL, // Locally visible symbol C_PARAM, // Locally visible function parameter C_EXTERN, // External globally visible symbol C_STRUCT, // A struct C_UNION, // A union C_MEMBER, // Member of a struct or union C_ENUMTYPE, // A named enumeration type C_ENUMVAL, // A named enumeration value C_TYPEDEF // A named typedef }; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol struct symtable *ctype; // If struct/union, ptr to that type int stype; // Structural type for the symbol int class; // Storage class for the symbol int size; // Total size in bytes of this symbol int nelems; // Functions: # params. Arrays: # elements #define st_endlabel st_posn // For functions, the end label int st_posn; // For locals, the negative offset // from the stack base pointer int *initlist; // List of initial values struct symtable *next; // Next symbol in one list struct symtable *member; // First member of a function, struct, }; // union or enum // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates int rvalue; // True if the node is an rvalue struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; struct symtable *sym; // For many AST nodes, the pointer to // the symbol in the symbol table #define a_intvalue a_size // For A_INTLIT, the integer value int a_size; // For A_SCALE, the size to scale by }; enum { NOREG = -1, // Use NOREG when the AST generation // functions have no register to return NOLABEL = 0 // Use NOLABEL when we have no label to // pass to genAST() }; ================================================ FILE: 46_Void_Functions/expr.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of expressions // Copyright (c) 2019 Warren Toomey, GPL3 // expression_list: // | expression // | expression ',' expression_list // ; // Parse a list of zero or more comma-separated expressions and // return an AST composed of A_GLUE nodes with the left-hand child // being the sub-tree of previous expressions (or NULL) and the right-hand // child being the next expression. Each A_GLUE node will have size field // set to the number of expressions in the tree at this point. If no // expressions are parsed, NULL is returned struct ASTnode *expression_list(int endtoken) { struct ASTnode *tree = NULL; struct ASTnode *child = NULL; int exprcount = 0; // Loop until the end token while (Token.token != endtoken) { // Parse the next expression and increment the expression count child = binexpr(0); exprcount++; // Build an A_GLUE AST node with the previous tree as the left child // and the new expression as the right child. Store the expression count. tree = mkastnode(A_GLUE, P_NONE, tree, NULL, child, NULL, exprcount); // Stop when we reach the end token if (Token.token == endtoken) break; // Must have a ',' at this point match(T_COMMA, ","); } // Return the tree of expressions return (tree); } // Parse a function call and return its AST static struct ASTnode *funccall(void) { struct ASTnode *tree; struct symtable *funcptr; // Check that the identifier has been defined as a function, // then make a leaf node for it. if ((funcptr = findsymbol(Text)) == NULL || funcptr->stype != S_FUNCTION) { fatals("Undeclared function", Text); } // Get the '(' lparen(); // Parse the argument expression list tree = expression_list(T_RPAREN); // XXX Check type of each argument against the function's prototype // Build the function call AST node. Store the // function's return type as this node's type. // Also record the function's symbol-id tree = mkastunary(A_FUNCCALL, funcptr->type, tree, funcptr, 0); // Get the ')' rparen(); return (tree); } // Parse the index into an array and return an AST tree for it static struct ASTnode *array_access(void) { struct ASTnode *left, *right; struct symtable *aryptr; // Check that the identifier has been defined as an array // then make a leaf node for it that points at the base if ((aryptr = findsymbol(Text)) == NULL || aryptr->stype != S_ARRAY) { fatals("Undeclared array", Text); } left = mkastleaf(A_ADDR, aryptr->type, aryptr, 0); // Get the '[' scan(&Token); // Parse the following expression right = binexpr(0); // Get the ']' match(T_RBRACKET, "]"); // Ensure that this is of int type if (!inttype(right->type)) fatal("Array index is not of integer type"); // Scale the index by the size of the element's type right = modify_type(right, left->type, A_ADD); // Return an AST tree where the array's base has the offset // added to it, and dereference the element. Still an lvalue // at this point. left = mkastnode(A_ADD, aryptr->type, left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, value_at(left->type), left, NULL, 0); return (left); } // Parse the member reference of a struct or union // and return an AST tree for it. If withpointer is true, // the access is through a pointer to the member. static struct ASTnode *member_access(int withpointer) { struct ASTnode *left, *right; struct symtable *compvar; struct symtable *typeptr; struct symtable *m; // Check that the identifier has been declared as a // struct/union or a struct/union pointer if ((compvar = findsymbol(Text)) == NULL) fatals("Undeclared variable", Text); if (withpointer && compvar->type != pointer_to(P_STRUCT) && compvar->type != pointer_to(P_UNION)) fatals("Undeclared variable", Text); if (!withpointer && compvar->type != P_STRUCT && compvar->type != P_UNION) fatals("Undeclared variable", Text); // If a pointer to a struct/union, get the pointer's value. // Otherwise, make a leaf node that points at the base // Either way, it's an rvalue if (withpointer) { left = mkastleaf(A_IDENT, pointer_to(compvar->type), compvar, 0); } else left = mkastleaf(A_ADDR, compvar->type, compvar, 0); left->rvalue = 1; // Get the details of the composite type typeptr = compvar->ctype; // Skip the '.' or '->' token and get the member's name scan(&Token); ident(); // Find the matching member's name in the type // Die if we can't find it for (m = typeptr->member; m != NULL; m = m->next) if (!strcmp(m->name, Text)) break; if (m == NULL) fatals("No member found in struct/union: ", Text); // Build an A_INTLIT node with the offset right = mkastleaf(A_INTLIT, P_INT, NULL, m->st_posn); // Add the member's offset to the base of the struct/union // and dereference it. Still an lvalue at this point left = mkastnode(A_ADD, pointer_to(m->type), left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, m->type, left, NULL, 0); return (left); } // Parse a postfix expression and return // an AST node representing it. The // identifier is already in Text. static struct ASTnode *postfix(void) { struct ASTnode *n; struct symtable *varptr; struct symtable *enumptr; // If the identifier matches an enum value, // return an A_INTLIT node if ((enumptr = findenumval(Text)) != NULL) { scan(&Token); return (mkastleaf(A_INTLIT, P_INT, NULL, enumptr->st_posn)); } // Scan in the next token to see if we have a postfix expression scan(&Token); // Function call if (Token.token == T_LPAREN) return (funccall()); // An array reference if (Token.token == T_LBRACKET) return (array_access()); // Access into a struct or union if (Token.token == T_DOT) return (member_access(0)); if (Token.token == T_ARROW) return (member_access(1)); // A variable. Check that the variable exists. if ((varptr = findsymbol(Text)) == NULL || varptr->stype != S_VARIABLE) fatals("Unknown variable", Text); switch (Token.token) { // Post-increment: skip over the token case T_INC: scan(&Token); n = mkastleaf(A_POSTINC, varptr->type, varptr, 0); break; // Post-decrement: skip over the token case T_DEC: scan(&Token); n = mkastleaf(A_POSTDEC, varptr->type, varptr, 0); break; // Just a variable reference default: n = mkastleaf(A_IDENT, varptr->type, varptr, 0); } return (n); } // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(void) { struct ASTnode *n; int id; int type=0; switch (Token.token) { case T_INTLIT: // For an INTLIT token, make a leaf AST node for it. // Make it a P_CHAR if it's within the P_CHAR range if (Token.intvalue >= 0 && Token.intvalue < 256) n = mkastleaf(A_INTLIT, P_CHAR, NULL, Token.intvalue); else n = mkastleaf(A_INTLIT, P_INT, NULL, Token.intvalue); break; case T_STRLIT: // For a STRLIT token, generate the assembly for it. // Then make a leaf AST node for it. id is the string's label. id = genglobstr(Text); n = mkastleaf(A_STRLIT, pointer_to(P_CHAR), NULL, id); break; case T_IDENT: return (postfix()); case T_LPAREN: // Beginning of a parenthesised expression, skip the '('. scan(&Token); // If the token after is a type identifier, this is a cast expression switch (Token.token) { case T_IDENT: // We have to see if the identifier matches a typedef. // If not, treat it as an expression. if (findtypedef(Text) == NULL) { n = binexpr(0); break; } case T_VOID: case T_CHAR: case T_INT: case T_LONG: case T_STRUCT: case T_UNION: case T_ENUM: // Get the type inside the parentheses type= parse_cast(); // Skip the closing ')' and then parse the following expression rparen(); default: n = binexpr(0); // Scan in the expression } // We now have at least an expression in n, and possibly a non-zero type in type // if there was a cast. Skip the closing ')' if there was no cast. if (type == 0) rparen(); else // Otherwise, make a unary AST node for the cast n= mkastunary(A_CAST, type, n, NULL, 0); return (n); default: fatals("Expecting a primary expression, got token", Token.tokstr); } // Scan in the next token and return the leaf node scan(&Token); return (n); } // Convert a binary operator token into a binary AST operation. // We rely on a 1:1 mapping from token to AST operation static int binastop(int tokentype) { if (tokentype > T_EOF && tokentype <= T_SLASH) return (tokentype); fatald("Syntax error, token", tokentype); return (0); // Keep -Wall happy } // Return true if a token is right-associative, // false otherwise. static int rightassoc(int tokentype) { if (tokentype >= T_ASSIGN && tokentype <= T_ASSLASH) return (1); return (0); } // Operator precedence for each token. Must // match up with the order of tokens in defs.h static int OpPrec[] = { 0, 10, 10, // T_EOF, T_ASSIGN, T_ASPLUS, 10, 10, 10, // T_ASMINUS, T_ASSTAR, T_ASSLASH, 20, 30, // T_LOGOR, T_LOGAND 40, 50, 60, // T_OR, T_XOR, T_AMPER 70, 70, // T_EQ, T_NE 80, 80, 80, 80, // T_LT, T_GT, T_LE, T_GE 90, 90, // T_LSHIFT, T_RSHIFT 100, 100, // T_PLUS, T_MINUS 110, 110 // T_STAR, T_SLASH }; // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec; if (tokentype > T_SLASH) fatald("Token with no precedence in op_precedence:", tokentype); prec = OpPrec[tokentype]; if (prec == 0) fatald("Syntax error, token", tokentype); return (prec); } // prefix_expression: primary // | '*' prefix_expression // | '&' prefix_expression // | '-' prefix_expression // | '++' prefix_expression // | '--' prefix_expression // ; // Parse a prefix expression and return // a sub-tree representing it. struct ASTnode *prefix(void) { struct ASTnode *tree; switch (Token.token) { case T_AMPER: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Ensure that it's an identifier if (tree->op != A_IDENT) fatal("& operator must be followed by an identifier"); // Now change the operator to A_ADDR and the type to // a pointer to the original type tree->op = A_ADDR; tree->type = pointer_to(tree->type); break; case T_STAR: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's either another deref or an // identifier if (tree->op != A_IDENT && tree->op != A_DEREF) fatal("* operator must be followed by an identifier or *"); // Prepend an A_DEREF operation to the tree tree = mkastunary(A_DEREF, value_at(tree->type), tree, NULL, 0); break; case T_MINUS: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_NEGATE operation to the tree and // make the child an rvalue. Because chars are unsigned, // also widen this to int so that it's signed tree->rvalue = 1; tree = modify_type(tree, P_INT, 0); tree = mkastunary(A_NEGATE, tree->type, tree, NULL, 0); break; case T_INVERT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_INVERT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_INVERT, tree->type, tree, NULL, 0); break; case T_LOGNOT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_LOGNOT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_LOGNOT, tree->type, tree, NULL, 0); break; case T_INC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("++ operator must be followed by an identifier"); // Prepend an A_PREINC operation to the tree tree = mkastunary(A_PREINC, tree->type, tree, NULL, 0); break; case T_DEC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("-- operator must be followed by an identifier"); // Prepend an A_PREDEC operation to the tree tree = mkastunary(A_PREDEC, tree->type, tree, NULL, 0); break; default: tree = primary(); } return (tree); } // Return an AST tree whose root is a binary operator. // Parameter ptp is the previous token's precedence. struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; struct ASTnode *ltemp, *rtemp; int ASTop; int tokentype; // Get the tree on the left. // Fetch the next token at the same time. left = prefix(); // If we hit one of several terminating tokens, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA || tokentype == T_COLON || tokentype == T_RBRACE) { left->rvalue = 1; return (left); } // While the precedence of this token is more than that of the // previous token precedence, or it's right associative and // equal to the previous token's precedence while ((op_precedence(tokentype) > ptp) || (rightassoc(tokentype) && op_precedence(tokentype) == ptp)) { // Fetch in the next integer literal scan(&Token); // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Determine the operation to be performed on the sub-trees ASTop = binastop(tokentype); if (ASTop == A_ASSIGN) { // Assignment // Make the right tree into an rvalue right->rvalue = 1; // Ensure the right's type matches the left right = modify_type(right, left->type, 0); if (right == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree. However, switch // left and right around, so that the right expression's // code will be generated before the left expression ltemp = left; left = right; right = ltemp; } else { // We are not doing an assignment, so both trees should be rvalues // Convert both trees into rvalue if they are lvalue trees left->rvalue = 1; right->rvalue = 1; // Ensure the two types are compatible by trying // to modify each tree to match the other's type. ltemp = modify_type(left, right->type, ASTop); rtemp = modify_type(right, left->type, ASTop); if (ltemp == NULL && rtemp == NULL) fatal("Incompatible types in binary expression"); if (ltemp != NULL) left = ltemp; if (rtemp != NULL) right = rtemp; } // Join that sub-tree with ours. Convert the token // into an AST operation at the same time. left = mkastnode(binastop(tokentype), left->type, left, NULL, right, NULL, 0); // Update the details of the current token. // If we hit a terminating token, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA || tokentype == T_COLON || tokentype == T_RBRACE) { left->rvalue = 1; return (left); } } // Return the tree we have when the precedence // is the same or lower left->rvalue = 1; return (left); } ================================================ FILE: 46_Void_Functions/gen.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Generic code generator // Copyright (c) 2019 Warren Toomey, GPL3 // Generate and return a new label number int genlabel(void) { static int id = 1; return (id++); } // Generate the code for an IF statement // and an optional ELSE clause static int genIF(struct ASTnode *n, int looptoplabel, int loopendlabel) { int Lfalse, Lend; // Generate two labels: one for the // false compound statement, and one // for the end of the overall IF statement. // When there is no ELSE clause, Lfalse _is_ // the ending label! Lfalse = genlabel(); if (n->right) Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. genAST(n->left, Lfalse, NOLABEL, NOLABEL, n->op); genfreeregs(); // Generate the true compound statement genAST(n->mid, NOLABEL, looptoplabel, loopendlabel, n->op); genfreeregs(); // If there is an optional ELSE clause, // generate the jump to skip to the end if (n->right) cgjump(Lend); // Now the false label cglabel(Lfalse); // Optional ELSE clause: generate the // false compound statement and the // end label if (n->right) { genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); genfreeregs(); cglabel(Lend); } return (NOREG); } // Generate the code for a WHILE statement static int genWHILE(struct ASTnode *n) { int Lstart, Lend; // Generate the start and end labels // and output the start label Lstart = genlabel(); Lend = genlabel(); cglabel(Lstart); // Generate the condition code followed // by a jump to the end label. genAST(n->left, Lend, Lstart, Lend, n->op); genfreeregs(); // Generate the compound statement for the body genAST(n->right, NOLABEL, Lstart, Lend, n->op); genfreeregs(); // Finally output the jump back to the condition, // and the end label cgjump(Lstart); cglabel(Lend); return (NOREG); } // Generate the code for a SWITCH statement static int genSWITCH(struct ASTnode *n) { int *caseval, *caselabel; int Ljumptop, Lend; int i, reg, defaultlabel = 0, casecount = 0; struct ASTnode *c; // Create arrays for the case values and associated labels. // Ensure that we have at least one position in each array. caseval = (int *) malloc((n->a_intvalue + 1) * sizeof(int)); caselabel = (int *) malloc((n->a_intvalue + 1) * sizeof(int)); // Generate labels for the top of the jump table, and the // end of the switch statement. Set a default label for // the end of the switch, in case we don't have a default. Ljumptop = genlabel(); Lend = genlabel(); defaultlabel = Lend; // Output the code to calculate the switch condition reg = genAST(n->left, NOLABEL, NOLABEL, NOLABEL, 0); cgjump(Ljumptop); genfreeregs(); // Walk the right-child linked list to // generate the code for each case for (i = 0, c = n->right; c != NULL; i++, c = c->right) { // Get a label for this case. Store it // and the case value in the arrays. // Record if it is the default case. caselabel[i] = genlabel(); caseval[i] = c->a_intvalue; cglabel(caselabel[i]); if (c->op == A_DEFAULT) defaultlabel = caselabel[i]; else casecount++; // Generate the case code. Pass in the end label for the breaks genAST(c->left, NOLABEL, NOLABEL, Lend, 0); genfreeregs(); } // Ensure the last case jumps past the switch table cgjump(Lend); // Now output the switch table and the end label. cgswitch(reg, casecount, Ljumptop, caselabel, caseval, defaultlabel); cglabel(Lend); return (NOREG); } // Generate the code to copy the arguments of a // function call to its parameters, then call the // function itself. Return the register that holds // the function's return value. static int gen_funccall(struct ASTnode *n) { struct ASTnode *gluetree = n->left; int reg; int numargs = 0; // If there is a list of arguments, walk this list // from the last argument (right-hand child) to the // first while (gluetree) { // Calculate the expression's value reg = genAST(gluetree->right, NOLABEL, NOLABEL, NOLABEL, gluetree->op); // Copy this into the n'th function parameter: size is 1, 2, 3, ... cgcopyarg(reg, gluetree->a_size); // Keep the first (highest) number of arguments if (numargs == 0) numargs = gluetree->a_size; genfreeregs(); gluetree = gluetree->left; } // Call the function, clean up the stack (based on numargs), // and return its result return (cgcall(n->sym, numargs)); } // Given an AST, an optional label, and the AST op // of the parent, generate assembly code recursively. // Return the register id with the tree's final value. int genAST(struct ASTnode *n, int iflabel, int looptoplabel, int loopendlabel, int parentASTop) { int leftreg, rightreg; // We have some specific AST node handling at the top // so that we don't evaluate the child sub-trees immediately switch (n->op) { case A_IF: return (genIF(n, looptoplabel, loopendlabel)); case A_WHILE: return (genWHILE(n)); case A_SWITCH: return (genSWITCH(n)); case A_FUNCCALL: return (gen_funccall(n)); case A_GLUE: // Do each child statement, and free the // registers after each child if (n->left != NULL) genAST(n->left, iflabel, looptoplabel, loopendlabel, n->op); genfreeregs(); if (n->right != NULL) genAST(n->right, iflabel, looptoplabel, loopendlabel, n->op); genfreeregs(); return (NOREG); case A_FUNCTION: // Generate the function's preamble before the code // in the child sub-tree cgfuncpreamble(n->sym); genAST(n->left, NOLABEL, NOLABEL, NOLABEL, n->op); cgfuncpostamble(n->sym); return (NOREG); } // General AST node handling below // Get the left and right sub-tree values if (n->left) leftreg = genAST(n->left, NOLABEL, NOLABEL, NOLABEL, n->op); if (n->right) rightreg = genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); switch (n->op) { case A_ADD: return (cgadd(leftreg, rightreg)); case A_SUBTRACT: return (cgsub(leftreg, rightreg)); case A_MULTIPLY: return (cgmul(leftreg, rightreg)); case A_DIVIDE: return (cgdiv(leftreg, rightreg)); case A_AND: return (cgand(leftreg, rightreg)); case A_OR: return (cgor(leftreg, rightreg)); case A_XOR: return (cgxor(leftreg, rightreg)); case A_LSHIFT: return (cgshl(leftreg, rightreg)); case A_RSHIFT: return (cgshr(leftreg, rightreg)); case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, compare registers // and set one to 1 or 0 based on the comparison. if (parentASTop == A_IF || parentASTop == A_WHILE) return (cgcompare_and_jump(n->op, leftreg, rightreg, iflabel)); else return (cgcompare_and_set(n->op, leftreg, rightreg)); case A_INTLIT: return (cgloadint(n->a_intvalue, n->type)); case A_STRLIT: return (cgloadglobstr(n->a_intvalue)); case A_IDENT: // Load our value if we are an rvalue // or we are being dereferenced if (n->rvalue || parentASTop == A_DEREF) { if (n->sym->class == C_GLOBAL) { return (cgloadglob(n->sym, n->op)); } else { return (cgloadlocal(n->sym, n->op)); } } else return (NOREG); case A_ASPLUS: case A_ASMINUS: case A_ASSTAR: case A_ASSLASH: case A_ASSIGN: // For the '+=' and friends operators, generate suitable code // and get the register with the result. Then take the left child, // make it the right child so that we can fall into the assignment code. switch (n->op) { case A_ASPLUS: leftreg= cgadd(leftreg, rightreg); n->right= n->left; break; case A_ASMINUS: leftreg= cgsub(leftreg, rightreg); n->right= n->left; break; case A_ASSTAR: leftreg= cgmul(leftreg, rightreg); n->right= n->left; break; case A_ASSLASH: leftreg= cgdiv(leftreg, rightreg); n->right= n->left; break; } // Now into the assignment code // Are we assigning to an identifier or through a pointer? switch (n->right->op) { case A_IDENT: if (n->right->sym->class == C_GLOBAL) return (cgstorglob(leftreg, n->right->sym)); else return (cgstorlocal(leftreg, n->right->sym)); case A_DEREF: return (cgstorderef(leftreg, rightreg, n->right->type)); default: fatald("Can't A_ASSIGN in genAST(), op", n->op); } case A_WIDEN: // Widen the child's type to the parent's type return (cgwiden(leftreg, n->left->type, n->type)); case A_RETURN: cgreturn(leftreg, Functionid); return (NOREG); case A_ADDR: return (cgaddress(n->sym)); case A_DEREF: // If we are an rvalue, dereference to get the value we point at, // otherwise leave it for A_ASSIGN to store through the pointer if (n->rvalue) return (cgderef(leftreg, n->left->type)); else return (leftreg); case A_SCALE: // Small optimisation: use shift if the // scale value is a known power of two switch (n->a_size) { case 2: return (cgshlconst(leftreg, 1)); case 4: return (cgshlconst(leftreg, 2)); case 8: return (cgshlconst(leftreg, 3)); default: // Load a register with the size and // multiply the leftreg by this size rightreg = cgloadint(n->a_size, P_INT); return (cgmul(leftreg, rightreg)); } case A_POSTINC: case A_POSTDEC: // Load and decrement the variable's value into a register // and post increment/decrement it if (n->sym->class == C_GLOBAL) return (cgloadglob(n->sym, n->op)); else return (cgloadlocal(n->sym, n->op)); case A_PREINC: case A_PREDEC: // Load and decrement the variable's value into a register // and pre increment/decrement it if (n->left->sym->class == C_GLOBAL) return (cgloadglob(n->left->sym, n->op)); else return (cgloadlocal(n->left->sym, n->op)); case A_NEGATE: return (cgnegate(leftreg)); case A_INVERT: return (cginvert(leftreg)); case A_LOGNOT: return (cglognot(leftreg)); case A_TOBOOL: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, set the register // to 0 or 1 based on it's zeroeness or non-zeroeness return (cgboolean(leftreg, parentASTop, iflabel)); case A_BREAK: cgjump(loopendlabel); return (NOREG); case A_CONTINUE: cgjump(looptoplabel); return (NOREG); case A_CAST: return (leftreg); // Not much to do default: fatald("Unknown AST operator", n->op); } return (NOREG); // Keep -Wall happy } void genpreamble() { cgpreamble(); } void genpostamble() { cgpostamble(); } void genfreeregs() { freeall_registers(); } void genglobsym(struct symtable *node) { cgglobsym(node); } int genglobstr(char *strvalue) { int l = genlabel(); cgglobstr(l, strvalue); return (l); } int genprimsize(int type) { return (cgprimsize(type)); } int genalign(int type, int offset, int direction) { return (cgalign(type, offset, direction)); } ================================================ FILE: 46_Void_Functions/include/ctype.h ================================================ #ifndef _CTYPE_H_ # define _CTYPE_H_ int isalnum(int c); int isalpha(int c); int iscntrl(int c); int isdigit(int c); int isgraph(int c); int islower(int c); int isprint(int c); int ispunct(int c); int isspace(int c); int isupper(int c); int isxdigit(int c); int isascii(int c); int isblank(int c); #endif // _CTYPE_H_ ================================================ FILE: 46_Void_Functions/include/errno.h ================================================ #ifndef _ERRNO_H_ # define _ERRNO_H_ #endif // _ERRNO_H_ ================================================ FILE: 46_Void_Functions/include/fcntl.h ================================================ #ifndef _FCNTL_H_ # define _FCNTL_H_ #define O_RDONLY 00 #define O_WRONLY 01 #define O_RDWR 02 int open(char *pathname, int flags); #endif // _FCNTL_H_ ================================================ FILE: 46_Void_Functions/include/stddef.h ================================================ #ifndef _STDDEF_H_ # define _STDDEF_H_ #ifndef NULL # define NULL (void *)0 #endif typedef long size_t; #endif //_STDDEF_H_ ================================================ FILE: 46_Void_Functions/include/stdio.h ================================================ #ifndef _STDIO_H_ # define _STDIO_H_ #include #ifndef NULL # define NULL (void *)0 #endif // This FILE definition will do for now typedef char * FILE; FILE *fopen(char *pathname, char *mode); size_t fread(void *ptr, size_t size, size_t nmemb, FILE *stream); size_t fwrite(void *ptr, size_t size, size_t nmemb, FILE *stream); int fclose(FILE *stream); int printf(char *format); int fprintf(FILE *stream, char *format); int fputc(int c, FILE *stream); int fputs(char *s, FILE *stream); int putc(int c, FILE *stream); int putchar(int c); int puts(char *s); extern FILE *stdin; extern FILE *stdout; extern FILE *stderr; #endif // _STDIO_H_ ================================================ FILE: 46_Void_Functions/include/stdlib.h ================================================ #ifndef _STDLIB_H_ # define _STDLIB_H_ void exit(int status); void _Exit(int status); void *malloc(int size); void free(void *ptr); void *calloc(int nmemb, int size); void *realloc(void *ptr, int size); #endif // _STDLIB_H_ ================================================ FILE: 46_Void_Functions/include/string.h ================================================ #ifndef _STRING_H_ # define _STRING_H_ char *strdup(char *s); char *strchr(char *s, int c); char *strrchr(char *s, int c); #endif // _STRING_H_ ================================================ FILE: 46_Void_Functions/include/unistd.h ================================================ #ifndef _UNISTD_H_ # define _UNISTD_H_ void _exit(int status); int unlink(char *pathname); #endif // _UNISTD_H_ ================================================ FILE: 46_Void_Functions/main.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "decl.h" #include #include // Compiler setup and top-level execution // Copyright (c) 2019 Warren Toomey, GPL3 // Given a string with a '.' and at least a 1-character suffix // after the '.', change the suffix to be the given character. // Return the new string or NULL if the original string could // not be modified char *alter_suffix(char *str, char suffix) { char *posn; char *newstr; // Clone the string if ((newstr = strdup(str)) == NULL) return (NULL); // Find the '.' if ((posn = strrchr(newstr, '.')) == NULL) return (NULL); // Ensure there is a suffix posn++; if (*posn == '\0') return (NULL); // Change the suffix and NUL-terminate the string *posn = suffix; posn++; *posn = '\0'; return (newstr); } // Given an input filename, compile that file // down to assembly code. Return the new file's name static char *do_compile(char *filename) { char cmd[TEXTLEN]; // Change the input file's suffix to .s Outfilename = alter_suffix(filename, 's'); if (Outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .c on the end\n", filename); exit(1); } // Generate the pre-processor command snprintf(cmd, TEXTLEN, "%s %s %s", CPPCMD, INCDIR, filename); // Open up the pre-processor pipe if ((Infile = popen(cmd, "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", filename, strerror(errno)); exit(1); } Infilename = filename; // Create the output file if ((Outfile = fopen(Outfilename, "w")) == NULL) { fprintf(stderr, "Unable to create %s: %s\n", Outfilename, strerror(errno)); exit(1); } Line = 1; // Reset the scanner Putback = '\n'; clear_symtable(); // Clear the symbol table if (O_verbose) printf("compiling %s\n", filename); scan(&Token); // Get the first token from the input Peektoken.token= 0; // and set there is no lookahead token genpreamble(); // Output the preamble global_declarations(); // Parse the global declarations genpostamble(); // Output the postamble fclose(Outfile); // Close the output file return (Outfilename); } // Given an input filename, assemble that file // down to object code. Return the object filename char *do_assemble(char *filename) { char cmd[TEXTLEN]; int err; char *outfilename = alter_suffix(filename, 'o'); if (outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .s on the end\n", filename); exit(1); } // Build the assembly command and run it snprintf(cmd, TEXTLEN, "%s %s %s", ASCMD, outfilename, filename); if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Assembly of %s failed\n", filename); exit(1); } return (outfilename); } // Given a list of object files and an output filename, // link all of the object filenames together. void do_link(char *outfilename, char *objlist[]) { int cnt, size = TEXTLEN; char cmd[TEXTLEN], *cptr; int err; // Start with the linker command and the output file cptr = cmd; cnt = snprintf(cptr, size, "%s %s ", LDCMD, outfilename); cptr += cnt; size -= cnt; // Now append each object file while (*objlist != NULL) { cnt = snprintf(cptr, size, "%s ", *objlist); cptr += cnt; size -= cnt; objlist++; } if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Linking failed\n"); exit(1); } } // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "Usage: %s [-vcST] [-o outfile] file [file ...]\n", prog); fprintf(stderr, " -v give verbose output of the compilation stages\n"); fprintf(stderr, " -c generate object files but don't link them\n"); fprintf(stderr, " -S generate assembly files but don't link them\n"); fprintf(stderr, " -T dump the AST trees for each input file\n"); fprintf(stderr, " -o outfile, produce the outfile executable file\n"); exit(1); } // Main program: check arguments and print a usage // if we don't have an argument. Open up the input // file and call scanfile() to scan the tokens in it. enum { MAXOBJ = 100 }; int main(int argc, char *argv[]) { char *outfilename = AOUT; char *asmfile, *objfile; char *objlist[MAXOBJ]; int i, objcnt = 0; // Initialise our variables O_dumpAST = 0; O_keepasm = 0; O_assemble = 0; O_verbose = 0; O_dolink = 1; // Scan for command-line options for (i = 1; i < argc; i++) { // No leading '-', stop scanning for options if (*argv[i] != '-') break; // For each option in this argument for (int j = 1; (*argv[i] == '-') && argv[i][j]; j++) { switch (argv[i][j]) { case 'o': outfilename = argv[++i]; // Save & skip to next argument break; case 'T': O_dumpAST = 1; break; case 'c': O_assemble = 1; O_keepasm = 0; O_dolink = 0; break; case 'S': O_keepasm = 1; O_assemble = 0; O_dolink = 0; break; case 'v': O_verbose = 1; break; default: usage(argv[0]); } } } // Ensure we have at lease one input file argument if (i >= argc) usage(argv[0]); // Work on each input file in turn while (i < argc) { asmfile = do_compile(argv[i]); // Compile the source file if (O_dolink || O_assemble) { objfile = do_assemble(asmfile); // Assemble it to object forma if (objcnt == (MAXOBJ - 2)) { fprintf(stderr, "Too many object files for the compiler to handle\n"); exit(1); } objlist[objcnt++] = objfile; // Add the object file's name objlist[objcnt] = NULL; // to the list of object files } if (!O_keepasm) // Remove the assembly file if unlink(asmfile); // we don't need to keep it i++; } // Now link all the object files together if (O_dolink) { do_link(outfilename, objlist); // If we don't need to keep the object // files, then remove them if (!O_assemble) { for (i = 0; objlist[i] != NULL; i++) unlink(objlist[i]); } } return (0); } ================================================ FILE: 46_Void_Functions/misc.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" #include #include // Miscellaneous functions // Copyright (c) 2019 Warren Toomey, GPL3 // Ensure that the current token is t, // and fetch the next token. Otherwise // throw an error void match(int t, char *what) { if (Token.token == t) { scan(&Token); } else { fatals("Expected", what); } } // Match a semicolon and fetch the next token void semi(void) { match(T_SEMI, ";"); } // Match a left brace and fetch the next token void lbrace(void) { match(T_LBRACE, "{"); } // Match a right brace and fetch the next token void rbrace(void) { match(T_RBRACE, "}"); } // Match a left parenthesis and fetch the next token void lparen(void) { match(T_LPAREN, "("); } // Match a right parenthesis and fetch the next token void rparen(void) { match(T_RPAREN, ")"); } // Match an identifier and fetch the next token void ident(void) { match(T_IDENT, "identifier"); } // Match a comma and fetch the next token void comma(void) { match(T_COMMA, "comma"); } // Print out fatal messages void fatal(char *s) { fprintf(stderr, "%s on line %d of %s\n", s, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatals(char *s1, char *s2) { fprintf(stderr, "%s:%s on line %d of %s\n", s1, s2, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatald(char *s, int d) { fprintf(stderr, "%s:%d on line %d of %s\n", s, d, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatalc(char *s, int c) { fprintf(stderr, "%s:%c on line %d of %s\n", s, c, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } ================================================ FILE: 46_Void_Functions/opt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST Tree Optimisation Code // Copyright (c) 2019 Warren Toomey, GPL3 // Fold an AST tree with a binary operator // and two A_INTLIT children. Return either // the original tree or a new leaf node. static struct ASTnode *fold2(struct ASTnode *n) { int val, leftval, rightval; // Get the values from each child leftval = n->left->a_intvalue; rightval = n->right->a_intvalue; // Perform some of the binary operations. // For any AST op we can't do, return // the original tree. switch (n->op) { case A_ADD: val = leftval + rightval; break; case A_SUBTRACT: val = leftval - rightval; break; case A_MULTIPLY: val = leftval * rightval; break; case A_DIVIDE: // Don't try to divide by zero. if (rightval == 0) return (n); val = leftval / rightval; break; default: return (n); } // Return a leaf node with the new value return (mkastleaf(A_INTLIT, n->type, NULL, val)); } // Fold an AST tree with a unary operator // and one INTLIT children. Return either // the original tree or a new leaf node. static struct ASTnode *fold1(struct ASTnode *n) { int val; // Get the child value. Do the // operation if recognised. // Return the new leaf node. val = n->left->a_intvalue; switch (n->op) { case A_WIDEN: break; case A_INVERT: val = ~val; break; case A_LOGNOT: val = !val; break; default: return (n); } // Return a leaf node with the new value return (mkastleaf(A_INTLIT, n->type, NULL, val)); } // Attempt to do constant folding on // the AST tree with the root node n static struct ASTnode *fold(struct ASTnode *n) { if (n == NULL) return (NULL); // Fold on the left child, then // do the same on the right child n->left = fold(n->left); n->right = fold(n->right); // If both children are A_INTLITs, do a fold2() if (n->left && n->left->op == A_INTLIT) { if (n->right && n->right->op == A_INTLIT) n = fold2(n); else // If only the left is A_INTLIT, do a fold1() n = fold1(n); } // Return the possibly modified tree return (n); } // Optimise an AST tree by // constant folding in all sub-trees struct ASTnode *optimise(struct ASTnode *n) { n = fold(n); return (n); } ================================================ FILE: 46_Void_Functions/scan.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { char *p; p = strchr(s, c); return (p ? p - s : -1); } // Get the next character from the input file. static int next(void) { int c, l; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return (c); } c = fgetc(Infile); // Read from input file while (c == '#') { // We've hit a pre-processor statement scan(&Token); // Get the line number into l if (Token.token != T_INTLIT) fatals("Expecting pre-processor line number, got:", Text); l = Token.intvalue; scan(&Token); // Get the filename in Text if (Token.token != T_STRLIT) fatals("Expecting pre-processor file name, got:", Text); if (Text[0] != '<') { // If this is a real filename if (strcmp(Text, Infilename)) // and not the one we have now Infilename = strdup(Text); // save it. Then update the line num Line = l; } while ((c = fgetc(Infile)) != '\n'); // Skip to the end of the line c = fgetc(Infile); // and get the next character } if ('\n' == c) Line++; // Increment line count return (c); } // Put back an unwanted character static void putback(int c) { Putback = c; } // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } // Read in a hexadecimal constant from the input static int hexchar(void) { int c, h, n = 0, f = 0; // Loop getting characters while (isxdigit(c = next())) { // Convert from char to int value h = chrpos("0123456789abcdef", tolower(c)); // Add to running hex value n = n * 16 + h; f = 1; } // We hit a non-hex character, put it back putback(c); // Flag tells us we never saw any hex characters if (!f) fatal("missing digits after '\\x'"); if (n > 255) fatal("value out of range after '\\x'"); return n; } // Return the next character from a character // or string literal static int scanch(void) { int i, c, c2; // Get the next input character and interpret // metacharacters that start with a backslash c = next(); if (c == '\\') { switch (c = next()) { case 'a': return ('\a'); case 'b': return ('\b'); case 'f': return ('\f'); case 'n': return ('\n'); case 'r': return ('\r'); case 't': return ('\t'); case 'v': return ('\v'); case '\\': return ('\\'); case '"': return ('"'); case '\'': return ('\''); // Deal with octal constants by reading in // characters until we hit a non-octal digit. // Build up the octal value in c2 and count // # digits in i. Permit only 3 octal digits. case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': for (i = c2 = 0; isdigit(c) && c < '8'; c = next()) { if (++i > 3) break; c2 = c2 * 8 + (c - '0'); } putback(c); // Put back the first non-octal char return (c2); case 'x': return hexchar(); default: fatalc("unknown escape sequence", c); } } return (c); // Just an ordinary old character! } // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0, radix = 10; // Assume the radix is 10, but if it starts with 0 if (c == '0') { // and the next character is 'x', it's radix 16 if ((c = next()) == 'x') { radix = 16; c = next(); } else // Otherwise, it's radix 8 radix = 8; } // Convert each character into an int value while ((k = chrpos("0123456789abcdef", tolower(c))) >= 0) { if (k >= radix) fatalc("invalid digit in integer literal", c); val = val * radix + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return (val); } // Scan in a string literal from the input file, // and store it in buf[]. Return the length of // the string. static int scanstr(char *buf) { int i, c; // Loop while we have enough buffer space for (i = 0; i < TEXTLEN - 1; i++) { // Get the next char and append to buf // Return when we hit the ending double quote if ((c = scanch()) == '"') { buf[i] = 0; return (i); } buf[i] = c; } // Ran out of buf[] space fatal("String literal too long"); return (0); } // Scan an identifier from the input file and // store it in buf[]. Return the identifier's length static int scanident(int c, char *buf, int lim) { int i = 0; // Allow digits, alpha and underscores while (isalpha(c) || isdigit(c) || '_' == c) { // Error if we hit the identifier length limit, // else append to buf[] and get next character if (lim - 1 == i) { fatal("Identifier too long"); } else if (i < lim - 1) { buf[i++] = c; } c = next(); } // We hit a non-valid character, put it back. // NUL-terminate the buf[] and return the length putback(c); buf[i] = '\0'; return (i); } // Given a word from the input, return the matching // keyword token number or 0 if it's not a keyword. // Switch on the first letter so that we don't have // to waste time strcmp()ing against all the keywords. static int keyword(char *s) { switch (*s) { case 'b': if (!strcmp(s, "break")) return (T_BREAK); break; case 'c': if (!strcmp(s, "case")) return (T_CASE); if (!strcmp(s, "char")) return (T_CHAR); if (!strcmp(s, "continue")) return (T_CONTINUE); break; case 'd': if (!strcmp(s, "default")) return (T_DEFAULT); break; case 'e': if (!strcmp(s, "else")) return (T_ELSE); if (!strcmp(s, "enum")) return (T_ENUM); if (!strcmp(s, "extern")) return (T_EXTERN); break; case 'f': if (!strcmp(s, "for")) return (T_FOR); break; case 'i': if (!strcmp(s, "if")) return (T_IF); if (!strcmp(s, "int")) return (T_INT); break; case 'l': if (!strcmp(s, "long")) return (T_LONG); break; case 'r': if (!strcmp(s, "return")) return (T_RETURN); break; case 's': if (!strcmp(s, "struct")) return (T_STRUCT); if (!strcmp(s, "switch")) return (T_SWITCH); break; case 't': if (!strcmp(s, "typedef")) return (T_TYPEDEF); break; case 'u': if (!strcmp(s, "union")) return (T_UNION); break; case 'v': if (!strcmp(s, "void")) return (T_VOID); break; case 'w': if (!strcmp(s, "while")) return (T_WHILE); break; } return (0); } // List of token strings, for debugging purposes char *Tstring[] = { "EOF", "=", "+=", "-=", "*=", "/=", "||", "&&", "|", "^", "&", "==", "!=", ",", ">", "<=", ">=", "<<", ">>", "+", "-", "*", "/", "++", "--", "~", "!", "void", "char", "int", "long", "if", "else", "while", "for", "return", "struct", "union", "enum", "typedef", "extern", "break", "continue", "switch", "case", "default", "intlit", "strlit", ";", "identifier", "{", "}", "(", ")", "[", "]", ",", ".", "->", ":" }; // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c, tokentype; // If we have a lookahead token, return this token if (Peektoken.token != 0) { t->token = Peektoken.token; t->tokstr = Peektoken.tokstr; t->intvalue = Peektoken.intvalue; Peektoken.token = 0; return (1); } // Skip whitespace c = skip(); // Determine the token based on // the input character switch (c) { case EOF: t->token = T_EOF; return (0); case '+': if ((c = next()) == '+') { t->token = T_INC; } else if (c == '=') { t->token = T_ASPLUS; } else { putback(c); t->token = T_PLUS; } break; case '-': if ((c = next()) == '-') { t->token = T_DEC; } else if (c == '>') { t->token = T_ARROW; } else if (c == '=') { t->token = T_ASMINUS; } else if (isdigit(c)) { // Negative int literal t->intvalue = -scanint(c); t->token = T_INTLIT; } else { putback(c); t->token = T_MINUS; } break; case '*': if ((c = next()) == '=') { t->token = T_ASSTAR; } else { putback(c); t->token = T_STAR; } break; case '/': if ((c = next()) == '=') { t->token = T_ASSLASH; } else { putback(c); t->token = T_SLASH; } break; case ';': t->token = T_SEMI; break; case '{': t->token = T_LBRACE; break; case '}': t->token = T_RBRACE; break; case '(': t->token = T_LPAREN; break; case ')': t->token = T_RPAREN; break; case '[': t->token = T_LBRACKET; break; case ']': t->token = T_RBRACKET; break; case '~': t->token = T_INVERT; break; case '^': t->token = T_XOR; break; case ',': t->token = T_COMMA; break; case '.': t->token = T_DOT; break; case ':': t->token = T_COLON; break; case '=': if ((c = next()) == '=') { t->token = T_EQ; } else { putback(c); t->token = T_ASSIGN; } break; case '!': if ((c = next()) == '=') { t->token = T_NE; } else { putback(c); t->token = T_LOGNOT; } break; case '<': if ((c = next()) == '=') { t->token = T_LE; } else if (c == '<') { t->token = T_LSHIFT; } else { putback(c); t->token = T_LT; } break; case '>': if ((c = next()) == '=') { t->token = T_GE; } else if (c == '>') { t->token = T_RSHIFT; } else { putback(c); t->token = T_GT; } break; case '&': if ((c = next()) == '&') { t->token = T_LOGAND; } else { putback(c); t->token = T_AMPER; } break; case '|': if ((c = next()) == '|') { t->token = T_LOGOR; } else { putback(c); t->token = T_OR; } break; case '\'': // If it's a quote, scan in the // literal character value and // the trailing quote t->intvalue = scanch(); t->token = T_INTLIT; if (next() != '\'') fatal("Expected '\\'' at end of char literal"); break; case '"': // Scan in a literal string scanstr(Text); t->token = T_STRLIT; break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } else if (isalpha(c) || '_' == c) { // Read in a keyword or identifier scanident(c, Text, TEXTLEN); // If it's a recognised keyword, return that token if ((tokentype = keyword(Text)) != 0) { t->token = tokentype; break; } // Not a recognised keyword, so it must be an identifier t->token = T_IDENT; break; } // The character isn't part of any recognised token, error fatalc("Unrecognised character", c); } // We found a token t->tokstr = Tstring[t->token]; return (1); } ================================================ FILE: 46_Void_Functions/stmt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of statements // Copyright (c) 2019 Warren Toomey, GPL3 // Prototypes static struct ASTnode *single_statement(void); // compound_statement: // empty, i.e. no statement // | statement // | statement statements // ; // // statement: declaration // | expression_statement // | function_call // | if_statement // | while_statement // | for_statement // | return_statement // ; // if_statement: if_head // | if_head 'else' statement // ; // // if_head: 'if' '(' true_false_expression ')' statement ; // // Parse an IF statement including any // optional ELSE clause and return its AST static struct ASTnode *if_statement(void) { struct ASTnode *condAST, *trueAST, *falseAST = NULL; // Ensure we have 'if' '(' match(T_IF, "if"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); rparen(); // Get the AST for the statement trueAST = single_statement(); // If we have an 'else', skip it // and get the AST for the statement if (Token.token == T_ELSE) { scan(&Token); falseAST = single_statement(); } // Build and return the AST for this statement return (mkastnode(A_IF, P_NONE, condAST, trueAST, falseAST, NULL, 0)); } // while_statement: 'while' '(' true_false_expression ')' statement ; // // Parse a WHILE statement and return its AST static struct ASTnode *while_statement(void) { struct ASTnode *condAST, *bodyAST; // Ensure we have 'while' '(' match(T_WHILE, "while"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); rparen(); // Get the AST for the statement. // Update the loop depth in the process Looplevel++; bodyAST = single_statement(); Looplevel--; // Build and return the AST for this statement return (mkastnode(A_WHILE, P_NONE, condAST, NULL, bodyAST, NULL, 0)); } // for_statement: 'for' '(' expression_list ';' // true_false_expression ';' // expression_list ')' statement ; // // Parse a FOR statement and return its AST static struct ASTnode *for_statement(void) { struct ASTnode *condAST, *bodyAST; struct ASTnode *preopAST, *postopAST; struct ASTnode *tree; // Ensure we have 'for' '(' match(T_FOR, "for"); lparen(); // Get the pre_op expression and the ';' preopAST = expression_list(T_SEMI); semi(); // Get the condition and the ';'. // Force a non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); semi(); // Get the post_op expression and the ')' postopAST = expression_list(T_RPAREN); rparen(); // Get the statement which is the body // Update the loop depth in the process Looplevel++; bodyAST = single_statement(); Looplevel--; // Glue the statement and the postop tree tree = mkastnode(A_GLUE, P_NONE, bodyAST, NULL, postopAST, NULL, 0); // Make a WHILE loop with the condition and this new body tree = mkastnode(A_WHILE, P_NONE, condAST, NULL, tree, NULL, 0); // And glue the preop tree to the A_WHILE tree return (mkastnode(A_GLUE, P_NONE, preopAST, NULL, tree, NULL, 0)); } // return_statement: 'return' '(' expression ')' ; // // Parse a return statement and return its AST static struct ASTnode *return_statement(void) { struct ASTnode *tree; // Can't return a value if function returns P_VOID if (Functionid->type == P_VOID) fatal("Can't return from a void function"); // Ensure we have 'return' '(' match(T_RETURN, "return"); lparen(); // Parse the following expression tree = binexpr(0); // Ensure this is compatible with the function's type tree = modify_type(tree, Functionid->type, 0); if (tree == NULL) fatal("Incompatible type to return"); // Add on the A_RETURN node tree = mkastunary(A_RETURN, P_NONE, tree, NULL, 0); // Get the ')' and ';' rparen(); semi(); return (tree); } // break_statement: 'break' ; // // Parse a break statement and return its AST static struct ASTnode *break_statement(void) { if (Looplevel == 0 && Switchlevel == 0) fatal("no loop or switch to break out from"); scan(&Token); semi(); return (mkastleaf(A_BREAK, 0, NULL, 0)); } // continue_statement: 'continue' ; // // Parse a continue statement and return its AST static struct ASTnode *continue_statement(void) { if (Looplevel == 0) fatal("no loop to continue to"); scan(&Token); semi(); return (mkastleaf(A_CONTINUE, 0, NULL, 0)); } // Parse a switch statement and return its AST static struct ASTnode *switch_statement(void) { struct ASTnode *left, *n, *c, *casetree= NULL, *casetail; int inloop=1, casecount=0; int seendefault=0; int ASTop, casevalue; // Skip the 'switch' and '(' scan(&Token); lparen(); // Get the switch expression, the ')' and the '{' left= binexpr(0); rparen(); lbrace(); // Ensure that this is of int type if (!inttype(left->type)) fatal("Switch expression is not of integer type"); // Build an A_SWITCH subtree with the expression as // the child n= mkastunary(A_SWITCH, 0, left, NULL, 0); // Now parse the cases Switchlevel++; while (inloop) { switch(Token.token) { // Leave the loop when we hit a '}' case T_RBRACE: if (casecount==0) fatal("No cases in switch"); inloop=0; break; case T_CASE: case T_DEFAULT: // Ensure this isn't after a previous 'default' if (seendefault) fatal("case or default after existing default"); // Set the AST operation. Scan the case value if required if (Token.token==T_DEFAULT) { ASTop= A_DEFAULT; seendefault= 1; scan(&Token); } else { ASTop= A_CASE; scan(&Token); left= binexpr(0); // Ensure the case value is an integer literal if (left->op != A_INTLIT) fatal("Expecting integer literal for case value"); casevalue= left->a_intvalue; // Walk the list of existing case values to ensure // that there isn't a duplicate case value for (c= casetree; c != NULL; c= c -> right) if (casevalue == c->a_intvalue) fatal("Duplicate case value"); } // Scan the ':' and get the compound expression match(T_COLON, ":"); left= compound_statement(1); casecount++; // Build a sub-tree with the compound statement as the left child // and link it in to the growing A_CASE tree if (casetree==NULL) { casetree= casetail= mkastunary(ASTop, 0, left, NULL, casevalue); } else { casetail->right= mkastunary(ASTop, 0, left, NULL, casevalue); casetail= casetail->right; } break; default: fatals("Unexpected token in switch", Token.tokstr); } } Switchlevel--; // We have a sub-tree with the cases and any default. Put the // case count into the A_SWITCH node and attach the case tree. n->a_intvalue= casecount; n->right= casetree; rbrace(); return(n); } // Parse a single statement and return its AST. static struct ASTnode *single_statement(void) { struct ASTnode *stmt; struct symtable *ctype; switch (Token.token) { case T_LBRACE: // We have a '{', so this is a compound statement lbrace(); stmt = compound_statement(0); rbrace(); return(stmt); case T_IDENT: // We have to see if the identifier matches a typedef. // If not, treat it as an expression. // Otherwise, fall down to the parse_type() call. if (findtypedef(Text) == NULL) { stmt= binexpr(0); semi(); return(stmt); } case T_CHAR: case T_INT: case T_LONG: case T_STRUCT: case T_UNION: case T_ENUM: case T_TYPEDEF: // The beginning of a variable declaration list. declaration_list(&ctype, C_LOCAL, T_SEMI, T_EOF, &stmt); semi(); return (stmt); // Any assignments from the declarations case T_IF: return (if_statement()); case T_WHILE: return (while_statement()); case T_FOR: return (for_statement()); case T_RETURN: return (return_statement()); case T_BREAK: return (break_statement()); case T_CONTINUE: return (continue_statement()); case T_SWITCH: return (switch_statement()); default: // For now, see if this is an expression. // This catches assignment statements. stmt= binexpr(0); semi(); return(stmt); } return (NULL); // Keep -Wall happy } // Parse a compound statement // and return its AST. If inswitch is true, // we look for a '}', 'case' or 'default' token // to end the parsing. Otherwise, look for // just a '}' to end the parsing. struct ASTnode *compound_statement(int inswitch) { struct ASTnode *left = NULL; struct ASTnode *tree; while (1) { // Parse a single statement tree = single_statement(); // For each new tree, either save it in left // if left is empty, or glue the left and the // new tree together if (tree != NULL) { if (left == NULL) left = tree; else left = mkastnode(A_GLUE, P_NONE, left, NULL, tree, NULL, 0); } // Leave if we've hit the end token if (Token.token == T_RBRACE) return(left); if (inswitch && (Token.token == T_CASE || Token.token == T_DEFAULT)) return(left); } } ================================================ FILE: 46_Void_Functions/sym.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Symbol table functions // Copyright (c) 2019 Warren Toomey, GPL3 // Append a node to the singly-linked list pointed to by head or tail void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node) { // Check for valid pointers if (head == NULL || tail == NULL || node == NULL) fatal("Either head, tail or node is NULL in appendsym"); // Append to the list if (*tail) { (*tail)->next = node; *tail = node; } else *head = *tail = node; node->next = NULL; } // Create a symbol node to be added to a symbol table list. // Set up the node's: // + type: char, int etc. // + ctype: composite type pointer for struct/union // + structural type: var, function, array etc. // + size: number of elements, or endlabel: end label for a function // + posn: Position information for local symbols // Return a pointer to the new node struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn) { // Get a new node struct symtable *node = (struct symtable *) malloc(sizeof(struct symtable)); if (node == NULL) fatal("Unable to malloc a symbol table node in newsym"); // Fill in the values if (name == NULL) node->name = NULL; else node->name = strdup(name); node->type = type; node->ctype = ctype; node->stype = stype; node->class = class; node->nelems = nelems; // For pointers and integer types, set the size // of the symbol. structs and union declarations // manually set this up themselves. if (ptrtype(type) || inttype(type)) node->size = nelems * typesize(type, ctype); node->st_posn = posn; node->next = NULL; node->member = NULL; node->initlist = NULL; return (node); } // Add a symbol to the global symbol list struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn) { struct symtable *sym = newsym(name, type, ctype, stype, class, nelems, posn); // For structs and unions, copy the size from the type node if (type== P_STRUCT || type== P_UNION) sym->size = ctype->size; appendsym(&Globhead, &Globtail, sym); return (sym); } // Add a symbol to the local symbol list struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int nelems) { struct symtable *sym = newsym(name, type, ctype, stype, C_LOCAL, nelems, 0); // For structs and unions, copy the size from the type node if (type== P_STRUCT || type== P_UNION) sym->size = ctype->size; appendsym(&Loclhead, &Locltail, sym); return (sym); } // Add a symbol to the parameter list struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype) { struct symtable *sym = newsym(name, type, ctype, stype, C_PARAM, 1, 0); appendsym(&Parmhead, &Parmtail, sym); return (sym); } // Add a symbol to the temporary member list struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int nelems) { struct symtable *sym = newsym(name, type, ctype, stype, C_MEMBER, nelems, 0); // For structs and unions, copy the size from the type node if (type== P_STRUCT || type== P_UNION) sym->size = ctype->size; appendsym(&Membhead, &Membtail, sym); return (sym); } // Add a struct to the struct list struct symtable *addstruct(char *name) { struct symtable *sym = newsym(name, P_STRUCT, NULL, 0, C_STRUCT, 0, 0); appendsym(&Structhead, &Structtail, sym); return (sym); } // Add a struct to the union list struct symtable *addunion(char *name) { struct symtable *sym = newsym(name, P_UNION, NULL, 0, C_UNION, 0, 0); appendsym(&Unionhead, &Uniontail, sym); return (sym); } // Add an enum type or value to the enum list. // Class is C_ENUMTYPE or C_ENUMVAL. // Use posn to store the int value. struct symtable *addenum(char *name, int class, int value) { struct symtable *sym = newsym(name, P_INT, NULL, 0, class, 0, value); appendsym(&Enumhead, &Enumtail, sym); return (sym); } // Add a typedef to the typedef list struct symtable *addtypedef(char *name, int type, struct symtable *ctype) { struct symtable *sym = newsym(name, type, ctype, 0, C_TYPEDEF, 0, 0); appendsym(&Typehead, &Typetail, sym); return (sym); } // Search for a symbol in a specific list. // Return a pointer to the found node or NULL if not found. // If class is not zero, also match on the given class static struct symtable *findsyminlist(char *s, struct symtable *list, int class) { for (; list != NULL; list = list->next) if ((list->name != NULL) && !strcmp(s, list->name)) if (class==0 || class== list->class) return (list); return (NULL); } // Determine if the symbol s is in the global symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findglob(char *s) { return (findsyminlist(s, Globhead, 0)); } // Determine if the symbol s is in the local symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findlocl(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member, 0); if (node) return (node); } return (findsyminlist(s, Loclhead, 0)); } // Determine if the symbol s is in the symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findsymbol(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member, 0); if (node) return (node); } // Otherwise, try the local and global symbol lists node = findsyminlist(s, Loclhead, 0); if (node) return (node); return (findsyminlist(s, Globhead, 0)); } // Find a member in the member list // Return a pointer to the found node or NULL if not found. struct symtable *findmember(char *s) { return (findsyminlist(s, Membhead, 0)); } // Find a struct in the struct list // Return a pointer to the found node or NULL if not found. struct symtable *findstruct(char *s) { return (findsyminlist(s, Structhead, 0)); } // Find a struct in the union list // Return a pointer to the found node or NULL if not found. struct symtable *findunion(char *s) { return (findsyminlist(s, Unionhead, 0)); } // Find an enum type in the enum list // Return a pointer to the found node or NULL if not found. struct symtable *findenumtype(char *s) { return (findsyminlist(s, Enumhead, C_ENUMTYPE)); } // Find an enum value in the enum list // Return a pointer to the found node or NULL if not found. struct symtable *findenumval(char *s) { return (findsyminlist(s, Enumhead, C_ENUMVAL)); } // Find a type in the tyedef list // Return a pointer to the found node or NULL if not found. struct symtable *findtypedef(char *s) { return (findsyminlist(s, Typehead, 0)); } // Reset the contents of the symbol table void clear_symtable(void) { Globhead = Globtail = NULL; Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Membhead = Membtail = NULL; Structhead = Structtail = NULL; Unionhead = Uniontail = NULL; Enumhead = Enumtail = NULL; Typehead = Typetail = NULL; } // Clear all the entries in the local symbol table void freeloclsyms(void) { Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Functionid = NULL; } ================================================ FILE: 46_Void_Functions/tests/err.input031.c ================================================ Expecting a primary expression, got token:+ on line 5 of input031.c ================================================ FILE: 46_Void_Functions/tests/err.input032.c ================================================ Unknown variable:cow on line 4 of input032.c ================================================ FILE: 46_Void_Functions/tests/err.input033.c ================================================ Incompatible type to return on line 4 of input033.c ================================================ FILE: 46_Void_Functions/tests/err.input034.c ================================================ For now, declaration of non-global arrays is not implemented on line 4 of input034.c ================================================ FILE: 46_Void_Functions/tests/err.input035.c ================================================ Duplicate local variable declaration:a on line 4 of input035.c ================================================ FILE: 46_Void_Functions/tests/err.input036.c ================================================ Type doesn't match prototype for parameter:2 on line 4 of input036.c ================================================ FILE: 46_Void_Functions/tests/err.input037.c ================================================ Expected:comma on line 3 of input037.c ================================================ FILE: 46_Void_Functions/tests/err.input038.c ================================================ Type doesn't match prototype for parameter:2 on line 4 of input038.c ================================================ FILE: 46_Void_Functions/tests/err.input039.c ================================================ No statements in function with non-void type on line 4 of input039.c ================================================ FILE: 46_Void_Functions/tests/err.input040.c ================================================ No return for function with non-void type on line 4 of input040.c ================================================ FILE: 46_Void_Functions/tests/err.input041.c ================================================ Can't return from a void function on line 3 of input041.c ================================================ FILE: 46_Void_Functions/tests/err.input042.c ================================================ Undeclared function:fred on line 3 of input042.c ================================================ FILE: 46_Void_Functions/tests/err.input043.c ================================================ Undeclared array:b on line 3 of input043.c ================================================ FILE: 46_Void_Functions/tests/err.input044.c ================================================ Unknown variable:z on line 3 of input044.c ================================================ FILE: 46_Void_Functions/tests/err.input045.c ================================================ & operator must be followed by an identifier on line 3 of input045.c ================================================ FILE: 46_Void_Functions/tests/err.input046.c ================================================ * operator must be followed by an identifier or * on line 3 of input046.c ================================================ FILE: 46_Void_Functions/tests/err.input047.c ================================================ ++ operator must be followed by an identifier on line 3 of input047.c ================================================ FILE: 46_Void_Functions/tests/err.input048.c ================================================ -- operator must be followed by an identifier on line 3 of input048.c ================================================ FILE: 46_Void_Functions/tests/err.input049.c ================================================ Incompatible expression in assignment on line 6 of input049.c ================================================ FILE: 46_Void_Functions/tests/err.input050.c ================================================ Incompatible types in binary expression on line 6 of input050.c ================================================ FILE: 46_Void_Functions/tests/err.input051.c ================================================ Expected '\'' at end of char literal on line 4 of input051.c ================================================ FILE: 46_Void_Functions/tests/err.input052.c ================================================ Unrecognised character:$ on line 5 of input052.c ================================================ FILE: 46_Void_Functions/tests/err.input056.c ================================================ unknown struct/union type:var1 on line 2 of input056.c ================================================ FILE: 46_Void_Functions/tests/err.input057.c ================================================ previously defined struct/union:fred on line 2 of input057.c ================================================ FILE: 46_Void_Functions/tests/err.input059.c ================================================ Undeclared variable:y on line 3 of input059.c ================================================ FILE: 46_Void_Functions/tests/err.input060.c ================================================ Undeclared variable:x on line 3 of input060.c ================================================ FILE: 46_Void_Functions/tests/err.input061.c ================================================ Undeclared variable:x on line 3 of input061.c ================================================ FILE: 46_Void_Functions/tests/err.input064.c ================================================ undeclared enum type::fred on line 1 of input064.c ================================================ FILE: 46_Void_Functions/tests/err.input065.c ================================================ enum type redeclared::fred on line 2 of input065.c ================================================ FILE: 46_Void_Functions/tests/err.input066.c ================================================ enum value redeclared::z on line 2 of input066.c ================================================ FILE: 46_Void_Functions/tests/err.input068.c ================================================ redefinition of typedef:FOO on line 2 of input068.c ================================================ FILE: 46_Void_Functions/tests/err.input069.c ================================================ unknown type:FLOO on line 2 of input069.c ================================================ FILE: 46_Void_Functions/tests/err.input072.c ================================================ no loop or switch to break out from on line 1 of input072.c ================================================ FILE: 46_Void_Functions/tests/err.input073.c ================================================ no loop to continue to on line 1 of input073.c ================================================ FILE: 46_Void_Functions/tests/err.input075.c ================================================ Unexpected token in switch:if on line 4 of input075.c ================================================ FILE: 46_Void_Functions/tests/err.input076.c ================================================ No cases in switch on line 3 of input076.c ================================================ FILE: 46_Void_Functions/tests/err.input077.c ================================================ case or default after existing default on line 6 of input077.c ================================================ FILE: 46_Void_Functions/tests/err.input078.c ================================================ case or default after existing default on line 6 of input078.c ================================================ FILE: 46_Void_Functions/tests/err.input079.c ================================================ Duplicate case value on line 6 of input079.c ================================================ FILE: 46_Void_Functions/tests/err.input085.c ================================================ Bad type in parameter list on line 1 of input085.c ================================================ FILE: 46_Void_Functions/tests/err.input086.c ================================================ Function definition not at global level on line 3 of input086.c ================================================ FILE: 46_Void_Functions/tests/err.input087.c ================================================ Bad type in member list on line 4 of input087.c ================================================ FILE: 46_Void_Functions/tests/err.input092.c ================================================ Type mismatch: literal vs. variable on line 1 of input092.c ================================================ FILE: 46_Void_Functions/tests/err.input093.c ================================================ Unknown variable:fred on line 1 of input093.c ================================================ FILE: 46_Void_Functions/tests/err.input094.c ================================================ Type mismatch: literal vs. variable on line 1 of input094.c ================================================ FILE: 46_Void_Functions/tests/err.input095.c ================================================ Variable can not be initialised:x on line 1 of input095.c ================================================ FILE: 46_Void_Functions/tests/err.input096.c ================================================ Array size is illegal:0 on line 1 of input096.c ================================================ FILE: 46_Void_Functions/tests/err.input097.c ================================================ For now, declaration of non-global arrays is not implemented on line 2 of input097.c ================================================ FILE: 46_Void_Functions/tests/err.input098.c ================================================ Too many values in initialisation list on line 1 of input098.c ================================================ FILE: 46_Void_Functions/tests/err.input102.c ================================================ Cannot cast to a struct, union or void type on line 3 of input102.c ================================================ FILE: 46_Void_Functions/tests/err.input103.c ================================================ Cannot cast to a struct, union or void type on line 3 of input103.c ================================================ FILE: 46_Void_Functions/tests/err.input104.c ================================================ Cannot cast to a struct, union or void type on line 2 of input104.c ================================================ FILE: 46_Void_Functions/tests/err.input105.c ================================================ Incompatible expression in assignment on line 4 of input105.c ================================================ FILE: 46_Void_Functions/tests/input001.c ================================================ int printf(char *fmt); void main() { printf("%d\n", 12 * 3); printf("%d\n", 18 - 2 * 4); printf("%d\n", 1 + 2 + 9 - 5/2 + 3*5); } ================================================ FILE: 46_Void_Functions/tests/input002.c ================================================ int printf(char *fmt); void main() { int fred; int jim; fred= 5; jim= 12; printf("%d\n", fred + jim); } ================================================ FILE: 46_Void_Functions/tests/input003.c ================================================ int printf(char *fmt); void main() { int x; x= 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); } ================================================ FILE: 46_Void_Functions/tests/input004.c ================================================ int printf(char *fmt); void main() { int x; x= 7 < 9; printf("%d\n", x); x= 7 <= 9; printf("%d\n", x); x= 7 != 9; printf("%d\n", x); x= 7 == 7; printf("%d\n", x); x= 7 >= 7; printf("%d\n", x); x= 7 <= 7; printf("%d\n", x); x= 9 > 7; printf("%d\n", x); x= 9 >= 7; printf("%d\n", x); x= 9 != 7; printf("%d\n", x); } ================================================ FILE: 46_Void_Functions/tests/input005.c ================================================ int printf(char *fmt); void main() { int i; int j; i=6; j=12; if (i < j) { printf("%d\n", i); } else { printf("%d\n", j); } } ================================================ FILE: 46_Void_Functions/tests/input006.c ================================================ int printf(char *fmt); void main() { int i; i=1; while (i <= 10) { printf("%d\n", i); i= i + 1; } } ================================================ FILE: 46_Void_Functions/tests/input007.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 46_Void_Functions/tests/input008.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 46_Void_Functions/tests/input009.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } void fred() { int a; int b; a= 12; b= 3 * a; if (a >= b) { printf("%d\n", 2 * b - a); } } ================================================ FILE: 46_Void_Functions/tests/input010.c ================================================ int printf(char *fmt); void main() { int i; char j; j= 20; printf("%d\n", j); i= 10; printf("%d\n", i); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 2; j= j + 1) { printf("%d\n", j); } } ================================================ FILE: 46_Void_Functions/tests/input011.c ================================================ int printf(char *fmt); int main() { int i; char j; long k; i= 10; printf("%d\n", i); j= 20; printf("%d\n", j); k= 30; printf("%d\n", k); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 4; j= j + 1) { printf("%d\n", j); } for (k= 1; k <= 5; k= k + 1) { printf("%d\n", k); } return(i); printf("%d\n", 12345); return(3); } ================================================ FILE: 46_Void_Functions/tests/input012.c ================================================ int printf(char *fmt); int fred() { return(5); } void main() { int x; x= fred(2); printf("%d\n", x); } ================================================ FILE: 46_Void_Functions/tests/input013.c ================================================ int printf(char *fmt); int fred() { return(56); } void main() { int dummy; int result; dummy= printf("%d\n", 23); result= fred(10); dummy= printf("%d\n", result); } ================================================ FILE: 46_Void_Functions/tests/input014.c ================================================ int printf(char *fmt); int fred() { return(20); } int main() { int result; printf("%d\n", 10); result= fred(15); printf("%d\n", result); printf("%d\n", fred(15)+10); return(0); } ================================================ FILE: 46_Void_Functions/tests/input015.c ================================================ int printf(char *fmt); int main() { char a; char *b; char c; int d; int *e; int f; a= 18; printf("%d\n", a); b= &a; c= *b; printf("%d\n", c); d= 12; printf("%d\n", d); e= &d; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 46_Void_Functions/tests/input016.c ================================================ int printf(char *fmt); int c; int d; int *e; int f; int main() { c= 12; d=18; printf("%d\n", c); e= &c + 1; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 46_Void_Functions/tests/input017.c ================================================ int printf(char *fmt); int main() { char a; char *b; int d; int *e; b= &a; *b= 19; printf("%d\n", a); e= &d; *e= 12; printf("%d\n", d); return(0); } ================================================ FILE: 46_Void_Functions/tests/input018.c ================================================ int printf(char *fmt); int main() { int a; int b; a= b= 34; printf("%d\n", a); printf("%d\n", b); return(0); } ================================================ FILE: 46_Void_Functions/tests/input018a.c ================================================ int printf(char *fmt); int a; int *b; char c; char *d; int main() { b= &a; *b= 15; printf("%d\n", a); d= &c; *d= 16; printf("%d\n", c); return(0); } ================================================ FILE: 46_Void_Functions/tests/input019.c ================================================ int printf(char *fmt); int a; int b; int c; int d; int e; int main() { a= 2; b= 4; c= 3; d= 2; e= (a+b) * (c+d); printf("%d\n", e); return(0); } ================================================ FILE: 46_Void_Functions/tests/input020.c ================================================ int printf(char *fmt); int a; int b[25]; int main() { b[3]= 12; a= b[3]; printf("%d\n", a); return(0); } ================================================ FILE: 46_Void_Functions/tests/input021.c ================================================ int printf(char *fmt); char c; char *str; int main() { c= '\n'; printf("%d\n", c); for (str= "Hello world\n"; *str != 0; str= str + 1) { printf("%c", *str); } return(0); } ================================================ FILE: 46_Void_Functions/tests/input022.c ================================================ int printf(char *fmt); char a; char b; char c; int d; int e; int f; long g; long h; long i; int main() { b= 5; c= 7; a= b + c++; printf("%d\n", a); e= 5; f= 7; d= e + f++; printf("%d\n", d); h= 5; i= 7; g= h + i++; printf("%d\n", g); a= b-- + c; printf("%d\n", a); d= e-- + f; printf("%d\n", d); g= h-- + i; printf("%d\n", g); a= ++b + c; printf("%d\n", a); d= ++e + f; printf("%d\n", d); g= ++h + i; printf("%d\n", g); a= b * --c; printf("%d\n", a); d= e * --f; printf("%d\n", d); g= h * --i; printf("%d\n", g); return(0); } ================================================ FILE: 46_Void_Functions/tests/input023.c ================================================ int printf(char *fmt); char *str; int x; int main() { x= -23; printf("%d\n", x); printf("%d\n", -10 * -10); x= 1; x= ~x; printf("%d\n", x); x= 2 > 5; printf("%d\n", x); x= !x; printf("%d\n", x); x= !x; printf("%d\n", x); x= 13; if (x) { printf("%d\n", 13); } x= 0; if (!x) { printf("%d\n", 14); } for (str= "Hello world\n"; *str; str++) { printf("%c", *str); } return(0); } ================================================ FILE: 46_Void_Functions/tests/input024.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { a= 42; b= 19; printf("%d\n", a & b); printf("%d\n", a | b); printf("%d\n", a ^ b); printf("%d\n", 1 << 3); printf("%d\n", 63 >> 3); return(0); } ================================================ FILE: 46_Void_Functions/tests/input025.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { char z; int y; int x; x= 10; y= 20; z= 30; printf("%d\n", x); printf("%d\n", y); printf("%d\n", z); a= 5; b= 15; c= 25; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); return(0); } ================================================ FILE: 46_Void_Functions/tests/input026.c ================================================ int printf(char *fmt); int main(int a, char b, long c, int d, int e, int f, int g, int h) { int i; int j; int k; a= 13; printf("%d\n", a); b= 23; printf("%d\n", b); c= 34; printf("%d\n", c); d= 44; printf("%d\n", d); e= 54; printf("%d\n", e); f= 64; printf("%d\n", f); g= 74; printf("%d\n", g); h= 84; printf("%d\n", h); i= 94; printf("%d\n", i); j= 95; printf("%d\n", j); k= 96; printf("%d\n", k); return(0); } ================================================ FILE: 46_Void_Functions/tests/input027.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int param5(int a, int b, int c, int d, int e) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param2(int a, int b) { int c; int d; int e; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param0() { int a; int b; int c; int d; int e; a= 1; b= 2; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int main() { param8(1,2,3,4,5,6,7,8); param5(1,2,3,4,5); param2(1,2); param0(); return(0); } ================================================ FILE: 46_Void_Functions/tests/input028.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 46_Void_Functions/tests/input029.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h); int fred(int a, int b, int c); int main(); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 46_Void_Functions/tests/input030.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input030.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 46_Void_Functions/tests/input031.c ================================================ int printf(char *fmt); int main() { int x; x= 2 + + 3 - * / ; } ================================================ FILE: 46_Void_Functions/tests/input032.c ================================================ int printf(char *fmt); int main() { pizza cow llama sausage; } ================================================ FILE: 46_Void_Functions/tests/input033.c ================================================ int printf(char *fmt); int main() { char *z; return(z); } ================================================ FILE: 46_Void_Functions/tests/input034.c ================================================ int printf(char *fmt); int main() { int a[12]; return(0); } ================================================ FILE: 46_Void_Functions/tests/input035.c ================================================ int printf(char *fmt); int fred(int a, int b) { int a; return(a); } ================================================ FILE: 46_Void_Functions/tests/input036.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c); ================================================ FILE: 46_Void_Functions/tests/input037.c ================================================ int printf(char *fmt); int fred(int a, char b +, int z); ================================================ FILE: 46_Void_Functions/tests/input038.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c, int g); ================================================ FILE: 46_Void_Functions/tests/input039.c ================================================ int printf(char *fmt); int main() { int a; } ================================================ FILE: 46_Void_Functions/tests/input040.c ================================================ int printf(char *fmt); int main() { int a; a= 5; } ================================================ FILE: 46_Void_Functions/tests/input041.c ================================================ int printf(char *fmt); void fred() { return(5); } ================================================ FILE: 46_Void_Functions/tests/input042.c ================================================ int printf(char *fmt); int main() { fred(5); } ================================================ FILE: 46_Void_Functions/tests/input043.c ================================================ int printf(char *fmt); int main() { int a; a= b[4]; } ================================================ FILE: 46_Void_Functions/tests/input044.c ================================================ int printf(char *fmt); int main() { int a; a= z; } ================================================ FILE: 46_Void_Functions/tests/input045.c ================================================ int printf(char *fmt); int main() { int a; a= &5; } ================================================ FILE: 46_Void_Functions/tests/input046.c ================================================ int printf(char *fmt); int main() { int a; a= *5; } ================================================ FILE: 46_Void_Functions/tests/input047.c ================================================ int printf(char *fmt); int main() { int a; a= ++5; } ================================================ FILE: 46_Void_Functions/tests/input048.c ================================================ int printf(char *fmt); int main() { int a; a= --5; } ================================================ FILE: 46_Void_Functions/tests/input049.c ================================================ int printf(char *fmt); int main() { int x; char y; y= x; } ================================================ FILE: 46_Void_Functions/tests/input050.c ================================================ int printf(char *fmt); int main() { char *a; char *b; a= a + b; } ================================================ FILE: 46_Void_Functions/tests/input051.c ================================================ int printf(char *fmt); int main() { char a; a= 'fred'; } ================================================ FILE: 46_Void_Functions/tests/input052.c ================================================ int printf(char *fmt); int main() { int a; a= $5.00; } ================================================ FILE: 46_Void_Functions/tests/input053.c ================================================ int printf(char *fmt); int main() { printf("Hello world, %d\n", 23); return(0); } ================================================ FILE: 46_Void_Functions/tests/input054.c ================================================ int printf(char *fmt); int main() { int i; for (i=0; i < 20; i++) { printf("Hello world, %d\n", i); } return(0); } ================================================ FILE: 46_Void_Functions/tests/input055.c ================================================ int printf(char *fmt); int main(int argc, char **argv) { int i; char *argument; printf("Hello world\n"); for (i=0; i < argc; i++) { argument= *argv; argv= argv + 1; printf("Argument %d is %s\n", i, argument); } return(0); } ================================================ FILE: 46_Void_Functions/tests/input056.c ================================================ struct foo { int x; }; struct mary var1; ================================================ FILE: 46_Void_Functions/tests/input057.c ================================================ struct fred { int x; } ; struct fred { char y; } ; ================================================ FILE: 46_Void_Functions/tests/input058.c ================================================ int printf(char *fmt); struct fred { int x; char y; long z; }; struct fred var2; struct fred *varptr; int main() { long result; var2.x= 12; printf("%d\n", var2.x); var2.y= 'c'; printf("%d\n", var2.y); var2.z= 4005; printf("%d\n", var2.z); result= var2.x + var2.y + var2.z; printf("%d\n", result); varptr= &var2; result= varptr->x + varptr->y + varptr->z; printf("%d\n", result); return(0); } ================================================ FILE: 46_Void_Functions/tests/input059.c ================================================ int main() { int x; x= y.foo; } ================================================ FILE: 46_Void_Functions/tests/input060.c ================================================ int main() { int x; x= x.foo; } ================================================ FILE: 46_Void_Functions/tests/input061.c ================================================ int main() { int x; x= x->foo; } ================================================ FILE: 46_Void_Functions/tests/input062.c ================================================ int printf(char *fmt); union fred { char w; int x; int y; long z; }; union fred var1; union fred *varptr; int main() { var1.x= 65; printf("%d\n", var1.x); var1.x= 66; printf("%d\n", var1.x); printf("%d\n", var1.y); printf("The next two depend on the endian of the platform\n"); printf("%d\n", var1.w); printf("%d\n", var1.z); varptr= &var1; varptr->x= 67; printf("%d\n", varptr->x); printf("%d\n", varptr->y); return(0); } ================================================ FILE: 46_Void_Functions/tests/input063.c ================================================ int printf(char *fmt); enum fred { apple=1, banana, carrot, pear=10, peach, mango, papaya }; enum jane { aple=1, bnana, crrot, par=10, pech, mago, paaya }; enum fred var1; enum jane var2; enum fred var3; int main() { var1= carrot + pear + mango; printf("%d\n", var1); return(0); } ================================================ FILE: 46_Void_Functions/tests/input064.c ================================================ enum fred var3; ================================================ FILE: 46_Void_Functions/tests/input065.c ================================================ enum fred { x, y, z }; enum fred { a, b }; ================================================ FILE: 46_Void_Functions/tests/input066.c ================================================ enum fred { x, y, z }; enum mary { a, b, z }; ================================================ FILE: 46_Void_Functions/tests/input067.c ================================================ int printf(char *fmt); typedef int FOO; FOO var1; struct bar { int x; int y} ; typedef struct bar BAR; BAR var2; int main() { var1= 5; printf("%d\n", var1); var2.x= 7; var2.y= 10; printf("%d\n", var2.x + var2.y); return(0); } ================================================ FILE: 46_Void_Functions/tests/input068.c ================================================ typedef int FOO; typedef char FOO; ================================================ FILE: 46_Void_Functions/tests/input069.c ================================================ typedef int FOO; FLOO y; ================================================ FILE: 46_Void_Functions/tests/input070.c ================================================ #include typedef int FOO; int main() { FOO x; x= 56; printf("%d\n", x); return(0); } ================================================ FILE: 46_Void_Functions/tests/input071.c ================================================ #include int main() { int x; x = 0; while (x < 100) { if (x == 5) { x = x + 2; continue; } printf("%d\n", x); if (x == 14) { break; } x = x + 1; } printf("Done\n"); return (0); } ================================================ FILE: 46_Void_Functions/tests/input072.c ================================================ int main() { break; } ================================================ FILE: 46_Void_Functions/tests/input073.c ================================================ int main() { continue; } ================================================ FILE: 46_Void_Functions/tests/input074.c ================================================ #include int main() { int x; int y; y= 0; for (x=0; x < 5; x++) { switch(x) { case 1: { y= 5; break; } case 2: { y= 7; break; } case 3: { y= 9; } default: { y= 100; } } printf("%d\n", y); } return(0); } ================================================ FILE: 46_Void_Functions/tests/input075.c ================================================ int main() { int x; switch(x) { if (x<5); } } ================================================ FILE: 46_Void_Functions/tests/input076.c ================================================ int main() { int x; switch(x) { } } ================================================ FILE: 46_Void_Functions/tests/input077.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } default: { x= 3; } case 4: { x= 2; } } } ================================================ FILE: 46_Void_Functions/tests/input078.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } default: { x= 3; } default: { x= 2; } } } ================================================ FILE: 46_Void_Functions/tests/input079.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } case 2: { x= 2; } case 1: { x= 2; } default: { x= 2; } } } ================================================ FILE: 46_Void_Functions/tests/input080.c ================================================ #include int x; int y; int main() { for (x=0, y=1; x < 6; x++, y=y+2) { printf("%d %d\n", x, y); } return(0); } ================================================ FILE: 46_Void_Functions/tests/input081.c ================================================ #include int x; int y; int main() { x= 0; y=1; for (;x<5;) { printf("%d %d\n", x, y); x=x+1; y=y+2; } return(0); } ================================================ FILE: 46_Void_Functions/tests/input082.c ================================================ #include int main() { int x; x= 10; // Dangling else test if (x > 5) if (x > 15) printf("x > 15\n"); else printf("15 >= x > 5\n"); else printf("5 >= x\n"); return(0); } ================================================ FILE: 46_Void_Functions/tests/input083.c ================================================ #include int main() { int x; // Dangling else test. // We should not print anything for x<= 5 for (x=0; x < 12; x++) if (x > 5) if (x > 10) printf("10 < %2d\n", x); else printf(" 5 < %2d <= 10\n", x); return(0); } ================================================ FILE: 46_Void_Functions/tests/input084.c ================================================ #include int main() { int x, y; x=2; y=3; printf("%d %d\n", x, y); char a, *b; a= 'f'; b= &a; printf("%c %c\n", a, *b); return(0); } ================================================ FILE: 46_Void_Functions/tests/input085.c ================================================ int main(struct foo { int x; }; , int a) { return(0); } ================================================ FILE: 46_Void_Functions/tests/input086.c ================================================ int main() { int fred() { return(5); } int x; x=2; return(x); } ================================================ FILE: 46_Void_Functions/tests/input087.c ================================================ struct foo { int x; int y; struct blah { int g; }; }; ================================================ FILE: 46_Void_Functions/tests/input088.c ================================================ #include struct foo { int x; int y; } fred, mary; struct foo james; int main() { fred.x= 5; mary.y= 6; printf("%d %d\n", fred.x, mary.y); return(0); } ================================================ FILE: 46_Void_Functions/tests/input089.c ================================================ #include int x= 23; char y= 'H'; char *z= "Hello world"; int main() { printf("%d %c %s\n", x, y, z); return(0); } ================================================ FILE: 46_Void_Functions/tests/input090.c ================================================ #include int a = 23, b = 100; char y = 'H', *z = "Hello world"; int main() { printf("%d %d %c %s\n", a, b, y, z); return (0); } ================================================ FILE: 46_Void_Functions/tests/input091.c ================================================ #include int fred[] = { 1, 2, 3, 4, 5 }; int jim[10] = { 1, 2, 3, 4, 5 }; int main() { int i; for (i=0; i < 5; i++) printf("%d\n", fred[i]); for (i=0; i < 10; i++) printf("%d\n", jim[i]); return(0); } ================================================ FILE: 46_Void_Functions/tests/input092.c ================================================ char x= 3000; ================================================ FILE: 46_Void_Functions/tests/input093.c ================================================ char x= fred; ================================================ FILE: 46_Void_Functions/tests/input094.c ================================================ char *s= 54; ================================================ FILE: 46_Void_Functions/tests/input095.c ================================================ int fred(int x=2) { return(x); } ================================================ FILE: 46_Void_Functions/tests/input096.c ================================================ int fred[0]; ================================================ FILE: 46_Void_Functions/tests/input097.c ================================================ int main() { int x[45]; return(0); } ================================================ FILE: 46_Void_Functions/tests/input098.c ================================================ int fred[3]= { 1, 2, 3, 4, 5 }; ================================================ FILE: 46_Void_Functions/tests/input099.c ================================================ #include // List of token strings, for debugging purposes. // As yet, we can't store a NULL into the list char *Tstring[] = { "EOF", "=", "||", "&&", "|", "^", "&", "==", "!=", ",", ">", "<=", ">=", "<<", ">>", "+", "-", "*", "/", "++", "--", "~", "!", "void", "char", "int", "long", "if", "else", "while", "for", "return", "struct", "union", "enum", "typedef", "extern", "break", "continue", "switch", "case", "default", "intlit", "strlit", ";", "identifier", "{", "}", "(", ")", "[", "]", ",", ".", "->", ":", "" }; int main() { int i; char *str; i=0; while (1) { str= Tstring[i]; if (*str == 0) break; printf("%s\n", str); i++; } return(0); } ================================================ FILE: 46_Void_Functions/tests/input100.c ================================================ #include int main() { int x= 3, y=14; int z= 2 * x + y; char *str= "Hello world"; printf("%s %d %d\n", str, x+y, z); return(0); } ================================================ FILE: 46_Void_Functions/tests/input101.c ================================================ #include int main() { int x= 65535; char y; char *str; y= (char )x; printf("0x%x\n", y); str= (char *)0; printf("0x%lx\n", (long)str); return(0); } ================================================ FILE: 46_Void_Functions/tests/input102.c ================================================ int main() { struct foo { int p; }; int y= (struct foo) x; } ================================================ FILE: 46_Void_Functions/tests/input103.c ================================================ int main() { union foo { int p; }; int y= (union foo) x; } ================================================ FILE: 46_Void_Functions/tests/input104.c ================================================ int main() { int y= (void) x; } ================================================ FILE: 46_Void_Functions/tests/input105.c ================================================ int main() { int x; char *y; y= (char) x; } ================================================ FILE: 46_Void_Functions/tests/input106.c ================================================ #include char *y= (char *)0; int main() { printf("0x%lx\n", (long)y); return(0); } ================================================ FILE: 46_Void_Functions/tests/input107.c ================================================ #include char *y[] = { "fish", "cow", NULL }; char *z= NULL; int main() { int i; char *ptr; for (i=0; i < 3; i++) { ptr= y[i]; if (ptr != (char *)0) printf("%s\n", y[i]); else printf("NULL\n"); } return(0); } ================================================ FILE: 46_Void_Functions/tests/input108.c ================================================ int main() { char *str= (void *)0; return(0); } ================================================ FILE: 46_Void_Functions/tests/input109.c ================================================ #include int main() { int x= 17 - 1; printf("%d\n", x); return(0); } ================================================ FILE: 46_Void_Functions/tests/input110.c ================================================ #include int x; int y; int main() { x= 3; y= 15; y += x; printf("%d\n", y); x= 3; y= 15; y -= x; printf("%d\n", y); x= 3; y= 15; y *= x; printf("%d\n", y); x= 3; y= 15; y /= x; printf("%d\n", y); return(0); } ================================================ FILE: 46_Void_Functions/tests/input111.c ================================================ #include int main() { int x= 2000 + 3 + 4 * 5 + 6; printf("%d\n", x); return(0); } ================================================ FILE: 46_Void_Functions/tests/input112.c ================================================ #include char* y = NULL; int x= 10 + 6; int fred [ 2 + 3 ]; int main() { fred[2]= x; printf("%d\n", fred[2]); return(0); } ================================================ FILE: 46_Void_Functions/tests/input113.c ================================================ #include void fred(void); void fred(void) { printf("fred says hello\n"); } int main(void) { fred(); return(0); } ================================================ FILE: 46_Void_Functions/tests/input114.c ================================================ #include int main() { int x= 0x4a; printf("%c\n", x); return(0); } ================================================ FILE: 46_Void_Functions/tests/mktests ================================================ #!/bin/sh # Make the output files for each test # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make install) fi for i in input*c do if [ ! -f "out.$i" -a ! -f "err.$i" ] then ../cwj -o out $i 2> "err.$i" # If the err file is empty if [ ! -s "err.$i" ] then rm -f "err.$i" cc -o out $i ./out > "out.$i" fi fi rm -f out out.s done ================================================ FILE: 46_Void_Functions/tests/out.input001.c ================================================ 36 10 25 ================================================ FILE: 46_Void_Functions/tests/out.input002.c ================================================ 17 ================================================ FILE: 46_Void_Functions/tests/out.input003.c ================================================ 1 2 3 4 5 ================================================ FILE: 46_Void_Functions/tests/out.input004.c ================================================ 1 1 1 1 1 1 1 1 1 ================================================ FILE: 46_Void_Functions/tests/out.input005.c ================================================ 6 ================================================ FILE: 46_Void_Functions/tests/out.input006.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 46_Void_Functions/tests/out.input007.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 46_Void_Functions/tests/out.input008.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 46_Void_Functions/tests/out.input009.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 46_Void_Functions/tests/out.input010.c ================================================ 20 10 1 2 3 4 5 253 254 255 0 1 ================================================ FILE: 46_Void_Functions/tests/out.input011.c ================================================ 10 20 30 1 2 3 4 5 253 254 255 0 1 2 3 1 2 3 4 5 ================================================ FILE: 46_Void_Functions/tests/out.input012.c ================================================ 5 ================================================ FILE: 46_Void_Functions/tests/out.input013.c ================================================ 23 56 ================================================ FILE: 46_Void_Functions/tests/out.input014.c ================================================ 10 20 30 ================================================ FILE: 46_Void_Functions/tests/out.input015.c ================================================ 18 18 12 12 ================================================ FILE: 46_Void_Functions/tests/out.input016.c ================================================ 12 18 ================================================ FILE: 46_Void_Functions/tests/out.input017.c ================================================ 19 12 ================================================ FILE: 46_Void_Functions/tests/out.input018.c ================================================ 34 34 ================================================ FILE: 46_Void_Functions/tests/out.input018a.c ================================================ 15 16 ================================================ FILE: 46_Void_Functions/tests/out.input019.c ================================================ 30 ================================================ FILE: 46_Void_Functions/tests/out.input020.c ================================================ 12 ================================================ FILE: 46_Void_Functions/tests/out.input021.c ================================================ 10 Hello world ================================================ FILE: 46_Void_Functions/tests/out.input022.c ================================================ 12 12 12 13 13 13 13 13 13 35 35 35 ================================================ FILE: 46_Void_Functions/tests/out.input023.c ================================================ -23 100 -2 0 1 0 13 14 Hello world ================================================ FILE: 46_Void_Functions/tests/out.input024.c ================================================ 2 59 57 8 7 ================================================ FILE: 46_Void_Functions/tests/out.input025.c ================================================ 10 20 30 5 15 25 ================================================ FILE: 46_Void_Functions/tests/out.input026.c ================================================ 13 23 34 44 54 64 74 84 94 95 96 ================================================ FILE: 46_Void_Functions/tests/out.input027.c ================================================ 1 2 3 4 5 6 7 8 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 ================================================ FILE: 46_Void_Functions/tests/out.input028.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 46_Void_Functions/tests/out.input029.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 46_Void_Functions/tests/out.input030.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input030.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 46_Void_Functions/tests/out.input053.c ================================================ Hello world, 23 ================================================ FILE: 46_Void_Functions/tests/out.input054.c ================================================ Hello world, 0 Hello world, 1 Hello world, 2 Hello world, 3 Hello world, 4 Hello world, 5 Hello world, 6 Hello world, 7 Hello world, 8 Hello world, 9 Hello world, 10 Hello world, 11 Hello world, 12 Hello world, 13 Hello world, 14 Hello world, 15 Hello world, 16 Hello world, 17 Hello world, 18 Hello world, 19 ================================================ FILE: 46_Void_Functions/tests/out.input055.c ================================================ Hello world Argument 0 is ./out ================================================ FILE: 46_Void_Functions/tests/out.input058.c ================================================ 12 99 4005 4116 4116 ================================================ FILE: 46_Void_Functions/tests/out.input062.c ================================================ 65 66 66 The next two depend on the endian of the platform 66 66 67 67 ================================================ FILE: 46_Void_Functions/tests/out.input063.c ================================================ 25 ================================================ FILE: 46_Void_Functions/tests/out.input067.c ================================================ 5 17 ================================================ FILE: 46_Void_Functions/tests/out.input070.c ================================================ 56 ================================================ FILE: 46_Void_Functions/tests/out.input071.c ================================================ 0 1 2 3 4 7 8 9 10 11 12 13 14 Done ================================================ FILE: 46_Void_Functions/tests/out.input074.c ================================================ 100 5 7 100 100 ================================================ FILE: 46_Void_Functions/tests/out.input080.c ================================================ 0 1 1 3 2 5 3 7 4 9 5 11 ================================================ FILE: 46_Void_Functions/tests/out.input081.c ================================================ 0 1 1 3 2 5 3 7 4 9 ================================================ FILE: 46_Void_Functions/tests/out.input082.c ================================================ 15 >= x > 5 ================================================ FILE: 46_Void_Functions/tests/out.input083.c ================================================ 5 < 6 <= 10 5 < 7 <= 10 5 < 8 <= 10 5 < 9 <= 10 5 < 10 <= 10 10 < 11 ================================================ FILE: 46_Void_Functions/tests/out.input084.c ================================================ 2 3 f f ================================================ FILE: 46_Void_Functions/tests/out.input088.c ================================================ 5 6 ================================================ FILE: 46_Void_Functions/tests/out.input089.c ================================================ 23 H Hello world ================================================ FILE: 46_Void_Functions/tests/out.input090.c ================================================ 23 100 H Hello world ================================================ FILE: 46_Void_Functions/tests/out.input091.c ================================================ 1 2 3 4 5 1 2 3 4 5 0 0 0 0 0 ================================================ FILE: 46_Void_Functions/tests/out.input099.c ================================================ EOF = || && | ^ & == != , > <= >= << >> + - * / ++ -- ~ ! void char int long if else while for return struct union enum typedef extern break continue switch case default intlit strlit ; identifier { } ( ) [ ] , . -> : ================================================ FILE: 46_Void_Functions/tests/out.input100.c ================================================ Hello world 17 20 ================================================ FILE: 46_Void_Functions/tests/out.input101.c ================================================ 0xff 0x0 ================================================ FILE: 46_Void_Functions/tests/out.input106.c ================================================ 0x0 ================================================ FILE: 46_Void_Functions/tests/out.input107.c ================================================ fish cow NULL ================================================ FILE: 46_Void_Functions/tests/out.input108.c ================================================ ================================================ FILE: 46_Void_Functions/tests/out.input109.c ================================================ 16 ================================================ FILE: 46_Void_Functions/tests/out.input110.c ================================================ 18 12 45 5 ================================================ FILE: 46_Void_Functions/tests/out.input111.c ================================================ 2029 ================================================ FILE: 46_Void_Functions/tests/out.input112.c ================================================ 16 ================================================ FILE: 46_Void_Functions/tests/out.input113.c ================================================ fred says hello ================================================ FILE: 46_Void_Functions/tests/out.input114.c ================================================ J ================================================ FILE: 46_Void_Functions/tests/runtests ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make install) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../cwj -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../cwj $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.s "trial.$i" done ================================================ FILE: 46_Void_Functions/tests/runtestsn ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../compn ] then (cd ..; make install) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../compn -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../compn $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.o out.s "trial.$i" done ================================================ FILE: 46_Void_Functions/tree.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree functions // Copyright (c) 2019 Warren Toomey, GPL3 // Build and return a generic AST node struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue) { struct ASTnode *n; // Malloc a new ASTnode n = (struct ASTnode *) malloc(sizeof(struct ASTnode)); if (n == NULL) fatal("Unable to malloc in mkastnode()"); // Copy in the field values and return it n->op = op; n->type = type; n->left = left; n->mid = mid; n->right = right; n->sym = sym; n->a_intvalue = intvalue; return (n); } // Make an AST leaf node struct ASTnode *mkastleaf(int op, int type, struct symtable *sym, int intvalue) { return (mkastnode(op, type, NULL, NULL, NULL, sym, intvalue)); } // Make a unary AST node: only one child struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, struct symtable *sym, int intvalue) { return (mkastnode(op, type, left, NULL, NULL, sym, intvalue)); } // Generate and return a new label number // just for AST dumping purposes static int gendumplabel(void) { static int id = 1; return (id++); } // Given an AST tree, print it out and follow the // traversal of the tree that genAST() follows void dumpAST(struct ASTnode *n, int label, int level) { int Lfalse, Lstart, Lend; switch (n->op) { case A_IF: Lfalse = gendumplabel(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_IF"); if (n->right) { Lend = gendumplabel(); fprintf(stdout, ", end L%d", Lend); } fprintf(stdout, "\n"); dumpAST(n->left, Lfalse, level + 2); dumpAST(n->mid, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); return; case A_WHILE: Lstart = gendumplabel(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_WHILE, start L%d\n", Lstart); Lend = gendumplabel(); dumpAST(n->left, Lend, level + 2); dumpAST(n->right, NOLABEL, level + 2); return; } // Reset level to -2 for A_GLUE if (n->op == A_GLUE) level = -2; // General AST node handling if (n->left) dumpAST(n->left, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); for (int i = 0; i < level; i++) fprintf(stdout, " "); switch (n->op) { case A_GLUE: fprintf(stdout, "\n\n"); return; case A_FUNCTION: fprintf(stdout, "A_FUNCTION %s\n", n->sym->name); return; case A_ADD: fprintf(stdout, "A_ADD\n"); return; case A_SUBTRACT: fprintf(stdout, "A_SUBTRACT\n"); return; case A_MULTIPLY: fprintf(stdout, "A_MULTIPLY\n"); return; case A_DIVIDE: fprintf(stdout, "A_DIVIDE\n"); return; case A_EQ: fprintf(stdout, "A_EQ\n"); return; case A_NE: fprintf(stdout, "A_NE\n"); return; case A_LT: fprintf(stdout, "A_LE\n"); return; case A_GT: fprintf(stdout, "A_GT\n"); return; case A_LE: fprintf(stdout, "A_LE\n"); return; case A_GE: fprintf(stdout, "A_GE\n"); return; case A_INTLIT: fprintf(stdout, "A_INTLIT %d\n", n->a_intvalue); return; case A_STRLIT: fprintf(stdout, "A_STRLIT rval label L%d\n", n->a_intvalue); return; case A_IDENT: if (n->rvalue) fprintf(stdout, "A_IDENT rval %s\n", n->sym->name); else fprintf(stdout, "A_IDENT %s\n", n->sym->name); return; case A_ASSIGN: fprintf(stdout, "A_ASSIGN\n"); return; case A_WIDEN: fprintf(stdout, "A_WIDEN\n"); return; case A_RETURN: fprintf(stdout, "A_RETURN\n"); return; case A_FUNCCALL: fprintf(stdout, "A_FUNCCALL %s\n", n->sym->name); return; case A_ADDR: fprintf(stdout, "A_ADDR %s\n", n->sym->name); return; case A_DEREF: if (n->rvalue) fprintf(stdout, "A_DEREF rval\n"); else fprintf(stdout, "A_DEREF\n"); return; case A_SCALE: fprintf(stdout, "A_SCALE %d\n", n->a_size); return; case A_PREINC: fprintf(stdout, "A_PREINC %s\n", n->sym->name); return; case A_PREDEC: fprintf(stdout, "A_PREDEC %s\n", n->sym->name); return; case A_POSTINC: fprintf(stdout, "A_POSTINC\n"); return; case A_POSTDEC: fprintf(stdout, "A_POSTDEC\n"); return; case A_NEGATE: fprintf(stdout, "A_NEGATE\n"); return; case A_BREAK: fprintf(stdout, "A_BREAK\n"); return; case A_CONTINUE: fprintf(stdout, "A_CONTINUE\n"); return; case A_CASE: fprintf(stdout, "A_CASE %d\n", n->a_intvalue); return; case A_DEFAULT: fprintf(stdout, "A_DEFAULT\n"); return; case A_SWITCH: fprintf(stdout, "A_SWITCH\n"); return; case A_CAST: fprintf(stdout, "A_CAST %d\n", n->type); return; case A_ASPLUS: fprintf(stdout, "A_ASPLUS\n"); return; case A_ASMINUS: fprintf(stdout, "A_ASMINUS\n"); return; case A_ASSTAR: fprintf(stdout, "A_ASSTAR\n"); return; case A_ASSLASH: fprintf(stdout, "A_ASSLASH\n"); return; default: fatald("Unknown dumpAST operator", n->op); } } ================================================ FILE: 46_Void_Functions/types.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Types and type handling // Copyright (c) 2019 Warren Toomey, GPL3 // Return true if a type is an int type // of any size, false otherwise int inttype(int type) { return (((type & 0xf) == 0) && (type >= P_CHAR && type <= P_LONG)); } // Return true if a type is of pointer type int ptrtype(int type) { return ((type & 0xf) != 0); } // Given a primitive type, return // the type which is a pointer to it int pointer_to(int type) { if ((type & 0xf) == 0xf) fatald("Unrecognised in pointer_to: type", type); return (type + 1); } // Given a primitive pointer type, return // the type which it points to int value_at(int type) { if ((type & 0xf) == 0x0) fatald("Unrecognised in value_at: type", type); return (type - 1); } // Given a type and a composite type pointer, return // the size of this type in bytes int typesize(int type, struct symtable *ctype) { if (type == P_STRUCT || type == P_UNION) return (ctype->size); return (genprimsize(type)); } // Given an AST tree and a type which we want it to become, // possibly modify the tree by widening or scaling so that // it is compatible with this type. Return the original tree // if no changes occurred, a modified tree, or NULL if the // tree is not compatible with the given type. // If this will be part of a binary operation, the AST op is not zero. struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op) { int ltype; int lsize, rsize; ltype = tree->type; // XXX No idea on these yet if (ltype == P_STRUCT || ltype == P_UNION) fatal("Don't know how to do this yet"); if (rtype == P_STRUCT || rtype == P_UNION) fatal("Don't know how to do this yet"); // Compare scalar int types if (inttype(ltype) && inttype(rtype)) { // Both types same, nothing to do if (ltype == rtype) return (tree); // Get the sizes for each type lsize = typesize(ltype, NULL); rsize = typesize(rtype, NULL); // Tree's size is too big if (lsize > rsize) return (NULL); // Widen to the right if (rsize > lsize) return (mkastunary(A_WIDEN, rtype, tree, NULL, 0)); } // For pointers if (ptrtype(ltype) && ptrtype(rtype)) { // We can compare them if (op >= A_EQ && op <= A_GE) return(tree); // A comparison of the same type for a non-binary operation is OK, // or when the left tree is of `void *` type. if (op == 0 && (ltype == rtype || ltype == pointer_to(P_VOID))) return (tree); } // We can scale only on A_ADD or A_SUBTRACT operation if (op == A_ADD || op == A_SUBTRACT) { // Left is int type, right is pointer type and the size // of the original type is >1: scale the left if (inttype(ltype) && ptrtype(rtype)) { rsize = genprimsize(value_at(rtype)); if (rsize > 1) return (mkastunary(A_SCALE, rtype, tree, NULL, rsize)); else return (tree); // Size 1, no need to scale } } // If we get here, the types are not compatible return (NULL); } ================================================ FILE: 47_Sizeof/Makefile ================================================ # Define the location of the include directory # and the location to install the compiler binary INCDIR=/tmp/include BINDIR=/tmp HSRCS= data.h decl.h defs.h SRCS= cg.c decl.c expr.c gen.c main.c misc.c \ opt.c scan.c stmt.c sym.c tree.c types.c SRCN= cgn.c decl.c expr.c gen.c main.c misc.c \ opt.c scan.c stmt.c sym.c tree.c types.c ARMSRCS= cg_arm.c decl.c expr.c gen.c main.c misc.c \ opt.c scan.c stmt.c sym.c tree.c types.c cwj: $(SRCS) $(HSRCS) cc -o cwj -g -Wall -DINCDIR=\"$(INCDIR)\" $(SRCS) compn: $(SRCN) $(HSRCS) cc -D__NASM__ -o compn -g -Wall -DINCDIR=\"$(INCDIR)\" $(SRCN) cwjarm: $(ARMSRCS) $(HSRCS) cc -o cwjarm -g -Wall $(ARMSRCS) cp cwjarm cwj install: cwj mkdir -p $(INCDIR) rsync -a include/. $(INCDIR) cp cwj $(BINDIR) chmod +x $(BINDIR)/cwj installn: compn mkdir -p $(INCDIR) rsync -a include/. $(INCDIR) cp compn $(BINDIR) chmod +x $(BINDIR)/compn clean: rm -f cwj cwjarm compn *.o *.s out a.out test: install tests/runtests (cd tests; chmod +x runtests; ./runtests) armtest: cwjarm tests/runtests (cd tests; chmod +x runtests; ./runtests) testn: installn tests/runtestsn (cd tests; chmod +x runtestsn; ./runtestsn) ================================================ FILE: 47_Sizeof/Readme.md ================================================ # Part 47: A Subset of `sizeof` In a real C compiler, the `sizeof()` operator gives the size in bytes of: + a type definition, and + the type of an expression I looked at the code in our compiler and I'm only using `sizeof()` for the first of the two options above, so I'm only going to implement the first one. This makes things a bit easier as we can assume that the tokens inside the `sizeof()` are a type definition. ## New Token and Keyword We need a "sizeof" keyword and a new token, T_SIZEOF. As per usual, I'll let you look at the changes to `scan.c`. Now, when adding new tokens, we also have to update the: ```c // List of token strings, for debugging purposes char *Tstring[] = { "EOF", "=", "+=", "-=", "*=", "/=", "||", "&&", "|", "^", "&", "==", "!=", ",", ">", "<=", ">=", "<<", ">>", "+", "-", "*", "/", "++", "--", "~", "!", "void", "char", "int", "long", "if", "else", "while", "for", "return", "struct", "union", "enum", "typedef", "extern", "break", "continue", "switch", "case", "default", "sizeof", "intlit", "strlit", ";", "identifier", "{", "}", "(", ")", "[", "]", ",", ".", "->", ":" }; ``` I initially forgot to do this, and when debugging I was seeing the "wrong" token description for the tokens after "default". Oops! ## Changes to the Parser The `sizeof()` operator is part of expression parsing, as it takes an expression and returns a new value. We can do things like: ```c int x= 43 + sizeof(char); ``` Thus, we are going to modify `expr.c` to add `sizeof()`. It isn't a binary operator, and it's not a prefix or postfix operator, so the best place to add `sizeof()` is as part of parsing primary expressions. In fact, once I found my silly bugs, the amount of new code to do `sizeof()` was small. Here it is: ```c // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(void) { struct ASTnode *n; int id; int type=0; int size, class; struct symtable *ctype; switch (Token.token) { case T_SIZEOF: // Skip the T_SIZEOF and ensure we have a left parenthesis scan(&Token); if (Token.token != T_LPAREN) fatal("Left parenthesis expected after sizeof"); scan(&Token); // Get the type inside the parentheses type= parse_stars(parse_type(&ctype, &class)); // Get the type's size size= typesize(type, ctype); rparen(); // Return a leaf node int literal with the size return (mkastleaf(A_INTLIT, P_INT, NULL, size)); ... } ... } ``` We already have a `parse_type()` function to parse a type definition, and we already have a `parse_stars()` function to parse any following asterisks. Finally, we already have a `typesize()` function which returns the number of bytes in a type. All we have to do is scan the tokens in, call these three functions, build a leaf AST node with an integer literal in it, and return it. Yes, I know there are a bunch of subtleties that go with `sizeof()`, but I'm following the "KISS principle" and doing enough to make our compiler self-compiling. ## Testing the New Code The file `tests/input115.c` has a set of tests for the primitive types, a pointer and for the structures in our compiler: ```c struct foo { int x; char y; long z; }; typedef struct foo blah; int main() { printf("%ld\n", sizeof(char)); printf("%ld\n", sizeof(int)); printf("%ld\n", sizeof(long)); printf("%ld\n", sizeof(char *)); printf("%ld\n", sizeof(blah)); printf("%ld\n", sizeof(struct symtable)); printf("%ld\n", sizeof(struct ASTnode)); return(0); } ``` At present, the output from our compiler is: ``` 1 4 8 8 13 64 48 ``` I'm wondering if we need to pad the `struct foo` struct to be 16 bytes instead of 13. We'll cross that bridge when we get to it. ## Conclusion and What's Next Well, `sizeof()` turned out to be simple, at least for the functionality that we need for our compiler. In reality, `sizeof()` is quite complicated for a full-blown production C compiler. In the next part of our compiler writing journey, I will tackle `static`. [Next step](../48_Static/Readme.md) ================================================ FILE: 47_Sizeof/cg.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\t.text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\t.data\n", Outfile); currSeg = data_seg; } } // Given a scalar type value, return the // size of the type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch (type) { case P_CHAR: return (offset); case P_INT: case P_LONG: break; default: if (!ptrtype(type)) fatald("Bad type in cg_align:", type); } // Here we have an int or a long. Align it on a 4-byte offset // I put the generic code here so it can be reused elsewhere. alignment = 4; offset = (offset + direction * (alignment - 1)) & ~(alignment - 1); return (offset); } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. static int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "%r10", "%r11", "%r12", "%r13", "%r9", "%r8", "%rcx", "%rdx", "%rsi", "%rdi" }; static char *breglist[] = { "%r10b", "%r11b", "%r12b", "%r13b", "%r9b", "%r8b", "%cl", "%dl", "%sil", "%dil" }; static char *dreglist[] = { "%r10d", "%r11d", "%r12d", "%r13d", "%r9d", "%r8d", "%ecx", "%edx", "%esi", "%edi" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); cgtextseg(); fprintf(Outfile, "# internal switch(expr) routine\n" "# %%rsi = switch table, %%rax = expr\n" "# from SubC: http://www.t3x.org/subc/\n" "\n" "switch:\n" " pushq %%rsi\n" " movq %%rdx, %%rsi\n" " movq %%rax, %%rbx\n" " cld\n" " lodsq\n" " movq %%rax, %%rcx\n" "next:\n" " lodsq\n" " movq %%rax, %%rdx\n" " lodsq\n" " cmpq %%rdx, %%rbx\n" " jnz no\n" " popq %%rsi\n" " jmp *%%rax\n" "no:\n" " loop next\n" " lodsq\n" " popq %%rsi\n" " jmp *%%rax\n" "\n"); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the %rsp and %rsp fprintf(Outfile, "\t.globl\t%s\n" "\t.type\t%s, @function\n" "%s:\n" "\tpushq\t%%rbp\n" "\tmovq\t%%rsp, %%rbp\n", name, name, name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->st_posn = paramOffset; paramOffset += 8; } else { parm->st_posn = newlocaloffset(parm->type); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->st_posn = newlocaloffset(locvar->type); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\taddq\t$%d,%%rsp\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->st_endlabel); fprintf(Outfile, "\taddq\t$%d,%%rsp\n", stackOffset); fputs("\tpopq %rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmovq\t$%d, %s\n", value, reglist[r]); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", sym->name); } else // Print out the code to initialise it switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovzbq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", sym->name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovslq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", sym->name); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", sym->st_posn); fprintf(Outfile, "\tmovq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", sym->st_posn); } else switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", sym->st_posn); fprintf(Outfile, "\tmovzbq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", sym->st_posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", sym->st_posn); fprintf(Outfile, "\tmovslq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", sym->st_posn); break; default: fatald("Bad type in cgloadlocal:", sym->type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tleaq\tL%d(%%rip), %s\n", label, reglist[r]); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\taddq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsubq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timulq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s,%%rax\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidivq\t%s\n", reglist[r2]); fprintf(Outfile, "\tmovq\t%%rax,%s\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tandq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\torq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txorq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshlq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshrq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tnegq\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnotq\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); return (r); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(struct symtable *sym, int numargs) { // Get a new register int outr = alloc_register(); // Call the function fprintf(Outfile, "\tcall\t%s@PLT\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\taddq\t$%d, %%rsp\n", 8 * (numargs - 6)); // and copy the return value into our register fprintf(Outfile, "\tmovq\t%%rax, %s\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpushq\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmovq\t%s, %s\n", reglist[r], reglist[FIRSTPARAMREG - argposn + 1]); } } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsalq\t$%d, %s\n", val, reglist[r]); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %s(%%rip)\n", reglist[r], sym->name); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %s(%%rip)\n", breglist[r], sym->name); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %s(%%rip)\n", dreglist[r], sym->name); break; default: fatald("Bad type in cgstorglob:", sym->type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %d(%%rbp)\n", reglist[r], sym->st_posn); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %d(%%rbp)\n", breglist[r], sym->st_posn); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %d(%%rbp)\n", dreglist[r], sym->st_posn); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size, type; int initvalue; int i; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the variable (or its elements if an array) // and the type of the variable if (node->stype == S_ARRAY) { size= typesize(value_at(node->type), node->ctype); type= value_at(node->type); } else { size = node->size; type= node->type; } // Generate the global identity and the label cgdataseg(); fprintf(Outfile, "\t.globl\t%s\n", node->name); fprintf(Outfile, "%s:\n", node->name); // Output space for one or more elements for (i=0; i < node->nelems; i++) { // Get any initial value initvalue= 0; if (node->initlist != NULL) initvalue= node->initlist[i]; // Generate the space for this type switch (size) { case 1: fprintf(Outfile, "\t.byte\t%d\n", initvalue); break; case 4: fprintf(Outfile, "\t.long\t%d\n", initvalue); break; case 8: // Generate the pointer to a string literal. Treat a zero value // as actually zero, not the label L0 if (node->initlist != NULL && type== pointer_to(P_CHAR) && initvalue != 0) fprintf(Outfile, "\t.quad\tL%d\n", initvalue); else fprintf(Outfile, "\t.quad\t%d\n", initvalue); break; default: for (int i = 0; i < size; i++) fprintf(Outfile, "\t.byte\t0\n"); } } } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\t.byte\t%d\n", *cptr); } fprintf(Outfile, "\t.byte\t0\n"); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzbl\t%s, %%eax\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %%eax\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } cgjump(sym->st_endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = alloc_register(); if (sym->class == C_GLOBAL) fprintf(Outfile, "\tleaq\t%s(%%rip), %s\n", sym->name, reglist[r]); else fprintf(Outfile, "\tleaq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzbq\t(%s), %s\n", reglist[r], reglist[r]); break; case 2: fprintf(Outfile, "\tmovslq\t(%s), %s\n", reglist[r], reglist[r]); break; case 4: case 8: fprintf(Outfile, "\tmovq\t(%s), %s\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { // Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmovb\t%s, (%s)\n", breglist[r1], reglist[r2]); break; case 2: case 4: case 8: fprintf(Outfile, "\tmovq\t%s, (%s)\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } // Generate a switch jump table and the code to // load the registers and call the switch() code void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel) { int i, label; // Get a label for the switch table label = genlabel(); cglabel(label); // Heuristic. If we have no cases, create one case // which points to the default case if (casecount == 0) { caseval[0] = 0; caselabel[0] = defaultlabel; casecount = 1; } // Generate the switch jump table. fprintf(Outfile, "\t.quad\t%d\n", casecount); for (i = 0; i < casecount; i++) fprintf(Outfile, "\t.quad\t%d, L%d\n", caseval[i], caselabel[i]); fprintf(Outfile, "\t.quad\tL%d\n", defaultlabel); // Load the specific registers cglabel(toplabel); fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); fprintf(Outfile, "\tleaq\tL%d(%%rip), %%rdx\n", label); fprintf(Outfile, "\tjmp\tswitch\n"); } ================================================ FILE: 47_Sizeof/cg_arm.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for ARMv6 on Raspberry Pi // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. static int freereg[4]; static char *reglist[4] = { "r4", "r5", "r6", "r7" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // We have to store large integer literal values in memory. // Keep a list of them which will be output in the postamble #define MAXINTS 1024 int Intlist[MAXINTS]; static int Intslot = 0; // Determine the offset of a large integer // literal from the .L3 label. If the integer // isn't in the list, add it. static void set_int_offset(int val) { int offset = -1; // See if it is already there for (int i = 0; i < Intslot; i++) { if (Intlist[i] == val) { offset = 4 * i; break; } } // Not in the list, so add it if (offset == -1) { offset = 4 * Intslot; if (Intslot == MAXINTS) fatal("Out of int slots in set_int_offset()"); Intlist[Intslot++] = val; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L3+%d\n", offset); } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n", Outfile); } // Print out the assembly postamble void cgpostamble() { // Print out the global variables fprintf(Outfile, ".L2:\n"); for (int i = 0; i < Globs; i++) { if (Symtable[i].stype == S_VARIABLE) fprintf(Outfile, "\t.word %s\n", Symtable[i].name); } // Print out the integer literals fprintf(Outfile, ".L3:\n"); for (int i = 0; i < Intslot; i++) { fprintf(Outfile, "\t.word %d\n", Intlist[i]); } } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Symtable[id].name; fprintf(Outfile, "\t.text\n" "\t.globl\t%s\n" "\t.type\t%s, \%%function\n" "%s:\n" "\tpush\t{fp, lr}\n" "\tadd\tfp, sp, #4\n" "\tsub\tsp, sp, #8\n" "\tstr\tr0, [fp, #-8]\n", name, name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Symtable[id].endlabel); fputs("\tsub\tsp, fp, #4\n" "\tpop\t{fp, pc}\n" "\t.align\t2\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); // If the literal value is small, do it with one instruction if (value <= 1000) fprintf(Outfile, "\tmov\t%s, #%d\n", reglist[r], value); else { set_int_offset(value); fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); } return (r); } // Determine the offset of a variable from the .L2 // label. Yes, this is inefficient code. static void set_var_offset(int id) { int offset = 0; // Walk the symbol table up to id. // Find S_VARIABLEs and add on 4 until // we get to our variable for (int i = 0; i < id; i++) { if (Symtable[i].stype == S_VARIABLE) offset += 4; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L2+%d\n", offset); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tldrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgloadglob:", Symtable[id].type); } return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s, %s\n", reglist[r1], reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\tmul\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { // To do a divide: r0 holds the dividend, r1 holds the divisor. // The quotient is in r0. fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r1]); fprintf(Outfile, "\tmov\tr1, %s\n", reglist[r2]); fprintf(Outfile, "\tbl\t__aeabi_idiv\n"); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r1]); free_register(r2); return (r1); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r]); fprintf(Outfile, "\tbl\t%s\n", Symtable[id].name); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r]); return (r); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tlsl\t%s, %s, #%d\n", reglist[r], reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, int id) { // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tstr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgstorglob:", Symtable[id].type); } return (r); } // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (4); switch (type) { case P_CHAR: return (1); case P_INT: case P_LONG: return (4); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Symtable[id].type); fprintf(Outfile, "\t.data\n" "\t.globl\t%s\n", Symtable[id].name); switch (typesize) { case 1: fprintf(Outfile, "%s:\t.byte\t0\n", Symtable[id].name); break; case 4: fprintf(Outfile, "%s:\t.long\t0\n", Symtable[id].name); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "moveq", "movne", "movlt", "movgt", "movle", "movge" }; // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "movne", "moveq", "movge", "movle", "movgt", "movlt" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #1\n", cmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #0\n", invcmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\tuxtb\t%s, %s\n", reglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tb\tL%d\n", l); } // List of inverted branch instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *brlist[] = { "bne", "beq", "bge", "ble", "bgt", "blt" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", brlist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[reg]); cgjump(Symtable[id].endlabel); } // Generate code to load the address of a global // identifier into a variable. Return a new register int cgaddress(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); fprintf(Outfile, "\tmov\t%s, r3\n", reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tldrb\t%s, [%s]\n", reglist[r], reglist[r]); break; case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [%s]\n", reglist[r], reglist[r]); break; } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [%s]\n", reglist[r1], reglist[r2]); break; case P_INT: case P_LONG: fprintf(Outfile, "\tstr\t%s, [%s]\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 47_Sizeof/cgn.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\tsection .text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\tsection .data\n", Outfile); currSeg = data_seg; } } // Given a scalar type value, return the // size of the type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch (type) { case P_CHAR: return (offset); case P_INT: case P_LONG: break; default: if (!ptrtype(type)) fatald("Bad type in cg_align:", type); } // Here we have an int or a long. Align it on a 4-byte offset // I put the generic code here so it can be reused elsewhere. alignment = 4; offset = (offset + direction * (alignment - 1)) & ~(alignment - 1); return (offset); } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "r10", "r11", "r12", "r13", "r9", "r8", "rcx", "rdx", "rsi", "rdi" }; static char *breglist[] = { "r10b", "r11b", "r12b", "r13b", "r9b", "r8b", "cl", "dl", "sil", "dil" }; static char *dreglist[] = { "r10d", "r11d", "r12d", "r13d", "r9d", "r8d", "ecx", "edx", "esi", "edi" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\textern\tprintint\n", Outfile); fputs("\textern\tprintchar\n", Outfile); fputs("\textern\topen\n", Outfile); fputs("\textern\tclose\n", Outfile); fputs("\textern\tread\n", Outfile); fputs("\textern\twrite\n", Outfile); fputs("\textern\tprintf\n", Outfile); cgtextseg(); fprintf(Outfile, "; internal switch(expr) routine\n" "; rsi = switch table, rax = expr\n" "; from SubC: http://www.t3x.org/subc/\n" "\n" "switch:\n" " push rsi\n" " mov rsi, rdx\n" " mov rbx, rax\n" " cld\n" " lodsq\n" " mov rcx, rax\n" "next:\n" " lodsq\n" " mov rdx, rax\n" " lodsq\n" " cmp rbx, rdx\n" " jnz no\n" " pop rsi\n" " jmp rax\n" "no:\n" " loop next\n" " lodsq\n" " pop rsi\n" " jmp rax\n" "\n"); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the rsp and rbp fprintf(Outfile, "\tglobal\t%s\n" "%s:\n" "\tpush\trbp\n" "\tmov\trbp, rsp\n", name, name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->st_posn = paramOffset; paramOffset += 8; } else { parm->st_posn = newlocaloffset(parm->type); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->st_posn = newlocaloffset(locvar->type); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\tadd\trsp, %d\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->st_endlabel); fprintf(Outfile, "\tadd\trsp, %d\n", stackOffset); fputs("\tpop rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], value); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", sym->name); fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], sym->name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", sym->name); } else // Print out the code to initialise it switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", sym->name); fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], sym->name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", sym->name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", sym->name); fprintf(Outfile, "\tmovsx\t%s, word [%s]\n", dreglist[r], sym->name); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", sym->name); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", sym->st_posn); fprintf(Outfile, "\tmov\t%s, [rbp+%d]\n", reglist[r], sym->st_posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", sym->st_posn); } else switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", sym->st_posn); fprintf(Outfile, "\tmovzx\t%s, byte [rbp+%d]\n", reglist[r], sym->st_posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", sym->st_posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", sym->st_posn); fprintf(Outfile, "\tmovsx\t%s, dword [rbp+%d]\n", reglist[r], sym->st_posn); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", sym->st_posn); break; default: fatald("Bad type in cgloadlocal:", sym->type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, L%d\n", reglist[r], label); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timul\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmov\trax, %s\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidiv\t%s\n", reglist[r2]); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tand\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\tor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshl\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshr\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tneg\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnot\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r], breglist[r]); return (r); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, byte %s\n", reglist[r], breglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(struct symtable *sym, int numargs) { // Get a new register int outr = alloc_register(); // Call the function fprintf(Outfile, "\tcall\t%s\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\tadd\trsp, %d\n", 8 * (numargs - 6)); // and copy the return value into our register fprintf(Outfile, "\tmov\t%s, rax\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpush\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmov\t%s, %s\n", reglist[FIRSTPARAMREG - argposn + 1], reglist[r]); } } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsal\t%s, %d\n", reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, dreglist[r]); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\tqword\t[rbp+%d], %s\n", sym->st_posn, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\tbyte\t[rbp+%d], %s\n", sym->st_posn, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\tdword\t[rbp+%d], %s\n", sym->st_posn, dreglist[r]); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size, type; int initvalue; int i; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the variable (or its elements if an array) // and the type of the variable if (node->stype == S_ARRAY) { size= typesize(value_at(node->type), node->ctype); type= value_at(node->type); } else { size = node->size; type= node->type; } // Generate the global identity and the label cgdataseg(); fprintf(Outfile, "\tsection\t.data\n" "\tglobal\t%s\n", node->name); fprintf(Outfile, "%s:\n", node->name); // Output space for one or more elements for (i = 0; i < node->nelems; i++) { // Get any initial value initvalue = 0; if (node->initlist != NULL) initvalue = node->initlist[i]; // Generate the space for this type // original version switch(size) { case 1: fprintf(Outfile, "\tdb\t%d\n", initvalue); break; case 4: fprintf(Outfile, "\tdd\t%d\n", initvalue); break; case 8: // Generate the pointer to a string literal. Treat a zero value // as actually zero, not the label L0 if (node->initlist != NULL && type == pointer_to(P_CHAR) && initvalue != 0) fprintf(Outfile, "\tdq\tL%d\n", initvalue); else fprintf(Outfile, "\tdq\t%d\n", initvalue); break; default: for (int i = 0; i < size; i++) fprintf(Outfile, "\tdb\t0\n"); /* compact version using times instead of loop fprintf(Outfile, "\ttimes\t%d\tdb\t0\n", size); */ } } } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\tdb\t%d\n", *cptr); } fprintf(Outfile, "\tdb\t0\n"); /* put string in readable format on a single line // probably went overboard with error checking int comma = 0, quote = 0, start = 1; fprintf(Outfile, "\tdb\t"); for (cptr=strvalue; *cptr; cptr++) { if ( ! isprint(*cptr) ) if (comma || start) { fprintf(Outfile, "%d, ", *cptr); start = 0; comma = 1; } else if (quote) { fprintf(Outfile, "\', %d, ", *cptr); comma = 1; quote = 0; } else { fprintf(Outfile, "%d, ", *cptr); comma = 1; quote = 0; } else if (start || comma) { fprintf(Outfile, "\'%c", *cptr); start = comma = 0; quote = 1; } else { fprintf(Outfile, "%c", *cptr); comma = 0; quote = 1; } } if (comma || start) fprintf(Outfile, "0\n"); else fprintf(Outfile, "\', 0\n"); */ } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r2], breglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzx\teax, %s\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmov\teax, %s\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } cgjump(sym->st_endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = alloc_register(); if (sym->class == C_GLOBAL) fprintf(Outfile, "\tmov\t%s, %s\n", reglist[r], sym->name); else fprintf(Outfile, "\tlea\t%s, [rbp+%d]\n", reglist[r], sym->st_posn); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], reglist[r]); break; case 2: fprintf(Outfile, "\tmovsx\t%s, dword [%s]\n", reglist[r], reglist[r]); break; case 4: case 8: fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { //Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmov\t[%s], byte %s\n", reglist[r2], breglist[r1]); break; case 2: case 4: case 8: fprintf(Outfile, "\tmov\t[%s], %s\n", reglist[r2], reglist[r1]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } // Generate a switch jump table and the code to // load the registers and call the switch() code void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel) { int i, label; // Get a label for the switch table label = genlabel(); cglabel(label); // Heuristic. If we have no cases, create one case // which points to the default case if (casecount == 0) { caseval[0] = 0; caselabel[0] = defaultlabel; casecount = 1; } // Generate the switch jump table. fprintf(Outfile, "\tdq\t%d\n", casecount); for (i = 0; i < casecount; i++) fprintf(Outfile, "\tdq\t%d, L%d\n", caseval[i], caselabel[i]); fprintf(Outfile, "\tdq\tL%d\n", defaultlabel); // Load the specific registers cglabel(toplabel); fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); fprintf(Outfile, "\tmov\trdx, L%d\n", label); fprintf(Outfile, "\tjmp\tswitch\n"); } ================================================ FILE: 47_Sizeof/data.h ================================================ #ifndef extern_ #define extern_ extern #endif // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 extern_ int Line; // Current line number extern_ int Putback; // Character put back by scanner extern_ struct symtable *Functionid; // Symbol ptr of the current function extern_ FILE *Infile; // Input and output files extern_ FILE *Outfile; extern_ char *Infilename; // Name of file we are parsing extern_ char *Outfilename; // Name of file we opened as Outfile extern_ struct token Token; // Last token scanned extern_ struct token Peektoken; // A look-ahead token extern_ char Text[TEXTLEN + 1]; // Last identifier scanned extern_ int Looplevel; // Depth of nested loops extern_ int Switchlevel; // Depth of nested switches // Symbol table lists extern_ struct symtable *Globhead, *Globtail; // Global variables and functions extern_ struct symtable *Loclhead, *Locltail; // Local variables extern_ struct symtable *Parmhead, *Parmtail; // Local parameters extern_ struct symtable *Membhead, *Membtail; // Temp list of struct/union members extern_ struct symtable *Structhead, *Structtail; // List of struct types extern_ struct symtable *Unionhead, *Uniontail; // List of union types extern_ struct symtable *Enumhead, *Enumtail; // List of enum types and values extern_ struct symtable *Typehead, *Typetail; // List of typedefs // Command-line flags extern_ int O_dumpAST; // If true, dump the AST trees extern_ int O_keepasm; // If true, keep any assembly files extern_ int O_assemble; // If true, assemble the assembly files extern_ int O_dolink; // If true, link the object files extern_ int O_verbose; // If true, print info on compilation stages ================================================ FILE: 47_Sizeof/decl.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of declarations // Copyright (c) 2019 Warren Toomey, GPL3 static struct symtable *composite_declaration(int type); static int typedef_declaration(struct symtable **ctype); static int type_of_typedef(char *name, struct symtable **ctype); static void enum_declaration(void); // Parse the current token and return a primitive type enum value, // a pointer to any composite type and possibly modify // the class of the type. int parse_type(struct symtable **ctype, int *class) { int type, exstatic = 1; // See if the class has been changed to extern (later, static) while (exstatic) { switch (Token.token) { case T_EXTERN: *class = C_EXTERN; scan(&Token); break; default: exstatic = 0; } } // Now work on the actual type keyword switch (Token.token) { case T_VOID: type = P_VOID; scan(&Token); break; case T_CHAR: type = P_CHAR; scan(&Token); break; case T_INT: type = P_INT; scan(&Token); break; case T_LONG: type = P_LONG; scan(&Token); break; // For the following, if we have a ';' after the // parsing then there is no type, so return -1. // Example: struct x {int y; int z}; case T_STRUCT: type = P_STRUCT; *ctype = composite_declaration(P_STRUCT); if (Token.token == T_SEMI) type = -1; break; case T_UNION: type = P_UNION; *ctype = composite_declaration(P_UNION); if (Token.token == T_SEMI) type = -1; break; case T_ENUM: type = P_INT; // Enums are really ints enum_declaration(); if (Token.token == T_SEMI) type = -1; break; case T_TYPEDEF: type = typedef_declaration(ctype); if (Token.token == T_SEMI) type = -1; break; case T_IDENT: type = type_of_typedef(Text, ctype); break; default: fatals("Illegal type, token", Token.tokstr); } return (type); } // Given a type parsed by parse_type(), scan in any following // '*' tokens and return the new type int parse_stars(int type) { while (1) { if (Token.token != T_STAR) break; type = pointer_to(type); scan(&Token); } return (type); } // Parse a type which appears inside a cast int parse_cast(void) { int type, class; struct symtable *ctype; // Get the type inside the parentheses type= parse_stars(parse_type(&ctype, &class)); // Do some error checking. I'm sure more can be done if (type == P_STRUCT || type == P_UNION || type == P_VOID) fatal("Cannot cast to a struct, union or void type"); return(type); } // Given a type, parse an expression of literals and ensure // that the type of this expression matches the given type. // Parse any type cast that precedes the expression. // If an integer literal, return this value. // If a string literal, return the label number of the string. int parse_literal(int type) { struct ASTnode *tree; // Parse the expression and optimise the resulting AST tree tree= optimise(binexpr(0)); // If there's a cast, get the child and // mark it as having the type from the cast if (tree->op == A_CAST) { tree->left->type= tree->type; tree= tree->left; } // The tree must now have an integer or string literal if (tree->op != A_INTLIT && tree->op != A_STRLIT) fatal("Cannot initialise globals with a general expression"); // If the type is char * and if (type == pointer_to(P_CHAR)) { // We have a string literal, return the label number if (tree->op == A_STRLIT) return(tree->a_intvalue); // We have a zero int literal, so that's a NULL if (tree->op == A_INTLIT && tree->a_intvalue==0) return(0); } // We only get here with an integer literal. The input type // is an integer type and is wide enough to hold the literal value if (inttype(type) && typesize(type, NULL) >= typesize(tree->type, NULL)) return(tree->a_intvalue); fatal("Type mismatch: literal vs. variable"); return(0); // Keep -Wall happy } // Given the type, name and class of a scalar variable, // parse any initialisation value and allocate storage for it. // Return the variable's symbol table entry. static struct symtable *scalar_declaration(char *varname, int type, struct symtable *ctype, int class, struct ASTnode **tree) { struct symtable *sym=NULL; struct ASTnode *varnode, *exprnode; *tree= NULL; // Add this as a known scalar switch (class) { case C_EXTERN: case C_GLOBAL: sym= addglob(varname, type, ctype, S_VARIABLE, class, 1, 0); break; case C_LOCAL: sym= addlocl(varname, type, ctype, S_VARIABLE, 1); break; case C_PARAM: sym= addparm(varname, type, ctype, S_VARIABLE); break; case C_MEMBER: sym= addmemb(varname, type, ctype, S_VARIABLE, 1); break; } // The variable is being initialised if (Token.token == T_ASSIGN) { // Only possible for a global or local if (class != C_GLOBAL && class != C_LOCAL) fatals("Variable can not be initialised", varname); scan(&Token); // Globals must be assigned a literal value if (class == C_GLOBAL) { // Create one initial value for the variable and // parse this value sym->initlist= (int *)malloc(sizeof(int)); sym->initlist[0]= parse_literal(type); } if (class == C_LOCAL) { // Make an A_IDENT AST node with the variable varnode = mkastleaf(A_IDENT, sym->type, sym, 0); // Get the expression for the assignment, make into a rvalue exprnode = binexpr(0); exprnode->rvalue = 1; // Ensure the expression's type matches the variable exprnode = modify_type(exprnode, varnode->type, 0); if (exprnode == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree *tree = mkastnode(A_ASSIGN, exprnode->type, exprnode, NULL, varnode, NULL, 0); } } // Generate any global space if (class == C_GLOBAL) genglobsym(sym); return (sym); } // Given the type, name and class of an variable, parse // the size of the array, if any. Then parse any initialisation // value and allocate storage for it. // Return the variable's symbol table entry. static struct symtable *array_declaration(char *varname, int type, struct symtable *ctype, int class) { struct symtable *sym; // New symbol table entry int nelems= -1; // Assume the number of elements won't be given int maxelems; // The maximum number of elements in the init list int *initlist; // The list of initial elements int i=0, j; // Skip past the '[' scan(&Token); // See we have an array size if (Token.token != T_RBRACKET) { nelems= parse_literal(P_INT); if (nelems <= 0) fatald("Array size is illegal", nelems); } // Ensure we have a following ']' match(T_RBRACKET, "]"); // Add this as a known array. We treat the // array as a pointer to its elements' type switch (class) { case C_EXTERN: case C_GLOBAL: sym = addglob(varname, pointer_to(type), ctype, S_ARRAY, class, 0, 0); break; default: fatal("For now, declaration of non-global arrays is not implemented"); } // Array initialisation if (Token.token == T_ASSIGN) { if (class != C_GLOBAL) fatals("Variable can not be initialised", varname); scan(&Token); // Get the following left curly bracket match(T_LBRACE, "{"); #define TABLE_INCREMENT 10 // If the array already has nelems, allocate that many elements // in the list. Otherwise, start with TABLE_INCREMENT. if (nelems != -1) maxelems= nelems; else maxelems= TABLE_INCREMENT; initlist= (int *)malloc(maxelems *sizeof(int)); // Loop getting a new literal value from the list while (1) { // Check we can add the next value, then parse and add it if (nelems != -1 && i == maxelems) fatal("Too many values in initialisation list"); initlist[i++]= parse_literal(type); // Increase the list size if the original size was // not set and we have hit the end of the current list if (nelems == -1 && i == maxelems) { maxelems += TABLE_INCREMENT; initlist= (int *)realloc(initlist, maxelems *sizeof(int)); } // Leave when we hit the right curly bracket if (Token.token == T_RBRACE) { scan(&Token); break; } // Next token must be a comma, then comma(); } // Zero any unused elements in the initlist. // Attach the list to the symbol table entry for (j=i; j < sym->nelems; j++) initlist[j]=0; if (i > nelems) nelems = i; sym->initlist= initlist; } // Set the size of the array and the number of elements sym->nelems= nelems; sym->size= sym->nelems * typesize(type, ctype); // Generate any global space if (class == C_GLOBAL) genglobsym(sym); return (sym); } // Given a pointer to the new function being declared and // a possibly NULL pointer to the function's previous declaration, // parse a list of parameters and cross-check them against the // previous declaration. Return the count of parameters static int param_declaration_list(struct symtable *oldfuncsym, struct symtable *newfuncsym) { int type, paramcnt = 0; struct symtable *ctype; struct symtable *protoptr = NULL; struct ASTnode *unused; // Get the pointer to the first prototype parameter if (oldfuncsym != NULL) protoptr = oldfuncsym->member; // Loop getting any parameters while (Token.token != T_RPAREN) { // If the first token is 'void' if (Token.token == T_VOID) { // Peek at the next token. If a ')', the function // has no parameters, so leave the loop. scan(&Peektoken); if (Peektoken.token == T_RPAREN) { // Move the Peektoken into the Token paramcnt= 0; scan(&Token); break; } } // Get the type of the next parameter type = declaration_list(&ctype, C_PARAM, T_COMMA, T_RPAREN, &unused); if (type == -1) fatal("Bad type in parameter list"); // Ensure the type of this parameter matches the prototype if (protoptr != NULL) { if (type != protoptr->type) fatald("Type doesn't match prototype for parameter", paramcnt + 1); protoptr = protoptr->next; } paramcnt++; // Stop when we hit the right parenthesis if (Token.token == T_RPAREN) break; // We need a comma as separator comma(); } if (oldfuncsym != NULL && paramcnt != oldfuncsym->nelems) fatals("Parameter count mismatch for function", oldfuncsym->name); // Return the count of parameters return (paramcnt); } // // function_declaration: type identifier '(' parameter_list ')' ; // | type identifier '(' parameter_list ')' compound_statement ; // // Parse the declaration of function. static struct symtable *function_declaration(char *funcname, int type, struct symtable *ctype, int class) { struct ASTnode *tree, *finalstmt; struct symtable *oldfuncsym, *newfuncsym = NULL; int endlabel, paramcnt; // Text has the identifier's name. If this exists and is a // function, get the id. Otherwise, set oldfuncsym to NULL. if ((oldfuncsym = findsymbol(funcname)) != NULL) if (oldfuncsym->stype != S_FUNCTION) oldfuncsym = NULL; // If this is a new function declaration, get a // label-id for the end label, and add the function // to the symbol table, if (oldfuncsym == NULL) { endlabel = genlabel(); // Assumtion: functions only return scalar types, so NULL below newfuncsym = addglob(funcname, type, NULL, S_FUNCTION, C_GLOBAL, 0, endlabel); } // Scan in the '(', any parameters and the ')'. // Pass in any existing function prototype pointer lparen(); paramcnt = param_declaration_list(oldfuncsym, newfuncsym); rparen(); // If this is a new function declaration, update the // function symbol entry with the number of parameters. // Also copy the parameter list into the function's node. if (newfuncsym) { newfuncsym->nelems = paramcnt; newfuncsym->member = Parmhead; oldfuncsym = newfuncsym; } // Clear out the parameter list Parmhead = Parmtail = NULL; // Declaration ends in a semicolon, only a prototype. if (Token.token == T_SEMI) return (oldfuncsym); // This is not just a prototype. // Set the Functionid global to the function's symbol pointer Functionid = oldfuncsym; // Get the AST tree for the compound statement and mark // that we have parsed no loops or switches yet Looplevel = 0; Switchlevel = 0; lbrace(); tree = compound_statement(0); rbrace(); // If the function type isn't P_VOID .. if (type != P_VOID) { // Error if no statements in the function if (tree == NULL) fatal("No statements in function with non-void type"); // Check that the last AST operation in the // compound statement was a return statement finalstmt = (tree->op == A_GLUE) ? tree->right : tree; if (finalstmt == NULL || finalstmt->op != A_RETURN) fatal("No return for function with non-void type"); } // Build the A_FUNCTION node which has the function's symbol pointer // and the compound statement sub-tree tree = mkastunary(A_FUNCTION, type, tree, oldfuncsym, endlabel); // Do optimisations on the AST tree tree= optimise(tree); // Dump the AST tree if requested if (O_dumpAST) { dumpAST(tree, NOLABEL, 0); fprintf(stdout, "\n\n"); } // Generate the assembly code for it genAST(tree, NOLABEL, NOLABEL, NOLABEL, 0); // Now free the symbols associated // with this function freeloclsyms(); return (oldfuncsym); } // Parse composite type declarations: structs or unions. // Either find an existing struct/union declaration, or build // a struct/union symbol table entry and return its pointer. static struct symtable *composite_declaration(int type) { struct symtable *ctype = NULL; struct symtable *m; struct ASTnode *unused; int offset; int t; // Skip the struct/union keyword scan(&Token); // See if there is a following struct/union name if (Token.token == T_IDENT) { // Find any matching composite type if (type == P_STRUCT) ctype = findstruct(Text); else ctype = findunion(Text); scan(&Token); } // If the next token isn't an LBRACE , this is // the usage of an existing struct/union type. // Return the pointer to the type. if (Token.token != T_LBRACE) { if (ctype == NULL) fatals("unknown struct/union type", Text); return (ctype); } // Ensure this struct/union type hasn't been // previously defined if (ctype) fatals("previously defined struct/union", Text); // Build the composite type and skip the left brace if (type == P_STRUCT) ctype = addstruct(Text); else ctype = addunion(Text); scan(&Token); // Scan in the list of members while (1) { // Get the next member. m is used as a dummy t= declaration_list(&m, C_MEMBER, T_SEMI, T_RBRACE, &unused); if (t== -1) fatal("Bad type in member list"); if (Token.token == T_SEMI) scan(&Token); if (Token.token == T_RBRACE) break; } // Attach to the struct type's node rbrace(); if (Membhead==NULL) fatals("No members in struct", ctype->name); ctype->member = Membhead; Membhead = Membtail = NULL; // Set the offset of the initial member // and find the first free byte after it m = ctype->member; m->st_posn = 0; offset = typesize(m->type, m->ctype); // Set the position of each successive member in the composite type // Unions are easy. For structs, align the member and find the next free byte for (m = m->next; m != NULL; m = m->next) { // Set the offset for this member if (type == P_STRUCT) m->st_posn = genalign(m->type, offset, 1); else m->st_posn = 0; // Get the offset of the next free byte after this member offset += typesize(m->type, m->ctype); } // Set the overall size of the composite type ctype->size = offset; return (ctype); } // Parse an enum declaration static void enum_declaration(void) { struct symtable *etype = NULL; char *name= NULL; int intval = 0; // Skip the enum keyword. scan(&Token); // If there's a following enum type name, get a // pointer to any existing enum type node. if (Token.token == T_IDENT) { etype = findenumtype(Text); name = strdup(Text); // As it gets tromped soon scan(&Token); } // If the next token isn't a LBRACE, check // that we have an enum type name, then return if (Token.token != T_LBRACE) { if (etype == NULL) fatals("undeclared enum type:", name); return; } // We do have an LBRACE. Skip it scan(&Token); // If we have an enum type name, ensure that it // hasn't been declared before. if (etype != NULL) fatals("enum type redeclared:", etype->name); else // Build an enum type node for this identifier etype = addenum(name, C_ENUMTYPE, 0); // Loop to get all the enum values while (1) { // Ensure we have an identifier // Copy it in case there's an int literal coming up ident(); name = strdup(Text); // Ensure this enum value hasn't been declared before etype = findenumval(name); if (etype != NULL) fatals("enum value redeclared:", Text); // If the next token is an '=', skip it and // get the following int literal if (Token.token == T_ASSIGN) { scan(&Token); if (Token.token != T_INTLIT) fatal("Expected int literal after '='"); intval = Token.intvalue; scan(&Token); } // Build an enum value node for this identifier. // Increment the value for the next enum identifier. etype = addenum(name, C_ENUMVAL, intval++); // Bail out on a right curly bracket, else get a comma if (Token.token == T_RBRACE) break; comma(); } scan(&Token); // Skip over the right curly bracket } // Parse a typedef declaration and return the type // and ctype that it represents static int typedef_declaration(struct symtable **ctype) { int type, class = 0; // Skip the typedef keyword. scan(&Token); // Get the actual type following the keyword type = parse_type(ctype, &class); if (class != 0) fatal("Can't have extern in a typedef declaration"); // See if the typedef identifier already exists if (findtypedef(Text) != NULL) fatals("redefinition of typedef", Text); // Get any following '*' tokens type = parse_stars(type); // It doesn't exist so add it to the typedef list addtypedef(Text, type, *ctype); scan(&Token); return (type); } // Given a typedef name, return the type it represents static int type_of_typedef(char *name, struct symtable **ctype) { struct symtable *t; // Look up the typedef in the list t = findtypedef(name); if (t == NULL) fatals("unknown type", name); scan(&Token); *ctype = t->ctype; return (t->type); } // Parse the declaration of a variable or function. // The type and any following '*'s have been scanned, and we // have the identifier in the Token variable. // The class argument is the variable's class. // Return a pointer to the symbol's entry in the symbol table static struct symtable *symbol_declaration(int type, struct symtable *ctype, int class, struct ASTnode **tree) { struct symtable *sym = NULL; char *varname = strdup(Text); // Ensure that we have an identifier. // We copied it above so we can scan more tokens in, e.g. // an assignment expression for a local variable. ident(); // Deal with function declarations if (Token.token == T_LPAREN) { return (function_declaration(varname, type, ctype, class)); } // See if this array or scalar variable has already been declared switch (class) { case C_EXTERN: case C_GLOBAL: if (findglob(varname) != NULL) fatals("Duplicate global variable declaration", varname); case C_LOCAL: case C_PARAM: if (findlocl(varname) != NULL) fatals("Duplicate local variable declaration", varname); case C_MEMBER: if (findmember(varname) != NULL) fatals("Duplicate struct/union member declaration", varname); } // Add the array or scalar variable to the symbol table if (Token.token == T_LBRACKET) sym = array_declaration(varname, type, ctype, class); else sym = scalar_declaration(varname, type, ctype, class, tree); return (sym); } // Parse a list of symbols where there is an initial type. // Return the type of the symbols. et1 and et2 are end tokens. int declaration_list(struct symtable **ctype, int class, int et1, int et2, struct ASTnode **gluetree) { int inittype, type; struct symtable *sym; struct ASTnode *tree; *gluetree= NULL; // Get the initial type. If -1, it was // a composite type definition, return this if ((inittype = parse_type(ctype, &class)) == -1) return (inittype); // Now parse the list of symbols while (1) { // See if this symbol is a pointer type = parse_stars(inittype); // Parse this symbol sym = symbol_declaration(type, *ctype, class, &tree); // We parsed a function, there is no list so leave if (sym->stype == S_FUNCTION) { if (class != C_GLOBAL) fatal("Function definition not at global level"); return (type); } // Glue any AST tree from a local declaration // to build a sequence of assignments to perform if (*gluetree== NULL) *gluetree= tree; else *gluetree = mkastnode(A_GLUE, P_NONE, *gluetree, NULL, tree, NULL, 0); // We are at the end of the list, leave if (Token.token == et1 || Token.token == et2) return (type); // Otherwise, we need a comma as separator comma(); } } // Parse one or more global declarations, either // variables, functions or structs void global_declarations(void) { struct symtable *ctype; struct ASTnode *unused; while (Token.token != T_EOF) { declaration_list(&ctype, C_GLOBAL, T_SEMI, T_EOF, &unused); // Skip any semicolons and right curly brackets if (Token.token == T_SEMI) scan(&Token); } } ================================================ FILE: 47_Sizeof/decl.h ================================================ // Function prototypes for all compiler files // Copyright (c) 2019 Warren Toomey, GPL3 // scan.c void reject_token(struct token *t); int scan(struct token *t); // tree.c struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue); struct ASTnode *mkastleaf(int op, int type, struct symtable *sym, int intvalue); struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, struct symtable *sym, int intvalue); void dumpAST(struct ASTnode *n, int label, int parentASTop); // gen.c int genlabel(void); int genAST(struct ASTnode *n, int iflabel, int looptoplabel, int loopendlabel, int parentASTop); void genpreamble(); void genpostamble(); void genfreeregs(); void genglobsym(struct symtable *node); int genglobstr(char *strvalue); int genprimsize(int type); int genalign(int type, int offset, int direction); void genreturn(int reg, int id); // cg.c int cgprimsize(int type); int cgalign(int type, int offset, int direction); void cgtextseg(); void cgdataseg(); void freeall_registers(void); void cgpreamble(); void cgpostamble(); void cgfuncpreamble(struct symtable *sym); void cgfuncpostamble(struct symtable *sym); int cgloadint(int value, int type); int cgloadglob(struct symtable *sym, int op); int cgloadlocal(struct symtable *sym, int op); int cgloadglobstr(int label); int cgadd(int r1, int r2); int cgsub(int r1, int r2); int cgmul(int r1, int r2); int cgdiv(int r1, int r2); int cgshlconst(int r, int val); int cgcall(struct symtable *sym, int numargs); void cgcopyarg(int r, int argposn); int cgstorglob(int r, struct symtable *sym); int cgstorlocal(int r, struct symtable *sym); void cgglobsym(struct symtable *node); void cgglobstr(int l, char *strvalue); int cgcompare_and_set(int ASTop, int r1, int r2); int cgcompare_and_jump(int ASTop, int r1, int r2, int label); void cglabel(int l); void cgjump(int l); int cgwiden(int r, int oldtype, int newtype); void cgreturn(int reg, struct symtable *sym); int cgaddress(struct symtable *sym); int cgderef(int r, int type); int cgstorderef(int r1, int r2, int type); int cgnegate(int r); int cginvert(int r); int cglognot(int r); int cgboolean(int r, int op, int label); int cgand(int r1, int r2); int cgor(int r1, int r2); int cgxor(int r1, int r2); int cgshl(int r1, int r2); int cgshr(int r1, int r2); void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel); // expr.c struct ASTnode *expression_list(int endtoken); struct ASTnode *binexpr(int ptp); // stmt.c struct ASTnode *compound_statement(int inswitch); // misc.c void match(int t, char *what); void semi(void); void lbrace(void); void rbrace(void); void lparen(void); void rparen(void); void ident(void); void comma(void); void fatal(char *s); void fatals(char *s1, char *s2); void fatald(char *s, int d); void fatalc(char *s, int c); // sym.c void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node); struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn); struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn); struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int nelems); struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype); struct symtable *addstruct(char *name); struct symtable *addunion(char *name); struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int nelems); struct symtable *addenum(char *name, int class, int value); struct symtable *addtypedef(char *name, int type, struct symtable *ctype); struct symtable *findglob(char *s); struct symtable *findlocl(char *s); struct symtable *findsymbol(char *s); struct symtable *findmember(char *s); struct symtable *findstruct(char *s); struct symtable *findunion(char *s); struct symtable *findenumtype(char *s); struct symtable *findenumval(char *s); struct symtable *findtypedef(char *s); void clear_symtable(void); void freeloclsyms(void); // decl.c int parse_type(struct symtable **ctype, int *class); int parse_stars(int type); int parse_cast(void); int declaration_list(struct symtable **ctype, int class, int et1, int et2, struct ASTnode **gluetree); void global_declarations(void); // types.c int inttype(int type); int ptrtype(int type); int pointer_to(int type); int value_at(int type); int typesize(int type, struct symtable *ctype); struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op); // opt.c struct ASTnode *optimise(struct ASTnode *n); ================================================ FILE: 47_Sizeof/defs.h ================================================ #include #include #include #include // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 enum { TEXTLEN = 512 // Length of identifiers in input }; // Commands and default filenames #define AOUT "a.out" #ifdef __NASM__ #define ASCMD "nasm -f elf64 -o " #define LDCMD "cc -no-pie -fno-plt -Wall -o " #else #define ASCMD "as -o " #define LDCMD "cc -o " #endif #define CPPCMD "cpp -nostdinc -isystem " // Token types enum { T_EOF, // Binary operators T_ASSIGN, T_ASPLUS, T_ASMINUS, T_ASSTAR, T_ASSLASH, T_LOGOR, T_LOGAND, T_OR, T_XOR, T_AMPER, T_EQ, T_NE, T_LT, T_GT, T_LE, T_GE, T_LSHIFT, T_RSHIFT, T_PLUS, T_MINUS, T_STAR, T_SLASH, // Other operators T_INC, T_DEC, T_INVERT, T_LOGNOT, // Type keywords T_VOID, T_CHAR, T_INT, T_LONG, // Other keywords T_IF, T_ELSE, T_WHILE, T_FOR, T_RETURN, T_STRUCT, T_UNION, T_ENUM, T_TYPEDEF, T_EXTERN, T_BREAK, T_CONTINUE, T_SWITCH, T_CASE, T_DEFAULT, T_SIZEOF, // Structural tokens T_INTLIT, T_STRLIT, T_SEMI, T_IDENT, T_LBRACE, T_RBRACE, T_LPAREN, T_RPAREN, T_LBRACKET, T_RBRACKET, T_COMMA, T_DOT, T_ARROW, T_COLON }; // Token structure struct token { int token; // Token type, from the enum list above char *tokstr; // String version of the token int intvalue; // For T_INTLIT, the integer value }; // AST node types. The first few line up // with the related tokens enum { A_ASSIGN = 1, A_ASPLUS, A_ASMINUS, A_ASSTAR, A_ASSLASH, A_LOGOR, A_LOGAND, A_OR, A_XOR, A_AND, A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE, A_LSHIFT, A_RSHIFT, A_ADD, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, A_INTLIT, A_STRLIT, A_IDENT, A_GLUE, A_IF, A_WHILE, A_FUNCTION, A_WIDEN, A_RETURN, A_FUNCCALL, A_DEREF, A_ADDR, A_SCALE, A_PREINC, A_PREDEC, A_POSTINC, A_POSTDEC, A_NEGATE, A_INVERT, A_LOGNOT, A_TOBOOL, A_BREAK, A_CONTINUE, A_SWITCH, A_CASE, A_DEFAULT, A_CAST, }; // Primitive types. The bottom 4 bits is an integer // value that represents the level of indirection, // e.g. 0= no pointer, 1= pointer, 2= pointer pointer etc. enum { P_NONE, P_VOID = 16, P_CHAR = 32, P_INT = 48, P_LONG = 64, P_STRUCT=80, P_UNION=96 }; // Structural types enum { S_VARIABLE, S_FUNCTION, S_ARRAY }; // Storage classes enum { C_GLOBAL = 1, // Globally visible symbol C_LOCAL, // Locally visible symbol C_PARAM, // Locally visible function parameter C_EXTERN, // External globally visible symbol C_STRUCT, // A struct C_UNION, // A union C_MEMBER, // Member of a struct or union C_ENUMTYPE, // A named enumeration type C_ENUMVAL, // A named enumeration value C_TYPEDEF // A named typedef }; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol struct symtable *ctype; // If struct/union, ptr to that type int stype; // Structural type for the symbol int class; // Storage class for the symbol int size; // Total size in bytes of this symbol int nelems; // Functions: # params. Arrays: # elements #define st_endlabel st_posn // For functions, the end label int st_posn; // For locals, the negative offset // from the stack base pointer int *initlist; // List of initial values struct symtable *next; // Next symbol in one list struct symtable *member; // First member of a function, struct, }; // union or enum // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates int rvalue; // True if the node is an rvalue struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; struct symtable *sym; // For many AST nodes, the pointer to // the symbol in the symbol table #define a_intvalue a_size // For A_INTLIT, the integer value int a_size; // For A_SCALE, the size to scale by }; enum { NOREG = -1, // Use NOREG when the AST generation // functions have no register to return NOLABEL = 0 // Use NOLABEL when we have no label to // pass to genAST() }; ================================================ FILE: 47_Sizeof/expr.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of expressions // Copyright (c) 2019 Warren Toomey, GPL3 // expression_list: // | expression // | expression ',' expression_list // ; // Parse a list of zero or more comma-separated expressions and // return an AST composed of A_GLUE nodes with the left-hand child // being the sub-tree of previous expressions (or NULL) and the right-hand // child being the next expression. Each A_GLUE node will have size field // set to the number of expressions in the tree at this point. If no // expressions are parsed, NULL is returned struct ASTnode *expression_list(int endtoken) { struct ASTnode *tree = NULL; struct ASTnode *child = NULL; int exprcount = 0; // Loop until the end token while (Token.token != endtoken) { // Parse the next expression and increment the expression count child = binexpr(0); exprcount++; // Build an A_GLUE AST node with the previous tree as the left child // and the new expression as the right child. Store the expression count. tree = mkastnode(A_GLUE, P_NONE, tree, NULL, child, NULL, exprcount); // Stop when we reach the end token if (Token.token == endtoken) break; // Must have a ',' at this point match(T_COMMA, ","); } // Return the tree of expressions return (tree); } // Parse a function call and return its AST static struct ASTnode *funccall(void) { struct ASTnode *tree; struct symtable *funcptr; // Check that the identifier has been defined as a function, // then make a leaf node for it. if ((funcptr = findsymbol(Text)) == NULL || funcptr->stype != S_FUNCTION) { fatals("Undeclared function", Text); } // Get the '(' lparen(); // Parse the argument expression list tree = expression_list(T_RPAREN); // XXX Check type of each argument against the function's prototype // Build the function call AST node. Store the // function's return type as this node's type. // Also record the function's symbol-id tree = mkastunary(A_FUNCCALL, funcptr->type, tree, funcptr, 0); // Get the ')' rparen(); return (tree); } // Parse the index into an array and return an AST tree for it static struct ASTnode *array_access(void) { struct ASTnode *left, *right; struct symtable *aryptr; // Check that the identifier has been defined as an array // then make a leaf node for it that points at the base if ((aryptr = findsymbol(Text)) == NULL || aryptr->stype != S_ARRAY) { fatals("Undeclared array", Text); } left = mkastleaf(A_ADDR, aryptr->type, aryptr, 0); // Get the '[' scan(&Token); // Parse the following expression right = binexpr(0); // Get the ']' match(T_RBRACKET, "]"); // Ensure that this is of int type if (!inttype(right->type)) fatal("Array index is not of integer type"); // Scale the index by the size of the element's type right = modify_type(right, left->type, A_ADD); // Return an AST tree where the array's base has the offset // added to it, and dereference the element. Still an lvalue // at this point. left = mkastnode(A_ADD, aryptr->type, left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, value_at(left->type), left, NULL, 0); return (left); } // Parse the member reference of a struct or union // and return an AST tree for it. If withpointer is true, // the access is through a pointer to the member. static struct ASTnode *member_access(int withpointer) { struct ASTnode *left, *right; struct symtable *compvar; struct symtable *typeptr; struct symtable *m; // Check that the identifier has been declared as a // struct/union or a struct/union pointer if ((compvar = findsymbol(Text)) == NULL) fatals("Undeclared variable", Text); if (withpointer && compvar->type != pointer_to(P_STRUCT) && compvar->type != pointer_to(P_UNION)) fatals("Undeclared variable", Text); if (!withpointer && compvar->type != P_STRUCT && compvar->type != P_UNION) fatals("Undeclared variable", Text); // If a pointer to a struct/union, get the pointer's value. // Otherwise, make a leaf node that points at the base // Either way, it's an rvalue if (withpointer) { left = mkastleaf(A_IDENT, pointer_to(compvar->type), compvar, 0); } else left = mkastleaf(A_ADDR, compvar->type, compvar, 0); left->rvalue = 1; // Get the details of the composite type typeptr = compvar->ctype; // Skip the '.' or '->' token and get the member's name scan(&Token); ident(); // Find the matching member's name in the type // Die if we can't find it for (m = typeptr->member; m != NULL; m = m->next) if (!strcmp(m->name, Text)) break; if (m == NULL) fatals("No member found in struct/union: ", Text); // Build an A_INTLIT node with the offset right = mkastleaf(A_INTLIT, P_INT, NULL, m->st_posn); // Add the member's offset to the base of the struct/union // and dereference it. Still an lvalue at this point left = mkastnode(A_ADD, pointer_to(m->type), left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, m->type, left, NULL, 0); return (left); } // Parse a postfix expression and return // an AST node representing it. The // identifier is already in Text. static struct ASTnode *postfix(void) { struct ASTnode *n; struct symtable *varptr; struct symtable *enumptr; // If the identifier matches an enum value, // return an A_INTLIT node if ((enumptr = findenumval(Text)) != NULL) { scan(&Token); return (mkastleaf(A_INTLIT, P_INT, NULL, enumptr->st_posn)); } // Scan in the next token to see if we have a postfix expression scan(&Token); // Function call if (Token.token == T_LPAREN) return (funccall()); // An array reference if (Token.token == T_LBRACKET) return (array_access()); // Access into a struct or union if (Token.token == T_DOT) return (member_access(0)); if (Token.token == T_ARROW) return (member_access(1)); // A variable. Check that the variable exists. if ((varptr = findsymbol(Text)) == NULL || varptr->stype != S_VARIABLE) fatals("Unknown variable", Text); switch (Token.token) { // Post-increment: skip over the token case T_INC: scan(&Token); n = mkastleaf(A_POSTINC, varptr->type, varptr, 0); break; // Post-decrement: skip over the token case T_DEC: scan(&Token); n = mkastleaf(A_POSTDEC, varptr->type, varptr, 0); break; // Just a variable reference default: n = mkastleaf(A_IDENT, varptr->type, varptr, 0); } return (n); } // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(void) { struct ASTnode *n; int id; int type=0; int size, class; struct symtable *ctype; switch (Token.token) { case T_SIZEOF: // Skip the T_SIZEOF and ensure we have a left parenthesis scan(&Token); if (Token.token != T_LPAREN) fatal("Left parenthesis expected after sizeof"); scan(&Token); // Get the type inside the parentheses type= parse_stars(parse_type(&ctype, &class)); // Get the type's size size= typesize(type, ctype); rparen(); // Return a leaf node int literal with the size return (mkastleaf(A_INTLIT, P_INT, NULL, size)); case T_INTLIT: // For an INTLIT token, make a leaf AST node for it. // Make it a P_CHAR if it's within the P_CHAR range if (Token.intvalue >= 0 && Token.intvalue < 256) n = mkastleaf(A_INTLIT, P_CHAR, NULL, Token.intvalue); else n = mkastleaf(A_INTLIT, P_INT, NULL, Token.intvalue); break; case T_STRLIT: // For a STRLIT token, generate the assembly for it. // Then make a leaf AST node for it. id is the string's label. id = genglobstr(Text); n = mkastleaf(A_STRLIT, pointer_to(P_CHAR), NULL, id); break; case T_IDENT: return (postfix()); case T_LPAREN: // Beginning of a parenthesised expression, skip the '('. scan(&Token); // If the token after is a type identifier, this is a cast expression switch (Token.token) { case T_IDENT: // We have to see if the identifier matches a typedef. // If not, treat it as an expression. if (findtypedef(Text) == NULL) { n = binexpr(0); break; } case T_VOID: case T_CHAR: case T_INT: case T_LONG: case T_STRUCT: case T_UNION: case T_ENUM: // Get the type inside the parentheses type= parse_cast(); // Skip the closing ')' and then parse the following expression rparen(); default: n = binexpr(0); // Scan in the expression } // We now have at least an expression in n, and possibly a non-zero type in type // if there was a cast. Skip the closing ')' if there was no cast. if (type == 0) rparen(); else // Otherwise, make a unary AST node for the cast n= mkastunary(A_CAST, type, n, NULL, 0); return (n); default: fatals("Expecting a primary expression, got token", Token.tokstr); } // Scan in the next token and return the leaf node scan(&Token); return (n); } // Convert a binary operator token into a binary AST operation. // We rely on a 1:1 mapping from token to AST operation static int binastop(int tokentype) { if (tokentype > T_EOF && tokentype <= T_SLASH) return (tokentype); fatald("Syntax error, token", tokentype); return (0); // Keep -Wall happy } // Return true if a token is right-associative, // false otherwise. static int rightassoc(int tokentype) { if (tokentype >= T_ASSIGN && tokentype <= T_ASSLASH) return (1); return (0); } // Operator precedence for each token. Must // match up with the order of tokens in defs.h static int OpPrec[] = { 0, 10, 10, // T_EOF, T_ASSIGN, T_ASPLUS, 10, 10, 10, // T_ASMINUS, T_ASSTAR, T_ASSLASH, 20, 30, // T_LOGOR, T_LOGAND 40, 50, 60, // T_OR, T_XOR, T_AMPER 70, 70, // T_EQ, T_NE 80, 80, 80, 80, // T_LT, T_GT, T_LE, T_GE 90, 90, // T_LSHIFT, T_RSHIFT 100, 100, // T_PLUS, T_MINUS 110, 110 // T_STAR, T_SLASH }; // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec; if (tokentype > T_SLASH) fatald("Token with no precedence in op_precedence:", tokentype); prec = OpPrec[tokentype]; if (prec == 0) fatald("Syntax error, token", tokentype); return (prec); } // prefix_expression: primary // | '*' prefix_expression // | '&' prefix_expression // | '-' prefix_expression // | '++' prefix_expression // | '--' prefix_expression // ; // Parse a prefix expression and return // a sub-tree representing it. struct ASTnode *prefix(void) { struct ASTnode *tree; switch (Token.token) { case T_AMPER: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Ensure that it's an identifier if (tree->op != A_IDENT) fatal("& operator must be followed by an identifier"); // Now change the operator to A_ADDR and the type to // a pointer to the original type tree->op = A_ADDR; tree->type = pointer_to(tree->type); break; case T_STAR: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's either another deref or an // identifier if (tree->op != A_IDENT && tree->op != A_DEREF) fatal("* operator must be followed by an identifier or *"); // Prepend an A_DEREF operation to the tree tree = mkastunary(A_DEREF, value_at(tree->type), tree, NULL, 0); break; case T_MINUS: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_NEGATE operation to the tree and // make the child an rvalue. Because chars are unsigned, // also widen this to int so that it's signed tree->rvalue = 1; tree = modify_type(tree, P_INT, 0); tree = mkastunary(A_NEGATE, tree->type, tree, NULL, 0); break; case T_INVERT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_INVERT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_INVERT, tree->type, tree, NULL, 0); break; case T_LOGNOT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_LOGNOT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_LOGNOT, tree->type, tree, NULL, 0); break; case T_INC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("++ operator must be followed by an identifier"); // Prepend an A_PREINC operation to the tree tree = mkastunary(A_PREINC, tree->type, tree, NULL, 0); break; case T_DEC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("-- operator must be followed by an identifier"); // Prepend an A_PREDEC operation to the tree tree = mkastunary(A_PREDEC, tree->type, tree, NULL, 0); break; default: tree = primary(); } return (tree); } // Return an AST tree whose root is a binary operator. // Parameter ptp is the previous token's precedence. struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; struct ASTnode *ltemp, *rtemp; int ASTop; int tokentype; // Get the tree on the left. // Fetch the next token at the same time. left = prefix(); // If we hit one of several terminating tokens, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA || tokentype == T_COLON || tokentype == T_RBRACE) { left->rvalue = 1; return (left); } // While the precedence of this token is more than that of the // previous token precedence, or it's right associative and // equal to the previous token's precedence while ((op_precedence(tokentype) > ptp) || (rightassoc(tokentype) && op_precedence(tokentype) == ptp)) { // Fetch in the next integer literal scan(&Token); // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Determine the operation to be performed on the sub-trees ASTop = binastop(tokentype); if (ASTop == A_ASSIGN) { // Assignment // Make the right tree into an rvalue right->rvalue = 1; // Ensure the right's type matches the left right = modify_type(right, left->type, 0); if (right == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree. However, switch // left and right around, so that the right expression's // code will be generated before the left expression ltemp = left; left = right; right = ltemp; } else { // We are not doing an assignment, so both trees should be rvalues // Convert both trees into rvalue if they are lvalue trees left->rvalue = 1; right->rvalue = 1; // Ensure the two types are compatible by trying // to modify each tree to match the other's type. ltemp = modify_type(left, right->type, ASTop); rtemp = modify_type(right, left->type, ASTop); if (ltemp == NULL && rtemp == NULL) fatal("Incompatible types in binary expression"); if (ltemp != NULL) left = ltemp; if (rtemp != NULL) right = rtemp; } // Join that sub-tree with ours. Convert the token // into an AST operation at the same time. left = mkastnode(binastop(tokentype), left->type, left, NULL, right, NULL, 0); // Update the details of the current token. // If we hit a terminating token, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA || tokentype == T_COLON || tokentype == T_RBRACE) { left->rvalue = 1; return (left); } } // Return the tree we have when the precedence // is the same or lower left->rvalue = 1; return (left); } ================================================ FILE: 47_Sizeof/gen.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Generic code generator // Copyright (c) 2019 Warren Toomey, GPL3 // Generate and return a new label number int genlabel(void) { static int id = 1; return (id++); } // Generate the code for an IF statement // and an optional ELSE clause static int genIF(struct ASTnode *n, int looptoplabel, int loopendlabel) { int Lfalse, Lend; // Generate two labels: one for the // false compound statement, and one // for the end of the overall IF statement. // When there is no ELSE clause, Lfalse _is_ // the ending label! Lfalse = genlabel(); if (n->right) Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. genAST(n->left, Lfalse, NOLABEL, NOLABEL, n->op); genfreeregs(); // Generate the true compound statement genAST(n->mid, NOLABEL, looptoplabel, loopendlabel, n->op); genfreeregs(); // If there is an optional ELSE clause, // generate the jump to skip to the end if (n->right) cgjump(Lend); // Now the false label cglabel(Lfalse); // Optional ELSE clause: generate the // false compound statement and the // end label if (n->right) { genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); genfreeregs(); cglabel(Lend); } return (NOREG); } // Generate the code for a WHILE statement static int genWHILE(struct ASTnode *n) { int Lstart, Lend; // Generate the start and end labels // and output the start label Lstart = genlabel(); Lend = genlabel(); cglabel(Lstart); // Generate the condition code followed // by a jump to the end label. genAST(n->left, Lend, Lstart, Lend, n->op); genfreeregs(); // Generate the compound statement for the body genAST(n->right, NOLABEL, Lstart, Lend, n->op); genfreeregs(); // Finally output the jump back to the condition, // and the end label cgjump(Lstart); cglabel(Lend); return (NOREG); } // Generate the code for a SWITCH statement static int genSWITCH(struct ASTnode *n) { int *caseval, *caselabel; int Ljumptop, Lend; int i, reg, defaultlabel = 0, casecount = 0; struct ASTnode *c; // Create arrays for the case values and associated labels. // Ensure that we have at least one position in each array. caseval = (int *) malloc((n->a_intvalue + 1) * sizeof(int)); caselabel = (int *) malloc((n->a_intvalue + 1) * sizeof(int)); // Generate labels for the top of the jump table, and the // end of the switch statement. Set a default label for // the end of the switch, in case we don't have a default. Ljumptop = genlabel(); Lend = genlabel(); defaultlabel = Lend; // Output the code to calculate the switch condition reg = genAST(n->left, NOLABEL, NOLABEL, NOLABEL, 0); cgjump(Ljumptop); genfreeregs(); // Walk the right-child linked list to // generate the code for each case for (i = 0, c = n->right; c != NULL; i++, c = c->right) { // Get a label for this case. Store it // and the case value in the arrays. // Record if it is the default case. caselabel[i] = genlabel(); caseval[i] = c->a_intvalue; cglabel(caselabel[i]); if (c->op == A_DEFAULT) defaultlabel = caselabel[i]; else casecount++; // Generate the case code. Pass in the end label for the breaks genAST(c->left, NOLABEL, NOLABEL, Lend, 0); genfreeregs(); } // Ensure the last case jumps past the switch table cgjump(Lend); // Now output the switch table and the end label. cgswitch(reg, casecount, Ljumptop, caselabel, caseval, defaultlabel); cglabel(Lend); return (NOREG); } // Generate the code to copy the arguments of a // function call to its parameters, then call the // function itself. Return the register that holds // the function's return value. static int gen_funccall(struct ASTnode *n) { struct ASTnode *gluetree = n->left; int reg; int numargs = 0; // If there is a list of arguments, walk this list // from the last argument (right-hand child) to the // first while (gluetree) { // Calculate the expression's value reg = genAST(gluetree->right, NOLABEL, NOLABEL, NOLABEL, gluetree->op); // Copy this into the n'th function parameter: size is 1, 2, 3, ... cgcopyarg(reg, gluetree->a_size); // Keep the first (highest) number of arguments if (numargs == 0) numargs = gluetree->a_size; genfreeregs(); gluetree = gluetree->left; } // Call the function, clean up the stack (based on numargs), // and return its result return (cgcall(n->sym, numargs)); } // Given an AST, an optional label, and the AST op // of the parent, generate assembly code recursively. // Return the register id with the tree's final value. int genAST(struct ASTnode *n, int iflabel, int looptoplabel, int loopendlabel, int parentASTop) { int leftreg, rightreg; // We have some specific AST node handling at the top // so that we don't evaluate the child sub-trees immediately switch (n->op) { case A_IF: return (genIF(n, looptoplabel, loopendlabel)); case A_WHILE: return (genWHILE(n)); case A_SWITCH: return (genSWITCH(n)); case A_FUNCCALL: return (gen_funccall(n)); case A_GLUE: // Do each child statement, and free the // registers after each child if (n->left != NULL) genAST(n->left, iflabel, looptoplabel, loopendlabel, n->op); genfreeregs(); if (n->right != NULL) genAST(n->right, iflabel, looptoplabel, loopendlabel, n->op); genfreeregs(); return (NOREG); case A_FUNCTION: // Generate the function's preamble before the code // in the child sub-tree cgfuncpreamble(n->sym); genAST(n->left, NOLABEL, NOLABEL, NOLABEL, n->op); cgfuncpostamble(n->sym); return (NOREG); } // General AST node handling below // Get the left and right sub-tree values if (n->left) leftreg = genAST(n->left, NOLABEL, NOLABEL, NOLABEL, n->op); if (n->right) rightreg = genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); switch (n->op) { case A_ADD: return (cgadd(leftreg, rightreg)); case A_SUBTRACT: return (cgsub(leftreg, rightreg)); case A_MULTIPLY: return (cgmul(leftreg, rightreg)); case A_DIVIDE: return (cgdiv(leftreg, rightreg)); case A_AND: return (cgand(leftreg, rightreg)); case A_OR: return (cgor(leftreg, rightreg)); case A_XOR: return (cgxor(leftreg, rightreg)); case A_LSHIFT: return (cgshl(leftreg, rightreg)); case A_RSHIFT: return (cgshr(leftreg, rightreg)); case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, compare registers // and set one to 1 or 0 based on the comparison. if (parentASTop == A_IF || parentASTop == A_WHILE) return (cgcompare_and_jump(n->op, leftreg, rightreg, iflabel)); else return (cgcompare_and_set(n->op, leftreg, rightreg)); case A_INTLIT: return (cgloadint(n->a_intvalue, n->type)); case A_STRLIT: return (cgloadglobstr(n->a_intvalue)); case A_IDENT: // Load our value if we are an rvalue // or we are being dereferenced if (n->rvalue || parentASTop == A_DEREF) { if (n->sym->class == C_GLOBAL) { return (cgloadglob(n->sym, n->op)); } else { return (cgloadlocal(n->sym, n->op)); } } else return (NOREG); case A_ASPLUS: case A_ASMINUS: case A_ASSTAR: case A_ASSLASH: case A_ASSIGN: // For the '+=' and friends operators, generate suitable code // and get the register with the result. Then take the left child, // make it the right child so that we can fall into the assignment code. switch (n->op) { case A_ASPLUS: leftreg= cgadd(leftreg, rightreg); n->right= n->left; break; case A_ASMINUS: leftreg= cgsub(leftreg, rightreg); n->right= n->left; break; case A_ASSTAR: leftreg= cgmul(leftreg, rightreg); n->right= n->left; break; case A_ASSLASH: leftreg= cgdiv(leftreg, rightreg); n->right= n->left; break; } // Now into the assignment code // Are we assigning to an identifier or through a pointer? switch (n->right->op) { case A_IDENT: if (n->right->sym->class == C_GLOBAL) return (cgstorglob(leftreg, n->right->sym)); else return (cgstorlocal(leftreg, n->right->sym)); case A_DEREF: return (cgstorderef(leftreg, rightreg, n->right->type)); default: fatald("Can't A_ASSIGN in genAST(), op", n->op); } case A_WIDEN: // Widen the child's type to the parent's type return (cgwiden(leftreg, n->left->type, n->type)); case A_RETURN: cgreturn(leftreg, Functionid); return (NOREG); case A_ADDR: return (cgaddress(n->sym)); case A_DEREF: // If we are an rvalue, dereference to get the value we point at, // otherwise leave it for A_ASSIGN to store through the pointer if (n->rvalue) return (cgderef(leftreg, n->left->type)); else return (leftreg); case A_SCALE: // Small optimisation: use shift if the // scale value is a known power of two switch (n->a_size) { case 2: return (cgshlconst(leftreg, 1)); case 4: return (cgshlconst(leftreg, 2)); case 8: return (cgshlconst(leftreg, 3)); default: // Load a register with the size and // multiply the leftreg by this size rightreg = cgloadint(n->a_size, P_INT); return (cgmul(leftreg, rightreg)); } case A_POSTINC: case A_POSTDEC: // Load and decrement the variable's value into a register // and post increment/decrement it if (n->sym->class == C_GLOBAL) return (cgloadglob(n->sym, n->op)); else return (cgloadlocal(n->sym, n->op)); case A_PREINC: case A_PREDEC: // Load and decrement the variable's value into a register // and pre increment/decrement it if (n->left->sym->class == C_GLOBAL) return (cgloadglob(n->left->sym, n->op)); else return (cgloadlocal(n->left->sym, n->op)); case A_NEGATE: return (cgnegate(leftreg)); case A_INVERT: return (cginvert(leftreg)); case A_LOGNOT: return (cglognot(leftreg)); case A_TOBOOL: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, set the register // to 0 or 1 based on it's zeroeness or non-zeroeness return (cgboolean(leftreg, parentASTop, iflabel)); case A_BREAK: cgjump(loopendlabel); return (NOREG); case A_CONTINUE: cgjump(looptoplabel); return (NOREG); case A_CAST: return (leftreg); // Not much to do default: fatald("Unknown AST operator", n->op); } return (NOREG); // Keep -Wall happy } void genpreamble() { cgpreamble(); } void genpostamble() { cgpostamble(); } void genfreeregs() { freeall_registers(); } void genglobsym(struct symtable *node) { cgglobsym(node); } int genglobstr(char *strvalue) { int l = genlabel(); cgglobstr(l, strvalue); return (l); } int genprimsize(int type) { return (cgprimsize(type)); } int genalign(int type, int offset, int direction) { return (cgalign(type, offset, direction)); } ================================================ FILE: 47_Sizeof/include/ctype.h ================================================ #ifndef _CTYPE_H_ # define _CTYPE_H_ int isalnum(int c); int isalpha(int c); int iscntrl(int c); int isdigit(int c); int isgraph(int c); int islower(int c); int isprint(int c); int ispunct(int c); int isspace(int c); int isupper(int c); int isxdigit(int c); int isascii(int c); int isblank(int c); #endif // _CTYPE_H_ ================================================ FILE: 47_Sizeof/include/errno.h ================================================ #ifndef _ERRNO_H_ # define _ERRNO_H_ #endif // _ERRNO_H_ ================================================ FILE: 47_Sizeof/include/fcntl.h ================================================ #ifndef _FCNTL_H_ # define _FCNTL_H_ #define O_RDONLY 00 #define O_WRONLY 01 #define O_RDWR 02 int open(char *pathname, int flags); #endif // _FCNTL_H_ ================================================ FILE: 47_Sizeof/include/stddef.h ================================================ #ifndef _STDDEF_H_ # define _STDDEF_H_ #ifndef NULL # define NULL (void *)0 #endif typedef long size_t; #endif //_STDDEF_H_ ================================================ FILE: 47_Sizeof/include/stdio.h ================================================ #ifndef _STDIO_H_ # define _STDIO_H_ #include #ifndef NULL # define NULL (void *)0 #endif // This FILE definition will do for now typedef char * FILE; FILE *fopen(char *pathname, char *mode); size_t fread(void *ptr, size_t size, size_t nmemb, FILE *stream); size_t fwrite(void *ptr, size_t size, size_t nmemb, FILE *stream); int fclose(FILE *stream); int printf(char *format); int fprintf(FILE *stream, char *format); int fputc(int c, FILE *stream); int fputs(char *s, FILE *stream); int putc(int c, FILE *stream); int putchar(int c); int puts(char *s); extern FILE *stdin; extern FILE *stdout; extern FILE *stderr; #endif // _STDIO_H_ ================================================ FILE: 47_Sizeof/include/stdlib.h ================================================ #ifndef _STDLIB_H_ # define _STDLIB_H_ void exit(int status); void _Exit(int status); void *malloc(int size); void free(void *ptr); void *calloc(int nmemb, int size); void *realloc(void *ptr, int size); #endif // _STDLIB_H_ ================================================ FILE: 47_Sizeof/include/string.h ================================================ #ifndef _STRING_H_ # define _STRING_H_ char *strdup(char *s); char *strchr(char *s, int c); char *strrchr(char *s, int c); #endif // _STRING_H_ ================================================ FILE: 47_Sizeof/include/unistd.h ================================================ #ifndef _UNISTD_H_ # define _UNISTD_H_ void _exit(int status); int unlink(char *pathname); #endif // _UNISTD_H_ ================================================ FILE: 47_Sizeof/main.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "decl.h" #include #include // Compiler setup and top-level execution // Copyright (c) 2019 Warren Toomey, GPL3 // Given a string with a '.' and at least a 1-character suffix // after the '.', change the suffix to be the given character. // Return the new string or NULL if the original string could // not be modified char *alter_suffix(char *str, char suffix) { char *posn; char *newstr; // Clone the string if ((newstr = strdup(str)) == NULL) return (NULL); // Find the '.' if ((posn = strrchr(newstr, '.')) == NULL) return (NULL); // Ensure there is a suffix posn++; if (*posn == '\0') return (NULL); // Change the suffix and NUL-terminate the string *posn = suffix; posn++; *posn = '\0'; return (newstr); } // Given an input filename, compile that file // down to assembly code. Return the new file's name static char *do_compile(char *filename) { char cmd[TEXTLEN]; // Change the input file's suffix to .s Outfilename = alter_suffix(filename, 's'); if (Outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .c on the end\n", filename); exit(1); } // Generate the pre-processor command snprintf(cmd, TEXTLEN, "%s %s %s", CPPCMD, INCDIR, filename); // Open up the pre-processor pipe if ((Infile = popen(cmd, "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", filename, strerror(errno)); exit(1); } Infilename = filename; // Create the output file if ((Outfile = fopen(Outfilename, "w")) == NULL) { fprintf(stderr, "Unable to create %s: %s\n", Outfilename, strerror(errno)); exit(1); } Line = 1; // Reset the scanner Putback = '\n'; clear_symtable(); // Clear the symbol table if (O_verbose) printf("compiling %s\n", filename); scan(&Token); // Get the first token from the input Peektoken.token= 0; // and set there is no lookahead token genpreamble(); // Output the preamble global_declarations(); // Parse the global declarations genpostamble(); // Output the postamble fclose(Outfile); // Close the output file return (Outfilename); } // Given an input filename, assemble that file // down to object code. Return the object filename char *do_assemble(char *filename) { char cmd[TEXTLEN]; int err; char *outfilename = alter_suffix(filename, 'o'); if (outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .s on the end\n", filename); exit(1); } // Build the assembly command and run it snprintf(cmd, TEXTLEN, "%s %s %s", ASCMD, outfilename, filename); if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Assembly of %s failed\n", filename); exit(1); } return (outfilename); } // Given a list of object files and an output filename, // link all of the object filenames together. void do_link(char *outfilename, char *objlist[]) { int cnt, size = TEXTLEN; char cmd[TEXTLEN], *cptr; int err; // Start with the linker command and the output file cptr = cmd; cnt = snprintf(cptr, size, "%s %s ", LDCMD, outfilename); cptr += cnt; size -= cnt; // Now append each object file while (*objlist != NULL) { cnt = snprintf(cptr, size, "%s ", *objlist); cptr += cnt; size -= cnt; objlist++; } if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Linking failed\n"); exit(1); } } // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "Usage: %s [-vcST] [-o outfile] file [file ...]\n", prog); fprintf(stderr, " -v give verbose output of the compilation stages\n"); fprintf(stderr, " -c generate object files but don't link them\n"); fprintf(stderr, " -S generate assembly files but don't link them\n"); fprintf(stderr, " -T dump the AST trees for each input file\n"); fprintf(stderr, " -o outfile, produce the outfile executable file\n"); exit(1); } // Main program: check arguments and print a usage // if we don't have an argument. Open up the input // file and call scanfile() to scan the tokens in it. enum { MAXOBJ = 100 }; int main(int argc, char *argv[]) { char *outfilename = AOUT; char *asmfile, *objfile; char *objlist[MAXOBJ]; int i, objcnt = 0; // Initialise our variables O_dumpAST = 0; O_keepasm = 0; O_assemble = 0; O_verbose = 0; O_dolink = 1; // Scan for command-line options for (i = 1; i < argc; i++) { // No leading '-', stop scanning for options if (*argv[i] != '-') break; // For each option in this argument for (int j = 1; (*argv[i] == '-') && argv[i][j]; j++) { switch (argv[i][j]) { case 'o': outfilename = argv[++i]; // Save & skip to next argument break; case 'T': O_dumpAST = 1; break; case 'c': O_assemble = 1; O_keepasm = 0; O_dolink = 0; break; case 'S': O_keepasm = 1; O_assemble = 0; O_dolink = 0; break; case 'v': O_verbose = 1; break; default: usage(argv[0]); } } } // Ensure we have at lease one input file argument if (i >= argc) usage(argv[0]); // Work on each input file in turn while (i < argc) { asmfile = do_compile(argv[i]); // Compile the source file if (O_dolink || O_assemble) { objfile = do_assemble(asmfile); // Assemble it to object forma if (objcnt == (MAXOBJ - 2)) { fprintf(stderr, "Too many object files for the compiler to handle\n"); exit(1); } objlist[objcnt++] = objfile; // Add the object file's name objlist[objcnt] = NULL; // to the list of object files } if (!O_keepasm) // Remove the assembly file if unlink(asmfile); // we don't need to keep it i++; } // Now link all the object files together if (O_dolink) { do_link(outfilename, objlist); // If we don't need to keep the object // files, then remove them if (!O_assemble) { for (i = 0; objlist[i] != NULL; i++) unlink(objlist[i]); } } return (0); } ================================================ FILE: 47_Sizeof/misc.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" #include #include // Miscellaneous functions // Copyright (c) 2019 Warren Toomey, GPL3 // Ensure that the current token is t, // and fetch the next token. Otherwise // throw an error void match(int t, char *what) { if (Token.token == t) { scan(&Token); } else { fatals("Expected", what); } } // Match a semicolon and fetch the next token void semi(void) { match(T_SEMI, ";"); } // Match a left brace and fetch the next token void lbrace(void) { match(T_LBRACE, "{"); } // Match a right brace and fetch the next token void rbrace(void) { match(T_RBRACE, "}"); } // Match a left parenthesis and fetch the next token void lparen(void) { match(T_LPAREN, "("); } // Match a right parenthesis and fetch the next token void rparen(void) { match(T_RPAREN, ")"); } // Match an identifier and fetch the next token void ident(void) { match(T_IDENT, "identifier"); } // Match a comma and fetch the next token void comma(void) { match(T_COMMA, "comma"); } // Print out fatal messages void fatal(char *s) { fprintf(stderr, "%s on line %d of %s\n", s, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatals(char *s1, char *s2) { fprintf(stderr, "%s:%s on line %d of %s\n", s1, s2, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatald(char *s, int d) { fprintf(stderr, "%s:%d on line %d of %s\n", s, d, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatalc(char *s, int c) { fprintf(stderr, "%s:%c on line %d of %s\n", s, c, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } ================================================ FILE: 47_Sizeof/opt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST Tree Optimisation Code // Copyright (c) 2019 Warren Toomey, GPL3 // Fold an AST tree with a binary operator // and two A_INTLIT children. Return either // the original tree or a new leaf node. static struct ASTnode *fold2(struct ASTnode *n) { int val, leftval, rightval; // Get the values from each child leftval = n->left->a_intvalue; rightval = n->right->a_intvalue; // Perform some of the binary operations. // For any AST op we can't do, return // the original tree. switch (n->op) { case A_ADD: val = leftval + rightval; break; case A_SUBTRACT: val = leftval - rightval; break; case A_MULTIPLY: val = leftval * rightval; break; case A_DIVIDE: // Don't try to divide by zero. if (rightval == 0) return (n); val = leftval / rightval; break; default: return (n); } // Return a leaf node with the new value return (mkastleaf(A_INTLIT, n->type, NULL, val)); } // Fold an AST tree with a unary operator // and one INTLIT children. Return either // the original tree or a new leaf node. static struct ASTnode *fold1(struct ASTnode *n) { int val; // Get the child value. Do the // operation if recognised. // Return the new leaf node. val = n->left->a_intvalue; switch (n->op) { case A_WIDEN: break; case A_INVERT: val = ~val; break; case A_LOGNOT: val = !val; break; default: return (n); } // Return a leaf node with the new value return (mkastleaf(A_INTLIT, n->type, NULL, val)); } // Attempt to do constant folding on // the AST tree with the root node n static struct ASTnode *fold(struct ASTnode *n) { if (n == NULL) return (NULL); // Fold on the left child, then // do the same on the right child n->left = fold(n->left); n->right = fold(n->right); // If both children are A_INTLITs, do a fold2() if (n->left && n->left->op == A_INTLIT) { if (n->right && n->right->op == A_INTLIT) n = fold2(n); else // If only the left is A_INTLIT, do a fold1() n = fold1(n); } // Return the possibly modified tree return (n); } // Optimise an AST tree by // constant folding in all sub-trees struct ASTnode *optimise(struct ASTnode *n) { n = fold(n); return (n); } ================================================ FILE: 47_Sizeof/scan.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { char *p; p = strchr(s, c); return (p ? p - s : -1); } // Get the next character from the input file. static int next(void) { int c, l; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return (c); } c = fgetc(Infile); // Read from input file while (c == '#') { // We've hit a pre-processor statement scan(&Token); // Get the line number into l if (Token.token != T_INTLIT) fatals("Expecting pre-processor line number, got:", Text); l = Token.intvalue; scan(&Token); // Get the filename in Text if (Token.token != T_STRLIT) fatals("Expecting pre-processor file name, got:", Text); if (Text[0] != '<') { // If this is a real filename if (strcmp(Text, Infilename)) // and not the one we have now Infilename = strdup(Text); // save it. Then update the line num Line = l; } while ((c = fgetc(Infile)) != '\n'); // Skip to the end of the line c = fgetc(Infile); // and get the next character } if ('\n' == c) Line++; // Increment line count return (c); } // Put back an unwanted character static void putback(int c) { Putback = c; } // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } // Read in a hexadecimal constant from the input static int hexchar(void) { int c, h, n = 0, f = 0; // Loop getting characters while (isxdigit(c = next())) { // Convert from char to int value h = chrpos("0123456789abcdef", tolower(c)); // Add to running hex value n = n * 16 + h; f = 1; } // We hit a non-hex character, put it back putback(c); // Flag tells us we never saw any hex characters if (!f) fatal("missing digits after '\\x'"); if (n > 255) fatal("value out of range after '\\x'"); return n; } // Return the next character from a character // or string literal static int scanch(void) { int i, c, c2; // Get the next input character and interpret // metacharacters that start with a backslash c = next(); if (c == '\\') { switch (c = next()) { case 'a': return ('\a'); case 'b': return ('\b'); case 'f': return ('\f'); case 'n': return ('\n'); case 'r': return ('\r'); case 't': return ('\t'); case 'v': return ('\v'); case '\\': return ('\\'); case '"': return ('"'); case '\'': return ('\''); // Deal with octal constants by reading in // characters until we hit a non-octal digit. // Build up the octal value in c2 and count // # digits in i. Permit only 3 octal digits. case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': for (i = c2 = 0; isdigit(c) && c < '8'; c = next()) { if (++i > 3) break; c2 = c2 * 8 + (c - '0'); } putback(c); // Put back the first non-octal char return (c2); case 'x': return hexchar(); default: fatalc("unknown escape sequence", c); } } return (c); // Just an ordinary old character! } // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0, radix = 10; // Assume the radix is 10, but if it starts with 0 if (c == '0') { // and the next character is 'x', it's radix 16 if ((c = next()) == 'x') { radix = 16; c = next(); } else // Otherwise, it's radix 8 radix = 8; } // Convert each character into an int value while ((k = chrpos("0123456789abcdef", tolower(c))) >= 0) { if (k >= radix) fatalc("invalid digit in integer literal", c); val = val * radix + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return (val); } // Scan in a string literal from the input file, // and store it in buf[]. Return the length of // the string. static int scanstr(char *buf) { int i, c; // Loop while we have enough buffer space for (i = 0; i < TEXTLEN - 1; i++) { // Get the next char and append to buf // Return when we hit the ending double quote if ((c = scanch()) == '"') { buf[i] = 0; return (i); } buf[i] = c; } // Ran out of buf[] space fatal("String literal too long"); return (0); } // Scan an identifier from the input file and // store it in buf[]. Return the identifier's length static int scanident(int c, char *buf, int lim) { int i = 0; // Allow digits, alpha and underscores while (isalpha(c) || isdigit(c) || '_' == c) { // Error if we hit the identifier length limit, // else append to buf[] and get next character if (lim - 1 == i) { fatal("Identifier too long"); } else if (i < lim - 1) { buf[i++] = c; } c = next(); } // We hit a non-valid character, put it back. // NUL-terminate the buf[] and return the length putback(c); buf[i] = '\0'; return (i); } // Given a word from the input, return the matching // keyword token number or 0 if it's not a keyword. // Switch on the first letter so that we don't have // to waste time strcmp()ing against all the keywords. static int keyword(char *s) { switch (*s) { case 'b': if (!strcmp(s, "break")) return (T_BREAK); break; case 'c': if (!strcmp(s, "case")) return (T_CASE); if (!strcmp(s, "char")) return (T_CHAR); if (!strcmp(s, "continue")) return (T_CONTINUE); break; case 'd': if (!strcmp(s, "default")) return (T_DEFAULT); break; case 'e': if (!strcmp(s, "else")) return (T_ELSE); if (!strcmp(s, "enum")) return (T_ENUM); if (!strcmp(s, "extern")) return (T_EXTERN); break; case 'f': if (!strcmp(s, "for")) return (T_FOR); break; case 'i': if (!strcmp(s, "if")) return (T_IF); if (!strcmp(s, "int")) return (T_INT); break; case 'l': if (!strcmp(s, "long")) return (T_LONG); break; case 'r': if (!strcmp(s, "return")) return (T_RETURN); break; case 's': if (!strcmp(s, "sizeof")) return (T_SIZEOF); if (!strcmp(s, "struct")) return (T_STRUCT); if (!strcmp(s, "switch")) return (T_SWITCH); break; case 't': if (!strcmp(s, "typedef")) return (T_TYPEDEF); break; case 'u': if (!strcmp(s, "union")) return (T_UNION); break; case 'v': if (!strcmp(s, "void")) return (T_VOID); break; case 'w': if (!strcmp(s, "while")) return (T_WHILE); break; } return (0); } // List of token strings, for debugging purposes char *Tstring[] = { "EOF", "=", "+=", "-=", "*=", "/=", "||", "&&", "|", "^", "&", "==", "!=", ",", ">", "<=", ">=", "<<", ">>", "+", "-", "*", "/", "++", "--", "~", "!", "void", "char", "int", "long", "if", "else", "while", "for", "return", "struct", "union", "enum", "typedef", "extern", "break", "continue", "switch", "case", "default", "sizeof", "intlit", "strlit", ";", "identifier", "{", "}", "(", ")", "[", "]", ",", ".", "->", ":" }; // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c, tokentype; // If we have a lookahead token, return this token if (Peektoken.token != 0) { t->token = Peektoken.token; t->tokstr = Peektoken.tokstr; t->intvalue = Peektoken.intvalue; Peektoken.token = 0; return (1); } // Skip whitespace c = skip(); // Determine the token based on // the input character switch (c) { case EOF: t->token = T_EOF; return (0); case '+': if ((c = next()) == '+') { t->token = T_INC; } else if (c == '=') { t->token = T_ASPLUS; } else { putback(c); t->token = T_PLUS; } break; case '-': if ((c = next()) == '-') { t->token = T_DEC; } else if (c == '>') { t->token = T_ARROW; } else if (c == '=') { t->token = T_ASMINUS; } else if (isdigit(c)) { // Negative int literal t->intvalue = -scanint(c); t->token = T_INTLIT; } else { putback(c); t->token = T_MINUS; } break; case '*': if ((c = next()) == '=') { t->token = T_ASSTAR; } else { putback(c); t->token = T_STAR; } break; case '/': if ((c = next()) == '=') { t->token = T_ASSLASH; } else { putback(c); t->token = T_SLASH; } break; case ';': t->token = T_SEMI; break; case '{': t->token = T_LBRACE; break; case '}': t->token = T_RBRACE; break; case '(': t->token = T_LPAREN; break; case ')': t->token = T_RPAREN; break; case '[': t->token = T_LBRACKET; break; case ']': t->token = T_RBRACKET; break; case '~': t->token = T_INVERT; break; case '^': t->token = T_XOR; break; case ',': t->token = T_COMMA; break; case '.': t->token = T_DOT; break; case ':': t->token = T_COLON; break; case '=': if ((c = next()) == '=') { t->token = T_EQ; } else { putback(c); t->token = T_ASSIGN; } break; case '!': if ((c = next()) == '=') { t->token = T_NE; } else { putback(c); t->token = T_LOGNOT; } break; case '<': if ((c = next()) == '=') { t->token = T_LE; } else if (c == '<') { t->token = T_LSHIFT; } else { putback(c); t->token = T_LT; } break; case '>': if ((c = next()) == '=') { t->token = T_GE; } else if (c == '>') { t->token = T_RSHIFT; } else { putback(c); t->token = T_GT; } break; case '&': if ((c = next()) == '&') { t->token = T_LOGAND; } else { putback(c); t->token = T_AMPER; } break; case '|': if ((c = next()) == '|') { t->token = T_LOGOR; } else { putback(c); t->token = T_OR; } break; case '\'': // If it's a quote, scan in the // literal character value and // the trailing quote t->intvalue = scanch(); t->token = T_INTLIT; if (next() != '\'') fatal("Expected '\\'' at end of char literal"); break; case '"': // Scan in a literal string scanstr(Text); t->token = T_STRLIT; break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } else if (isalpha(c) || '_' == c) { // Read in a keyword or identifier scanident(c, Text, TEXTLEN); // If it's a recognised keyword, return that token if ((tokentype = keyword(Text)) != 0) { t->token = tokentype; break; } // Not a recognised keyword, so it must be an identifier t->token = T_IDENT; break; } // The character isn't part of any recognised token, error fatalc("Unrecognised character", c); } // We found a token t->tokstr = Tstring[t->token]; return (1); } ================================================ FILE: 47_Sizeof/stmt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of statements // Copyright (c) 2019 Warren Toomey, GPL3 // Prototypes static struct ASTnode *single_statement(void); // compound_statement: // empty, i.e. no statement // | statement // | statement statements // ; // // statement: declaration // | expression_statement // | function_call // | if_statement // | while_statement // | for_statement // | return_statement // ; // if_statement: if_head // | if_head 'else' statement // ; // // if_head: 'if' '(' true_false_expression ')' statement ; // // Parse an IF statement including any // optional ELSE clause and return its AST static struct ASTnode *if_statement(void) { struct ASTnode *condAST, *trueAST, *falseAST = NULL; // Ensure we have 'if' '(' match(T_IF, "if"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); rparen(); // Get the AST for the statement trueAST = single_statement(); // If we have an 'else', skip it // and get the AST for the statement if (Token.token == T_ELSE) { scan(&Token); falseAST = single_statement(); } // Build and return the AST for this statement return (mkastnode(A_IF, P_NONE, condAST, trueAST, falseAST, NULL, 0)); } // while_statement: 'while' '(' true_false_expression ')' statement ; // // Parse a WHILE statement and return its AST static struct ASTnode *while_statement(void) { struct ASTnode *condAST, *bodyAST; // Ensure we have 'while' '(' match(T_WHILE, "while"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); rparen(); // Get the AST for the statement. // Update the loop depth in the process Looplevel++; bodyAST = single_statement(); Looplevel--; // Build and return the AST for this statement return (mkastnode(A_WHILE, P_NONE, condAST, NULL, bodyAST, NULL, 0)); } // for_statement: 'for' '(' expression_list ';' // true_false_expression ';' // expression_list ')' statement ; // // Parse a FOR statement and return its AST static struct ASTnode *for_statement(void) { struct ASTnode *condAST, *bodyAST; struct ASTnode *preopAST, *postopAST; struct ASTnode *tree; // Ensure we have 'for' '(' match(T_FOR, "for"); lparen(); // Get the pre_op expression and the ';' preopAST = expression_list(T_SEMI); semi(); // Get the condition and the ';'. // Force a non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); semi(); // Get the post_op expression and the ')' postopAST = expression_list(T_RPAREN); rparen(); // Get the statement which is the body // Update the loop depth in the process Looplevel++; bodyAST = single_statement(); Looplevel--; // Glue the statement and the postop tree tree = mkastnode(A_GLUE, P_NONE, bodyAST, NULL, postopAST, NULL, 0); // Make a WHILE loop with the condition and this new body tree = mkastnode(A_WHILE, P_NONE, condAST, NULL, tree, NULL, 0); // And glue the preop tree to the A_WHILE tree return (mkastnode(A_GLUE, P_NONE, preopAST, NULL, tree, NULL, 0)); } // return_statement: 'return' '(' expression ')' ; // // Parse a return statement and return its AST static struct ASTnode *return_statement(void) { struct ASTnode *tree; // Can't return a value if function returns P_VOID if (Functionid->type == P_VOID) fatal("Can't return from a void function"); // Ensure we have 'return' '(' match(T_RETURN, "return"); lparen(); // Parse the following expression tree = binexpr(0); // Ensure this is compatible with the function's type tree = modify_type(tree, Functionid->type, 0); if (tree == NULL) fatal("Incompatible type to return"); // Add on the A_RETURN node tree = mkastunary(A_RETURN, P_NONE, tree, NULL, 0); // Get the ')' and ';' rparen(); semi(); return (tree); } // break_statement: 'break' ; // // Parse a break statement and return its AST static struct ASTnode *break_statement(void) { if (Looplevel == 0 && Switchlevel == 0) fatal("no loop or switch to break out from"); scan(&Token); semi(); return (mkastleaf(A_BREAK, 0, NULL, 0)); } // continue_statement: 'continue' ; // // Parse a continue statement and return its AST static struct ASTnode *continue_statement(void) { if (Looplevel == 0) fatal("no loop to continue to"); scan(&Token); semi(); return (mkastleaf(A_CONTINUE, 0, NULL, 0)); } // Parse a switch statement and return its AST static struct ASTnode *switch_statement(void) { struct ASTnode *left, *n, *c, *casetree= NULL, *casetail; int inloop=1, casecount=0; int seendefault=0; int ASTop, casevalue; // Skip the 'switch' and '(' scan(&Token); lparen(); // Get the switch expression, the ')' and the '{' left= binexpr(0); rparen(); lbrace(); // Ensure that this is of int type if (!inttype(left->type)) fatal("Switch expression is not of integer type"); // Build an A_SWITCH subtree with the expression as // the child n= mkastunary(A_SWITCH, 0, left, NULL, 0); // Now parse the cases Switchlevel++; while (inloop) { switch(Token.token) { // Leave the loop when we hit a '}' case T_RBRACE: if (casecount==0) fatal("No cases in switch"); inloop=0; break; case T_CASE: case T_DEFAULT: // Ensure this isn't after a previous 'default' if (seendefault) fatal("case or default after existing default"); // Set the AST operation. Scan the case value if required if (Token.token==T_DEFAULT) { ASTop= A_DEFAULT; seendefault= 1; scan(&Token); } else { ASTop= A_CASE; scan(&Token); left= binexpr(0); // Ensure the case value is an integer literal if (left->op != A_INTLIT) fatal("Expecting integer literal for case value"); casevalue= left->a_intvalue; // Walk the list of existing case values to ensure // that there isn't a duplicate case value for (c= casetree; c != NULL; c= c -> right) if (casevalue == c->a_intvalue) fatal("Duplicate case value"); } // Scan the ':' and get the compound expression match(T_COLON, ":"); left= compound_statement(1); casecount++; // Build a sub-tree with the compound statement as the left child // and link it in to the growing A_CASE tree if (casetree==NULL) { casetree= casetail= mkastunary(ASTop, 0, left, NULL, casevalue); } else { casetail->right= mkastunary(ASTop, 0, left, NULL, casevalue); casetail= casetail->right; } break; default: fatals("Unexpected token in switch", Token.tokstr); } } Switchlevel--; // We have a sub-tree with the cases and any default. Put the // case count into the A_SWITCH node and attach the case tree. n->a_intvalue= casecount; n->right= casetree; rbrace(); return(n); } // Parse a single statement and return its AST. static struct ASTnode *single_statement(void) { struct ASTnode *stmt; struct symtable *ctype; switch (Token.token) { case T_LBRACE: // We have a '{', so this is a compound statement lbrace(); stmt = compound_statement(0); rbrace(); return(stmt); case T_IDENT: // We have to see if the identifier matches a typedef. // If not, treat it as an expression. // Otherwise, fall down to the parse_type() call. if (findtypedef(Text) == NULL) { stmt= binexpr(0); semi(); return(stmt); } case T_CHAR: case T_INT: case T_LONG: case T_STRUCT: case T_UNION: case T_ENUM: case T_TYPEDEF: // The beginning of a variable declaration list. declaration_list(&ctype, C_LOCAL, T_SEMI, T_EOF, &stmt); semi(); return (stmt); // Any assignments from the declarations case T_IF: return (if_statement()); case T_WHILE: return (while_statement()); case T_FOR: return (for_statement()); case T_RETURN: return (return_statement()); case T_BREAK: return (break_statement()); case T_CONTINUE: return (continue_statement()); case T_SWITCH: return (switch_statement()); default: // For now, see if this is an expression. // This catches assignment statements. stmt= binexpr(0); semi(); return(stmt); } return (NULL); // Keep -Wall happy } // Parse a compound statement // and return its AST. If inswitch is true, // we look for a '}', 'case' or 'default' token // to end the parsing. Otherwise, look for // just a '}' to end the parsing. struct ASTnode *compound_statement(int inswitch) { struct ASTnode *left = NULL; struct ASTnode *tree; while (1) { // Parse a single statement tree = single_statement(); // For each new tree, either save it in left // if left is empty, or glue the left and the // new tree together if (tree != NULL) { if (left == NULL) left = tree; else left = mkastnode(A_GLUE, P_NONE, left, NULL, tree, NULL, 0); } // Leave if we've hit the end token if (Token.token == T_RBRACE) return(left); if (inswitch && (Token.token == T_CASE || Token.token == T_DEFAULT)) return(left); } } ================================================ FILE: 47_Sizeof/sym.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Symbol table functions // Copyright (c) 2019 Warren Toomey, GPL3 // Append a node to the singly-linked list pointed to by head or tail void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node) { // Check for valid pointers if (head == NULL || tail == NULL || node == NULL) fatal("Either head, tail or node is NULL in appendsym"); // Append to the list if (*tail) { (*tail)->next = node; *tail = node; } else *head = *tail = node; node->next = NULL; } // Create a symbol node to be added to a symbol table list. // Set up the node's: // + type: char, int etc. // + ctype: composite type pointer for struct/union // + structural type: var, function, array etc. // + size: number of elements, or endlabel: end label for a function // + posn: Position information for local symbols // Return a pointer to the new node struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn) { // Get a new node struct symtable *node = (struct symtable *) malloc(sizeof(struct symtable)); if (node == NULL) fatal("Unable to malloc a symbol table node in newsym"); // Fill in the values if (name == NULL) node->name = NULL; else node->name = strdup(name); node->type = type; node->ctype = ctype; node->stype = stype; node->class = class; node->nelems = nelems; // For pointers and integer types, set the size // of the symbol. structs and union declarations // manually set this up themselves. if (ptrtype(type) || inttype(type)) node->size = nelems * typesize(type, ctype); node->st_posn = posn; node->next = NULL; node->member = NULL; node->initlist = NULL; return (node); } // Add a symbol to the global symbol list struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn) { struct symtable *sym = newsym(name, type, ctype, stype, class, nelems, posn); // For structs and unions, copy the size from the type node if (type== P_STRUCT || type== P_UNION) sym->size = ctype->size; appendsym(&Globhead, &Globtail, sym); return (sym); } // Add a symbol to the local symbol list struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int nelems) { struct symtable *sym = newsym(name, type, ctype, stype, C_LOCAL, nelems, 0); // For structs and unions, copy the size from the type node if (type== P_STRUCT || type== P_UNION) sym->size = ctype->size; appendsym(&Loclhead, &Locltail, sym); return (sym); } // Add a symbol to the parameter list struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype) { struct symtable *sym = newsym(name, type, ctype, stype, C_PARAM, 1, 0); appendsym(&Parmhead, &Parmtail, sym); return (sym); } // Add a symbol to the temporary member list struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int nelems) { struct symtable *sym = newsym(name, type, ctype, stype, C_MEMBER, nelems, 0); // For structs and unions, copy the size from the type node if (type== P_STRUCT || type== P_UNION) sym->size = ctype->size; appendsym(&Membhead, &Membtail, sym); return (sym); } // Add a struct to the struct list struct symtable *addstruct(char *name) { struct symtable *sym = newsym(name, P_STRUCT, NULL, 0, C_STRUCT, 0, 0); appendsym(&Structhead, &Structtail, sym); return (sym); } // Add a struct to the union list struct symtable *addunion(char *name) { struct symtable *sym = newsym(name, P_UNION, NULL, 0, C_UNION, 0, 0); appendsym(&Unionhead, &Uniontail, sym); return (sym); } // Add an enum type or value to the enum list. // Class is C_ENUMTYPE or C_ENUMVAL. // Use posn to store the int value. struct symtable *addenum(char *name, int class, int value) { struct symtable *sym = newsym(name, P_INT, NULL, 0, class, 0, value); appendsym(&Enumhead, &Enumtail, sym); return (sym); } // Add a typedef to the typedef list struct symtable *addtypedef(char *name, int type, struct symtable *ctype) { struct symtable *sym = newsym(name, type, ctype, 0, C_TYPEDEF, 0, 0); appendsym(&Typehead, &Typetail, sym); return (sym); } // Search for a symbol in a specific list. // Return a pointer to the found node or NULL if not found. // If class is not zero, also match on the given class static struct symtable *findsyminlist(char *s, struct symtable *list, int class) { for (; list != NULL; list = list->next) if ((list->name != NULL) && !strcmp(s, list->name)) if (class==0 || class== list->class) return (list); return (NULL); } // Determine if the symbol s is in the global symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findglob(char *s) { return (findsyminlist(s, Globhead, 0)); } // Determine if the symbol s is in the local symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findlocl(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member, 0); if (node) return (node); } return (findsyminlist(s, Loclhead, 0)); } // Determine if the symbol s is in the symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findsymbol(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member, 0); if (node) return (node); } // Otherwise, try the local and global symbol lists node = findsyminlist(s, Loclhead, 0); if (node) return (node); return (findsyminlist(s, Globhead, 0)); } // Find a member in the member list // Return a pointer to the found node or NULL if not found. struct symtable *findmember(char *s) { return (findsyminlist(s, Membhead, 0)); } // Find a struct in the struct list // Return a pointer to the found node or NULL if not found. struct symtable *findstruct(char *s) { return (findsyminlist(s, Structhead, 0)); } // Find a struct in the union list // Return a pointer to the found node or NULL if not found. struct symtable *findunion(char *s) { return (findsyminlist(s, Unionhead, 0)); } // Find an enum type in the enum list // Return a pointer to the found node or NULL if not found. struct symtable *findenumtype(char *s) { return (findsyminlist(s, Enumhead, C_ENUMTYPE)); } // Find an enum value in the enum list // Return a pointer to the found node or NULL if not found. struct symtable *findenumval(char *s) { return (findsyminlist(s, Enumhead, C_ENUMVAL)); } // Find a type in the tyedef list // Return a pointer to the found node or NULL if not found. struct symtable *findtypedef(char *s) { return (findsyminlist(s, Typehead, 0)); } // Reset the contents of the symbol table void clear_symtable(void) { Globhead = Globtail = NULL; Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Membhead = Membtail = NULL; Structhead = Structtail = NULL; Unionhead = Uniontail = NULL; Enumhead = Enumtail = NULL; Typehead = Typetail = NULL; } // Clear all the entries in the local symbol table void freeloclsyms(void) { Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Functionid = NULL; } ================================================ FILE: 47_Sizeof/tests/err.input031.c ================================================ Expecting a primary expression, got token:+ on line 5 of input031.c ================================================ FILE: 47_Sizeof/tests/err.input032.c ================================================ Unknown variable:cow on line 4 of input032.c ================================================ FILE: 47_Sizeof/tests/err.input033.c ================================================ Incompatible type to return on line 4 of input033.c ================================================ FILE: 47_Sizeof/tests/err.input034.c ================================================ For now, declaration of non-global arrays is not implemented on line 4 of input034.c ================================================ FILE: 47_Sizeof/tests/err.input035.c ================================================ Duplicate local variable declaration:a on line 4 of input035.c ================================================ FILE: 47_Sizeof/tests/err.input036.c ================================================ Type doesn't match prototype for parameter:2 on line 4 of input036.c ================================================ FILE: 47_Sizeof/tests/err.input037.c ================================================ Expected:comma on line 3 of input037.c ================================================ FILE: 47_Sizeof/tests/err.input038.c ================================================ Type doesn't match prototype for parameter:2 on line 4 of input038.c ================================================ FILE: 47_Sizeof/tests/err.input039.c ================================================ No statements in function with non-void type on line 4 of input039.c ================================================ FILE: 47_Sizeof/tests/err.input040.c ================================================ No return for function with non-void type on line 4 of input040.c ================================================ FILE: 47_Sizeof/tests/err.input041.c ================================================ Can't return from a void function on line 3 of input041.c ================================================ FILE: 47_Sizeof/tests/err.input042.c ================================================ Undeclared function:fred on line 3 of input042.c ================================================ FILE: 47_Sizeof/tests/err.input043.c ================================================ Undeclared array:b on line 3 of input043.c ================================================ FILE: 47_Sizeof/tests/err.input044.c ================================================ Unknown variable:z on line 3 of input044.c ================================================ FILE: 47_Sizeof/tests/err.input045.c ================================================ & operator must be followed by an identifier on line 3 of input045.c ================================================ FILE: 47_Sizeof/tests/err.input046.c ================================================ * operator must be followed by an identifier or * on line 3 of input046.c ================================================ FILE: 47_Sizeof/tests/err.input047.c ================================================ ++ operator must be followed by an identifier on line 3 of input047.c ================================================ FILE: 47_Sizeof/tests/err.input048.c ================================================ -- operator must be followed by an identifier on line 3 of input048.c ================================================ FILE: 47_Sizeof/tests/err.input049.c ================================================ Incompatible expression in assignment on line 6 of input049.c ================================================ FILE: 47_Sizeof/tests/err.input050.c ================================================ Incompatible types in binary expression on line 6 of input050.c ================================================ FILE: 47_Sizeof/tests/err.input051.c ================================================ Expected '\'' at end of char literal on line 4 of input051.c ================================================ FILE: 47_Sizeof/tests/err.input052.c ================================================ Unrecognised character:$ on line 5 of input052.c ================================================ FILE: 47_Sizeof/tests/err.input056.c ================================================ unknown struct/union type:var1 on line 2 of input056.c ================================================ FILE: 47_Sizeof/tests/err.input057.c ================================================ previously defined struct/union:fred on line 2 of input057.c ================================================ FILE: 47_Sizeof/tests/err.input059.c ================================================ Undeclared variable:y on line 3 of input059.c ================================================ FILE: 47_Sizeof/tests/err.input060.c ================================================ Undeclared variable:x on line 3 of input060.c ================================================ FILE: 47_Sizeof/tests/err.input061.c ================================================ Undeclared variable:x on line 3 of input061.c ================================================ FILE: 47_Sizeof/tests/err.input064.c ================================================ undeclared enum type::fred on line 1 of input064.c ================================================ FILE: 47_Sizeof/tests/err.input065.c ================================================ enum type redeclared::fred on line 2 of input065.c ================================================ FILE: 47_Sizeof/tests/err.input066.c ================================================ enum value redeclared::z on line 2 of input066.c ================================================ FILE: 47_Sizeof/tests/err.input068.c ================================================ redefinition of typedef:FOO on line 2 of input068.c ================================================ FILE: 47_Sizeof/tests/err.input069.c ================================================ unknown type:FLOO on line 2 of input069.c ================================================ FILE: 47_Sizeof/tests/err.input072.c ================================================ no loop or switch to break out from on line 1 of input072.c ================================================ FILE: 47_Sizeof/tests/err.input073.c ================================================ no loop to continue to on line 1 of input073.c ================================================ FILE: 47_Sizeof/tests/err.input075.c ================================================ Unexpected token in switch:if on line 4 of input075.c ================================================ FILE: 47_Sizeof/tests/err.input076.c ================================================ No cases in switch on line 3 of input076.c ================================================ FILE: 47_Sizeof/tests/err.input077.c ================================================ case or default after existing default on line 6 of input077.c ================================================ FILE: 47_Sizeof/tests/err.input078.c ================================================ case or default after existing default on line 6 of input078.c ================================================ FILE: 47_Sizeof/tests/err.input079.c ================================================ Duplicate case value on line 6 of input079.c ================================================ FILE: 47_Sizeof/tests/err.input085.c ================================================ Bad type in parameter list on line 1 of input085.c ================================================ FILE: 47_Sizeof/tests/err.input086.c ================================================ Function definition not at global level on line 3 of input086.c ================================================ FILE: 47_Sizeof/tests/err.input087.c ================================================ Bad type in member list on line 4 of input087.c ================================================ FILE: 47_Sizeof/tests/err.input092.c ================================================ Type mismatch: literal vs. variable on line 1 of input092.c ================================================ FILE: 47_Sizeof/tests/err.input093.c ================================================ Unknown variable:fred on line 1 of input093.c ================================================ FILE: 47_Sizeof/tests/err.input094.c ================================================ Type mismatch: literal vs. variable on line 1 of input094.c ================================================ FILE: 47_Sizeof/tests/err.input095.c ================================================ Variable can not be initialised:x on line 1 of input095.c ================================================ FILE: 47_Sizeof/tests/err.input096.c ================================================ Array size is illegal:0 on line 1 of input096.c ================================================ FILE: 47_Sizeof/tests/err.input097.c ================================================ For now, declaration of non-global arrays is not implemented on line 2 of input097.c ================================================ FILE: 47_Sizeof/tests/err.input098.c ================================================ Too many values in initialisation list on line 1 of input098.c ================================================ FILE: 47_Sizeof/tests/err.input102.c ================================================ Cannot cast to a struct, union or void type on line 3 of input102.c ================================================ FILE: 47_Sizeof/tests/err.input103.c ================================================ Cannot cast to a struct, union or void type on line 3 of input103.c ================================================ FILE: 47_Sizeof/tests/err.input104.c ================================================ Cannot cast to a struct, union or void type on line 2 of input104.c ================================================ FILE: 47_Sizeof/tests/err.input105.c ================================================ Incompatible expression in assignment on line 4 of input105.c ================================================ FILE: 47_Sizeof/tests/input001.c ================================================ int printf(char *fmt); void main() { printf("%d\n", 12 * 3); printf("%d\n", 18 - 2 * 4); printf("%d\n", 1 + 2 + 9 - 5/2 + 3*5); } ================================================ FILE: 47_Sizeof/tests/input002.c ================================================ int printf(char *fmt); void main() { int fred; int jim; fred= 5; jim= 12; printf("%d\n", fred + jim); } ================================================ FILE: 47_Sizeof/tests/input003.c ================================================ int printf(char *fmt); void main() { int x; x= 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); } ================================================ FILE: 47_Sizeof/tests/input004.c ================================================ int printf(char *fmt); void main() { int x; x= 7 < 9; printf("%d\n", x); x= 7 <= 9; printf("%d\n", x); x= 7 != 9; printf("%d\n", x); x= 7 == 7; printf("%d\n", x); x= 7 >= 7; printf("%d\n", x); x= 7 <= 7; printf("%d\n", x); x= 9 > 7; printf("%d\n", x); x= 9 >= 7; printf("%d\n", x); x= 9 != 7; printf("%d\n", x); } ================================================ FILE: 47_Sizeof/tests/input005.c ================================================ int printf(char *fmt); void main() { int i; int j; i=6; j=12; if (i < j) { printf("%d\n", i); } else { printf("%d\n", j); } } ================================================ FILE: 47_Sizeof/tests/input006.c ================================================ int printf(char *fmt); void main() { int i; i=1; while (i <= 10) { printf("%d\n", i); i= i + 1; } } ================================================ FILE: 47_Sizeof/tests/input007.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 47_Sizeof/tests/input008.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 47_Sizeof/tests/input009.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } void fred() { int a; int b; a= 12; b= 3 * a; if (a >= b) { printf("%d\n", 2 * b - a); } } ================================================ FILE: 47_Sizeof/tests/input010.c ================================================ int printf(char *fmt); void main() { int i; char j; j= 20; printf("%d\n", j); i= 10; printf("%d\n", i); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 2; j= j + 1) { printf("%d\n", j); } } ================================================ FILE: 47_Sizeof/tests/input011.c ================================================ int printf(char *fmt); int main() { int i; char j; long k; i= 10; printf("%d\n", i); j= 20; printf("%d\n", j); k= 30; printf("%d\n", k); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 4; j= j + 1) { printf("%d\n", j); } for (k= 1; k <= 5; k= k + 1) { printf("%d\n", k); } return(i); printf("%d\n", 12345); return(3); } ================================================ FILE: 47_Sizeof/tests/input012.c ================================================ int printf(char *fmt); int fred() { return(5); } void main() { int x; x= fred(2); printf("%d\n", x); } ================================================ FILE: 47_Sizeof/tests/input013.c ================================================ int printf(char *fmt); int fred() { return(56); } void main() { int dummy; int result; dummy= printf("%d\n", 23); result= fred(10); dummy= printf("%d\n", result); } ================================================ FILE: 47_Sizeof/tests/input014.c ================================================ int printf(char *fmt); int fred() { return(20); } int main() { int result; printf("%d\n", 10); result= fred(15); printf("%d\n", result); printf("%d\n", fred(15)+10); return(0); } ================================================ FILE: 47_Sizeof/tests/input015.c ================================================ int printf(char *fmt); int main() { char a; char *b; char c; int d; int *e; int f; a= 18; printf("%d\n", a); b= &a; c= *b; printf("%d\n", c); d= 12; printf("%d\n", d); e= &d; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 47_Sizeof/tests/input016.c ================================================ int printf(char *fmt); int c; int d; int *e; int f; int main() { c= 12; d=18; printf("%d\n", c); e= &c + 1; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 47_Sizeof/tests/input017.c ================================================ int printf(char *fmt); int main() { char a; char *b; int d; int *e; b= &a; *b= 19; printf("%d\n", a); e= &d; *e= 12; printf("%d\n", d); return(0); } ================================================ FILE: 47_Sizeof/tests/input018.c ================================================ int printf(char *fmt); int main() { int a; int b; a= b= 34; printf("%d\n", a); printf("%d\n", b); return(0); } ================================================ FILE: 47_Sizeof/tests/input018a.c ================================================ int printf(char *fmt); int a; int *b; char c; char *d; int main() { b= &a; *b= 15; printf("%d\n", a); d= &c; *d= 16; printf("%d\n", c); return(0); } ================================================ FILE: 47_Sizeof/tests/input019.c ================================================ int printf(char *fmt); int a; int b; int c; int d; int e; int main() { a= 2; b= 4; c= 3; d= 2; e= (a+b) * (c+d); printf("%d\n", e); return(0); } ================================================ FILE: 47_Sizeof/tests/input020.c ================================================ int printf(char *fmt); int a; int b[25]; int main() { b[3]= 12; a= b[3]; printf("%d\n", a); return(0); } ================================================ FILE: 47_Sizeof/tests/input021.c ================================================ int printf(char *fmt); char c; char *str; int main() { c= '\n'; printf("%d\n", c); for (str= "Hello world\n"; *str != 0; str= str + 1) { printf("%c", *str); } return(0); } ================================================ FILE: 47_Sizeof/tests/input022.c ================================================ int printf(char *fmt); char a; char b; char c; int d; int e; int f; long g; long h; long i; int main() { b= 5; c= 7; a= b + c++; printf("%d\n", a); e= 5; f= 7; d= e + f++; printf("%d\n", d); h= 5; i= 7; g= h + i++; printf("%d\n", g); a= b-- + c; printf("%d\n", a); d= e-- + f; printf("%d\n", d); g= h-- + i; printf("%d\n", g); a= ++b + c; printf("%d\n", a); d= ++e + f; printf("%d\n", d); g= ++h + i; printf("%d\n", g); a= b * --c; printf("%d\n", a); d= e * --f; printf("%d\n", d); g= h * --i; printf("%d\n", g); return(0); } ================================================ FILE: 47_Sizeof/tests/input023.c ================================================ int printf(char *fmt); char *str; int x; int main() { x= -23; printf("%d\n", x); printf("%d\n", -10 * -10); x= 1; x= ~x; printf("%d\n", x); x= 2 > 5; printf("%d\n", x); x= !x; printf("%d\n", x); x= !x; printf("%d\n", x); x= 13; if (x) { printf("%d\n", 13); } x= 0; if (!x) { printf("%d\n", 14); } for (str= "Hello world\n"; *str; str++) { printf("%c", *str); } return(0); } ================================================ FILE: 47_Sizeof/tests/input024.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { a= 42; b= 19; printf("%d\n", a & b); printf("%d\n", a | b); printf("%d\n", a ^ b); printf("%d\n", 1 << 3); printf("%d\n", 63 >> 3); return(0); } ================================================ FILE: 47_Sizeof/tests/input025.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { char z; int y; int x; x= 10; y= 20; z= 30; printf("%d\n", x); printf("%d\n", y); printf("%d\n", z); a= 5; b= 15; c= 25; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); return(0); } ================================================ FILE: 47_Sizeof/tests/input026.c ================================================ int printf(char *fmt); int main(int a, char b, long c, int d, int e, int f, int g, int h) { int i; int j; int k; a= 13; printf("%d\n", a); b= 23; printf("%d\n", b); c= 34; printf("%d\n", c); d= 44; printf("%d\n", d); e= 54; printf("%d\n", e); f= 64; printf("%d\n", f); g= 74; printf("%d\n", g); h= 84; printf("%d\n", h); i= 94; printf("%d\n", i); j= 95; printf("%d\n", j); k= 96; printf("%d\n", k); return(0); } ================================================ FILE: 47_Sizeof/tests/input027.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int param5(int a, int b, int c, int d, int e) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param2(int a, int b) { int c; int d; int e; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param0() { int a; int b; int c; int d; int e; a= 1; b= 2; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int main() { param8(1,2,3,4,5,6,7,8); param5(1,2,3,4,5); param2(1,2); param0(); return(0); } ================================================ FILE: 47_Sizeof/tests/input028.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 47_Sizeof/tests/input029.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h); int fred(int a, int b, int c); int main(); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 47_Sizeof/tests/input030.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input030.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 47_Sizeof/tests/input031.c ================================================ int printf(char *fmt); int main() { int x; x= 2 + + 3 - * / ; } ================================================ FILE: 47_Sizeof/tests/input032.c ================================================ int printf(char *fmt); int main() { pizza cow llama sausage; } ================================================ FILE: 47_Sizeof/tests/input033.c ================================================ int printf(char *fmt); int main() { char *z; return(z); } ================================================ FILE: 47_Sizeof/tests/input034.c ================================================ int printf(char *fmt); int main() { int a[12]; return(0); } ================================================ FILE: 47_Sizeof/tests/input035.c ================================================ int printf(char *fmt); int fred(int a, int b) { int a; return(a); } ================================================ FILE: 47_Sizeof/tests/input036.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c); ================================================ FILE: 47_Sizeof/tests/input037.c ================================================ int printf(char *fmt); int fred(int a, char b +, int z); ================================================ FILE: 47_Sizeof/tests/input038.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c, int g); ================================================ FILE: 47_Sizeof/tests/input039.c ================================================ int printf(char *fmt); int main() { int a; } ================================================ FILE: 47_Sizeof/tests/input040.c ================================================ int printf(char *fmt); int main() { int a; a= 5; } ================================================ FILE: 47_Sizeof/tests/input041.c ================================================ int printf(char *fmt); void fred() { return(5); } ================================================ FILE: 47_Sizeof/tests/input042.c ================================================ int printf(char *fmt); int main() { fred(5); } ================================================ FILE: 47_Sizeof/tests/input043.c ================================================ int printf(char *fmt); int main() { int a; a= b[4]; } ================================================ FILE: 47_Sizeof/tests/input044.c ================================================ int printf(char *fmt); int main() { int a; a= z; } ================================================ FILE: 47_Sizeof/tests/input045.c ================================================ int printf(char *fmt); int main() { int a; a= &5; } ================================================ FILE: 47_Sizeof/tests/input046.c ================================================ int printf(char *fmt); int main() { int a; a= *5; } ================================================ FILE: 47_Sizeof/tests/input047.c ================================================ int printf(char *fmt); int main() { int a; a= ++5; } ================================================ FILE: 47_Sizeof/tests/input048.c ================================================ int printf(char *fmt); int main() { int a; a= --5; } ================================================ FILE: 47_Sizeof/tests/input049.c ================================================ int printf(char *fmt); int main() { int x; char y; y= x; } ================================================ FILE: 47_Sizeof/tests/input050.c ================================================ int printf(char *fmt); int main() { char *a; char *b; a= a + b; } ================================================ FILE: 47_Sizeof/tests/input051.c ================================================ int printf(char *fmt); int main() { char a; a= 'fred'; } ================================================ FILE: 47_Sizeof/tests/input052.c ================================================ int printf(char *fmt); int main() { int a; a= $5.00; } ================================================ FILE: 47_Sizeof/tests/input053.c ================================================ int printf(char *fmt); int main() { printf("Hello world, %d\n", 23); return(0); } ================================================ FILE: 47_Sizeof/tests/input054.c ================================================ int printf(char *fmt); int main() { int i; for (i=0; i < 20; i++) { printf("Hello world, %d\n", i); } return(0); } ================================================ FILE: 47_Sizeof/tests/input055.c ================================================ int printf(char *fmt); int main(int argc, char **argv) { int i; char *argument; printf("Hello world\n"); for (i=0; i < argc; i++) { argument= *argv; argv= argv + 1; printf("Argument %d is %s\n", i, argument); } return(0); } ================================================ FILE: 47_Sizeof/tests/input056.c ================================================ struct foo { int x; }; struct mary var1; ================================================ FILE: 47_Sizeof/tests/input057.c ================================================ struct fred { int x; } ; struct fred { char y; } ; ================================================ FILE: 47_Sizeof/tests/input058.c ================================================ int printf(char *fmt); struct fred { int x; char y; long z; }; struct fred var2; struct fred *varptr; int main() { long result; var2.x= 12; printf("%d\n", var2.x); var2.y= 'c'; printf("%d\n", var2.y); var2.z= 4005; printf("%d\n", var2.z); result= var2.x + var2.y + var2.z; printf("%d\n", result); varptr= &var2; result= varptr->x + varptr->y + varptr->z; printf("%d\n", result); return(0); } ================================================ FILE: 47_Sizeof/tests/input059.c ================================================ int main() { int x; x= y.foo; } ================================================ FILE: 47_Sizeof/tests/input060.c ================================================ int main() { int x; x= x.foo; } ================================================ FILE: 47_Sizeof/tests/input061.c ================================================ int main() { int x; x= x->foo; } ================================================ FILE: 47_Sizeof/tests/input062.c ================================================ int printf(char *fmt); union fred { char w; int x; int y; long z; }; union fred var1; union fred *varptr; int main() { var1.x= 65; printf("%d\n", var1.x); var1.x= 66; printf("%d\n", var1.x); printf("%d\n", var1.y); printf("The next two depend on the endian of the platform\n"); printf("%d\n", var1.w); printf("%d\n", var1.z); varptr= &var1; varptr->x= 67; printf("%d\n", varptr->x); printf("%d\n", varptr->y); return(0); } ================================================ FILE: 47_Sizeof/tests/input063.c ================================================ int printf(char *fmt); enum fred { apple=1, banana, carrot, pear=10, peach, mango, papaya }; enum jane { aple=1, bnana, crrot, par=10, pech, mago, paaya }; enum fred var1; enum jane var2; enum fred var3; int main() { var1= carrot + pear + mango; printf("%d\n", var1); return(0); } ================================================ FILE: 47_Sizeof/tests/input064.c ================================================ enum fred var3; ================================================ FILE: 47_Sizeof/tests/input065.c ================================================ enum fred { x, y, z }; enum fred { a, b }; ================================================ FILE: 47_Sizeof/tests/input066.c ================================================ enum fred { x, y, z }; enum mary { a, b, z }; ================================================ FILE: 47_Sizeof/tests/input067.c ================================================ int printf(char *fmt); typedef int FOO; FOO var1; struct bar { int x; int y} ; typedef struct bar BAR; BAR var2; int main() { var1= 5; printf("%d\n", var1); var2.x= 7; var2.y= 10; printf("%d\n", var2.x + var2.y); return(0); } ================================================ FILE: 47_Sizeof/tests/input068.c ================================================ typedef int FOO; typedef char FOO; ================================================ FILE: 47_Sizeof/tests/input069.c ================================================ typedef int FOO; FLOO y; ================================================ FILE: 47_Sizeof/tests/input070.c ================================================ #include typedef int FOO; int main() { FOO x; x= 56; printf("%d\n", x); return(0); } ================================================ FILE: 47_Sizeof/tests/input071.c ================================================ #include int main() { int x; x = 0; while (x < 100) { if (x == 5) { x = x + 2; continue; } printf("%d\n", x); if (x == 14) { break; } x = x + 1; } printf("Done\n"); return (0); } ================================================ FILE: 47_Sizeof/tests/input072.c ================================================ int main() { break; } ================================================ FILE: 47_Sizeof/tests/input073.c ================================================ int main() { continue; } ================================================ FILE: 47_Sizeof/tests/input074.c ================================================ #include int main() { int x; int y; y= 0; for (x=0; x < 5; x++) { switch(x) { case 1: { y= 5; break; } case 2: { y= 7; break; } case 3: { y= 9; } default: { y= 100; } } printf("%d\n", y); } return(0); } ================================================ FILE: 47_Sizeof/tests/input075.c ================================================ int main() { int x; switch(x) { if (x<5); } } ================================================ FILE: 47_Sizeof/tests/input076.c ================================================ int main() { int x; switch(x) { } } ================================================ FILE: 47_Sizeof/tests/input077.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } default: { x= 3; } case 4: { x= 2; } } } ================================================ FILE: 47_Sizeof/tests/input078.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } default: { x= 3; } default: { x= 2; } } } ================================================ FILE: 47_Sizeof/tests/input079.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } case 2: { x= 2; } case 1: { x= 2; } default: { x= 2; } } } ================================================ FILE: 47_Sizeof/tests/input080.c ================================================ #include int x; int y; int main() { for (x=0, y=1; x < 6; x++, y=y+2) { printf("%d %d\n", x, y); } return(0); } ================================================ FILE: 47_Sizeof/tests/input081.c ================================================ #include int x; int y; int main() { x= 0; y=1; for (;x<5;) { printf("%d %d\n", x, y); x=x+1; y=y+2; } return(0); } ================================================ FILE: 47_Sizeof/tests/input082.c ================================================ #include int main() { int x; x= 10; // Dangling else test if (x > 5) if (x > 15) printf("x > 15\n"); else printf("15 >= x > 5\n"); else printf("5 >= x\n"); return(0); } ================================================ FILE: 47_Sizeof/tests/input083.c ================================================ #include int main() { int x; // Dangling else test. // We should not print anything for x<= 5 for (x=0; x < 12; x++) if (x > 5) if (x > 10) printf("10 < %2d\n", x); else printf(" 5 < %2d <= 10\n", x); return(0); } ================================================ FILE: 47_Sizeof/tests/input084.c ================================================ #include int main() { int x, y; x=2; y=3; printf("%d %d\n", x, y); char a, *b; a= 'f'; b= &a; printf("%c %c\n", a, *b); return(0); } ================================================ FILE: 47_Sizeof/tests/input085.c ================================================ int main(struct foo { int x; }; , int a) { return(0); } ================================================ FILE: 47_Sizeof/tests/input086.c ================================================ int main() { int fred() { return(5); } int x; x=2; return(x); } ================================================ FILE: 47_Sizeof/tests/input087.c ================================================ struct foo { int x; int y; struct blah { int g; }; }; ================================================ FILE: 47_Sizeof/tests/input088.c ================================================ #include struct foo { int x; int y; } fred, mary; struct foo james; int main() { fred.x= 5; mary.y= 6; printf("%d %d\n", fred.x, mary.y); return(0); } ================================================ FILE: 47_Sizeof/tests/input089.c ================================================ #include int x= 23; char y= 'H'; char *z= "Hello world"; int main() { printf("%d %c %s\n", x, y, z); return(0); } ================================================ FILE: 47_Sizeof/tests/input090.c ================================================ #include int a = 23, b = 100; char y = 'H', *z = "Hello world"; int main() { printf("%d %d %c %s\n", a, b, y, z); return (0); } ================================================ FILE: 47_Sizeof/tests/input091.c ================================================ #include int fred[] = { 1, 2, 3, 4, 5 }; int jim[10] = { 1, 2, 3, 4, 5 }; int main() { int i; for (i=0; i < 5; i++) printf("%d\n", fred[i]); for (i=0; i < 10; i++) printf("%d\n", jim[i]); return(0); } ================================================ FILE: 47_Sizeof/tests/input092.c ================================================ char x= 3000; ================================================ FILE: 47_Sizeof/tests/input093.c ================================================ char x= fred; ================================================ FILE: 47_Sizeof/tests/input094.c ================================================ char *s= 54; ================================================ FILE: 47_Sizeof/tests/input095.c ================================================ int fred(int x=2) { return(x); } ================================================ FILE: 47_Sizeof/tests/input096.c ================================================ int fred[0]; ================================================ FILE: 47_Sizeof/tests/input097.c ================================================ int main() { int x[45]; return(0); } ================================================ FILE: 47_Sizeof/tests/input098.c ================================================ int fred[3]= { 1, 2, 3, 4, 5 }; ================================================ FILE: 47_Sizeof/tests/input099.c ================================================ #include // List of token strings, for debugging purposes. // As yet, we can't store a NULL into the list char *Tstring[] = { "EOF", "=", "||", "&&", "|", "^", "&", "==", "!=", ",", ">", "<=", ">=", "<<", ">>", "+", "-", "*", "/", "++", "--", "~", "!", "void", "char", "int", "long", "if", "else", "while", "for", "return", "struct", "union", "enum", "typedef", "extern", "break", "continue", "switch", "case", "default", "intlit", "strlit", ";", "identifier", "{", "}", "(", ")", "[", "]", ",", ".", "->", ":", "" }; int main() { int i; char *str; i=0; while (1) { str= Tstring[i]; if (*str == 0) break; printf("%s\n", str); i++; } return(0); } ================================================ FILE: 47_Sizeof/tests/input100.c ================================================ #include int main() { int x= 3, y=14; int z= 2 * x + y; char *str= "Hello world"; printf("%s %d %d\n", str, x+y, z); return(0); } ================================================ FILE: 47_Sizeof/tests/input101.c ================================================ #include int main() { int x= 65535; char y; char *str; y= (char )x; printf("0x%x\n", y); str= (char *)0; printf("0x%lx\n", (long)str); return(0); } ================================================ FILE: 47_Sizeof/tests/input102.c ================================================ int main() { struct foo { int p; }; int y= (struct foo) x; } ================================================ FILE: 47_Sizeof/tests/input103.c ================================================ int main() { union foo { int p; }; int y= (union foo) x; } ================================================ FILE: 47_Sizeof/tests/input104.c ================================================ int main() { int y= (void) x; } ================================================ FILE: 47_Sizeof/tests/input105.c ================================================ int main() { int x; char *y; y= (char) x; } ================================================ FILE: 47_Sizeof/tests/input106.c ================================================ #include char *y= (char *)0; int main() { printf("0x%lx\n", (long)y); return(0); } ================================================ FILE: 47_Sizeof/tests/input107.c ================================================ #include char *y[] = { "fish", "cow", NULL }; char *z= NULL; int main() { int i; char *ptr; for (i=0; i < 3; i++) { ptr= y[i]; if (ptr != (char *)0) printf("%s\n", y[i]); else printf("NULL\n"); } return(0); } ================================================ FILE: 47_Sizeof/tests/input108.c ================================================ int main() { char *str= (void *)0; return(0); } ================================================ FILE: 47_Sizeof/tests/input109.c ================================================ #include int main() { int x= 17 - 1; printf("%d\n", x); return(0); } ================================================ FILE: 47_Sizeof/tests/input110.c ================================================ #include int x; int y; int main() { x= 3; y= 15; y += x; printf("%d\n", y); x= 3; y= 15; y -= x; printf("%d\n", y); x= 3; y= 15; y *= x; printf("%d\n", y); x= 3; y= 15; y /= x; printf("%d\n", y); return(0); } ================================================ FILE: 47_Sizeof/tests/input111.c ================================================ #include int main() { int x= 2000 + 3 + 4 * 5 + 6; printf("%d\n", x); return(0); } ================================================ FILE: 47_Sizeof/tests/input112.c ================================================ #include char* y = NULL; int x= 10 + 6; int fred [ 2 + 3 ]; int main() { fred[2]= x; printf("%d\n", fred[2]); return(0); } ================================================ FILE: 47_Sizeof/tests/input113.c ================================================ #include void fred(void); void fred(void) { printf("fred says hello\n"); } int main(void) { fred(); return(0); } ================================================ FILE: 47_Sizeof/tests/input114.c ================================================ #include int main() { int x= 0x4a; printf("%c\n", x); return(0); } ================================================ FILE: 47_Sizeof/tests/input115.c ================================================ #include struct foo { int x; char y; long z; }; typedef struct foo blah; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol struct symtable *ctype; // If struct/union, ptr to that type int stype; // Structural type for the symbol int class; // Storage class for the symbol int size; // Total size in bytes of this symbol int nelems; // Functions: # params. Arrays: # elements #define st_endlabel st_posn // For functions, the end label int st_posn; // For locals, the negative offset // from the stack base pointer int *initlist; // List of initial values struct symtable *next; // Next symbol in one list struct symtable *member; // First member of a function, struct, }; // union or enum // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates int rvalue; // True if the node is an rvalue struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; struct symtable *sym; // For many AST nodes, the pointer to // the symbol in the symbol table #define a_intvalue a_size // For A_INTLIT, the integer value int a_size; // For A_SCALE, the size to scale by }; int main() { printf("%ld\n", sizeof(char)); printf("%ld\n", sizeof(int)); printf("%ld\n", sizeof(long)); printf("%ld\n", sizeof(char *)); printf("%ld\n", sizeof(blah)); printf("%ld\n", sizeof(struct symtable)); printf("%ld\n", sizeof(struct ASTnode)); return(0); } ================================================ FILE: 47_Sizeof/tests/mktests ================================================ #!/bin/sh # Make the output files for each test # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make install) fi for i in input*c do if [ ! -f "out.$i" -a ! -f "err.$i" ] then ../cwj -o out $i 2> "err.$i" # If the err file is empty if [ ! -s "err.$i" ] then rm -f "err.$i" cc -o out $i ./out > "out.$i" fi fi rm -f out out.s done ================================================ FILE: 47_Sizeof/tests/out.input001.c ================================================ 36 10 25 ================================================ FILE: 47_Sizeof/tests/out.input002.c ================================================ 17 ================================================ FILE: 47_Sizeof/tests/out.input003.c ================================================ 1 2 3 4 5 ================================================ FILE: 47_Sizeof/tests/out.input004.c ================================================ 1 1 1 1 1 1 1 1 1 ================================================ FILE: 47_Sizeof/tests/out.input005.c ================================================ 6 ================================================ FILE: 47_Sizeof/tests/out.input006.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 47_Sizeof/tests/out.input007.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 47_Sizeof/tests/out.input008.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 47_Sizeof/tests/out.input009.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 47_Sizeof/tests/out.input010.c ================================================ 20 10 1 2 3 4 5 253 254 255 0 1 ================================================ FILE: 47_Sizeof/tests/out.input011.c ================================================ 10 20 30 1 2 3 4 5 253 254 255 0 1 2 3 1 2 3 4 5 ================================================ FILE: 47_Sizeof/tests/out.input012.c ================================================ 5 ================================================ FILE: 47_Sizeof/tests/out.input013.c ================================================ 23 56 ================================================ FILE: 47_Sizeof/tests/out.input014.c ================================================ 10 20 30 ================================================ FILE: 47_Sizeof/tests/out.input015.c ================================================ 18 18 12 12 ================================================ FILE: 47_Sizeof/tests/out.input016.c ================================================ 12 18 ================================================ FILE: 47_Sizeof/tests/out.input017.c ================================================ 19 12 ================================================ FILE: 47_Sizeof/tests/out.input018.c ================================================ 34 34 ================================================ FILE: 47_Sizeof/tests/out.input018a.c ================================================ 15 16 ================================================ FILE: 47_Sizeof/tests/out.input019.c ================================================ 30 ================================================ FILE: 47_Sizeof/tests/out.input020.c ================================================ 12 ================================================ FILE: 47_Sizeof/tests/out.input021.c ================================================ 10 Hello world ================================================ FILE: 47_Sizeof/tests/out.input022.c ================================================ 12 12 12 13 13 13 13 13 13 35 35 35 ================================================ FILE: 47_Sizeof/tests/out.input023.c ================================================ -23 100 -2 0 1 0 13 14 Hello world ================================================ FILE: 47_Sizeof/tests/out.input024.c ================================================ 2 59 57 8 7 ================================================ FILE: 47_Sizeof/tests/out.input025.c ================================================ 10 20 30 5 15 25 ================================================ FILE: 47_Sizeof/tests/out.input026.c ================================================ 13 23 34 44 54 64 74 84 94 95 96 ================================================ FILE: 47_Sizeof/tests/out.input027.c ================================================ 1 2 3 4 5 6 7 8 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 ================================================ FILE: 47_Sizeof/tests/out.input028.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 47_Sizeof/tests/out.input029.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 47_Sizeof/tests/out.input030.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input030.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 47_Sizeof/tests/out.input053.c ================================================ Hello world, 23 ================================================ FILE: 47_Sizeof/tests/out.input054.c ================================================ Hello world, 0 Hello world, 1 Hello world, 2 Hello world, 3 Hello world, 4 Hello world, 5 Hello world, 6 Hello world, 7 Hello world, 8 Hello world, 9 Hello world, 10 Hello world, 11 Hello world, 12 Hello world, 13 Hello world, 14 Hello world, 15 Hello world, 16 Hello world, 17 Hello world, 18 Hello world, 19 ================================================ FILE: 47_Sizeof/tests/out.input055.c ================================================ Hello world Argument 0 is ./out ================================================ FILE: 47_Sizeof/tests/out.input058.c ================================================ 12 99 4005 4116 4116 ================================================ FILE: 47_Sizeof/tests/out.input062.c ================================================ 65 66 66 The next two depend on the endian of the platform 66 66 67 67 ================================================ FILE: 47_Sizeof/tests/out.input063.c ================================================ 25 ================================================ FILE: 47_Sizeof/tests/out.input067.c ================================================ 5 17 ================================================ FILE: 47_Sizeof/tests/out.input070.c ================================================ 56 ================================================ FILE: 47_Sizeof/tests/out.input071.c ================================================ 0 1 2 3 4 7 8 9 10 11 12 13 14 Done ================================================ FILE: 47_Sizeof/tests/out.input074.c ================================================ 100 5 7 100 100 ================================================ FILE: 47_Sizeof/tests/out.input080.c ================================================ 0 1 1 3 2 5 3 7 4 9 5 11 ================================================ FILE: 47_Sizeof/tests/out.input081.c ================================================ 0 1 1 3 2 5 3 7 4 9 ================================================ FILE: 47_Sizeof/tests/out.input082.c ================================================ 15 >= x > 5 ================================================ FILE: 47_Sizeof/tests/out.input083.c ================================================ 5 < 6 <= 10 5 < 7 <= 10 5 < 8 <= 10 5 < 9 <= 10 5 < 10 <= 10 10 < 11 ================================================ FILE: 47_Sizeof/tests/out.input084.c ================================================ 2 3 f f ================================================ FILE: 47_Sizeof/tests/out.input088.c ================================================ 5 6 ================================================ FILE: 47_Sizeof/tests/out.input089.c ================================================ 23 H Hello world ================================================ FILE: 47_Sizeof/tests/out.input090.c ================================================ 23 100 H Hello world ================================================ FILE: 47_Sizeof/tests/out.input091.c ================================================ 1 2 3 4 5 1 2 3 4 5 0 0 0 0 0 ================================================ FILE: 47_Sizeof/tests/out.input099.c ================================================ EOF = || && | ^ & == != , > <= >= << >> + - * / ++ -- ~ ! void char int long if else while for return struct union enum typedef extern break continue switch case default intlit strlit ; identifier { } ( ) [ ] , . -> : ================================================ FILE: 47_Sizeof/tests/out.input100.c ================================================ Hello world 17 20 ================================================ FILE: 47_Sizeof/tests/out.input101.c ================================================ 0xff 0x0 ================================================ FILE: 47_Sizeof/tests/out.input106.c ================================================ 0x0 ================================================ FILE: 47_Sizeof/tests/out.input107.c ================================================ fish cow NULL ================================================ FILE: 47_Sizeof/tests/out.input108.c ================================================ ================================================ FILE: 47_Sizeof/tests/out.input109.c ================================================ 16 ================================================ FILE: 47_Sizeof/tests/out.input110.c ================================================ 18 12 45 5 ================================================ FILE: 47_Sizeof/tests/out.input111.c ================================================ 2029 ================================================ FILE: 47_Sizeof/tests/out.input112.c ================================================ 16 ================================================ FILE: 47_Sizeof/tests/out.input113.c ================================================ fred says hello ================================================ FILE: 47_Sizeof/tests/out.input114.c ================================================ J ================================================ FILE: 47_Sizeof/tests/out.input115.c ================================================ 1 4 8 8 13 64 48 ================================================ FILE: 47_Sizeof/tests/runtests ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make install) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../cwj -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../cwj $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.s "trial.$i" done ================================================ FILE: 47_Sizeof/tests/runtestsn ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../compn ] then (cd ..; make install) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../compn -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../compn $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.o out.s "trial.$i" done ================================================ FILE: 47_Sizeof/tree.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree functions // Copyright (c) 2019 Warren Toomey, GPL3 // Build and return a generic AST node struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue) { struct ASTnode *n; // Malloc a new ASTnode n = (struct ASTnode *) malloc(sizeof(struct ASTnode)); if (n == NULL) fatal("Unable to malloc in mkastnode()"); // Copy in the field values and return it n->op = op; n->type = type; n->left = left; n->mid = mid; n->right = right; n->sym = sym; n->a_intvalue = intvalue; return (n); } // Make an AST leaf node struct ASTnode *mkastleaf(int op, int type, struct symtable *sym, int intvalue) { return (mkastnode(op, type, NULL, NULL, NULL, sym, intvalue)); } // Make a unary AST node: only one child struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, struct symtable *sym, int intvalue) { return (mkastnode(op, type, left, NULL, NULL, sym, intvalue)); } // Generate and return a new label number // just for AST dumping purposes static int gendumplabel(void) { static int id = 1; return (id++); } // Given an AST tree, print it out and follow the // traversal of the tree that genAST() follows void dumpAST(struct ASTnode *n, int label, int level) { int Lfalse, Lstart, Lend; switch (n->op) { case A_IF: Lfalse = gendumplabel(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_IF"); if (n->right) { Lend = gendumplabel(); fprintf(stdout, ", end L%d", Lend); } fprintf(stdout, "\n"); dumpAST(n->left, Lfalse, level + 2); dumpAST(n->mid, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); return; case A_WHILE: Lstart = gendumplabel(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_WHILE, start L%d\n", Lstart); Lend = gendumplabel(); dumpAST(n->left, Lend, level + 2); dumpAST(n->right, NOLABEL, level + 2); return; } // Reset level to -2 for A_GLUE if (n->op == A_GLUE) level = -2; // General AST node handling if (n->left) dumpAST(n->left, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); for (int i = 0; i < level; i++) fprintf(stdout, " "); switch (n->op) { case A_GLUE: fprintf(stdout, "\n\n"); return; case A_FUNCTION: fprintf(stdout, "A_FUNCTION %s\n", n->sym->name); return; case A_ADD: fprintf(stdout, "A_ADD\n"); return; case A_SUBTRACT: fprintf(stdout, "A_SUBTRACT\n"); return; case A_MULTIPLY: fprintf(stdout, "A_MULTIPLY\n"); return; case A_DIVIDE: fprintf(stdout, "A_DIVIDE\n"); return; case A_EQ: fprintf(stdout, "A_EQ\n"); return; case A_NE: fprintf(stdout, "A_NE\n"); return; case A_LT: fprintf(stdout, "A_LE\n"); return; case A_GT: fprintf(stdout, "A_GT\n"); return; case A_LE: fprintf(stdout, "A_LE\n"); return; case A_GE: fprintf(stdout, "A_GE\n"); return; case A_INTLIT: fprintf(stdout, "A_INTLIT %d\n", n->a_intvalue); return; case A_STRLIT: fprintf(stdout, "A_STRLIT rval label L%d\n", n->a_intvalue); return; case A_IDENT: if (n->rvalue) fprintf(stdout, "A_IDENT rval %s\n", n->sym->name); else fprintf(stdout, "A_IDENT %s\n", n->sym->name); return; case A_ASSIGN: fprintf(stdout, "A_ASSIGN\n"); return; case A_WIDEN: fprintf(stdout, "A_WIDEN\n"); return; case A_RETURN: fprintf(stdout, "A_RETURN\n"); return; case A_FUNCCALL: fprintf(stdout, "A_FUNCCALL %s\n", n->sym->name); return; case A_ADDR: fprintf(stdout, "A_ADDR %s\n", n->sym->name); return; case A_DEREF: if (n->rvalue) fprintf(stdout, "A_DEREF rval\n"); else fprintf(stdout, "A_DEREF\n"); return; case A_SCALE: fprintf(stdout, "A_SCALE %d\n", n->a_size); return; case A_PREINC: fprintf(stdout, "A_PREINC %s\n", n->sym->name); return; case A_PREDEC: fprintf(stdout, "A_PREDEC %s\n", n->sym->name); return; case A_POSTINC: fprintf(stdout, "A_POSTINC\n"); return; case A_POSTDEC: fprintf(stdout, "A_POSTDEC\n"); return; case A_NEGATE: fprintf(stdout, "A_NEGATE\n"); return; case A_BREAK: fprintf(stdout, "A_BREAK\n"); return; case A_CONTINUE: fprintf(stdout, "A_CONTINUE\n"); return; case A_CASE: fprintf(stdout, "A_CASE %d\n", n->a_intvalue); return; case A_DEFAULT: fprintf(stdout, "A_DEFAULT\n"); return; case A_SWITCH: fprintf(stdout, "A_SWITCH\n"); return; case A_CAST: fprintf(stdout, "A_CAST %d\n", n->type); return; case A_ASPLUS: fprintf(stdout, "A_ASPLUS\n"); return; case A_ASMINUS: fprintf(stdout, "A_ASMINUS\n"); return; case A_ASSTAR: fprintf(stdout, "A_ASSTAR\n"); return; case A_ASSLASH: fprintf(stdout, "A_ASSLASH\n"); return; default: fatald("Unknown dumpAST operator", n->op); } } ================================================ FILE: 47_Sizeof/types.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Types and type handling // Copyright (c) 2019 Warren Toomey, GPL3 // Return true if a type is an int type // of any size, false otherwise int inttype(int type) { return (((type & 0xf) == 0) && (type >= P_CHAR && type <= P_LONG)); } // Return true if a type is of pointer type int ptrtype(int type) { return ((type & 0xf) != 0); } // Given a primitive type, return // the type which is a pointer to it int pointer_to(int type) { if ((type & 0xf) == 0xf) fatald("Unrecognised in pointer_to: type", type); return (type + 1); } // Given a primitive pointer type, return // the type which it points to int value_at(int type) { if ((type & 0xf) == 0x0) fatald("Unrecognised in value_at: type", type); return (type - 1); } // Given a type and a composite type pointer, return // the size of this type in bytes int typesize(int type, struct symtable *ctype) { if (type == P_STRUCT || type == P_UNION) return (ctype->size); return (genprimsize(type)); } // Given an AST tree and a type which we want it to become, // possibly modify the tree by widening or scaling so that // it is compatible with this type. Return the original tree // if no changes occurred, a modified tree, or NULL if the // tree is not compatible with the given type. // If this will be part of a binary operation, the AST op is not zero. struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op) { int ltype; int lsize, rsize; ltype = tree->type; // XXX No idea on these yet if (ltype == P_STRUCT || ltype == P_UNION) fatal("Don't know how to do this yet"); if (rtype == P_STRUCT || rtype == P_UNION) fatal("Don't know how to do this yet"); // Compare scalar int types if (inttype(ltype) && inttype(rtype)) { // Both types same, nothing to do if (ltype == rtype) return (tree); // Get the sizes for each type lsize = typesize(ltype, NULL); rsize = typesize(rtype, NULL); // Tree's size is too big if (lsize > rsize) return (NULL); // Widen to the right if (rsize > lsize) return (mkastunary(A_WIDEN, rtype, tree, NULL, 0)); } // For pointers if (ptrtype(ltype) && ptrtype(rtype)) { // We can compare them if (op >= A_EQ && op <= A_GE) return(tree); // A comparison of the same type for a non-binary operation is OK, // or when the left tree is of `void *` type. if (op == 0 && (ltype == rtype || ltype == pointer_to(P_VOID))) return (tree); } // We can scale only on A_ADD or A_SUBTRACT operation if (op == A_ADD || op == A_SUBTRACT) { // Left is int type, right is pointer type and the size // of the original type is >1: scale the left if (inttype(ltype) && ptrtype(rtype)) { rsize = genprimsize(value_at(rtype)); if (rsize > 1) return (mkastunary(A_SCALE, rtype, tree, NULL, rsize)); else return (tree); // Size 1, no need to scale } } // If we get here, the types are not compatible return (NULL); } ================================================ FILE: 48_Static/Makefile ================================================ # Define the location of the include directory # and the location to install the compiler binary INCDIR=/tmp/include BINDIR=/tmp HSRCS= data.h decl.h defs.h SRCS= cg.c decl.c expr.c gen.c main.c misc.c \ opt.c scan.c stmt.c sym.c tree.c types.c SRCN= cgn.c decl.c expr.c gen.c main.c misc.c \ opt.c scan.c stmt.c sym.c tree.c types.c ARMSRCS= cg_arm.c decl.c expr.c gen.c main.c misc.c \ opt.c scan.c stmt.c sym.c tree.c types.c cwj: $(SRCS) $(HSRCS) cc -o cwj -g -Wall -DINCDIR=\"$(INCDIR)\" $(SRCS) compn: $(SRCN) $(HSRCS) cc -D__NASM__ -o compn -g -Wall -DINCDIR=\"$(INCDIR)\" $(SRCN) cwjarm: $(ARMSRCS) $(HSRCS) cc -o cwjarm -g -Wall $(ARMSRCS) cp cwjarm cwj install: cwj mkdir -p $(INCDIR) rsync -a include/. $(INCDIR) cp cwj $(BINDIR) chmod +x $(BINDIR)/cwj installn: compn mkdir -p $(INCDIR) rsync -a include/. $(INCDIR) cp compn $(BINDIR) chmod +x $(BINDIR)/compn clean: rm -f cwj cwjarm compn *.o *.s out a.out test: install tests/runtests (cd tests; chmod +x runtests; ./runtests) armtest: cwjarm tests/runtests (cd tests; chmod +x runtests; ./runtests) testn: installn tests/runtestsn (cd tests; chmod +x runtestsn; ./runtestsn) ================================================ FILE: 48_Static/Readme.md ================================================ # Part 48: A Subset of `static` In a real C compiler, there are three types of `static` things: + static functions, whose declaration is visible only in the source file where the function appears; + static global variables, whose declaration is visible only in the source file where the variable appears; and + static local variables, which act like global variables except that each static local variables is only visible within the function where the variable appears. The first two should be simple to implement: + add them as a global variable when they are declared, and + remove them from the global symbol table when that source code file is closed The third one is much harder. Here's an example. Let's keep two private counters with functions to increment them: ```c int inc_counter1(void) { static int counter= 0; return(counter); } int inc_counter2(void) { static int counter= 0; return(counter); } ``` Both functions see their own `counter` variable, and the value of both counters persist across function calls. The variable persistence makes them "global" (i.e. live outside of function scope) but they are only visible to one function, which makes them sort of "local". I'll drop a reference to [closures](https://en.wikipedia.org/wiki/Closure_(computer_programming)) here, but the theory side is a bit out of scope, mainly because I'm *not* going to implement this third type of static things. Why not? Mainly because it will be hard to implement something that has both global and local characteristics at the same time. But, also, I don't have any static local variables in our compiler (now that I've rewritten some code), and so there's no need for the functionality. Instead, we can concentrate on static global functions and static global variables. ## A New Keyword and Token We have a new keyword `static` and a new token T_STATIC. As always, read through `scan.c` for the changes. ## Parsing `static` The `static` keyword gets parsed in the same place as `extern`. We also want to reject any attempt to use the `static` keyword in a local context. So in `decl.c`, we modify `parse_type()`: ```c // Parse the current token and return a primitive type enum value, // a pointer to any composite type and possibly modify // the class of the type. int parse_type(struct symtable **ctype, int *class) { int type, exstatic = 1; // See if the class has been changed to extern or static while (exstatic) { switch (Token.token) { case T_EXTERN: if (*class == C_STATIC) fatal("Illegal to have extern and static at the same time"); *class = C_EXTERN; scan(&Token); break; case T_STATIC: if (*class == C_LOCAL) fatal("Compiler doesn't support static local declarations"); if (*class == C_EXTERN) fatal("Illegal to have extern and static at the same time"); *class = C_STATIC; scan(&Token); break; default: exstatic = 0; } } ... } ``` If we see either `static` or `extern`, we firstly check if this is legal given the current declaration class. Then we update the `class` variable. If we see neither tokens, we leave the loop. Now that we have a type which is marked as being for a static declaration, how is this added to the global symbol table? We need to change, in nearly every place in the compiler, any use of the C_GLOBAL class to also include C_STATIC. This occurs numerous times across multiple files, but you should look out for code like this: ```c if (class == C_GLOBAL || class == C_STATIC) ... ``` in `cg.c`, `decl.c`, `expr.c` and `gen.c`. ## Getting Rid of `static` Declarations Once we have finished parsing static declarations, we need to remove them from the global symbol table. In `do_compile()` in `main.c`, just after we close the input file, we now do this: ```c genpreamble(); // Output the preamble global_declarations(); // Parse the global declarations genpostamble(); // Output the postamble fclose(Outfile); // Close the output file freestaticsyms(); // Free any static symbols in the file ``` So let's now look at `freestaticsyms()` in `sym.c`. We walk the global symbol table. For any static node, we relink the list to remove it. I'm not a whiz at linked list code, so I wrote out all the possibly alternatives on a sheet of paper to come up with the following code: ```c // Remove all static symbols from the global symbol table void freestaticsyms(void) { // g points at current node, prev at the previous one struct symtable *g, *prev= NULL; // Walk the global table looking for static entries for (g= Globhead; g != NULL; g= g->next) { if (g->class == C_STATIC) { // If there's a previous node, rearrange the prev pointer // to skip over the current node. If not, g is the head, // so do the same to Globhead if (prev != NULL) prev->next= g->next; else Globhead->next= g->next; // If g is the tail, point Globtail at the previous node // (if there is one), or Globhead if (g == Globtail) { if (prev != NULL) Globtail= prev; else Globtail= Globhead; } } } // Point prev at g before we move up to the next node prev= g; } ``` The overall effect is to treat static declarations as global declarations, but to remove them from the symbol table at the end of processing an input file. ## Testing the Changes There are three programs to test the changes, `tests/input116.c` through to `tests/input118.c`. Let's look at the first one: ```c #include static int counter=0; static int fred(void) { return(counter++); } int main(void) { int i; for (i=0; i < 5; i++) printf("%d\n", fred()); return(0); } ``` Let's look at the assembly output for some of this: ``` ... .data counter: .long 0 .text fred: pushq %rbp movq %rsp, %rbp addq $0,%rsp ... ``` Normally, `counter` and `fred` would have been decorated with a `.globl` marking. Now that they are static, they get labels but we tell the assembler not to make these globally visible. ## Conclusion and What's Next I was worried about `static`, but once I decided to not implement the really hard third alternative, it wasn't too bad. What caused me some grief was going through the code, finding all C_GLOBAL uses and ensuring that I added appropriate C_STATIC code as well. In the next part of our compiler writing journey, I think it's time that I tackle the [ternary operator](https://en.wikipedia.org/wiki/%3F:). [Next step](../49_Ternary/Readme.md) ================================================ FILE: 48_Static/cg.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\t.text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\t.data\n", Outfile); currSeg = data_seg; } } // Given a scalar type value, return the // size of the type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch (type) { case P_CHAR: return (offset); case P_INT: case P_LONG: break; default: if (!ptrtype(type)) fatald("Bad type in cg_align:", type); } // Here we have an int or a long. Align it on a 4-byte offset // I put the generic code here so it can be reused elsewhere. alignment = 4; offset = (offset + direction * (alignment - 1)) & ~(alignment - 1); return (offset); } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. static int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "%r10", "%r11", "%r12", "%r13", "%r9", "%r8", "%rcx", "%rdx", "%rsi", "%rdi" }; static char *breglist[] = { "%r10b", "%r11b", "%r12b", "%r13b", "%r9b", "%r8b", "%cl", "%dl", "%sil", "%dil" }; static char *dreglist[] = { "%r10d", "%r11d", "%r12d", "%r13d", "%r9d", "%r8d", "%ecx", "%edx", "%esi", "%edi" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); cgtextseg(); fprintf(Outfile, "# internal switch(expr) routine\n" "# %%rsi = switch table, %%rax = expr\n" "# from SubC: http://www.t3x.org/subc/\n" "\n" "switch:\n" " pushq %%rsi\n" " movq %%rdx, %%rsi\n" " movq %%rax, %%rbx\n" " cld\n" " lodsq\n" " movq %%rax, %%rcx\n" "next:\n" " lodsq\n" " movq %%rax, %%rdx\n" " lodsq\n" " cmpq %%rdx, %%rbx\n" " jnz no\n" " popq %%rsi\n" " jmp *%%rax\n" "no:\n" " loop next\n" " lodsq\n" " popq %%rsi\n" " jmp *%%rax\n" "\n"); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the %rsp and %rsp if (sym->class == C_GLOBAL) fprintf(Outfile, "\t.globl\t%s\n" "\t.type\t%s, @function\n", name, name); fprintf(Outfile, "%s:\n" "\tpushq\t%%rbp\n" "\tmovq\t%%rsp, %%rbp\n", name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->st_posn = paramOffset; paramOffset += 8; } else { parm->st_posn = newlocaloffset(parm->type); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->st_posn = newlocaloffset(locvar->type); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\taddq\t$%d,%%rsp\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->st_endlabel); fprintf(Outfile, "\taddq\t$%d,%%rsp\n", stackOffset); fputs("\tpopq %rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmovq\t$%d, %s\n", value, reglist[r]); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", sym->name); } else // Print out the code to initialise it switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovzbq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", sym->name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovslq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", sym->name); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", sym->st_posn); fprintf(Outfile, "\tmovq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", sym->st_posn); } else switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", sym->st_posn); fprintf(Outfile, "\tmovzbq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", sym->st_posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", sym->st_posn); fprintf(Outfile, "\tmovslq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", sym->st_posn); break; default: fatald("Bad type in cgloadlocal:", sym->type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tleaq\tL%d(%%rip), %s\n", label, reglist[r]); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\taddq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsubq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timulq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s,%%rax\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidivq\t%s\n", reglist[r2]); fprintf(Outfile, "\tmovq\t%%rax,%s\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tandq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\torq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txorq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshlq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshrq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tnegq\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnotq\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); return (r); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(struct symtable *sym, int numargs) { // Get a new register int outr = alloc_register(); // Call the function fprintf(Outfile, "\tcall\t%s@PLT\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\taddq\t$%d, %%rsp\n", 8 * (numargs - 6)); // and copy the return value into our register fprintf(Outfile, "\tmovq\t%%rax, %s\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpushq\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmovq\t%s, %s\n", reglist[r], reglist[FIRSTPARAMREG - argposn + 1]); } } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsalq\t$%d, %s\n", val, reglist[r]); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %s(%%rip)\n", reglist[r], sym->name); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %s(%%rip)\n", breglist[r], sym->name); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %s(%%rip)\n", dreglist[r], sym->name); break; default: fatald("Bad type in cgstorglob:", sym->type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %d(%%rbp)\n", reglist[r], sym->st_posn); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %d(%%rbp)\n", breglist[r], sym->st_posn); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %d(%%rbp)\n", dreglist[r], sym->st_posn); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size, type; int initvalue; int i; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the variable (or its elements if an array) // and the type of the variable if (node->stype == S_ARRAY) { size= typesize(value_at(node->type), node->ctype); type= value_at(node->type); } else { size = node->size; type= node->type; } // Generate the global identity and the label cgdataseg(); if (node->class == C_GLOBAL) fprintf(Outfile, "\t.globl\t%s\n", node->name); fprintf(Outfile, "%s:\n", node->name); // Output space for one or more elements for (i=0; i < node->nelems; i++) { // Get any initial value initvalue= 0; if (node->initlist != NULL) initvalue= node->initlist[i]; // Generate the space for this type switch (size) { case 1: fprintf(Outfile, "\t.byte\t%d\n", initvalue); break; case 4: fprintf(Outfile, "\t.long\t%d\n", initvalue); break; case 8: // Generate the pointer to a string literal. Treat a zero value // as actually zero, not the label L0 if (node->initlist != NULL && type== pointer_to(P_CHAR) && initvalue != 0) fprintf(Outfile, "\t.quad\tL%d\n", initvalue); else fprintf(Outfile, "\t.quad\t%d\n", initvalue); break; default: for (int i = 0; i < size; i++) fprintf(Outfile, "\t.byte\t0\n"); } } } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\t.byte\t%d\n", *cptr); } fprintf(Outfile, "\t.byte\t0\n"); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Deal with pointers here as we can't put them in // the switch statement if (ptrtype(sym->type)) fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); else { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzbl\t%s, %%eax\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %%eax\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } } cgjump(sym->st_endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = alloc_register(); if (sym->class == C_GLOBAL || sym->class == C_STATIC) fprintf(Outfile, "\tleaq\t%s(%%rip), %s\n", sym->name, reglist[r]); else fprintf(Outfile, "\tleaq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzbq\t(%s), %s\n", reglist[r], reglist[r]); break; case 2: fprintf(Outfile, "\tmovslq\t(%s), %s\n", reglist[r], reglist[r]); break; case 4: case 8: fprintf(Outfile, "\tmovq\t(%s), %s\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { // Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmovb\t%s, (%s)\n", breglist[r1], reglist[r2]); break; case 2: case 4: case 8: fprintf(Outfile, "\tmovq\t%s, (%s)\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } // Generate a switch jump table and the code to // load the registers and call the switch() code void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel) { int i, label; // Get a label for the switch table label = genlabel(); cglabel(label); // Heuristic. If we have no cases, create one case // which points to the default case if (casecount == 0) { caseval[0] = 0; caselabel[0] = defaultlabel; casecount = 1; } // Generate the switch jump table. fprintf(Outfile, "\t.quad\t%d\n", casecount); for (i = 0; i < casecount; i++) fprintf(Outfile, "\t.quad\t%d, L%d\n", caseval[i], caselabel[i]); fprintf(Outfile, "\t.quad\tL%d\n", defaultlabel); // Load the specific registers cglabel(toplabel); fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); fprintf(Outfile, "\tleaq\tL%d(%%rip), %%rdx\n", label); fprintf(Outfile, "\tjmp\tswitch\n"); } ================================================ FILE: 48_Static/cg_arm.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for ARMv6 on Raspberry Pi // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. static int freereg[4]; static char *reglist[4] = { "r4", "r5", "r6", "r7" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // We have to store large integer literal values in memory. // Keep a list of them which will be output in the postamble #define MAXINTS 1024 int Intlist[MAXINTS]; static int Intslot = 0; // Determine the offset of a large integer // literal from the .L3 label. If the integer // isn't in the list, add it. static void set_int_offset(int val) { int offset = -1; // See if it is already there for (int i = 0; i < Intslot; i++) { if (Intlist[i] == val) { offset = 4 * i; break; } } // Not in the list, so add it if (offset == -1) { offset = 4 * Intslot; if (Intslot == MAXINTS) fatal("Out of int slots in set_int_offset()"); Intlist[Intslot++] = val; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L3+%d\n", offset); } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n", Outfile); } // Print out the assembly postamble void cgpostamble() { // Print out the global variables fprintf(Outfile, ".L2:\n"); for (int i = 0; i < Globs; i++) { if (Symtable[i].stype == S_VARIABLE) fprintf(Outfile, "\t.word %s\n", Symtable[i].name); } // Print out the integer literals fprintf(Outfile, ".L3:\n"); for (int i = 0; i < Intslot; i++) { fprintf(Outfile, "\t.word %d\n", Intlist[i]); } } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Symtable[id].name; fprintf(Outfile, "\t.text\n" "\t.globl\t%s\n" "\t.type\t%s, \%%function\n" "%s:\n" "\tpush\t{fp, lr}\n" "\tadd\tfp, sp, #4\n" "\tsub\tsp, sp, #8\n" "\tstr\tr0, [fp, #-8]\n", name, name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Symtable[id].endlabel); fputs("\tsub\tsp, fp, #4\n" "\tpop\t{fp, pc}\n" "\t.align\t2\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); // If the literal value is small, do it with one instruction if (value <= 1000) fprintf(Outfile, "\tmov\t%s, #%d\n", reglist[r], value); else { set_int_offset(value); fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); } return (r); } // Determine the offset of a variable from the .L2 // label. Yes, this is inefficient code. static void set_var_offset(int id) { int offset = 0; // Walk the symbol table up to id. // Find S_VARIABLEs and add on 4 until // we get to our variable for (int i = 0; i < id; i++) { if (Symtable[i].stype == S_VARIABLE) offset += 4; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L2+%d\n", offset); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tldrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgloadglob:", Symtable[id].type); } return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s, %s\n", reglist[r1], reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\tmul\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { // To do a divide: r0 holds the dividend, r1 holds the divisor. // The quotient is in r0. fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r1]); fprintf(Outfile, "\tmov\tr1, %s\n", reglist[r2]); fprintf(Outfile, "\tbl\t__aeabi_idiv\n"); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r1]); free_register(r2); return (r1); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r]); fprintf(Outfile, "\tbl\t%s\n", Symtable[id].name); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r]); return (r); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tlsl\t%s, %s, #%d\n", reglist[r], reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, int id) { // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tstr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgstorglob:", Symtable[id].type); } return (r); } // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (4); switch (type) { case P_CHAR: return (1); case P_INT: case P_LONG: return (4); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Symtable[id].type); fprintf(Outfile, "\t.data\n" "\t.globl\t%s\n", Symtable[id].name); switch (typesize) { case 1: fprintf(Outfile, "%s:\t.byte\t0\n", Symtable[id].name); break; case 4: fprintf(Outfile, "%s:\t.long\t0\n", Symtable[id].name); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "moveq", "movne", "movlt", "movgt", "movle", "movge" }; // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "movne", "moveq", "movge", "movle", "movgt", "movlt" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #1\n", cmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #0\n", invcmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\tuxtb\t%s, %s\n", reglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tb\tL%d\n", l); } // List of inverted branch instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *brlist[] = { "bne", "beq", "bge", "ble", "bgt", "blt" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", brlist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[reg]); cgjump(Symtable[id].endlabel); } // Generate code to load the address of a global // identifier into a variable. Return a new register int cgaddress(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); fprintf(Outfile, "\tmov\t%s, r3\n", reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tldrb\t%s, [%s]\n", reglist[r], reglist[r]); break; case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [%s]\n", reglist[r], reglist[r]); break; } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [%s]\n", reglist[r1], reglist[r2]); break; case P_INT: case P_LONG: fprintf(Outfile, "\tstr\t%s, [%s]\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 48_Static/cgn.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\tsection .text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\tsection .data\n", Outfile); currSeg = data_seg; } } // Given a scalar type value, return the // size of the type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch (type) { case P_CHAR: return (offset); case P_INT: case P_LONG: break; default: if (!ptrtype(type)) fatald("Bad type in cg_align:", type); } // Here we have an int or a long. Align it on a 4-byte offset // I put the generic code here so it can be reused elsewhere. alignment = 4; offset = (offset + direction * (alignment - 1)) & ~(alignment - 1); return (offset); } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "r10", "r11", "r12", "r13", "r9", "r8", "rcx", "rdx", "rsi", "rdi" }; static char *breglist[] = { "r10b", "r11b", "r12b", "r13b", "r9b", "r8b", "cl", "dl", "sil", "dil" }; static char *dreglist[] = { "r10d", "r11d", "r12d", "r13d", "r9d", "r8d", "ecx", "edx", "esi", "edi" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\textern\tprintint\n", Outfile); fputs("\textern\tprintchar\n", Outfile); fputs("\textern\topen\n", Outfile); fputs("\textern\tclose\n", Outfile); fputs("\textern\tread\n", Outfile); fputs("\textern\twrite\n", Outfile); fputs("\textern\tprintf\n", Outfile); cgtextseg(); fprintf(Outfile, "; internal switch(expr) routine\n" "; rsi = switch table, rax = expr\n" "; from SubC: http://www.t3x.org/subc/\n" "\n" "switch:\n" " push rsi\n" " mov rsi, rdx\n" " mov rbx, rax\n" " cld\n" " lodsq\n" " mov rcx, rax\n" "next:\n" " lodsq\n" " mov rdx, rax\n" " lodsq\n" " cmp rbx, rdx\n" " jnz no\n" " pop rsi\n" " jmp rax\n" "no:\n" " loop next\n" " lodsq\n" " pop rsi\n" " jmp rax\n" "\n"); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the rsp and rbp if (sym->class == C_GLOBAL) fprintf(Outfile, "\tglobal\t%s\n", name); fprintf(Outfile, "%s:\n" "\tpush\trbp\n" "\tmov\trbp, rsp\n", name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->st_posn = paramOffset; paramOffset += 8; } else { parm->st_posn = newlocaloffset(parm->type); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->st_posn = newlocaloffset(locvar->type); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\tadd\trsp, %d\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->st_endlabel); fprintf(Outfile, "\tadd\trsp, %d\n", stackOffset); fputs("\tpop rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], value); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", sym->name); fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], sym->name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", sym->name); } else // Print out the code to initialise it switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", sym->name); fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], sym->name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", sym->name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", sym->name); fprintf(Outfile, "\tmovsx\t%s, word [%s]\n", dreglist[r], sym->name); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", sym->name); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", sym->st_posn); fprintf(Outfile, "\tmov\t%s, [rbp+%d]\n", reglist[r], sym->st_posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", sym->st_posn); } else switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", sym->st_posn); fprintf(Outfile, "\tmovzx\t%s, byte [rbp+%d]\n", reglist[r], sym->st_posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", sym->st_posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", sym->st_posn); fprintf(Outfile, "\tmovsx\t%s, dword [rbp+%d]\n", reglist[r], sym->st_posn); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", sym->st_posn); break; default: fatald("Bad type in cgloadlocal:", sym->type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, L%d\n", reglist[r], label); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timul\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmov\trax, %s\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidiv\t%s\n", reglist[r2]); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tand\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\tor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshl\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshr\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tneg\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnot\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r], breglist[r]); return (r); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, byte %s\n", reglist[r], breglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(struct symtable *sym, int numargs) { // Get a new register int outr = alloc_register(); // Call the function fprintf(Outfile, "\tcall\t%s\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\tadd\trsp, %d\n", 8 * (numargs - 6)); // and copy the return value into our register fprintf(Outfile, "\tmov\t%s, rax\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpush\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmov\t%s, %s\n", reglist[FIRSTPARAMREG - argposn + 1], reglist[r]); } } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsal\t%s, %d\n", reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, dreglist[r]); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\tqword\t[rbp+%d], %s\n", sym->st_posn, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\tbyte\t[rbp+%d], %s\n", sym->st_posn, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\tdword\t[rbp+%d], %s\n", sym->st_posn, dreglist[r]); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size, type; int initvalue; int i; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the variable (or its elements if an array) // and the type of the variable if (node->stype == S_ARRAY) { size= typesize(value_at(node->type), node->ctype); type= value_at(node->type); } else { size = node->size; type= node->type; } // Generate the global identity and the label cgdataseg(); if (node->class == C_GLOBAL) fprintf(Outfile, "\tglobal\t%s\n", node->name); fprintf(Outfile, "%s:\n", node->name); // Output space for one or more elements for (i = 0; i < node->nelems; i++) { // Get any initial value initvalue = 0; if (node->initlist != NULL) initvalue = node->initlist[i]; // Generate the space for this type // original version switch(size) { case 1: fprintf(Outfile, "\tdb\t%d\n", initvalue); break; case 4: fprintf(Outfile, "\tdd\t%d\n", initvalue); break; case 8: // Generate the pointer to a string literal. Treat a zero value // as actually zero, not the label L0 if (node->initlist != NULL && type == pointer_to(P_CHAR) && initvalue != 0) fprintf(Outfile, "\tdq\tL%d\n", initvalue); else fprintf(Outfile, "\tdq\t%d\n", initvalue); break; default: for (int i = 0; i < size; i++) fprintf(Outfile, "\tdb\t0\n"); /* compact version using times instead of loop fprintf(Outfile, "\ttimes\t%d\tdb\t0\n", size); */ } } } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\tdb\t%d\n", *cptr); } fprintf(Outfile, "\tdb\t0\n"); /* put string in readable format on a single line // probably went overboard with error checking int comma = 0, quote = 0, start = 1; fprintf(Outfile, "\tdb\t"); for (cptr=strvalue; *cptr; cptr++) { if ( ! isprint(*cptr) ) if (comma || start) { fprintf(Outfile, "%d, ", *cptr); start = 0; comma = 1; } else if (quote) { fprintf(Outfile, "\', %d, ", *cptr); comma = 1; quote = 0; } else { fprintf(Outfile, "%d, ", *cptr); comma = 1; quote = 0; } else if (start || comma) { fprintf(Outfile, "\'%c", *cptr); start = comma = 0; quote = 1; } else { fprintf(Outfile, "%c", *cptr); comma = 0; quote = 1; } } if (comma || start) fprintf(Outfile, "0\n"); else fprintf(Outfile, "\', 0\n"); */ } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r2], breglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Deal with pointers here as we can't put them in // the switch statement if (ptrtype(sym->type)) fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); else { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzx\teax, %s\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmov\teax, %s\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } } cgjump(sym->st_endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = alloc_register(); if (sym->class == C_GLOBAL || sym->class == C_STATIC) fprintf(Outfile, "\tmov\t%s, %s\n", reglist[r], sym->name); else fprintf(Outfile, "\tlea\t%s, [rbp+%d]\n", reglist[r], sym->st_posn); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], reglist[r]); break; case 2: fprintf(Outfile, "\tmovsx\t%s, dword [%s]\n", reglist[r], reglist[r]); break; case 4: case 8: fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { //Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmov\t[%s], byte %s\n", reglist[r2], breglist[r1]); break; case 2: case 4: case 8: fprintf(Outfile, "\tmov\t[%s], %s\n", reglist[r2], reglist[r1]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } // Generate a switch jump table and the code to // load the registers and call the switch() code void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel) { int i, label; // Get a label for the switch table label = genlabel(); cglabel(label); // Heuristic. If we have no cases, create one case // which points to the default case if (casecount == 0) { caseval[0] = 0; caselabel[0] = defaultlabel; casecount = 1; } // Generate the switch jump table. fprintf(Outfile, "\tdq\t%d\n", casecount); for (i = 0; i < casecount; i++) fprintf(Outfile, "\tdq\t%d, L%d\n", caseval[i], caselabel[i]); fprintf(Outfile, "\tdq\tL%d\n", defaultlabel); // Load the specific registers cglabel(toplabel); fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); fprintf(Outfile, "\tmov\trdx, L%d\n", label); fprintf(Outfile, "\tjmp\tswitch\n"); } ================================================ FILE: 48_Static/data.h ================================================ #ifndef extern_ #define extern_ extern #endif // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 extern_ int Line; // Current line number extern_ int Putback; // Character put back by scanner extern_ struct symtable *Functionid; // Symbol ptr of the current function extern_ FILE *Infile; // Input and output files extern_ FILE *Outfile; extern_ char *Infilename; // Name of file we are parsing extern_ char *Outfilename; // Name of file we opened as Outfile extern_ struct token Token; // Last token scanned extern_ struct token Peektoken; // A look-ahead token extern_ char Text[TEXTLEN + 1]; // Last identifier scanned extern_ int Looplevel; // Depth of nested loops extern_ int Switchlevel; // Depth of nested switches // Symbol table lists extern_ struct symtable *Globhead, *Globtail; // Global variables and functions extern_ struct symtable *Loclhead, *Locltail; // Local variables extern_ struct symtable *Parmhead, *Parmtail; // Local parameters extern_ struct symtable *Membhead, *Membtail; // Temp list of struct/union members extern_ struct symtable *Structhead, *Structtail; // List of struct types extern_ struct symtable *Unionhead, *Uniontail; // List of union types extern_ struct symtable *Enumhead, *Enumtail; // List of enum types and values extern_ struct symtable *Typehead, *Typetail; // List of typedefs // Command-line flags extern_ int O_dumpAST; // If true, dump the AST trees extern_ int O_keepasm; // If true, keep any assembly files extern_ int O_assemble; // If true, assemble the assembly files extern_ int O_dolink; // If true, link the object files extern_ int O_verbose; // If true, print info on compilation stages ================================================ FILE: 48_Static/decl.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of declarations // Copyright (c) 2019 Warren Toomey, GPL3 static struct symtable *composite_declaration(int type); static int typedef_declaration(struct symtable **ctype); static int type_of_typedef(char *name, struct symtable **ctype); static void enum_declaration(void); // Parse the current token and return a primitive type enum value, // a pointer to any composite type and possibly modify // the class of the type. int parse_type(struct symtable **ctype, int *class) { int type, exstatic = 1; // See if the class has been changed to extern or static while (exstatic) { switch (Token.token) { case T_EXTERN: if (*class == C_STATIC) fatal("Illegal to have extern and static at the same time"); *class = C_EXTERN; scan(&Token); break; case T_STATIC: if (*class == C_LOCAL) fatal("Compiler doesn't support static local declarations"); if (*class == C_EXTERN) fatal("Illegal to have extern and static at the same time"); *class = C_STATIC; scan(&Token); break; default: exstatic = 0; } } // Now work on the actual type keyword switch (Token.token) { case T_VOID: type = P_VOID; scan(&Token); break; case T_CHAR: type = P_CHAR; scan(&Token); break; case T_INT: type = P_INT; scan(&Token); break; case T_LONG: type = P_LONG; scan(&Token); break; // For the following, if we have a ';' after the // parsing then there is no type, so return -1. // Example: struct x {int y; int z}; case T_STRUCT: type = P_STRUCT; *ctype = composite_declaration(P_STRUCT); if (Token.token == T_SEMI) type = -1; break; case T_UNION: type = P_UNION; *ctype = composite_declaration(P_UNION); if (Token.token == T_SEMI) type = -1; break; case T_ENUM: type = P_INT; // Enums are really ints enum_declaration(); if (Token.token == T_SEMI) type = -1; break; case T_TYPEDEF: type = typedef_declaration(ctype); if (Token.token == T_SEMI) type = -1; break; case T_IDENT: type = type_of_typedef(Text, ctype); break; default: fatals("Illegal type, token", Token.tokstr); } return (type); } // Given a type parsed by parse_type(), scan in any following // '*' tokens and return the new type int parse_stars(int type) { while (1) { if (Token.token != T_STAR) break; type = pointer_to(type); scan(&Token); } return (type); } // Parse a type which appears inside a cast int parse_cast(void) { int type, class=0; struct symtable *ctype; // Get the type inside the parentheses type= parse_stars(parse_type(&ctype, &class)); // Do some error checking. I'm sure more can be done if (type == P_STRUCT || type == P_UNION || type == P_VOID) fatal("Cannot cast to a struct, union or void type"); return(type); } // Given a type, parse an expression of literals and ensure // that the type of this expression matches the given type. // Parse any type cast that precedes the expression. // If an integer literal, return this value. // If a string literal, return the label number of the string. int parse_literal(int type) { struct ASTnode *tree; // Parse the expression and optimise the resulting AST tree tree= optimise(binexpr(0)); // If there's a cast, get the child and // mark it as having the type from the cast if (tree->op == A_CAST) { tree->left->type= tree->type; tree= tree->left; } // The tree must now have an integer or string literal if (tree->op != A_INTLIT && tree->op != A_STRLIT) fatal("Cannot initialise globals with a general expression"); // If the type is char * and if (type == pointer_to(P_CHAR)) { // We have a string literal, return the label number if (tree->op == A_STRLIT) return(tree->a_intvalue); // We have a zero int literal, so that's a NULL if (tree->op == A_INTLIT && tree->a_intvalue==0) return(0); } // We only get here with an integer literal. The input type // is an integer type and is wide enough to hold the literal value if (inttype(type) && typesize(type, NULL) >= typesize(tree->type, NULL)) return(tree->a_intvalue); fatal("Type mismatch: literal vs. variable"); return(0); // Keep -Wall happy } // Given the type, name and class of a scalar variable, // parse any initialisation value and allocate storage for it. // Return the variable's symbol table entry. static struct symtable *scalar_declaration(char *varname, int type, struct symtable *ctype, int class, struct ASTnode **tree) { struct symtable *sym=NULL; struct ASTnode *varnode, *exprnode; *tree= NULL; // Add this as a known scalar switch (class) { case C_STATIC: case C_EXTERN: case C_GLOBAL: sym= addglob(varname, type, ctype, S_VARIABLE, class, 1, 0); break; case C_LOCAL: sym= addlocl(varname, type, ctype, S_VARIABLE, 1); break; case C_PARAM: sym= addparm(varname, type, ctype, S_VARIABLE); break; case C_MEMBER: sym= addmemb(varname, type, ctype, S_VARIABLE, 1); break; } // The variable is being initialised if (Token.token == T_ASSIGN) { // Only possible for a global or local if (class != C_GLOBAL && class != C_LOCAL && class != C_STATIC) fatals("Variable can not be initialised", varname); scan(&Token); // Globals must be assigned a literal value if (class == C_GLOBAL || class == C_STATIC) { // Create one initial value for the variable and // parse this value sym->initlist= (int *)malloc(sizeof(int)); sym->initlist[0]= parse_literal(type); } if (class == C_LOCAL) { // Make an A_IDENT AST node with the variable varnode = mkastleaf(A_IDENT, sym->type, sym, 0); // Get the expression for the assignment, make into a rvalue exprnode = binexpr(0); exprnode->rvalue = 1; // Ensure the expression's type matches the variable exprnode = modify_type(exprnode, varnode->type, 0); if (exprnode == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree *tree = mkastnode(A_ASSIGN, exprnode->type, exprnode, NULL, varnode, NULL, 0); } } // Generate any global space if (class == C_GLOBAL || class == C_STATIC) genglobsym(sym); return (sym); } // Given the type, name and class of an variable, parse // the size of the array, if any. Then parse any initialisation // value and allocate storage for it. // Return the variable's symbol table entry. static struct symtable *array_declaration(char *varname, int type, struct symtable *ctype, int class) { struct symtable *sym; // New symbol table entry int nelems= -1; // Assume the number of elements won't be given int maxelems; // The maximum number of elements in the init list int *initlist; // The list of initial elements int i=0, j; // Skip past the '[' scan(&Token); // See we have an array size if (Token.token != T_RBRACKET) { nelems= parse_literal(P_INT); if (nelems <= 0) fatald("Array size is illegal", nelems); } // Ensure we have a following ']' match(T_RBRACKET, "]"); // Add this as a known array. We treat the // array as a pointer to its elements' type switch (class) { case C_STATIC: case C_EXTERN: case C_GLOBAL: sym = addglob(varname, pointer_to(type), ctype, S_ARRAY, class, 0, 0); break; default: fatal("For now, declaration of non-global arrays is not implemented"); } // Array initialisation if (Token.token == T_ASSIGN) { if (class != C_GLOBAL && class != C_STATIC) fatals("Variable can not be initialised", varname); scan(&Token); // Get the following left curly bracket match(T_LBRACE, "{"); #define TABLE_INCREMENT 10 // If the array already has nelems, allocate that many elements // in the list. Otherwise, start with TABLE_INCREMENT. if (nelems != -1) maxelems= nelems; else maxelems= TABLE_INCREMENT; initlist= (int *)malloc(maxelems *sizeof(int)); // Loop getting a new literal value from the list while (1) { // Check we can add the next value, then parse and add it if (nelems != -1 && i == maxelems) fatal("Too many values in initialisation list"); initlist[i++]= parse_literal(type); // Increase the list size if the original size was // not set and we have hit the end of the current list if (nelems == -1 && i == maxelems) { maxelems += TABLE_INCREMENT; initlist= (int *)realloc(initlist, maxelems *sizeof(int)); } // Leave when we hit the right curly bracket if (Token.token == T_RBRACE) { scan(&Token); break; } // Next token must be a comma, then comma(); } // Zero any unused elements in the initlist. // Attach the list to the symbol table entry for (j=i; j < sym->nelems; j++) initlist[j]=0; if (i > nelems) nelems = i; sym->initlist= initlist; } // Set the size of the array and the number of elements sym->nelems= nelems; sym->size= sym->nelems * typesize(type, ctype); // Generate any global space if (class == C_GLOBAL || class == C_STATIC) genglobsym(sym); return (sym); } // Given a pointer to the new function being declared and // a possibly NULL pointer to the function's previous declaration, // parse a list of parameters and cross-check them against the // previous declaration. Return the count of parameters static int param_declaration_list(struct symtable *oldfuncsym, struct symtable *newfuncsym) { int type, paramcnt = 0; struct symtable *ctype; struct symtable *protoptr = NULL; struct ASTnode *unused; // Get the pointer to the first prototype parameter if (oldfuncsym != NULL) protoptr = oldfuncsym->member; // Loop getting any parameters while (Token.token != T_RPAREN) { // If the first token is 'void' if (Token.token == T_VOID) { // Peek at the next token. If a ')', the function // has no parameters, so leave the loop. scan(&Peektoken); if (Peektoken.token == T_RPAREN) { // Move the Peektoken into the Token paramcnt= 0; scan(&Token); break; } } // Get the type of the next parameter type = declaration_list(&ctype, C_PARAM, T_COMMA, T_RPAREN, &unused); if (type == -1) fatal("Bad type in parameter list"); // Ensure the type of this parameter matches the prototype if (protoptr != NULL) { if (type != protoptr->type) fatald("Type doesn't match prototype for parameter", paramcnt + 1); protoptr = protoptr->next; } paramcnt++; // Stop when we hit the right parenthesis if (Token.token == T_RPAREN) break; // We need a comma as separator comma(); } if (oldfuncsym != NULL && paramcnt != oldfuncsym->nelems) fatals("Parameter count mismatch for function", oldfuncsym->name); // Return the count of parameters return (paramcnt); } // // function_declaration: type identifier '(' parameter_list ')' ; // | type identifier '(' parameter_list ')' compound_statement ; // // Parse the declaration of function. static struct symtable *function_declaration(char *funcname, int type, struct symtable *ctype, int class) { struct ASTnode *tree, *finalstmt; struct symtable *oldfuncsym, *newfuncsym = NULL; int endlabel, paramcnt; // Text has the identifier's name. If this exists and is a // function, get the id. Otherwise, set oldfuncsym to NULL. if ((oldfuncsym = findsymbol(funcname)) != NULL) if (oldfuncsym->stype != S_FUNCTION) oldfuncsym = NULL; // If this is a new function declaration, get a // label-id for the end label, and add the function // to the symbol table, if (oldfuncsym == NULL) { endlabel = genlabel(); // Assumtion: functions only return scalar types, so NULL below newfuncsym = addglob(funcname, type, NULL, S_FUNCTION, class, 0, endlabel); } // Scan in the '(', any parameters and the ')'. // Pass in any existing function prototype pointer lparen(); paramcnt = param_declaration_list(oldfuncsym, newfuncsym); rparen(); // If this is a new function declaration, update the // function symbol entry with the number of parameters. // Also copy the parameter list into the function's node. if (newfuncsym) { newfuncsym->nelems = paramcnt; newfuncsym->member = Parmhead; oldfuncsym = newfuncsym; } // Clear out the parameter list Parmhead = Parmtail = NULL; // Declaration ends in a semicolon, only a prototype. if (Token.token == T_SEMI) return (oldfuncsym); // This is not just a prototype. // Set the Functionid global to the function's symbol pointer Functionid = oldfuncsym; // Get the AST tree for the compound statement and mark // that we have parsed no loops or switches yet Looplevel = 0; Switchlevel = 0; lbrace(); tree = compound_statement(0); rbrace(); // If the function type isn't P_VOID .. if (type != P_VOID) { // Error if no statements in the function if (tree == NULL) fatal("No statements in function with non-void type"); // Check that the last AST operation in the // compound statement was a return statement finalstmt = (tree->op == A_GLUE) ? tree->right : tree; if (finalstmt == NULL || finalstmt->op != A_RETURN) fatal("No return for function with non-void type"); } // Build the A_FUNCTION node which has the function's symbol pointer // and the compound statement sub-tree tree = mkastunary(A_FUNCTION, type, tree, oldfuncsym, endlabel); // Do optimisations on the AST tree tree= optimise(tree); // Dump the AST tree if requested if (O_dumpAST) { dumpAST(tree, NOLABEL, 0); fprintf(stdout, "\n\n"); } // Generate the assembly code for it genAST(tree, NOLABEL, NOLABEL, NOLABEL, 0); // Now free the symbols associated // with this function freeloclsyms(); return (oldfuncsym); } // Parse composite type declarations: structs or unions. // Either find an existing struct/union declaration, or build // a struct/union symbol table entry and return its pointer. static struct symtable *composite_declaration(int type) { struct symtable *ctype = NULL; struct symtable *m; struct ASTnode *unused; int offset; int t; // Skip the struct/union keyword scan(&Token); // See if there is a following struct/union name if (Token.token == T_IDENT) { // Find any matching composite type if (type == P_STRUCT) ctype = findstruct(Text); else ctype = findunion(Text); scan(&Token); } // If the next token isn't an LBRACE , this is // the usage of an existing struct/union type. // Return the pointer to the type. if (Token.token != T_LBRACE) { if (ctype == NULL) fatals("unknown struct/union type", Text); return (ctype); } // Ensure this struct/union type hasn't been // previously defined if (ctype) fatals("previously defined struct/union", Text); // Build the composite type and skip the left brace if (type == P_STRUCT) ctype = addstruct(Text); else ctype = addunion(Text); scan(&Token); // Scan in the list of members while (1) { // Get the next member. m is used as a dummy t= declaration_list(&m, C_MEMBER, T_SEMI, T_RBRACE, &unused); if (t== -1) fatal("Bad type in member list"); if (Token.token == T_SEMI) scan(&Token); if (Token.token == T_RBRACE) break; } // Attach to the struct type's node rbrace(); if (Membhead==NULL) fatals("No members in struct", ctype->name); ctype->member = Membhead; Membhead = Membtail = NULL; // Set the offset of the initial member // and find the first free byte after it m = ctype->member; m->st_posn = 0; offset = typesize(m->type, m->ctype); // Set the position of each successive member in the composite type // Unions are easy. For structs, align the member and find the next free byte for (m = m->next; m != NULL; m = m->next) { // Set the offset for this member if (type == P_STRUCT) m->st_posn = genalign(m->type, offset, 1); else m->st_posn = 0; // Get the offset of the next free byte after this member offset += typesize(m->type, m->ctype); } // Set the overall size of the composite type ctype->size = offset; return (ctype); } // Parse an enum declaration static void enum_declaration(void) { struct symtable *etype = NULL; char *name= NULL; int intval = 0; // Skip the enum keyword. scan(&Token); // If there's a following enum type name, get a // pointer to any existing enum type node. if (Token.token == T_IDENT) { etype = findenumtype(Text); name = strdup(Text); // As it gets tromped soon scan(&Token); } // If the next token isn't a LBRACE, check // that we have an enum type name, then return if (Token.token != T_LBRACE) { if (etype == NULL) fatals("undeclared enum type:", name); return; } // We do have an LBRACE. Skip it scan(&Token); // If we have an enum type name, ensure that it // hasn't been declared before. if (etype != NULL) fatals("enum type redeclared:", etype->name); else // Build an enum type node for this identifier etype = addenum(name, C_ENUMTYPE, 0); // Loop to get all the enum values while (1) { // Ensure we have an identifier // Copy it in case there's an int literal coming up ident(); name = strdup(Text); // Ensure this enum value hasn't been declared before etype = findenumval(name); if (etype != NULL) fatals("enum value redeclared:", Text); // If the next token is an '=', skip it and // get the following int literal if (Token.token == T_ASSIGN) { scan(&Token); if (Token.token != T_INTLIT) fatal("Expected int literal after '='"); intval = Token.intvalue; scan(&Token); } // Build an enum value node for this identifier. // Increment the value for the next enum identifier. etype = addenum(name, C_ENUMVAL, intval++); // Bail out on a right curly bracket, else get a comma if (Token.token == T_RBRACE) break; comma(); } scan(&Token); // Skip over the right curly bracket } // Parse a typedef declaration and return the type // and ctype that it represents static int typedef_declaration(struct symtable **ctype) { int type, class = 0; // Skip the typedef keyword. scan(&Token); // Get the actual type following the keyword type = parse_type(ctype, &class); if (class != 0) fatal("Can't have extern in a typedef declaration"); // See if the typedef identifier already exists if (findtypedef(Text) != NULL) fatals("redefinition of typedef", Text); // Get any following '*' tokens type = parse_stars(type); // It doesn't exist so add it to the typedef list addtypedef(Text, type, *ctype); scan(&Token); return (type); } // Given a typedef name, return the type it represents static int type_of_typedef(char *name, struct symtable **ctype) { struct symtable *t; // Look up the typedef in the list t = findtypedef(name); if (t == NULL) fatals("unknown type", name); scan(&Token); *ctype = t->ctype; return (t->type); } // Parse the declaration of a variable or function. // The type and any following '*'s have been scanned, and we // have the identifier in the Token variable. // The class argument is the variable's class. // Return a pointer to the symbol's entry in the symbol table static struct symtable *symbol_declaration(int type, struct symtable *ctype, int class, struct ASTnode **tree) { struct symtable *sym = NULL; char *varname = strdup(Text); // Ensure that we have an identifier. // We copied it above so we can scan more tokens in, e.g. // an assignment expression for a local variable. ident(); // Deal with function declarations if (Token.token == T_LPAREN) { return (function_declaration(varname, type, ctype, class)); } // See if this array or scalar variable has already been declared switch (class) { case C_EXTERN: case C_STATIC: case C_GLOBAL: if (findglob(varname) != NULL) fatals("Duplicate global variable declaration", varname); case C_LOCAL: case C_PARAM: if (findlocl(varname) != NULL) fatals("Duplicate local variable declaration", varname); case C_MEMBER: if (findmember(varname) != NULL) fatals("Duplicate struct/union member declaration", varname); } // Add the array or scalar variable to the symbol table if (Token.token == T_LBRACKET) sym = array_declaration(varname, type, ctype, class); else sym = scalar_declaration(varname, type, ctype, class, tree); return (sym); } // Parse a list of symbols where there is an initial type. // Return the type of the symbols. et1 and et2 are end tokens. int declaration_list(struct symtable **ctype, int class, int et1, int et2, struct ASTnode **gluetree) { int inittype, type; struct symtable *sym; struct ASTnode *tree; *gluetree= NULL; // Get the initial type. If -1, it was // a composite type definition, return this if ((inittype = parse_type(ctype, &class)) == -1) return (inittype); // Now parse the list of symbols while (1) { // See if this symbol is a pointer type = parse_stars(inittype); // Parse this symbol sym = symbol_declaration(type, *ctype, class, &tree); // We parsed a function, there is no list so leave if (sym->stype == S_FUNCTION) { if (class != C_GLOBAL && class != C_STATIC) fatal("Function definition not at global level"); return (type); } // Glue any AST tree from a local declaration // to build a sequence of assignments to perform if (*gluetree== NULL) *gluetree= tree; else *gluetree = mkastnode(A_GLUE, P_NONE, *gluetree, NULL, tree, NULL, 0); // We are at the end of the list, leave if (Token.token == et1 || Token.token == et2) return (type); // Otherwise, we need a comma as separator comma(); } } // Parse one or more global declarations, either // variables, functions or structs void global_declarations(void) { struct symtable *ctype; struct ASTnode *unused; while (Token.token != T_EOF) { declaration_list(&ctype, C_GLOBAL, T_SEMI, T_EOF, &unused); // Skip any semicolons and right curly brackets if (Token.token == T_SEMI) scan(&Token); } } ================================================ FILE: 48_Static/decl.h ================================================ // Function prototypes for all compiler files // Copyright (c) 2019 Warren Toomey, GPL3 // scan.c void reject_token(struct token *t); int scan(struct token *t); // tree.c struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue); struct ASTnode *mkastleaf(int op, int type, struct symtable *sym, int intvalue); struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, struct symtable *sym, int intvalue); void dumpAST(struct ASTnode *n, int label, int parentASTop); // gen.c int genlabel(void); int genAST(struct ASTnode *n, int iflabel, int looptoplabel, int loopendlabel, int parentASTop); void genpreamble(); void genpostamble(); void genfreeregs(); void genglobsym(struct symtable *node); int genglobstr(char *strvalue); int genprimsize(int type); int genalign(int type, int offset, int direction); void genreturn(int reg, int id); // cg.c int cgprimsize(int type); int cgalign(int type, int offset, int direction); void cgtextseg(); void cgdataseg(); void freeall_registers(void); void cgpreamble(); void cgpostamble(); void cgfuncpreamble(struct symtable *sym); void cgfuncpostamble(struct symtable *sym); int cgloadint(int value, int type); int cgloadglob(struct symtable *sym, int op); int cgloadlocal(struct symtable *sym, int op); int cgloadglobstr(int label); int cgadd(int r1, int r2); int cgsub(int r1, int r2); int cgmul(int r1, int r2); int cgdiv(int r1, int r2); int cgshlconst(int r, int val); int cgcall(struct symtable *sym, int numargs); void cgcopyarg(int r, int argposn); int cgstorglob(int r, struct symtable *sym); int cgstorlocal(int r, struct symtable *sym); void cgglobsym(struct symtable *node); void cgglobstr(int l, char *strvalue); int cgcompare_and_set(int ASTop, int r1, int r2); int cgcompare_and_jump(int ASTop, int r1, int r2, int label); void cglabel(int l); void cgjump(int l); int cgwiden(int r, int oldtype, int newtype); void cgreturn(int reg, struct symtable *sym); int cgaddress(struct symtable *sym); int cgderef(int r, int type); int cgstorderef(int r1, int r2, int type); int cgnegate(int r); int cginvert(int r); int cglognot(int r); int cgboolean(int r, int op, int label); int cgand(int r1, int r2); int cgor(int r1, int r2); int cgxor(int r1, int r2); int cgshl(int r1, int r2); int cgshr(int r1, int r2); void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel); // expr.c struct ASTnode *expression_list(int endtoken); struct ASTnode *binexpr(int ptp); // stmt.c struct ASTnode *compound_statement(int inswitch); // misc.c void match(int t, char *what); void semi(void); void lbrace(void); void rbrace(void); void lparen(void); void rparen(void); void ident(void); void comma(void); void fatal(char *s); void fatals(char *s1, char *s2); void fatald(char *s, int d); void fatalc(char *s, int c); // sym.c void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node); struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn); struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn); struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int nelems); struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype); struct symtable *addstruct(char *name); struct symtable *addunion(char *name); struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int nelems); struct symtable *addenum(char *name, int class, int value); struct symtable *addtypedef(char *name, int type, struct symtable *ctype); struct symtable *findglob(char *s); struct symtable *findlocl(char *s); struct symtable *findsymbol(char *s); struct symtable *findmember(char *s); struct symtable *findstruct(char *s); struct symtable *findunion(char *s); struct symtable *findenumtype(char *s); struct symtable *findenumval(char *s); struct symtable *findtypedef(char *s); void clear_symtable(void); void freeloclsyms(void); void freestaticsyms(void); // decl.c int parse_type(struct symtable **ctype, int *class); int parse_stars(int type); int parse_cast(void); int declaration_list(struct symtable **ctype, int class, int et1, int et2, struct ASTnode **gluetree); void global_declarations(void); // types.c int inttype(int type); int ptrtype(int type); int pointer_to(int type); int value_at(int type); int typesize(int type, struct symtable *ctype); struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op); // opt.c struct ASTnode *optimise(struct ASTnode *n); ================================================ FILE: 48_Static/defs.h ================================================ #include #include #include #include // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 enum { TEXTLEN = 512 // Length of identifiers in input }; // Commands and default filenames #define AOUT "a.out" #ifdef __NASM__ #define ASCMD "nasm -f elf64 -o " #define LDCMD "cc -no-pie -fno-plt -Wall -o " #else #define ASCMD "as -o " #define LDCMD "cc -o " #endif #define CPPCMD "cpp -nostdinc -isystem " // Token types enum { T_EOF, // Binary operators T_ASSIGN, T_ASPLUS, T_ASMINUS, T_ASSTAR, T_ASSLASH, T_LOGOR, T_LOGAND, T_OR, T_XOR, T_AMPER, T_EQ, T_NE, T_LT, T_GT, T_LE, T_GE, T_LSHIFT, T_RSHIFT, T_PLUS, T_MINUS, T_STAR, T_SLASH, // Other operators T_INC, T_DEC, T_INVERT, T_LOGNOT, // Type keywords T_VOID, T_CHAR, T_INT, T_LONG, // Other keywords T_IF, T_ELSE, T_WHILE, T_FOR, T_RETURN, T_STRUCT, T_UNION, T_ENUM, T_TYPEDEF, T_EXTERN, T_BREAK, T_CONTINUE, T_SWITCH, T_CASE, T_DEFAULT, T_SIZEOF, T_STATIC, // Structural tokens T_INTLIT, T_STRLIT, T_SEMI, T_IDENT, T_LBRACE, T_RBRACE, T_LPAREN, T_RPAREN, T_LBRACKET, T_RBRACKET, T_COMMA, T_DOT, T_ARROW, T_COLON }; // Token structure struct token { int token; // Token type, from the enum list above char *tokstr; // String version of the token int intvalue; // For T_INTLIT, the integer value }; // AST node types. The first few line up // with the related tokens enum { A_ASSIGN = 1, A_ASPLUS, A_ASMINUS, A_ASSTAR, A_ASSLASH, A_LOGOR, A_LOGAND, A_OR, A_XOR, A_AND, A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE, A_LSHIFT, A_RSHIFT, A_ADD, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, A_INTLIT, A_STRLIT, A_IDENT, A_GLUE, A_IF, A_WHILE, A_FUNCTION, A_WIDEN, A_RETURN, A_FUNCCALL, A_DEREF, A_ADDR, A_SCALE, A_PREINC, A_PREDEC, A_POSTINC, A_POSTDEC, A_NEGATE, A_INVERT, A_LOGNOT, A_TOBOOL, A_BREAK, A_CONTINUE, A_SWITCH, A_CASE, A_DEFAULT, A_CAST }; // Primitive types. The bottom 4 bits is an integer // value that represents the level of indirection, // e.g. 0= no pointer, 1= pointer, 2= pointer pointer etc. enum { P_NONE, P_VOID = 16, P_CHAR = 32, P_INT = 48, P_LONG = 64, P_STRUCT=80, P_UNION=96 }; // Structural types enum { S_VARIABLE, S_FUNCTION, S_ARRAY }; // Storage classes enum { C_GLOBAL = 1, // Globally visible symbol C_LOCAL, // Locally visible symbol C_PARAM, // Locally visible function parameter C_EXTERN, // External globally visible symbol C_STATIC, // Static symbol, visible in one file C_STRUCT, // A struct C_UNION, // A union C_MEMBER, // Member of a struct or union C_ENUMTYPE, // A named enumeration type C_ENUMVAL, // A named enumeration value C_TYPEDEF // A named typedef }; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol struct symtable *ctype; // If struct/union, ptr to that type int stype; // Structural type for the symbol int class; // Storage class for the symbol int size; // Total size in bytes of this symbol int nelems; // Functions: # params. Arrays: # elements #define st_endlabel st_posn // For functions, the end label int st_posn; // For locals, the negative offset // from the stack base pointer int *initlist; // List of initial values struct symtable *next; // Next symbol in one list struct symtable *member; // First member of a function, struct, }; // union or enum // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates int rvalue; // True if the node is an rvalue struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; struct symtable *sym; // For many AST nodes, the pointer to // the symbol in the symbol table #define a_intvalue a_size // For A_INTLIT, the integer value int a_size; // For A_SCALE, the size to scale by }; enum { NOREG = -1, // Use NOREG when the AST generation // functions have no register to return NOLABEL = 0 // Use NOLABEL when we have no label to // pass to genAST() }; ================================================ FILE: 48_Static/expr.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of expressions // Copyright (c) 2019 Warren Toomey, GPL3 // expression_list: // | expression // | expression ',' expression_list // ; // Parse a list of zero or more comma-separated expressions and // return an AST composed of A_GLUE nodes with the left-hand child // being the sub-tree of previous expressions (or NULL) and the right-hand // child being the next expression. Each A_GLUE node will have size field // set to the number of expressions in the tree at this point. If no // expressions are parsed, NULL is returned struct ASTnode *expression_list(int endtoken) { struct ASTnode *tree = NULL; struct ASTnode *child = NULL; int exprcount = 0; // Loop until the end token while (Token.token != endtoken) { // Parse the next expression and increment the expression count child = binexpr(0); exprcount++; // Build an A_GLUE AST node with the previous tree as the left child // and the new expression as the right child. Store the expression count. tree = mkastnode(A_GLUE, P_NONE, tree, NULL, child, NULL, exprcount); // Stop when we reach the end token if (Token.token == endtoken) break; // Must have a ',' at this point match(T_COMMA, ","); } // Return the tree of expressions return (tree); } // Parse a function call and return its AST static struct ASTnode *funccall(void) { struct ASTnode *tree; struct symtable *funcptr; // Check that the identifier has been defined as a function, // then make a leaf node for it. if ((funcptr = findsymbol(Text)) == NULL || funcptr->stype != S_FUNCTION) { fatals("Undeclared function", Text); } // Get the '(' lparen(); // Parse the argument expression list tree = expression_list(T_RPAREN); // XXX Check type of each argument against the function's prototype // Build the function call AST node. Store the // function's return type as this node's type. // Also record the function's symbol-id tree = mkastunary(A_FUNCCALL, funcptr->type, tree, funcptr, 0); // Get the ')' rparen(); return (tree); } // Parse the index into an array and return an AST tree for it static struct ASTnode *array_access(void) { struct ASTnode *left, *right; struct symtable *aryptr; // Check that the identifier has been defined as an array // then make a leaf node for it that points at the base if ((aryptr = findsymbol(Text)) == NULL || aryptr->stype != S_ARRAY) { fatals("Undeclared array", Text); } left = mkastleaf(A_ADDR, aryptr->type, aryptr, 0); // Get the '[' scan(&Token); // Parse the following expression right = binexpr(0); // Get the ']' match(T_RBRACKET, "]"); // Ensure that this is of int type if (!inttype(right->type)) fatal("Array index is not of integer type"); // Scale the index by the size of the element's type right = modify_type(right, left->type, A_ADD); // Return an AST tree where the array's base has the offset // added to it, and dereference the element. Still an lvalue // at this point. left = mkastnode(A_ADD, aryptr->type, left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, value_at(left->type), left, NULL, 0); return (left); } // Parse the member reference of a struct or union // and return an AST tree for it. If withpointer is true, // the access is through a pointer to the member. static struct ASTnode *member_access(int withpointer) { struct ASTnode *left, *right; struct symtable *compvar; struct symtable *typeptr; struct symtable *m; // Check that the identifier has been declared as a // struct/union or a struct/union pointer if ((compvar = findsymbol(Text)) == NULL) fatals("Undeclared variable", Text); if (withpointer && compvar->type != pointer_to(P_STRUCT) && compvar->type != pointer_to(P_UNION)) fatals("Undeclared variable", Text); if (!withpointer && compvar->type != P_STRUCT && compvar->type != P_UNION) fatals("Undeclared variable", Text); // If a pointer to a struct/union, get the pointer's value. // Otherwise, make a leaf node that points at the base // Either way, it's an rvalue if (withpointer) { left = mkastleaf(A_IDENT, pointer_to(compvar->type), compvar, 0); } else left = mkastleaf(A_ADDR, compvar->type, compvar, 0); left->rvalue = 1; // Get the details of the composite type typeptr = compvar->ctype; // Skip the '.' or '->' token and get the member's name scan(&Token); ident(); // Find the matching member's name in the type // Die if we can't find it for (m = typeptr->member; m != NULL; m = m->next) if (!strcmp(m->name, Text)) break; if (m == NULL) fatals("No member found in struct/union: ", Text); // Build an A_INTLIT node with the offset right = mkastleaf(A_INTLIT, P_INT, NULL, m->st_posn); // Add the member's offset to the base of the struct/union // and dereference it. Still an lvalue at this point left = mkastnode(A_ADD, pointer_to(m->type), left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, m->type, left, NULL, 0); return (left); } // Parse a postfix expression and return // an AST node representing it. The // identifier is already in Text. static struct ASTnode *postfix(void) { struct ASTnode *n; struct symtable *varptr; struct symtable *enumptr; // If the identifier matches an enum value, // return an A_INTLIT node if ((enumptr = findenumval(Text)) != NULL) { scan(&Token); return (mkastleaf(A_INTLIT, P_INT, NULL, enumptr->st_posn)); } // Scan in the next token to see if we have a postfix expression scan(&Token); // Function call if (Token.token == T_LPAREN) return (funccall()); // An array reference if (Token.token == T_LBRACKET) return (array_access()); // Access into a struct or union if (Token.token == T_DOT) return (member_access(0)); if (Token.token == T_ARROW) return (member_access(1)); // A variable. Check that the variable exists. if ((varptr = findsymbol(Text)) == NULL || varptr->stype != S_VARIABLE) fatals("Unknown variable", Text); switch (Token.token) { // Post-increment: skip over the token case T_INC: scan(&Token); n = mkastleaf(A_POSTINC, varptr->type, varptr, 0); break; // Post-decrement: skip over the token case T_DEC: scan(&Token); n = mkastleaf(A_POSTDEC, varptr->type, varptr, 0); break; // Just a variable reference default: n = mkastleaf(A_IDENT, varptr->type, varptr, 0); } return (n); } // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(void) { struct ASTnode *n; int id; int type=0; int size, class; struct symtable *ctype; switch (Token.token) { case T_STATIC: case T_EXTERN: fatal("Compiler doesn't support static or extern local declarations"); case T_SIZEOF: // Skip the T_SIZEOF and ensure we have a left parenthesis scan(&Token); if (Token.token != T_LPAREN) fatal("Left parenthesis expected after sizeof"); scan(&Token); // Get the type inside the parentheses type= parse_stars(parse_type(&ctype, &class)); // Get the type's size size= typesize(type, ctype); rparen(); // Return a leaf node int literal with the size return (mkastleaf(A_INTLIT, P_INT, NULL, size)); case T_INTLIT: // For an INTLIT token, make a leaf AST node for it. // Make it a P_CHAR if it's within the P_CHAR range if (Token.intvalue >= 0 && Token.intvalue < 256) n = mkastleaf(A_INTLIT, P_CHAR, NULL, Token.intvalue); else n = mkastleaf(A_INTLIT, P_INT, NULL, Token.intvalue); break; case T_STRLIT: // For a STRLIT token, generate the assembly for it. // Then make a leaf AST node for it. id is the string's label. id = genglobstr(Text); n = mkastleaf(A_STRLIT, pointer_to(P_CHAR), NULL, id); break; case T_IDENT: return (postfix()); case T_LPAREN: // Beginning of a parenthesised expression, skip the '('. scan(&Token); // If the token after is a type identifier, this is a cast expression switch (Token.token) { case T_IDENT: // We have to see if the identifier matches a typedef. // If not, treat it as an expression. if (findtypedef(Text) == NULL) { n = binexpr(0); break; } case T_VOID: case T_CHAR: case T_INT: case T_LONG: case T_STRUCT: case T_UNION: case T_ENUM: // Get the type inside the parentheses type= parse_cast(); // Skip the closing ')' and then parse the following expression rparen(); default: n = binexpr(0); // Scan in the expression } // We now have at least an expression in n, and possibly a non-zero type in type // if there was a cast. Skip the closing ')' if there was no cast. if (type == 0) rparen(); else // Otherwise, make a unary AST node for the cast n= mkastunary(A_CAST, type, n, NULL, 0); return (n); default: fatals("Expecting a primary expression, got token", Token.tokstr); } // Scan in the next token and return the leaf node scan(&Token); return (n); } // Convert a binary operator token into a binary AST operation. // We rely on a 1:1 mapping from token to AST operation static int binastop(int tokentype) { if (tokentype > T_EOF && tokentype <= T_SLASH) return (tokentype); fatald("Syntax error, token", tokentype); return (0); // Keep -Wall happy } // Return true if a token is right-associative, // false otherwise. static int rightassoc(int tokentype) { if (tokentype >= T_ASSIGN && tokentype <= T_ASSLASH) return (1); return (0); } // Operator precedence for each token. Must // match up with the order of tokens in defs.h static int OpPrec[] = { 0, 10, 10, // T_EOF, T_ASSIGN, T_ASPLUS, 10, 10, 10, // T_ASMINUS, T_ASSTAR, T_ASSLASH, 20, 30, // T_LOGOR, T_LOGAND 40, 50, 60, // T_OR, T_XOR, T_AMPER 70, 70, // T_EQ, T_NE 80, 80, 80, 80, // T_LT, T_GT, T_LE, T_GE 90, 90, // T_LSHIFT, T_RSHIFT 100, 100, // T_PLUS, T_MINUS 110, 110 // T_STAR, T_SLASH }; // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec; if (tokentype > T_SLASH) fatald("Token with no precedence in op_precedence:", tokentype); prec = OpPrec[tokentype]; if (prec == 0) fatald("Syntax error, token", tokentype); return (prec); } // prefix_expression: primary // | '*' prefix_expression // | '&' prefix_expression // | '-' prefix_expression // | '++' prefix_expression // | '--' prefix_expression // ; // Parse a prefix expression and return // a sub-tree representing it. struct ASTnode *prefix(void) { struct ASTnode *tree; switch (Token.token) { case T_AMPER: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Ensure that it's an identifier if (tree->op != A_IDENT) fatal("& operator must be followed by an identifier"); // Now change the operator to A_ADDR and the type to // a pointer to the original type tree->op = A_ADDR; tree->type = pointer_to(tree->type); break; case T_STAR: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's either another deref or an // identifier if (tree->op != A_IDENT && tree->op != A_DEREF) fatal("* operator must be followed by an identifier or *"); // Prepend an A_DEREF operation to the tree tree = mkastunary(A_DEREF, value_at(tree->type), tree, NULL, 0); break; case T_MINUS: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_NEGATE operation to the tree and // make the child an rvalue. Because chars are unsigned, // also widen this to int so that it's signed tree->rvalue = 1; tree = modify_type(tree, P_INT, 0); tree = mkastunary(A_NEGATE, tree->type, tree, NULL, 0); break; case T_INVERT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_INVERT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_INVERT, tree->type, tree, NULL, 0); break; case T_LOGNOT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_LOGNOT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_LOGNOT, tree->type, tree, NULL, 0); break; case T_INC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("++ operator must be followed by an identifier"); // Prepend an A_PREINC operation to the tree tree = mkastunary(A_PREINC, tree->type, tree, NULL, 0); break; case T_DEC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("-- operator must be followed by an identifier"); // Prepend an A_PREDEC operation to the tree tree = mkastunary(A_PREDEC, tree->type, tree, NULL, 0); break; default: tree = primary(); } return (tree); } // Return an AST tree whose root is a binary operator. // Parameter ptp is the previous token's precedence. struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; struct ASTnode *ltemp, *rtemp; int ASTop; int tokentype; // Get the tree on the left. // Fetch the next token at the same time. left = prefix(); // If we hit one of several terminating tokens, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA || tokentype == T_COLON || tokentype == T_RBRACE) { left->rvalue = 1; return (left); } // While the precedence of this token is more than that of the // previous token precedence, or it's right associative and // equal to the previous token's precedence while ((op_precedence(tokentype) > ptp) || (rightassoc(tokentype) && op_precedence(tokentype) == ptp)) { // Fetch in the next integer literal scan(&Token); // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Determine the operation to be performed on the sub-trees ASTop = binastop(tokentype); if (ASTop == A_ASSIGN) { // Assignment // Make the right tree into an rvalue right->rvalue = 1; // Ensure the right's type matches the left right = modify_type(right, left->type, 0); if (right == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree. However, switch // left and right around, so that the right expression's // code will be generated before the left expression ltemp = left; left = right; right = ltemp; } else { // We are not doing an assignment, so both trees should be rvalues // Convert both trees into rvalue if they are lvalue trees left->rvalue = 1; right->rvalue = 1; // Ensure the two types are compatible by trying // to modify each tree to match the other's type. ltemp = modify_type(left, right->type, ASTop); rtemp = modify_type(right, left->type, ASTop); if (ltemp == NULL && rtemp == NULL) fatal("Incompatible types in binary expression"); if (ltemp != NULL) left = ltemp; if (rtemp != NULL) right = rtemp; } // Join that sub-tree with ours. Convert the token // into an AST operation at the same time. left = mkastnode(binastop(tokentype), left->type, left, NULL, right, NULL, 0); // Update the details of the current token. // If we hit a terminating token, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA || tokentype == T_COLON || tokentype == T_RBRACE) { left->rvalue = 1; return (left); } } // Return the tree we have when the precedence // is the same or lower left->rvalue = 1; return (left); } ================================================ FILE: 48_Static/gen.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Generic code generator // Copyright (c) 2019 Warren Toomey, GPL3 // Generate and return a new label number static int labelid = 1; int genlabel(void) { return (labelid++); } // Generate the code for an IF statement // and an optional ELSE clause static int genIF(struct ASTnode *n, int looptoplabel, int loopendlabel) { int Lfalse, Lend; // Generate two labels: one for the // false compound statement, and one // for the end of the overall IF statement. // When there is no ELSE clause, Lfalse _is_ // the ending label! Lfalse = genlabel(); if (n->right) Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. genAST(n->left, Lfalse, NOLABEL, NOLABEL, n->op); genfreeregs(); // Generate the true compound statement genAST(n->mid, NOLABEL, looptoplabel, loopendlabel, n->op); genfreeregs(); // If there is an optional ELSE clause, // generate the jump to skip to the end if (n->right) cgjump(Lend); // Now the false label cglabel(Lfalse); // Optional ELSE clause: generate the // false compound statement and the // end label if (n->right) { genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); genfreeregs(); cglabel(Lend); } return (NOREG); } // Generate the code for a WHILE statement static int genWHILE(struct ASTnode *n) { int Lstart, Lend; // Generate the start and end labels // and output the start label Lstart = genlabel(); Lend = genlabel(); cglabel(Lstart); // Generate the condition code followed // by a jump to the end label. genAST(n->left, Lend, Lstart, Lend, n->op); genfreeregs(); // Generate the compound statement for the body genAST(n->right, NOLABEL, Lstart, Lend, n->op); genfreeregs(); // Finally output the jump back to the condition, // and the end label cgjump(Lstart); cglabel(Lend); return (NOREG); } // Generate the code for a SWITCH statement static int genSWITCH(struct ASTnode *n) { int *caseval, *caselabel; int Ljumptop, Lend; int i, reg, defaultlabel = 0, casecount = 0; struct ASTnode *c; // Create arrays for the case values and associated labels. // Ensure that we have at least one position in each array. caseval = (int *) malloc((n->a_intvalue + 1) * sizeof(int)); caselabel = (int *) malloc((n->a_intvalue + 1) * sizeof(int)); // Generate labels for the top of the jump table, and the // end of the switch statement. Set a default label for // the end of the switch, in case we don't have a default. Ljumptop = genlabel(); Lend = genlabel(); defaultlabel = Lend; // Output the code to calculate the switch condition reg = genAST(n->left, NOLABEL, NOLABEL, NOLABEL, 0); cgjump(Ljumptop); genfreeregs(); // Walk the right-child linked list to // generate the code for each case for (i = 0, c = n->right; c != NULL; i++, c = c->right) { // Get a label for this case. Store it // and the case value in the arrays. // Record if it is the default case. caselabel[i] = genlabel(); caseval[i] = c->a_intvalue; cglabel(caselabel[i]); if (c->op == A_DEFAULT) defaultlabel = caselabel[i]; else casecount++; // Generate the case code. Pass in the end label for the breaks genAST(c->left, NOLABEL, NOLABEL, Lend, 0); genfreeregs(); } // Ensure the last case jumps past the switch table cgjump(Lend); // Now output the switch table and the end label. cgswitch(reg, casecount, Ljumptop, caselabel, caseval, defaultlabel); cglabel(Lend); return (NOREG); } // Generate the code to copy the arguments of a // function call to its parameters, then call the // function itself. Return the register that holds // the function's return value. static int gen_funccall(struct ASTnode *n) { struct ASTnode *gluetree = n->left; int reg; int numargs = 0; // If there is a list of arguments, walk this list // from the last argument (right-hand child) to the // first while (gluetree) { // Calculate the expression's value reg = genAST(gluetree->right, NOLABEL, NOLABEL, NOLABEL, gluetree->op); // Copy this into the n'th function parameter: size is 1, 2, 3, ... cgcopyarg(reg, gluetree->a_size); // Keep the first (highest) number of arguments if (numargs == 0) numargs = gluetree->a_size; genfreeregs(); gluetree = gluetree->left; } // Call the function, clean up the stack (based on numargs), // and return its result return (cgcall(n->sym, numargs)); } // Given an AST, an optional label, and the AST op // of the parent, generate assembly code recursively. // Return the register id with the tree's final value. int genAST(struct ASTnode *n, int iflabel, int looptoplabel, int loopendlabel, int parentASTop) { int leftreg, rightreg; // We have some specific AST node handling at the top // so that we don't evaluate the child sub-trees immediately switch (n->op) { case A_IF: return (genIF(n, looptoplabel, loopendlabel)); case A_WHILE: return (genWHILE(n)); case A_SWITCH: return (genSWITCH(n)); case A_FUNCCALL: return (gen_funccall(n)); case A_GLUE: // Do each child statement, and free the // registers after each child if (n->left != NULL) genAST(n->left, iflabel, looptoplabel, loopendlabel, n->op); genfreeregs(); if (n->right != NULL) genAST(n->right, iflabel, looptoplabel, loopendlabel, n->op); genfreeregs(); return (NOREG); case A_FUNCTION: // Generate the function's preamble before the code // in the child sub-tree cgfuncpreamble(n->sym); genAST(n->left, NOLABEL, NOLABEL, NOLABEL, n->op); cgfuncpostamble(n->sym); return (NOREG); } // General AST node handling below // Get the left and right sub-tree values if (n->left) leftreg = genAST(n->left, NOLABEL, NOLABEL, NOLABEL, n->op); if (n->right) rightreg = genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); switch (n->op) { case A_ADD: return (cgadd(leftreg, rightreg)); case A_SUBTRACT: return (cgsub(leftreg, rightreg)); case A_MULTIPLY: return (cgmul(leftreg, rightreg)); case A_DIVIDE: return (cgdiv(leftreg, rightreg)); case A_AND: return (cgand(leftreg, rightreg)); case A_OR: return (cgor(leftreg, rightreg)); case A_XOR: return (cgxor(leftreg, rightreg)); case A_LSHIFT: return (cgshl(leftreg, rightreg)); case A_RSHIFT: return (cgshr(leftreg, rightreg)); case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, compare registers // and set one to 1 or 0 based on the comparison. if (parentASTop == A_IF || parentASTop == A_WHILE) return (cgcompare_and_jump(n->op, leftreg, rightreg, iflabel)); else return (cgcompare_and_set(n->op, leftreg, rightreg)); case A_INTLIT: return (cgloadint(n->a_intvalue, n->type)); case A_STRLIT: return (cgloadglobstr(n->a_intvalue)); case A_IDENT: // Load our value if we are an rvalue // or we are being dereferenced if (n->rvalue || parentASTop == A_DEREF) { if (n->sym->class == C_GLOBAL || n->sym->class == C_STATIC) { return (cgloadglob(n->sym, n->op)); } else { return (cgloadlocal(n->sym, n->op)); } } else return (NOREG); case A_ASPLUS: case A_ASMINUS: case A_ASSTAR: case A_ASSLASH: case A_ASSIGN: // For the '+=' and friends operators, generate suitable code // and get the register with the result. Then take the left child, // make it the right child so that we can fall into the assignment code. switch (n->op) { case A_ASPLUS: leftreg= cgadd(leftreg, rightreg); n->right= n->left; break; case A_ASMINUS: leftreg= cgsub(leftreg, rightreg); n->right= n->left; break; case A_ASSTAR: leftreg= cgmul(leftreg, rightreg); n->right= n->left; break; case A_ASSLASH: leftreg= cgdiv(leftreg, rightreg); n->right= n->left; break; } // Now into the assignment code // Are we assigning to an identifier or through a pointer? switch (n->right->op) { case A_IDENT: if (n->right->sym->class == C_GLOBAL || n->right->sym->class == C_STATIC) return (cgstorglob(leftreg, n->right->sym)); else return (cgstorlocal(leftreg, n->right->sym)); case A_DEREF: return (cgstorderef(leftreg, rightreg, n->right->type)); default: fatald("Can't A_ASSIGN in genAST(), op", n->op); } case A_WIDEN: // Widen the child's type to the parent's type return (cgwiden(leftreg, n->left->type, n->type)); case A_RETURN: cgreturn(leftreg, Functionid); return (NOREG); case A_ADDR: return (cgaddress(n->sym)); case A_DEREF: // If we are an rvalue, dereference to get the value we point at, // otherwise leave it for A_ASSIGN to store through the pointer if (n->rvalue) return (cgderef(leftreg, n->left->type)); else return (leftreg); case A_SCALE: // Small optimisation: use shift if the // scale value is a known power of two switch (n->a_size) { case 2: return (cgshlconst(leftreg, 1)); case 4: return (cgshlconst(leftreg, 2)); case 8: return (cgshlconst(leftreg, 3)); default: // Load a register with the size and // multiply the leftreg by this size rightreg = cgloadint(n->a_size, P_INT); return (cgmul(leftreg, rightreg)); } case A_POSTINC: case A_POSTDEC: // Load and decrement the variable's value into a register // and post increment/decrement it if (n->sym->class == C_GLOBAL || n->sym->class == C_STATIC) return (cgloadglob(n->sym, n->op)); else return (cgloadlocal(n->sym, n->op)); case A_PREINC: case A_PREDEC: // Load and decrement the variable's value into a register // and pre increment/decrement it if (n->left->sym->class == C_GLOBAL || n->left->sym->class == C_STATIC) return (cgloadglob(n->left->sym, n->op)); else return (cgloadlocal(n->left->sym, n->op)); case A_NEGATE: return (cgnegate(leftreg)); case A_INVERT: return (cginvert(leftreg)); case A_LOGNOT: return (cglognot(leftreg)); case A_TOBOOL: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, set the register // to 0 or 1 based on it's zeroeness or non-zeroeness return (cgboolean(leftreg, parentASTop, iflabel)); case A_BREAK: cgjump(loopendlabel); return (NOREG); case A_CONTINUE: cgjump(looptoplabel); return (NOREG); case A_CAST: return (leftreg); // Not much to do default: fatald("Unknown AST operator", n->op); } return (NOREG); // Keep -Wall happy } void genpreamble() { cgpreamble(); } void genpostamble() { cgpostamble(); } void genfreeregs() { freeall_registers(); } void genglobsym(struct symtable *node) { cgglobsym(node); } int genglobstr(char *strvalue) { int l = genlabel(); cgglobstr(l, strvalue); return (l); } int genprimsize(int type) { return (cgprimsize(type)); } int genalign(int type, int offset, int direction) { return (cgalign(type, offset, direction)); } ================================================ FILE: 48_Static/include/ctype.h ================================================ #ifndef _CTYPE_H_ # define _CTYPE_H_ int isalnum(int c); int isalpha(int c); int iscntrl(int c); int isdigit(int c); int isgraph(int c); int islower(int c); int isprint(int c); int ispunct(int c); int isspace(int c); int isupper(int c); int isxdigit(int c); int isascii(int c); int isblank(int c); #endif // _CTYPE_H_ ================================================ FILE: 48_Static/include/errno.h ================================================ #ifndef _ERRNO_H_ # define _ERRNO_H_ #endif // _ERRNO_H_ ================================================ FILE: 48_Static/include/fcntl.h ================================================ #ifndef _FCNTL_H_ # define _FCNTL_H_ #define O_RDONLY 00 #define O_WRONLY 01 #define O_RDWR 02 int open(char *pathname, int flags); #endif // _FCNTL_H_ ================================================ FILE: 48_Static/include/stddef.h ================================================ #ifndef _STDDEF_H_ # define _STDDEF_H_ #ifndef NULL # define NULL (void *)0 #endif typedef long size_t; #endif //_STDDEF_H_ ================================================ FILE: 48_Static/include/stdio.h ================================================ #ifndef _STDIO_H_ # define _STDIO_H_ #include #ifndef NULL # define NULL (void *)0 #endif // This FILE definition will do for now typedef char * FILE; FILE *fopen(char *pathname, char *mode); size_t fread(void *ptr, size_t size, size_t nmemb, FILE *stream); size_t fwrite(void *ptr, size_t size, size_t nmemb, FILE *stream); int fclose(FILE *stream); int printf(char *format); int fprintf(FILE *stream, char *format); int fputc(int c, FILE *stream); int fputs(char *s, FILE *stream); int putc(int c, FILE *stream); int putchar(int c); int puts(char *s); extern FILE *stdin; extern FILE *stdout; extern FILE *stderr; #endif // _STDIO_H_ ================================================ FILE: 48_Static/include/stdlib.h ================================================ #ifndef _STDLIB_H_ # define _STDLIB_H_ void exit(int status); void _Exit(int status); void *malloc(int size); void free(void *ptr); void *calloc(int nmemb, int size); void *realloc(void *ptr, int size); #endif // _STDLIB_H_ ================================================ FILE: 48_Static/include/string.h ================================================ #ifndef _STRING_H_ # define _STRING_H_ char *strdup(char *s); char *strchr(char *s, int c); char *strrchr(char *s, int c); #endif // _STRING_H_ ================================================ FILE: 48_Static/include/unistd.h ================================================ #ifndef _UNISTD_H_ # define _UNISTD_H_ void _exit(int status); int unlink(char *pathname); #endif // _UNISTD_H_ ================================================ FILE: 48_Static/main.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "decl.h" #include #include // Compiler setup and top-level execution // Copyright (c) 2019 Warren Toomey, GPL3 // Given a string with a '.' and at least a 1-character suffix // after the '.', change the suffix to be the given character. // Return the new string or NULL if the original string could // not be modified char *alter_suffix(char *str, char suffix) { char *posn; char *newstr; // Clone the string if ((newstr = strdup(str)) == NULL) return (NULL); // Find the '.' if ((posn = strrchr(newstr, '.')) == NULL) return (NULL); // Ensure there is a suffix posn++; if (*posn == '\0') return (NULL); // Change the suffix and NUL-terminate the string *posn = suffix; posn++; *posn = '\0'; return (newstr); } // Given an input filename, compile that file // down to assembly code. Return the new file's name static char *do_compile(char *filename) { char cmd[TEXTLEN]; // Change the input file's suffix to .s Outfilename = alter_suffix(filename, 's'); if (Outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .c on the end\n", filename); exit(1); } // Generate the pre-processor command snprintf(cmd, TEXTLEN, "%s %s %s", CPPCMD, INCDIR, filename); // Open up the pre-processor pipe if ((Infile = popen(cmd, "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", filename, strerror(errno)); exit(1); } Infilename = filename; // Create the output file if ((Outfile = fopen(Outfilename, "w")) == NULL) { fprintf(stderr, "Unable to create %s: %s\n", Outfilename, strerror(errno)); exit(1); } Line = 1; // Reset the scanner Putback = '\n'; clear_symtable(); // Clear the symbol table if (O_verbose) printf("compiling %s\n", filename); scan(&Token); // Get the first token from the input Peektoken.token= 0; // and set there is no lookahead token genpreamble(); // Output the preamble global_declarations(); // Parse the global declarations genpostamble(); // Output the postamble fclose(Outfile); // Close the output file freestaticsyms(); // Free any static symbols in the file return (Outfilename); } // Given an input filename, assemble that file // down to object code. Return the object filename char *do_assemble(char *filename) { char cmd[TEXTLEN]; int err; char *outfilename = alter_suffix(filename, 'o'); if (outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .s on the end\n", filename); exit(1); } // Build the assembly command and run it snprintf(cmd, TEXTLEN, "%s %s %s", ASCMD, outfilename, filename); if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Assembly of %s failed\n", filename); exit(1); } return (outfilename); } // Given a list of object files and an output filename, // link all of the object filenames together. void do_link(char *outfilename, char *objlist[]) { int cnt, size = TEXTLEN; char cmd[TEXTLEN], *cptr; int err; // Start with the linker command and the output file cptr = cmd; cnt = snprintf(cptr, size, "%s %s ", LDCMD, outfilename); cptr += cnt; size -= cnt; // Now append each object file while (*objlist != NULL) { cnt = snprintf(cptr, size, "%s ", *objlist); cptr += cnt; size -= cnt; objlist++; } if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Linking failed\n"); exit(1); } } // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "Usage: %s [-vcST] [-o outfile] file [file ...]\n", prog); fprintf(stderr, " -v give verbose output of the compilation stages\n"); fprintf(stderr, " -c generate object files but don't link them\n"); fprintf(stderr, " -S generate assembly files but don't link them\n"); fprintf(stderr, " -T dump the AST trees for each input file\n"); fprintf(stderr, " -o outfile, produce the outfile executable file\n"); exit(1); } // Main program: check arguments and print a usage // if we don't have an argument. Open up the input // file and call scanfile() to scan the tokens in it. enum { MAXOBJ = 100 }; int main(int argc, char *argv[]) { char *outfilename = AOUT; char *asmfile, *objfile; char *objlist[MAXOBJ]; int i, objcnt = 0; // Initialise our variables O_dumpAST = 0; O_keepasm = 0; O_assemble = 0; O_verbose = 0; O_dolink = 1; // Scan for command-line options for (i = 1; i < argc; i++) { // No leading '-', stop scanning for options if (*argv[i] != '-') break; // For each option in this argument for (int j = 1; (*argv[i] == '-') && argv[i][j]; j++) { switch (argv[i][j]) { case 'o': outfilename = argv[++i]; // Save & skip to next argument break; case 'T': O_dumpAST = 1; break; case 'c': O_assemble = 1; O_keepasm = 0; O_dolink = 0; break; case 'S': O_keepasm = 1; O_assemble = 0; O_dolink = 0; break; case 'v': O_verbose = 1; break; default: usage(argv[0]); } } } // Ensure we have at lease one input file argument if (i >= argc) usage(argv[0]); // Work on each input file in turn while (i < argc) { asmfile = do_compile(argv[i]); // Compile the source file if (O_dolink || O_assemble) { objfile = do_assemble(asmfile); // Assemble it to object forma if (objcnt == (MAXOBJ - 2)) { fprintf(stderr, "Too many object files for the compiler to handle\n"); exit(1); } objlist[objcnt++] = objfile; // Add the object file's name objlist[objcnt] = NULL; // to the list of object files } if (!O_keepasm) // Remove the assembly file if unlink(asmfile); // we don't need to keep it i++; } // Now link all the object files together if (O_dolink) { do_link(outfilename, objlist); // If we don't need to keep the object // files, then remove them if (!O_assemble) { for (i = 0; objlist[i] != NULL; i++) unlink(objlist[i]); } } return (0); } ================================================ FILE: 48_Static/misc.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" #include #include // Miscellaneous functions // Copyright (c) 2019 Warren Toomey, GPL3 // Ensure that the current token is t, // and fetch the next token. Otherwise // throw an error void match(int t, char *what) { if (Token.token == t) { scan(&Token); } else { fatals("Expected", what); } } // Match a semicolon and fetch the next token void semi(void) { match(T_SEMI, ";"); } // Match a left brace and fetch the next token void lbrace(void) { match(T_LBRACE, "{"); } // Match a right brace and fetch the next token void rbrace(void) { match(T_RBRACE, "}"); } // Match a left parenthesis and fetch the next token void lparen(void) { match(T_LPAREN, "("); } // Match a right parenthesis and fetch the next token void rparen(void) { match(T_RPAREN, ")"); } // Match an identifier and fetch the next token void ident(void) { match(T_IDENT, "identifier"); } // Match a comma and fetch the next token void comma(void) { match(T_COMMA, "comma"); } // Print out fatal messages void fatal(char *s) { fprintf(stderr, "%s on line %d of %s\n", s, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatals(char *s1, char *s2) { fprintf(stderr, "%s:%s on line %d of %s\n", s1, s2, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatald(char *s, int d) { fprintf(stderr, "%s:%d on line %d of %s\n", s, d, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatalc(char *s, int c) { fprintf(stderr, "%s:%c on line %d of %s\n", s, c, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } ================================================ FILE: 48_Static/opt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST Tree Optimisation Code // Copyright (c) 2019 Warren Toomey, GPL3 // Fold an AST tree with a binary operator // and two A_INTLIT children. Return either // the original tree or a new leaf node. static struct ASTnode *fold2(struct ASTnode *n) { int val, leftval, rightval; // Get the values from each child leftval = n->left->a_intvalue; rightval = n->right->a_intvalue; // Perform some of the binary operations. // For any AST op we can't do, return // the original tree. switch (n->op) { case A_ADD: val = leftval + rightval; break; case A_SUBTRACT: val = leftval - rightval; break; case A_MULTIPLY: val = leftval * rightval; break; case A_DIVIDE: // Don't try to divide by zero. if (rightval == 0) return (n); val = leftval / rightval; break; default: return (n); } // Return a leaf node with the new value return (mkastleaf(A_INTLIT, n->type, NULL, val)); } // Fold an AST tree with a unary operator // and one INTLIT children. Return either // the original tree or a new leaf node. static struct ASTnode *fold1(struct ASTnode *n) { int val; // Get the child value. Do the // operation if recognised. // Return the new leaf node. val = n->left->a_intvalue; switch (n->op) { case A_WIDEN: break; case A_INVERT: val = ~val; break; case A_LOGNOT: val = !val; break; default: return (n); } // Return a leaf node with the new value return (mkastleaf(A_INTLIT, n->type, NULL, val)); } // Attempt to do constant folding on // the AST tree with the root node n static struct ASTnode *fold(struct ASTnode *n) { if (n == NULL) return (NULL); // Fold on the left child, then // do the same on the right child n->left = fold(n->left); n->right = fold(n->right); // If both children are A_INTLITs, do a fold2() if (n->left && n->left->op == A_INTLIT) { if (n->right && n->right->op == A_INTLIT) n = fold2(n); else // If only the left is A_INTLIT, do a fold1() n = fold1(n); } // Return the possibly modified tree return (n); } // Optimise an AST tree by // constant folding in all sub-trees struct ASTnode *optimise(struct ASTnode *n) { n = fold(n); return (n); } ================================================ FILE: 48_Static/scan.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { char *p; p = strchr(s, c); return (p ? p - s : -1); } // Get the next character from the input file. static int next(void) { int c, l; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return (c); } c = fgetc(Infile); // Read from input file while (c == '#') { // We've hit a pre-processor statement scan(&Token); // Get the line number into l if (Token.token != T_INTLIT) fatals("Expecting pre-processor line number, got:", Text); l = Token.intvalue; scan(&Token); // Get the filename in Text if (Token.token != T_STRLIT) fatals("Expecting pre-processor file name, got:", Text); if (Text[0] != '<') { // If this is a real filename if (strcmp(Text, Infilename)) // and not the one we have now Infilename = strdup(Text); // save it. Then update the line num Line = l; } while ((c = fgetc(Infile)) != '\n'); // Skip to the end of the line c = fgetc(Infile); // and get the next character } if ('\n' == c) Line++; // Increment line count return (c); } // Put back an unwanted character static void putback(int c) { Putback = c; } // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } // Read in a hexadecimal constant from the input static int hexchar(void) { int c, h, n = 0, f = 0; // Loop getting characters while (isxdigit(c = next())) { // Convert from char to int value h = chrpos("0123456789abcdef", tolower(c)); // Add to running hex value n = n * 16 + h; f = 1; } // We hit a non-hex character, put it back putback(c); // Flag tells us we never saw any hex characters if (!f) fatal("missing digits after '\\x'"); if (n > 255) fatal("value out of range after '\\x'"); return n; } // Return the next character from a character // or string literal static int scanch(void) { int i, c, c2; // Get the next input character and interpret // metacharacters that start with a backslash c = next(); if (c == '\\') { switch (c = next()) { case 'a': return ('\a'); case 'b': return ('\b'); case 'f': return ('\f'); case 'n': return ('\n'); case 'r': return ('\r'); case 't': return ('\t'); case 'v': return ('\v'); case '\\': return ('\\'); case '"': return ('"'); case '\'': return ('\''); // Deal with octal constants by reading in // characters until we hit a non-octal digit. // Build up the octal value in c2 and count // # digits in i. Permit only 3 octal digits. case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': for (i = c2 = 0; isdigit(c) && c < '8'; c = next()) { if (++i > 3) break; c2 = c2 * 8 + (c - '0'); } putback(c); // Put back the first non-octal char return (c2); case 'x': return hexchar(); default: fatalc("unknown escape sequence", c); } } return (c); // Just an ordinary old character! } // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0, radix = 10; // Assume the radix is 10, but if it starts with 0 if (c == '0') { // and the next character is 'x', it's radix 16 if ((c = next()) == 'x') { radix = 16; c = next(); } else // Otherwise, it's radix 8 radix = 8; } // Convert each character into an int value while ((k = chrpos("0123456789abcdef", tolower(c))) >= 0) { if (k >= radix) fatalc("invalid digit in integer literal", c); val = val * radix + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return (val); } // Scan in a string literal from the input file, // and store it in buf[]. Return the length of // the string. static int scanstr(char *buf) { int i, c; // Loop while we have enough buffer space for (i = 0; i < TEXTLEN - 1; i++) { // Get the next char and append to buf // Return when we hit the ending double quote if ((c = scanch()) == '"') { buf[i] = 0; return (i); } buf[i] = c; } // Ran out of buf[] space fatal("String literal too long"); return (0); } // Scan an identifier from the input file and // store it in buf[]. Return the identifier's length static int scanident(int c, char *buf, int lim) { int i = 0; // Allow digits, alpha and underscores while (isalpha(c) || isdigit(c) || '_' == c) { // Error if we hit the identifier length limit, // else append to buf[] and get next character if (lim - 1 == i) { fatal("Identifier too long"); } else if (i < lim - 1) { buf[i++] = c; } c = next(); } // We hit a non-valid character, put it back. // NUL-terminate the buf[] and return the length putback(c); buf[i] = '\0'; return (i); } // Given a word from the input, return the matching // keyword token number or 0 if it's not a keyword. // Switch on the first letter so that we don't have // to waste time strcmp()ing against all the keywords. static int keyword(char *s) { switch (*s) { case 'b': if (!strcmp(s, "break")) return (T_BREAK); break; case 'c': if (!strcmp(s, "case")) return (T_CASE); if (!strcmp(s, "char")) return (T_CHAR); if (!strcmp(s, "continue")) return (T_CONTINUE); break; case 'd': if (!strcmp(s, "default")) return (T_DEFAULT); break; case 'e': if (!strcmp(s, "else")) return (T_ELSE); if (!strcmp(s, "enum")) return (T_ENUM); if (!strcmp(s, "extern")) return (T_EXTERN); break; case 'f': if (!strcmp(s, "for")) return (T_FOR); break; case 'i': if (!strcmp(s, "if")) return (T_IF); if (!strcmp(s, "int")) return (T_INT); break; case 'l': if (!strcmp(s, "long")) return (T_LONG); break; case 'r': if (!strcmp(s, "return")) return (T_RETURN); break; case 's': if (!strcmp(s, "sizeof")) return (T_SIZEOF); if (!strcmp(s, "static")) return (T_STATIC); if (!strcmp(s, "struct")) return (T_STRUCT); if (!strcmp(s, "switch")) return (T_SWITCH); break; case 't': if (!strcmp(s, "typedef")) return (T_TYPEDEF); break; case 'u': if (!strcmp(s, "union")) return (T_UNION); break; case 'v': if (!strcmp(s, "void")) return (T_VOID); break; case 'w': if (!strcmp(s, "while")) return (T_WHILE); break; } return (0); } // List of token strings, for debugging purposes char *Tstring[] = { "EOF", "=", "+=", "-=", "*=", "/=", "||", "&&", "|", "^", "&", "==", "!=", ",", ">", "<=", ">=", "<<", ">>", "+", "-", "*", "/", "++", "--", "~", "!", "void", "char", "int", "long", "if", "else", "while", "for", "return", "struct", "union", "enum", "typedef", "extern", "break", "continue", "switch", "case", "default", "sizeof", "static", "intlit", "strlit", ";", "identifier", "{", "}", "(", ")", "[", "]", ",", ".", "->", ":" }; // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c, tokentype; // If we have a lookahead token, return this token if (Peektoken.token != 0) { t->token = Peektoken.token; t->tokstr = Peektoken.tokstr; t->intvalue = Peektoken.intvalue; Peektoken.token = 0; return (1); } // Skip whitespace c = skip(); // Determine the token based on // the input character switch (c) { case EOF: t->token = T_EOF; return (0); case '+': if ((c = next()) == '+') { t->token = T_INC; } else if (c == '=') { t->token = T_ASPLUS; } else { putback(c); t->token = T_PLUS; } break; case '-': if ((c = next()) == '-') { t->token = T_DEC; } else if (c == '>') { t->token = T_ARROW; } else if (c == '=') { t->token = T_ASMINUS; } else if (isdigit(c)) { // Negative int literal t->intvalue = -scanint(c); t->token = T_INTLIT; } else { putback(c); t->token = T_MINUS; } break; case '*': if ((c = next()) == '=') { t->token = T_ASSTAR; } else { putback(c); t->token = T_STAR; } break; case '/': if ((c = next()) == '=') { t->token = T_ASSLASH; } else { putback(c); t->token = T_SLASH; } break; case ';': t->token = T_SEMI; break; case '{': t->token = T_LBRACE; break; case '}': t->token = T_RBRACE; break; case '(': t->token = T_LPAREN; break; case ')': t->token = T_RPAREN; break; case '[': t->token = T_LBRACKET; break; case ']': t->token = T_RBRACKET; break; case '~': t->token = T_INVERT; break; case '^': t->token = T_XOR; break; case ',': t->token = T_COMMA; break; case '.': t->token = T_DOT; break; case ':': t->token = T_COLON; break; case '=': if ((c = next()) == '=') { t->token = T_EQ; } else { putback(c); t->token = T_ASSIGN; } break; case '!': if ((c = next()) == '=') { t->token = T_NE; } else { putback(c); t->token = T_LOGNOT; } break; case '<': if ((c = next()) == '=') { t->token = T_LE; } else if (c == '<') { t->token = T_LSHIFT; } else { putback(c); t->token = T_LT; } break; case '>': if ((c = next()) == '=') { t->token = T_GE; } else if (c == '>') { t->token = T_RSHIFT; } else { putback(c); t->token = T_GT; } break; case '&': if ((c = next()) == '&') { t->token = T_LOGAND; } else { putback(c); t->token = T_AMPER; } break; case '|': if ((c = next()) == '|') { t->token = T_LOGOR; } else { putback(c); t->token = T_OR; } break; case '\'': // If it's a quote, scan in the // literal character value and // the trailing quote t->intvalue = scanch(); t->token = T_INTLIT; if (next() != '\'') fatal("Expected '\\'' at end of char literal"); break; case '"': // Scan in a literal string scanstr(Text); t->token = T_STRLIT; break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } else if (isalpha(c) || '_' == c) { // Read in a keyword or identifier scanident(c, Text, TEXTLEN); // If it's a recognised keyword, return that token if ((tokentype = keyword(Text)) != 0) { t->token = tokentype; break; } // Not a recognised keyword, so it must be an identifier t->token = T_IDENT; break; } // The character isn't part of any recognised token, error fatalc("Unrecognised character", c); } // We found a token t->tokstr = Tstring[t->token]; return (1); } ================================================ FILE: 48_Static/stmt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of statements // Copyright (c) 2019 Warren Toomey, GPL3 // Prototypes static struct ASTnode *single_statement(void); // compound_statement: // empty, i.e. no statement // | statement // | statement statements // ; // // statement: declaration // | expression_statement // | function_call // | if_statement // | while_statement // | for_statement // | return_statement // ; // if_statement: if_head // | if_head 'else' statement // ; // // if_head: 'if' '(' true_false_expression ')' statement ; // // Parse an IF statement including any // optional ELSE clause and return its AST static struct ASTnode *if_statement(void) { struct ASTnode *condAST, *trueAST, *falseAST = NULL; // Ensure we have 'if' '(' match(T_IF, "if"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); rparen(); // Get the AST for the statement trueAST = single_statement(); // If we have an 'else', skip it // and get the AST for the statement if (Token.token == T_ELSE) { scan(&Token); falseAST = single_statement(); } // Build and return the AST for this statement return (mkastnode(A_IF, P_NONE, condAST, trueAST, falseAST, NULL, 0)); } // while_statement: 'while' '(' true_false_expression ')' statement ; // // Parse a WHILE statement and return its AST static struct ASTnode *while_statement(void) { struct ASTnode *condAST, *bodyAST; // Ensure we have 'while' '(' match(T_WHILE, "while"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); rparen(); // Get the AST for the statement. // Update the loop depth in the process Looplevel++; bodyAST = single_statement(); Looplevel--; // Build and return the AST for this statement return (mkastnode(A_WHILE, P_NONE, condAST, NULL, bodyAST, NULL, 0)); } // for_statement: 'for' '(' expression_list ';' // true_false_expression ';' // expression_list ')' statement ; // // Parse a FOR statement and return its AST static struct ASTnode *for_statement(void) { struct ASTnode *condAST, *bodyAST; struct ASTnode *preopAST, *postopAST; struct ASTnode *tree; // Ensure we have 'for' '(' match(T_FOR, "for"); lparen(); // Get the pre_op expression and the ';' preopAST = expression_list(T_SEMI); semi(); // Get the condition and the ';'. // Force a non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); semi(); // Get the post_op expression and the ')' postopAST = expression_list(T_RPAREN); rparen(); // Get the statement which is the body // Update the loop depth in the process Looplevel++; bodyAST = single_statement(); Looplevel--; // Glue the statement and the postop tree tree = mkastnode(A_GLUE, P_NONE, bodyAST, NULL, postopAST, NULL, 0); // Make a WHILE loop with the condition and this new body tree = mkastnode(A_WHILE, P_NONE, condAST, NULL, tree, NULL, 0); // And glue the preop tree to the A_WHILE tree return (mkastnode(A_GLUE, P_NONE, preopAST, NULL, tree, NULL, 0)); } // return_statement: 'return' '(' expression ')' ; // // Parse a return statement and return its AST static struct ASTnode *return_statement(void) { struct ASTnode *tree; // Can't return a value if function returns P_VOID if (Functionid->type == P_VOID) fatal("Can't return from a void function"); // Ensure we have 'return' '(' match(T_RETURN, "return"); lparen(); // Parse the following expression tree = binexpr(0); // Ensure this is compatible with the function's type tree = modify_type(tree, Functionid->type, 0); if (tree == NULL) fatal("Incompatible type to return"); // Add on the A_RETURN node tree = mkastunary(A_RETURN, P_NONE, tree, NULL, 0); // Get the ')' and ';' rparen(); semi(); return (tree); } // break_statement: 'break' ; // // Parse a break statement and return its AST static struct ASTnode *break_statement(void) { if (Looplevel == 0 && Switchlevel == 0) fatal("no loop or switch to break out from"); scan(&Token); semi(); return (mkastleaf(A_BREAK, 0, NULL, 0)); } // continue_statement: 'continue' ; // // Parse a continue statement and return its AST static struct ASTnode *continue_statement(void) { if (Looplevel == 0) fatal("no loop to continue to"); scan(&Token); semi(); return (mkastleaf(A_CONTINUE, 0, NULL, 0)); } // Parse a switch statement and return its AST static struct ASTnode *switch_statement(void) { struct ASTnode *left, *n, *c, *casetree= NULL, *casetail; int inloop=1, casecount=0; int seendefault=0; int ASTop, casevalue; // Skip the 'switch' and '(' scan(&Token); lparen(); // Get the switch expression, the ')' and the '{' left= binexpr(0); rparen(); lbrace(); // Ensure that this is of int type if (!inttype(left->type)) fatal("Switch expression is not of integer type"); // Build an A_SWITCH subtree with the expression as // the child n= mkastunary(A_SWITCH, 0, left, NULL, 0); // Now parse the cases Switchlevel++; while (inloop) { switch(Token.token) { // Leave the loop when we hit a '}' case T_RBRACE: if (casecount==0) fatal("No cases in switch"); inloop=0; break; case T_CASE: case T_DEFAULT: // Ensure this isn't after a previous 'default' if (seendefault) fatal("case or default after existing default"); // Set the AST operation. Scan the case value if required if (Token.token==T_DEFAULT) { ASTop= A_DEFAULT; seendefault= 1; scan(&Token); } else { ASTop= A_CASE; scan(&Token); left= binexpr(0); // Ensure the case value is an integer literal if (left->op != A_INTLIT) fatal("Expecting integer literal for case value"); casevalue= left->a_intvalue; // Walk the list of existing case values to ensure // that there isn't a duplicate case value for (c= casetree; c != NULL; c= c -> right) if (casevalue == c->a_intvalue) fatal("Duplicate case value"); } // Scan the ':' and get the compound expression match(T_COLON, ":"); left= compound_statement(1); casecount++; // Build a sub-tree with the compound statement as the left child // and link it in to the growing A_CASE tree if (casetree==NULL) { casetree= casetail= mkastunary(ASTop, 0, left, NULL, casevalue); } else { casetail->right= mkastunary(ASTop, 0, left, NULL, casevalue); casetail= casetail->right; } break; default: fatals("Unexpected token in switch", Token.tokstr); } } Switchlevel--; // We have a sub-tree with the cases and any default. Put the // case count into the A_SWITCH node and attach the case tree. n->a_intvalue= casecount; n->right= casetree; rbrace(); return(n); } // Parse a single statement and return its AST. static struct ASTnode *single_statement(void) { struct ASTnode *stmt; struct symtable *ctype; switch (Token.token) { case T_LBRACE: // We have a '{', so this is a compound statement lbrace(); stmt = compound_statement(0); rbrace(); return(stmt); case T_IDENT: // We have to see if the identifier matches a typedef. // If not, treat it as an expression. // Otherwise, fall down to the parse_type() call. if (findtypedef(Text) == NULL) { stmt= binexpr(0); semi(); return(stmt); } case T_CHAR: case T_INT: case T_LONG: case T_STRUCT: case T_UNION: case T_ENUM: case T_TYPEDEF: // The beginning of a variable declaration list. declaration_list(&ctype, C_LOCAL, T_SEMI, T_EOF, &stmt); semi(); return (stmt); // Any assignments from the declarations case T_IF: return (if_statement()); case T_WHILE: return (while_statement()); case T_FOR: return (for_statement()); case T_RETURN: return (return_statement()); case T_BREAK: return (break_statement()); case T_CONTINUE: return (continue_statement()); case T_SWITCH: return (switch_statement()); default: // For now, see if this is an expression. // This catches assignment statements. stmt= binexpr(0); semi(); return(stmt); } return (NULL); // Keep -Wall happy } // Parse a compound statement // and return its AST. If inswitch is true, // we look for a '}', 'case' or 'default' token // to end the parsing. Otherwise, look for // just a '}' to end the parsing. struct ASTnode *compound_statement(int inswitch) { struct ASTnode *left = NULL; struct ASTnode *tree; while (1) { // Parse a single statement tree = single_statement(); // For each new tree, either save it in left // if left is empty, or glue the left and the // new tree together if (tree != NULL) { if (left == NULL) left = tree; else left = mkastnode(A_GLUE, P_NONE, left, NULL, tree, NULL, 0); } // Leave if we've hit the end token if (Token.token == T_RBRACE) return(left); if (inswitch && (Token.token == T_CASE || Token.token == T_DEFAULT)) return(left); } } ================================================ FILE: 48_Static/sym.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Symbol table functions // Copyright (c) 2019 Warren Toomey, GPL3 // Append a node to the singly-linked list pointed to by head or tail void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node) { // Check for valid pointers if (head == NULL || tail == NULL || node == NULL) fatal("Either head, tail or node is NULL in appendsym"); // Append to the list if (*tail) { (*tail)->next = node; *tail = node; } else *head = *tail = node; node->next = NULL; } // Create a symbol node to be added to a symbol table list. // Set up the node's: // + type: char, int etc. // + ctype: composite type pointer for struct/union // + structural type: var, function, array etc. // + size: number of elements, or endlabel: end label for a function // + posn: Position information for local symbols // Return a pointer to the new node struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn) { // Get a new node struct symtable *node = (struct symtable *) malloc(sizeof(struct symtable)); if (node == NULL) fatal("Unable to malloc a symbol table node in newsym"); // Fill in the values if (name == NULL) node->name = NULL; else node->name = strdup(name); node->type = type; node->ctype = ctype; node->stype = stype; node->class = class; node->nelems = nelems; // For pointers and integer types, set the size // of the symbol. structs and union declarations // manually set this up themselves. if (ptrtype(type) || inttype(type)) node->size = nelems * typesize(type, ctype); node->st_posn = posn; node->next = NULL; node->member = NULL; node->initlist = NULL; return (node); } // Add a symbol to the global symbol list struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn) { struct symtable *sym = newsym(name, type, ctype, stype, class, nelems, posn); // For structs and unions, copy the size from the type node if (type== P_STRUCT || type== P_UNION) sym->size = ctype->size; appendsym(&Globhead, &Globtail, sym); return (sym); } // Add a symbol to the local symbol list struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int nelems) { struct symtable *sym = newsym(name, type, ctype, stype, C_LOCAL, nelems, 0); // For structs and unions, copy the size from the type node if (type== P_STRUCT || type== P_UNION) sym->size = ctype->size; appendsym(&Loclhead, &Locltail, sym); return (sym); } // Add a symbol to the parameter list struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype) { struct symtable *sym = newsym(name, type, ctype, stype, C_PARAM, 1, 0); appendsym(&Parmhead, &Parmtail, sym); return (sym); } // Add a symbol to the temporary member list struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int nelems) { struct symtable *sym = newsym(name, type, ctype, stype, C_MEMBER, nelems, 0); // For structs and unions, copy the size from the type node if (type== P_STRUCT || type== P_UNION) sym->size = ctype->size; appendsym(&Membhead, &Membtail, sym); return (sym); } // Add a struct to the struct list struct symtable *addstruct(char *name) { struct symtable *sym = newsym(name, P_STRUCT, NULL, 0, C_STRUCT, 0, 0); appendsym(&Structhead, &Structtail, sym); return (sym); } // Add a struct to the union list struct symtable *addunion(char *name) { struct symtable *sym = newsym(name, P_UNION, NULL, 0, C_UNION, 0, 0); appendsym(&Unionhead, &Uniontail, sym); return (sym); } // Add an enum type or value to the enum list. // Class is C_ENUMTYPE or C_ENUMVAL. // Use posn to store the int value. struct symtable *addenum(char *name, int class, int value) { struct symtable *sym = newsym(name, P_INT, NULL, 0, class, 0, value); appendsym(&Enumhead, &Enumtail, sym); return (sym); } // Add a typedef to the typedef list struct symtable *addtypedef(char *name, int type, struct symtable *ctype) { struct symtable *sym = newsym(name, type, ctype, 0, C_TYPEDEF, 0, 0); appendsym(&Typehead, &Typetail, sym); return (sym); } // Search for a symbol in a specific list. // Return a pointer to the found node or NULL if not found. // If class is not zero, also match on the given class static struct symtable *findsyminlist(char *s, struct symtable *list, int class) { for (; list != NULL; list = list->next) if ((list->name != NULL) && !strcmp(s, list->name)) if (class==0 || class== list->class) return (list); return (NULL); } // Determine if the symbol s is in the global symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findglob(char *s) { return (findsyminlist(s, Globhead, 0)); } // Determine if the symbol s is in the local symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findlocl(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member, 0); if (node) return (node); } return (findsyminlist(s, Loclhead, 0)); } // Determine if the symbol s is in the symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findsymbol(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member, 0); if (node) return (node); } // Otherwise, try the local and global symbol lists node = findsyminlist(s, Loclhead, 0); if (node) return (node); return (findsyminlist(s, Globhead, 0)); } // Find a member in the member list // Return a pointer to the found node or NULL if not found. struct symtable *findmember(char *s) { return (findsyminlist(s, Membhead, 0)); } // Find a struct in the struct list // Return a pointer to the found node or NULL if not found. struct symtable *findstruct(char *s) { return (findsyminlist(s, Structhead, 0)); } // Find a struct in the union list // Return a pointer to the found node or NULL if not found. struct symtable *findunion(char *s) { return (findsyminlist(s, Unionhead, 0)); } // Find an enum type in the enum list // Return a pointer to the found node or NULL if not found. struct symtable *findenumtype(char *s) { return (findsyminlist(s, Enumhead, C_ENUMTYPE)); } // Find an enum value in the enum list // Return a pointer to the found node or NULL if not found. struct symtable *findenumval(char *s) { return (findsyminlist(s, Enumhead, C_ENUMVAL)); } // Find a type in the tyedef list // Return a pointer to the found node or NULL if not found. struct symtable *findtypedef(char *s) { return (findsyminlist(s, Typehead, 0)); } // Reset the contents of the symbol table void clear_symtable(void) { Globhead = Globtail = NULL; Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Membhead = Membtail = NULL; Structhead = Structtail = NULL; Unionhead = Uniontail = NULL; Enumhead = Enumtail = NULL; Typehead = Typetail = NULL; } // Clear all the entries in the local symbol table void freeloclsyms(void) { Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Functionid = NULL; } // Remove all static symbols from the global symbol table void freestaticsyms(void) { // g points at current node, prev at the previous one struct symtable *g, *prev= NULL; // Walk the global table looking for static entries for (g= Globhead; g != NULL; g= g->next) { if (g->class == C_STATIC) { // If there's a previous node, rearrange the prev pointer // to skip over the current node. If not, g is the head, // so do the same to Globhead if (prev != NULL) prev->next= g->next; else Globhead->next= g->next; // If g is the tail, point Globtail at the previous node // (if there is one), or Globhead if (g == Globtail) { if (prev != NULL) Globtail= prev; else Globtail= Globhead; } } } // Point prev at g before we move up to the next node prev= g; } ================================================ FILE: 48_Static/tests/err.input031.c ================================================ Expecting a primary expression, got token:+ on line 5 of input031.c ================================================ FILE: 48_Static/tests/err.input032.c ================================================ Unknown variable:cow on line 4 of input032.c ================================================ FILE: 48_Static/tests/err.input033.c ================================================ Incompatible type to return on line 4 of input033.c ================================================ FILE: 48_Static/tests/err.input034.c ================================================ For now, declaration of non-global arrays is not implemented on line 4 of input034.c ================================================ FILE: 48_Static/tests/err.input035.c ================================================ Duplicate local variable declaration:a on line 4 of input035.c ================================================ FILE: 48_Static/tests/err.input036.c ================================================ Type doesn't match prototype for parameter:2 on line 4 of input036.c ================================================ FILE: 48_Static/tests/err.input037.c ================================================ Expected:comma on line 3 of input037.c ================================================ FILE: 48_Static/tests/err.input038.c ================================================ Type doesn't match prototype for parameter:2 on line 4 of input038.c ================================================ FILE: 48_Static/tests/err.input039.c ================================================ No statements in function with non-void type on line 4 of input039.c ================================================ FILE: 48_Static/tests/err.input040.c ================================================ No return for function with non-void type on line 4 of input040.c ================================================ FILE: 48_Static/tests/err.input041.c ================================================ Can't return from a void function on line 3 of input041.c ================================================ FILE: 48_Static/tests/err.input042.c ================================================ Undeclared function:fred on line 3 of input042.c ================================================ FILE: 48_Static/tests/err.input043.c ================================================ Undeclared array:b on line 3 of input043.c ================================================ FILE: 48_Static/tests/err.input044.c ================================================ Unknown variable:z on line 3 of input044.c ================================================ FILE: 48_Static/tests/err.input045.c ================================================ & operator must be followed by an identifier on line 3 of input045.c ================================================ FILE: 48_Static/tests/err.input046.c ================================================ * operator must be followed by an identifier or * on line 3 of input046.c ================================================ FILE: 48_Static/tests/err.input047.c ================================================ ++ operator must be followed by an identifier on line 3 of input047.c ================================================ FILE: 48_Static/tests/err.input048.c ================================================ -- operator must be followed by an identifier on line 3 of input048.c ================================================ FILE: 48_Static/tests/err.input049.c ================================================ Incompatible expression in assignment on line 6 of input049.c ================================================ FILE: 48_Static/tests/err.input050.c ================================================ Incompatible types in binary expression on line 6 of input050.c ================================================ FILE: 48_Static/tests/err.input051.c ================================================ Expected '\'' at end of char literal on line 4 of input051.c ================================================ FILE: 48_Static/tests/err.input052.c ================================================ Unrecognised character:$ on line 5 of input052.c ================================================ FILE: 48_Static/tests/err.input056.c ================================================ unknown struct/union type:var1 on line 2 of input056.c ================================================ FILE: 48_Static/tests/err.input057.c ================================================ previously defined struct/union:fred on line 2 of input057.c ================================================ FILE: 48_Static/tests/err.input059.c ================================================ Undeclared variable:y on line 3 of input059.c ================================================ FILE: 48_Static/tests/err.input060.c ================================================ Undeclared variable:x on line 3 of input060.c ================================================ FILE: 48_Static/tests/err.input061.c ================================================ Undeclared variable:x on line 3 of input061.c ================================================ FILE: 48_Static/tests/err.input064.c ================================================ undeclared enum type::fred on line 1 of input064.c ================================================ FILE: 48_Static/tests/err.input065.c ================================================ enum type redeclared::fred on line 2 of input065.c ================================================ FILE: 48_Static/tests/err.input066.c ================================================ enum value redeclared::z on line 2 of input066.c ================================================ FILE: 48_Static/tests/err.input068.c ================================================ redefinition of typedef:FOO on line 2 of input068.c ================================================ FILE: 48_Static/tests/err.input069.c ================================================ unknown type:FLOO on line 2 of input069.c ================================================ FILE: 48_Static/tests/err.input072.c ================================================ no loop or switch to break out from on line 1 of input072.c ================================================ FILE: 48_Static/tests/err.input073.c ================================================ no loop to continue to on line 1 of input073.c ================================================ FILE: 48_Static/tests/err.input075.c ================================================ Unexpected token in switch:if on line 4 of input075.c ================================================ FILE: 48_Static/tests/err.input076.c ================================================ No cases in switch on line 3 of input076.c ================================================ FILE: 48_Static/tests/err.input077.c ================================================ case or default after existing default on line 6 of input077.c ================================================ FILE: 48_Static/tests/err.input078.c ================================================ case or default after existing default on line 6 of input078.c ================================================ FILE: 48_Static/tests/err.input079.c ================================================ Duplicate case value on line 6 of input079.c ================================================ FILE: 48_Static/tests/err.input085.c ================================================ Bad type in parameter list on line 1 of input085.c ================================================ FILE: 48_Static/tests/err.input086.c ================================================ Function definition not at global level on line 3 of input086.c ================================================ FILE: 48_Static/tests/err.input087.c ================================================ Bad type in member list on line 4 of input087.c ================================================ FILE: 48_Static/tests/err.input092.c ================================================ Type mismatch: literal vs. variable on line 1 of input092.c ================================================ FILE: 48_Static/tests/err.input093.c ================================================ Unknown variable:fred on line 1 of input093.c ================================================ FILE: 48_Static/tests/err.input094.c ================================================ Type mismatch: literal vs. variable on line 1 of input094.c ================================================ FILE: 48_Static/tests/err.input095.c ================================================ Variable can not be initialised:x on line 1 of input095.c ================================================ FILE: 48_Static/tests/err.input096.c ================================================ Array size is illegal:0 on line 1 of input096.c ================================================ FILE: 48_Static/tests/err.input097.c ================================================ For now, declaration of non-global arrays is not implemented on line 2 of input097.c ================================================ FILE: 48_Static/tests/err.input098.c ================================================ Too many values in initialisation list on line 1 of input098.c ================================================ FILE: 48_Static/tests/err.input102.c ================================================ Cannot cast to a struct, union or void type on line 3 of input102.c ================================================ FILE: 48_Static/tests/err.input103.c ================================================ Cannot cast to a struct, union or void type on line 3 of input103.c ================================================ FILE: 48_Static/tests/err.input104.c ================================================ Cannot cast to a struct, union or void type on line 2 of input104.c ================================================ FILE: 48_Static/tests/err.input105.c ================================================ Incompatible expression in assignment on line 4 of input105.c ================================================ FILE: 48_Static/tests/err.input118.c ================================================ Compiler doesn't support static or extern local declarations on line 2 of input118.c ================================================ FILE: 48_Static/tests/input001.c ================================================ int printf(char *fmt); void main() { printf("%d\n", 12 * 3); printf("%d\n", 18 - 2 * 4); printf("%d\n", 1 + 2 + 9 - 5/2 + 3*5); } ================================================ FILE: 48_Static/tests/input002.c ================================================ int printf(char *fmt); void main() { int fred; int jim; fred= 5; jim= 12; printf("%d\n", fred + jim); } ================================================ FILE: 48_Static/tests/input003.c ================================================ int printf(char *fmt); void main() { int x; x= 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); } ================================================ FILE: 48_Static/tests/input004.c ================================================ int printf(char *fmt); void main() { int x; x= 7 < 9; printf("%d\n", x); x= 7 <= 9; printf("%d\n", x); x= 7 != 9; printf("%d\n", x); x= 7 == 7; printf("%d\n", x); x= 7 >= 7; printf("%d\n", x); x= 7 <= 7; printf("%d\n", x); x= 9 > 7; printf("%d\n", x); x= 9 >= 7; printf("%d\n", x); x= 9 != 7; printf("%d\n", x); } ================================================ FILE: 48_Static/tests/input005.c ================================================ int printf(char *fmt); void main() { int i; int j; i=6; j=12; if (i < j) { printf("%d\n", i); } else { printf("%d\n", j); } } ================================================ FILE: 48_Static/tests/input006.c ================================================ int printf(char *fmt); void main() { int i; i=1; while (i <= 10) { printf("%d\n", i); i= i + 1; } } ================================================ FILE: 48_Static/tests/input007.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 48_Static/tests/input008.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 48_Static/tests/input009.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } void fred() { int a; int b; a= 12; b= 3 * a; if (a >= b) { printf("%d\n", 2 * b - a); } } ================================================ FILE: 48_Static/tests/input010.c ================================================ int printf(char *fmt); void main() { int i; char j; j= 20; printf("%d\n", j); i= 10; printf("%d\n", i); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 2; j= j + 1) { printf("%d\n", j); } } ================================================ FILE: 48_Static/tests/input011.c ================================================ int printf(char *fmt); int main() { int i; char j; long k; i= 10; printf("%d\n", i); j= 20; printf("%d\n", j); k= 30; printf("%d\n", k); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 4; j= j + 1) { printf("%d\n", j); } for (k= 1; k <= 5; k= k + 1) { printf("%d\n", k); } return(i); printf("%d\n", 12345); return(3); } ================================================ FILE: 48_Static/tests/input012.c ================================================ int printf(char *fmt); int fred() { return(5); } void main() { int x; x= fred(2); printf("%d\n", x); } ================================================ FILE: 48_Static/tests/input013.c ================================================ int printf(char *fmt); int fred() { return(56); } void main() { int dummy; int result; dummy= printf("%d\n", 23); result= fred(10); dummy= printf("%d\n", result); } ================================================ FILE: 48_Static/tests/input014.c ================================================ int printf(char *fmt); int fred() { return(20); } int main() { int result; printf("%d\n", 10); result= fred(15); printf("%d\n", result); printf("%d\n", fred(15)+10); return(0); } ================================================ FILE: 48_Static/tests/input015.c ================================================ int printf(char *fmt); int main() { char a; char *b; char c; int d; int *e; int f; a= 18; printf("%d\n", a); b= &a; c= *b; printf("%d\n", c); d= 12; printf("%d\n", d); e= &d; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 48_Static/tests/input016.c ================================================ int printf(char *fmt); int c; int d; int *e; int f; int main() { c= 12; d=18; printf("%d\n", c); e= &c + 1; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 48_Static/tests/input017.c ================================================ int printf(char *fmt); int main() { char a; char *b; int d; int *e; b= &a; *b= 19; printf("%d\n", a); e= &d; *e= 12; printf("%d\n", d); return(0); } ================================================ FILE: 48_Static/tests/input018.c ================================================ int printf(char *fmt); int main() { int a; int b; a= b= 34; printf("%d\n", a); printf("%d\n", b); return(0); } ================================================ FILE: 48_Static/tests/input018a.c ================================================ int printf(char *fmt); int a; int *b; char c; char *d; int main() { b= &a; *b= 15; printf("%d\n", a); d= &c; *d= 16; printf("%d\n", c); return(0); } ================================================ FILE: 48_Static/tests/input019.c ================================================ int printf(char *fmt); int a; int b; int c; int d; int e; int main() { a= 2; b= 4; c= 3; d= 2; e= (a+b) * (c+d); printf("%d\n", e); return(0); } ================================================ FILE: 48_Static/tests/input020.c ================================================ int printf(char *fmt); int a; int b[25]; int main() { b[3]= 12; a= b[3]; printf("%d\n", a); return(0); } ================================================ FILE: 48_Static/tests/input021.c ================================================ int printf(char *fmt); char c; char *str; int main() { c= '\n'; printf("%d\n", c); for (str= "Hello world\n"; *str != 0; str= str + 1) { printf("%c", *str); } return(0); } ================================================ FILE: 48_Static/tests/input022.c ================================================ int printf(char *fmt); char a; char b; char c; int d; int e; int f; long g; long h; long i; int main() { b= 5; c= 7; a= b + c++; printf("%d\n", a); e= 5; f= 7; d= e + f++; printf("%d\n", d); h= 5; i= 7; g= h + i++; printf("%d\n", g); a= b-- + c; printf("%d\n", a); d= e-- + f; printf("%d\n", d); g= h-- + i; printf("%d\n", g); a= ++b + c; printf("%d\n", a); d= ++e + f; printf("%d\n", d); g= ++h + i; printf("%d\n", g); a= b * --c; printf("%d\n", a); d= e * --f; printf("%d\n", d); g= h * --i; printf("%d\n", g); return(0); } ================================================ FILE: 48_Static/tests/input023.c ================================================ int printf(char *fmt); char *str; int x; int main() { x= -23; printf("%d\n", x); printf("%d\n", -10 * -10); x= 1; x= ~x; printf("%d\n", x); x= 2 > 5; printf("%d\n", x); x= !x; printf("%d\n", x); x= !x; printf("%d\n", x); x= 13; if (x) { printf("%d\n", 13); } x= 0; if (!x) { printf("%d\n", 14); } for (str= "Hello world\n"; *str; str++) { printf("%c", *str); } return(0); } ================================================ FILE: 48_Static/tests/input024.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { a= 42; b= 19; printf("%d\n", a & b); printf("%d\n", a | b); printf("%d\n", a ^ b); printf("%d\n", 1 << 3); printf("%d\n", 63 >> 3); return(0); } ================================================ FILE: 48_Static/tests/input025.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { char z; int y; int x; x= 10; y= 20; z= 30; printf("%d\n", x); printf("%d\n", y); printf("%d\n", z); a= 5; b= 15; c= 25; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); return(0); } ================================================ FILE: 48_Static/tests/input026.c ================================================ int printf(char *fmt); int main(int a, char b, long c, int d, int e, int f, int g, int h) { int i; int j; int k; a= 13; printf("%d\n", a); b= 23; printf("%d\n", b); c= 34; printf("%d\n", c); d= 44; printf("%d\n", d); e= 54; printf("%d\n", e); f= 64; printf("%d\n", f); g= 74; printf("%d\n", g); h= 84; printf("%d\n", h); i= 94; printf("%d\n", i); j= 95; printf("%d\n", j); k= 96; printf("%d\n", k); return(0); } ================================================ FILE: 48_Static/tests/input027.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int param5(int a, int b, int c, int d, int e) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param2(int a, int b) { int c; int d; int e; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param0() { int a; int b; int c; int d; int e; a= 1; b= 2; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int main() { param8(1,2,3,4,5,6,7,8); param5(1,2,3,4,5); param2(1,2); param0(); return(0); } ================================================ FILE: 48_Static/tests/input028.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 48_Static/tests/input029.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h); int fred(int a, int b, int c); int main(); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 48_Static/tests/input030.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input030.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 48_Static/tests/input031.c ================================================ int printf(char *fmt); int main() { int x; x= 2 + + 3 - * / ; } ================================================ FILE: 48_Static/tests/input032.c ================================================ int printf(char *fmt); int main() { pizza cow llama sausage; } ================================================ FILE: 48_Static/tests/input033.c ================================================ int printf(char *fmt); int main() { char *z; return(z); } ================================================ FILE: 48_Static/tests/input034.c ================================================ int printf(char *fmt); int main() { int a[12]; return(0); } ================================================ FILE: 48_Static/tests/input035.c ================================================ int printf(char *fmt); int fred(int a, int b) { int a; return(a); } ================================================ FILE: 48_Static/tests/input036.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c); ================================================ FILE: 48_Static/tests/input037.c ================================================ int printf(char *fmt); int fred(int a, char b +, int z); ================================================ FILE: 48_Static/tests/input038.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c, int g); ================================================ FILE: 48_Static/tests/input039.c ================================================ int printf(char *fmt); int main() { int a; } ================================================ FILE: 48_Static/tests/input040.c ================================================ int printf(char *fmt); int main() { int a; a= 5; } ================================================ FILE: 48_Static/tests/input041.c ================================================ int printf(char *fmt); void fred() { return(5); } ================================================ FILE: 48_Static/tests/input042.c ================================================ int printf(char *fmt); int main() { fred(5); } ================================================ FILE: 48_Static/tests/input043.c ================================================ int printf(char *fmt); int main() { int a; a= b[4]; } ================================================ FILE: 48_Static/tests/input044.c ================================================ int printf(char *fmt); int main() { int a; a= z; } ================================================ FILE: 48_Static/tests/input045.c ================================================ int printf(char *fmt); int main() { int a; a= &5; } ================================================ FILE: 48_Static/tests/input046.c ================================================ int printf(char *fmt); int main() { int a; a= *5; } ================================================ FILE: 48_Static/tests/input047.c ================================================ int printf(char *fmt); int main() { int a; a= ++5; } ================================================ FILE: 48_Static/tests/input048.c ================================================ int printf(char *fmt); int main() { int a; a= --5; } ================================================ FILE: 48_Static/tests/input049.c ================================================ int printf(char *fmt); int main() { int x; char y; y= x; } ================================================ FILE: 48_Static/tests/input050.c ================================================ int printf(char *fmt); int main() { char *a; char *b; a= a + b; } ================================================ FILE: 48_Static/tests/input051.c ================================================ int printf(char *fmt); int main() { char a; a= 'fred'; } ================================================ FILE: 48_Static/tests/input052.c ================================================ int printf(char *fmt); int main() { int a; a= $5.00; } ================================================ FILE: 48_Static/tests/input053.c ================================================ int printf(char *fmt); int main() { printf("Hello world, %d\n", 23); return(0); } ================================================ FILE: 48_Static/tests/input054.c ================================================ int printf(char *fmt); int main() { int i; for (i=0; i < 20; i++) { printf("Hello world, %d\n", i); } return(0); } ================================================ FILE: 48_Static/tests/input055.c ================================================ int printf(char *fmt); int main(int argc, char **argv) { int i; char *argument; printf("Hello world\n"); for (i=0; i < argc; i++) { argument= *argv; argv= argv + 1; printf("Argument %d is %s\n", i, argument); } return(0); } ================================================ FILE: 48_Static/tests/input056.c ================================================ struct foo { int x; }; struct mary var1; ================================================ FILE: 48_Static/tests/input057.c ================================================ struct fred { int x; } ; struct fred { char y; } ; ================================================ FILE: 48_Static/tests/input058.c ================================================ int printf(char *fmt); struct fred { int x; char y; long z; }; struct fred var2; struct fred *varptr; int main() { long result; var2.x= 12; printf("%d\n", var2.x); var2.y= 'c'; printf("%d\n", var2.y); var2.z= 4005; printf("%d\n", var2.z); result= var2.x + var2.y + var2.z; printf("%d\n", result); varptr= &var2; result= varptr->x + varptr->y + varptr->z; printf("%d\n", result); return(0); } ================================================ FILE: 48_Static/tests/input059.c ================================================ int main() { int x; x= y.foo; } ================================================ FILE: 48_Static/tests/input060.c ================================================ int main() { int x; x= x.foo; } ================================================ FILE: 48_Static/tests/input061.c ================================================ int main() { int x; x= x->foo; } ================================================ FILE: 48_Static/tests/input062.c ================================================ int printf(char *fmt); union fred { char w; int x; int y; long z; }; union fred var1; union fred *varptr; int main() { var1.x= 65; printf("%d\n", var1.x); var1.x= 66; printf("%d\n", var1.x); printf("%d\n", var1.y); printf("The next two depend on the endian of the platform\n"); printf("%d\n", var1.w); printf("%d\n", var1.z); varptr= &var1; varptr->x= 67; printf("%d\n", varptr->x); printf("%d\n", varptr->y); return(0); } ================================================ FILE: 48_Static/tests/input063.c ================================================ int printf(char *fmt); enum fred { apple=1, banana, carrot, pear=10, peach, mango, papaya }; enum jane { aple=1, bnana, crrot, par=10, pech, mago, paaya }; enum fred var1; enum jane var2; enum fred var3; int main() { var1= carrot + pear + mango; printf("%d\n", var1); return(0); } ================================================ FILE: 48_Static/tests/input064.c ================================================ enum fred var3; ================================================ FILE: 48_Static/tests/input065.c ================================================ enum fred { x, y, z }; enum fred { a, b }; ================================================ FILE: 48_Static/tests/input066.c ================================================ enum fred { x, y, z }; enum mary { a, b, z }; ================================================ FILE: 48_Static/tests/input067.c ================================================ int printf(char *fmt); typedef int FOO; FOO var1; struct bar { int x; int y} ; typedef struct bar BAR; BAR var2; int main() { var1= 5; printf("%d\n", var1); var2.x= 7; var2.y= 10; printf("%d\n", var2.x + var2.y); return(0); } ================================================ FILE: 48_Static/tests/input068.c ================================================ typedef int FOO; typedef char FOO; ================================================ FILE: 48_Static/tests/input069.c ================================================ typedef int FOO; FLOO y; ================================================ FILE: 48_Static/tests/input070.c ================================================ #include typedef int FOO; int main() { FOO x; x= 56; printf("%d\n", x); return(0); } ================================================ FILE: 48_Static/tests/input071.c ================================================ #include int main() { int x; x = 0; while (x < 100) { if (x == 5) { x = x + 2; continue; } printf("%d\n", x); if (x == 14) { break; } x = x + 1; } printf("Done\n"); return (0); } ================================================ FILE: 48_Static/tests/input072.c ================================================ int main() { break; } ================================================ FILE: 48_Static/tests/input073.c ================================================ int main() { continue; } ================================================ FILE: 48_Static/tests/input074.c ================================================ #include int main() { int x; int y; y= 0; for (x=0; x < 5; x++) { switch(x) { case 1: { y= 5; break; } case 2: { y= 7; break; } case 3: { y= 9; } default: { y= 100; } } printf("%d\n", y); } return(0); } ================================================ FILE: 48_Static/tests/input075.c ================================================ int main() { int x; switch(x) { if (x<5); } } ================================================ FILE: 48_Static/tests/input076.c ================================================ int main() { int x; switch(x) { } } ================================================ FILE: 48_Static/tests/input077.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } default: { x= 3; } case 4: { x= 2; } } } ================================================ FILE: 48_Static/tests/input078.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } default: { x= 3; } default: { x= 2; } } } ================================================ FILE: 48_Static/tests/input079.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } case 2: { x= 2; } case 1: { x= 2; } default: { x= 2; } } } ================================================ FILE: 48_Static/tests/input080.c ================================================ #include int x; int y; int main() { for (x=0, y=1; x < 6; x++, y=y+2) { printf("%d %d\n", x, y); } return(0); } ================================================ FILE: 48_Static/tests/input081.c ================================================ #include int x; int y; int main() { x= 0; y=1; for (;x<5;) { printf("%d %d\n", x, y); x=x+1; y=y+2; } return(0); } ================================================ FILE: 48_Static/tests/input082.c ================================================ #include int main() { int x; x= 10; // Dangling else test if (x > 5) if (x > 15) printf("x > 15\n"); else printf("15 >= x > 5\n"); else printf("5 >= x\n"); return(0); } ================================================ FILE: 48_Static/tests/input083.c ================================================ #include int main() { int x; // Dangling else test. // We should not print anything for x<= 5 for (x=0; x < 12; x++) if (x > 5) if (x > 10) printf("10 < %2d\n", x); else printf(" 5 < %2d <= 10\n", x); return(0); } ================================================ FILE: 48_Static/tests/input084.c ================================================ #include int main() { int x, y; x=2; y=3; printf("%d %d\n", x, y); char a, *b; a= 'f'; b= &a; printf("%c %c\n", a, *b); return(0); } ================================================ FILE: 48_Static/tests/input085.c ================================================ int main(struct foo { int x; }; , int a) { return(0); } ================================================ FILE: 48_Static/tests/input086.c ================================================ int main() { int fred() { return(5); } int x; x=2; return(x); } ================================================ FILE: 48_Static/tests/input087.c ================================================ struct foo { int x; int y; struct blah { int g; }; }; ================================================ FILE: 48_Static/tests/input088.c ================================================ #include struct foo { int x; int y; } fred, mary; struct foo james; int main() { fred.x= 5; mary.y= 6; printf("%d %d\n", fred.x, mary.y); return(0); } ================================================ FILE: 48_Static/tests/input089.c ================================================ #include int x= 23; char y= 'H'; char *z= "Hello world"; int main() { printf("%d %c %s\n", x, y, z); return(0); } ================================================ FILE: 48_Static/tests/input090.c ================================================ #include int a = 23, b = 100; char y = 'H', *z = "Hello world"; int main() { printf("%d %d %c %s\n", a, b, y, z); return (0); } ================================================ FILE: 48_Static/tests/input091.c ================================================ #include int fred[] = { 1, 2, 3, 4, 5 }; int jim[10] = { 1, 2, 3, 4, 5 }; int main() { int i; for (i=0; i < 5; i++) printf("%d\n", fred[i]); for (i=0; i < 10; i++) printf("%d\n", jim[i]); return(0); } ================================================ FILE: 48_Static/tests/input092.c ================================================ char x= 3000; ================================================ FILE: 48_Static/tests/input093.c ================================================ char x= fred; ================================================ FILE: 48_Static/tests/input094.c ================================================ char *s= 54; ================================================ FILE: 48_Static/tests/input095.c ================================================ int fred(int x=2) { return(x); } ================================================ FILE: 48_Static/tests/input096.c ================================================ int fred[0]; ================================================ FILE: 48_Static/tests/input097.c ================================================ int main() { int x[45]; return(0); } ================================================ FILE: 48_Static/tests/input098.c ================================================ int fred[3]= { 1, 2, 3, 4, 5 }; ================================================ FILE: 48_Static/tests/input099.c ================================================ #include // List of token strings, for debugging purposes. // As yet, we can't store a NULL into the list char *Tstring[] = { "EOF", "=", "||", "&&", "|", "^", "&", "==", "!=", ",", ">", "<=", ">=", "<<", ">>", "+", "-", "*", "/", "++", "--", "~", "!", "void", "char", "int", "long", "if", "else", "while", "for", "return", "struct", "union", "enum", "typedef", "extern", "break", "continue", "switch", "case", "default", "intlit", "strlit", ";", "identifier", "{", "}", "(", ")", "[", "]", ",", ".", "->", ":", "" }; int main() { int i; char *str; i=0; while (1) { str= Tstring[i]; if (*str == 0) break; printf("%s\n", str); i++; } return(0); } ================================================ FILE: 48_Static/tests/input100.c ================================================ #include int main() { int x= 3, y=14; int z= 2 * x + y; char *str= "Hello world"; printf("%s %d %d\n", str, x+y, z); return(0); } ================================================ FILE: 48_Static/tests/input101.c ================================================ #include int main() { int x= 65535; char y; char *str; y= (char )x; printf("0x%x\n", y); str= (char *)0; printf("0x%lx\n", (long)str); return(0); } ================================================ FILE: 48_Static/tests/input102.c ================================================ int main() { struct foo { int p; }; int y= (struct foo) x; } ================================================ FILE: 48_Static/tests/input103.c ================================================ int main() { union foo { int p; }; int y= (union foo) x; } ================================================ FILE: 48_Static/tests/input104.c ================================================ int main() { int y= (void) x; } ================================================ FILE: 48_Static/tests/input105.c ================================================ int main() { int x; char *y; y= (char) x; } ================================================ FILE: 48_Static/tests/input106.c ================================================ #include char *y= (char *)0; int main() { printf("0x%lx\n", (long)y); return(0); } ================================================ FILE: 48_Static/tests/input107.c ================================================ #include char *y[] = { "fish", "cow", NULL }; char *z= NULL; int main() { int i; char *ptr; for (i=0; i < 3; i++) { ptr= y[i]; if (ptr != (char *)0) printf("%s\n", y[i]); else printf("NULL\n"); } return(0); } ================================================ FILE: 48_Static/tests/input108.c ================================================ int main() { char *str= (void *)0; return(0); } ================================================ FILE: 48_Static/tests/input109.c ================================================ #include int main() { int x= 17 - 1; printf("%d\n", x); return(0); } ================================================ FILE: 48_Static/tests/input110.c ================================================ #include int x; int y; int main() { x= 3; y= 15; y += x; printf("%d\n", y); x= 3; y= 15; y -= x; printf("%d\n", y); x= 3; y= 15; y *= x; printf("%d\n", y); x= 3; y= 15; y /= x; printf("%d\n", y); return(0); } ================================================ FILE: 48_Static/tests/input111.c ================================================ #include int main() { int x= 2000 + 3 + 4 * 5 + 6; printf("%d\n", x); return(0); } ================================================ FILE: 48_Static/tests/input112.c ================================================ #include char* y = NULL; int x= 10 + 6; int fred [ 2 + 3 ]; int main() { fred[2]= x; printf("%d\n", fred[2]); return(0); } ================================================ FILE: 48_Static/tests/input113.c ================================================ #include void fred(void); void fred(void) { printf("fred says hello\n"); } int main(void) { fred(); return(0); } ================================================ FILE: 48_Static/tests/input114.c ================================================ #include int main() { int x= 0x4a; printf("%c\n", x); return(0); } ================================================ FILE: 48_Static/tests/input115.c ================================================ #include struct foo { int x; char y; long z; }; typedef struct foo blah; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol struct symtable *ctype; // If struct/union, ptr to that type int stype; // Structural type for the symbol int class; // Storage class for the symbol int size; // Total size in bytes of this symbol int nelems; // Functions: # params. Arrays: # elements #define st_endlabel st_posn // For functions, the end label int st_posn; // For locals, the negative offset // from the stack base pointer int *initlist; // List of initial values struct symtable *next; // Next symbol in one list struct symtable *member; // First member of a function, struct, }; // union or enum // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates int rvalue; // True if the node is an rvalue struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; struct symtable *sym; // For many AST nodes, the pointer to // the symbol in the symbol table #define a_intvalue a_size // For A_INTLIT, the integer value int a_size; // For A_SCALE, the size to scale by }; int main() { printf("%ld\n", sizeof(char)); printf("%ld\n", sizeof(int)); printf("%ld\n", sizeof(long)); printf("%ld\n", sizeof(char *)); printf("%ld\n", sizeof(blah)); printf("%ld\n", sizeof(struct symtable)); printf("%ld\n", sizeof(struct ASTnode)); return(0); } ================================================ FILE: 48_Static/tests/input116.c ================================================ #include static int counter=0; static int fred(void) { return(counter++); } int main(void) { int i; for (i=0; i < 5; i++) printf("%d\n", fred()); return(0); } ================================================ FILE: 48_Static/tests/input117.c ================================================ #include static char *fred(void) { return("Hello"); } int main(void) { printf("%s\n", fred()); return(0); } ================================================ FILE: 48_Static/tests/input118.c ================================================ int main(void) { static int x; } ================================================ FILE: 48_Static/tests/mktests ================================================ #!/bin/sh # Make the output files for each test # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make install) fi for i in input*c do if [ ! -f "out.$i" -a ! -f "err.$i" ] then ../cwj -o out $i 2> "err.$i" # If the err file is empty if [ ! -s "err.$i" ] then rm -f "err.$i" cc -o out $i ./out > "out.$i" fi fi rm -f out out.s done ================================================ FILE: 48_Static/tests/out.input001.c ================================================ 36 10 25 ================================================ FILE: 48_Static/tests/out.input002.c ================================================ 17 ================================================ FILE: 48_Static/tests/out.input003.c ================================================ 1 2 3 4 5 ================================================ FILE: 48_Static/tests/out.input004.c ================================================ 1 1 1 1 1 1 1 1 1 ================================================ FILE: 48_Static/tests/out.input005.c ================================================ 6 ================================================ FILE: 48_Static/tests/out.input006.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 48_Static/tests/out.input007.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 48_Static/tests/out.input008.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 48_Static/tests/out.input009.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 48_Static/tests/out.input010.c ================================================ 20 10 1 2 3 4 5 253 254 255 0 1 ================================================ FILE: 48_Static/tests/out.input011.c ================================================ 10 20 30 1 2 3 4 5 253 254 255 0 1 2 3 1 2 3 4 5 ================================================ FILE: 48_Static/tests/out.input012.c ================================================ 5 ================================================ FILE: 48_Static/tests/out.input013.c ================================================ 23 56 ================================================ FILE: 48_Static/tests/out.input014.c ================================================ 10 20 30 ================================================ FILE: 48_Static/tests/out.input015.c ================================================ 18 18 12 12 ================================================ FILE: 48_Static/tests/out.input016.c ================================================ 12 18 ================================================ FILE: 48_Static/tests/out.input017.c ================================================ 19 12 ================================================ FILE: 48_Static/tests/out.input018.c ================================================ 34 34 ================================================ FILE: 48_Static/tests/out.input018a.c ================================================ 15 16 ================================================ FILE: 48_Static/tests/out.input019.c ================================================ 30 ================================================ FILE: 48_Static/tests/out.input020.c ================================================ 12 ================================================ FILE: 48_Static/tests/out.input021.c ================================================ 10 Hello world ================================================ FILE: 48_Static/tests/out.input022.c ================================================ 12 12 12 13 13 13 13 13 13 35 35 35 ================================================ FILE: 48_Static/tests/out.input023.c ================================================ -23 100 -2 0 1 0 13 14 Hello world ================================================ FILE: 48_Static/tests/out.input024.c ================================================ 2 59 57 8 7 ================================================ FILE: 48_Static/tests/out.input025.c ================================================ 10 20 30 5 15 25 ================================================ FILE: 48_Static/tests/out.input026.c ================================================ 13 23 34 44 54 64 74 84 94 95 96 ================================================ FILE: 48_Static/tests/out.input027.c ================================================ 1 2 3 4 5 6 7 8 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 ================================================ FILE: 48_Static/tests/out.input028.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 48_Static/tests/out.input029.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 48_Static/tests/out.input030.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input030.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 48_Static/tests/out.input053.c ================================================ Hello world, 23 ================================================ FILE: 48_Static/tests/out.input054.c ================================================ Hello world, 0 Hello world, 1 Hello world, 2 Hello world, 3 Hello world, 4 Hello world, 5 Hello world, 6 Hello world, 7 Hello world, 8 Hello world, 9 Hello world, 10 Hello world, 11 Hello world, 12 Hello world, 13 Hello world, 14 Hello world, 15 Hello world, 16 Hello world, 17 Hello world, 18 Hello world, 19 ================================================ FILE: 48_Static/tests/out.input055.c ================================================ Hello world Argument 0 is ./out ================================================ FILE: 48_Static/tests/out.input058.c ================================================ 12 99 4005 4116 4116 ================================================ FILE: 48_Static/tests/out.input062.c ================================================ 65 66 66 The next two depend on the endian of the platform 66 66 67 67 ================================================ FILE: 48_Static/tests/out.input063.c ================================================ 25 ================================================ FILE: 48_Static/tests/out.input067.c ================================================ 5 17 ================================================ FILE: 48_Static/tests/out.input070.c ================================================ 56 ================================================ FILE: 48_Static/tests/out.input071.c ================================================ 0 1 2 3 4 7 8 9 10 11 12 13 14 Done ================================================ FILE: 48_Static/tests/out.input074.c ================================================ 100 5 7 100 100 ================================================ FILE: 48_Static/tests/out.input080.c ================================================ 0 1 1 3 2 5 3 7 4 9 5 11 ================================================ FILE: 48_Static/tests/out.input081.c ================================================ 0 1 1 3 2 5 3 7 4 9 ================================================ FILE: 48_Static/tests/out.input082.c ================================================ 15 >= x > 5 ================================================ FILE: 48_Static/tests/out.input083.c ================================================ 5 < 6 <= 10 5 < 7 <= 10 5 < 8 <= 10 5 < 9 <= 10 5 < 10 <= 10 10 < 11 ================================================ FILE: 48_Static/tests/out.input084.c ================================================ 2 3 f f ================================================ FILE: 48_Static/tests/out.input088.c ================================================ 5 6 ================================================ FILE: 48_Static/tests/out.input089.c ================================================ 23 H Hello world ================================================ FILE: 48_Static/tests/out.input090.c ================================================ 23 100 H Hello world ================================================ FILE: 48_Static/tests/out.input091.c ================================================ 1 2 3 4 5 1 2 3 4 5 0 0 0 0 0 ================================================ FILE: 48_Static/tests/out.input099.c ================================================ EOF = || && | ^ & == != , > <= >= << >> + - * / ++ -- ~ ! void char int long if else while for return struct union enum typedef extern break continue switch case default intlit strlit ; identifier { } ( ) [ ] , . -> : ================================================ FILE: 48_Static/tests/out.input100.c ================================================ Hello world 17 20 ================================================ FILE: 48_Static/tests/out.input101.c ================================================ 0xff 0x0 ================================================ FILE: 48_Static/tests/out.input106.c ================================================ 0x0 ================================================ FILE: 48_Static/tests/out.input107.c ================================================ fish cow NULL ================================================ FILE: 48_Static/tests/out.input108.c ================================================ ================================================ FILE: 48_Static/tests/out.input109.c ================================================ 16 ================================================ FILE: 48_Static/tests/out.input110.c ================================================ 18 12 45 5 ================================================ FILE: 48_Static/tests/out.input111.c ================================================ 2029 ================================================ FILE: 48_Static/tests/out.input112.c ================================================ 16 ================================================ FILE: 48_Static/tests/out.input113.c ================================================ fred says hello ================================================ FILE: 48_Static/tests/out.input114.c ================================================ J ================================================ FILE: 48_Static/tests/out.input115.c ================================================ 1 4 8 8 13 64 48 ================================================ FILE: 48_Static/tests/out.input116.c ================================================ 0 1 2 3 4 ================================================ FILE: 48_Static/tests/out.input117.c ================================================ Hello ================================================ FILE: 48_Static/tests/runtests ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make install) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../cwj -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../cwj $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.s "trial.$i" done ================================================ FILE: 48_Static/tests/runtestsn ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../compn ] then (cd ..; make install) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../compn -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../compn $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.o out.s "trial.$i" done ================================================ FILE: 48_Static/tree.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree functions // Copyright (c) 2019 Warren Toomey, GPL3 // Build and return a generic AST node struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue) { struct ASTnode *n; // Malloc a new ASTnode n = (struct ASTnode *) malloc(sizeof(struct ASTnode)); if (n == NULL) fatal("Unable to malloc in mkastnode()"); // Copy in the field values and return it n->op = op; n->type = type; n->left = left; n->mid = mid; n->right = right; n->sym = sym; n->a_intvalue = intvalue; return (n); } // Make an AST leaf node struct ASTnode *mkastleaf(int op, int type, struct symtable *sym, int intvalue) { return (mkastnode(op, type, NULL, NULL, NULL, sym, intvalue)); } // Make a unary AST node: only one child struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, struct symtable *sym, int intvalue) { return (mkastnode(op, type, left, NULL, NULL, sym, intvalue)); } // Generate and return a new label number // just for AST dumping purposes static int dumpid = 1; static int gendumplabel(void) { return (dumpid++); } // Given an AST tree, print it out and follow the // traversal of the tree that genAST() follows void dumpAST(struct ASTnode *n, int label, int level) { int Lfalse, Lstart, Lend; switch (n->op) { case A_IF: Lfalse = gendumplabel(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_IF"); if (n->right) { Lend = gendumplabel(); fprintf(stdout, ", end L%d", Lend); } fprintf(stdout, "\n"); dumpAST(n->left, Lfalse, level + 2); dumpAST(n->mid, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); return; case A_WHILE: Lstart = gendumplabel(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_WHILE, start L%d\n", Lstart); Lend = gendumplabel(); dumpAST(n->left, Lend, level + 2); dumpAST(n->right, NOLABEL, level + 2); return; } // Reset level to -2 for A_GLUE if (n->op == A_GLUE) level = -2; // General AST node handling if (n->left) dumpAST(n->left, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); for (int i = 0; i < level; i++) fprintf(stdout, " "); switch (n->op) { case A_GLUE: fprintf(stdout, "\n\n"); return; case A_FUNCTION: fprintf(stdout, "A_FUNCTION %s\n", n->sym->name); return; case A_ADD: fprintf(stdout, "A_ADD\n"); return; case A_SUBTRACT: fprintf(stdout, "A_SUBTRACT\n"); return; case A_MULTIPLY: fprintf(stdout, "A_MULTIPLY\n"); return; case A_DIVIDE: fprintf(stdout, "A_DIVIDE\n"); return; case A_EQ: fprintf(stdout, "A_EQ\n"); return; case A_NE: fprintf(stdout, "A_NE\n"); return; case A_LT: fprintf(stdout, "A_LE\n"); return; case A_GT: fprintf(stdout, "A_GT\n"); return; case A_LE: fprintf(stdout, "A_LE\n"); return; case A_GE: fprintf(stdout, "A_GE\n"); return; case A_INTLIT: fprintf(stdout, "A_INTLIT %d\n", n->a_intvalue); return; case A_STRLIT: fprintf(stdout, "A_STRLIT rval label L%d\n", n->a_intvalue); return; case A_IDENT: if (n->rvalue) fprintf(stdout, "A_IDENT rval %s\n", n->sym->name); else fprintf(stdout, "A_IDENT %s\n", n->sym->name); return; case A_ASSIGN: fprintf(stdout, "A_ASSIGN\n"); return; case A_WIDEN: fprintf(stdout, "A_WIDEN\n"); return; case A_RETURN: fprintf(stdout, "A_RETURN\n"); return; case A_FUNCCALL: fprintf(stdout, "A_FUNCCALL %s\n", n->sym->name); return; case A_ADDR: fprintf(stdout, "A_ADDR %s\n", n->sym->name); return; case A_DEREF: if (n->rvalue) fprintf(stdout, "A_DEREF rval\n"); else fprintf(stdout, "A_DEREF\n"); return; case A_SCALE: fprintf(stdout, "A_SCALE %d\n", n->a_size); return; case A_PREINC: fprintf(stdout, "A_PREINC %s\n", n->sym->name); return; case A_PREDEC: fprintf(stdout, "A_PREDEC %s\n", n->sym->name); return; case A_POSTINC: fprintf(stdout, "A_POSTINC\n"); return; case A_POSTDEC: fprintf(stdout, "A_POSTDEC\n"); return; case A_NEGATE: fprintf(stdout, "A_NEGATE\n"); return; case A_BREAK: fprintf(stdout, "A_BREAK\n"); return; case A_CONTINUE: fprintf(stdout, "A_CONTINUE\n"); return; case A_CASE: fprintf(stdout, "A_CASE %d\n", n->a_intvalue); return; case A_DEFAULT: fprintf(stdout, "A_DEFAULT\n"); return; case A_SWITCH: fprintf(stdout, "A_SWITCH\n"); return; case A_CAST: fprintf(stdout, "A_CAST %d\n", n->type); return; case A_ASPLUS: fprintf(stdout, "A_ASPLUS\n"); return; case A_ASMINUS: fprintf(stdout, "A_ASMINUS\n"); return; case A_ASSTAR: fprintf(stdout, "A_ASSTAR\n"); return; case A_ASSLASH: fprintf(stdout, "A_ASSLASH\n"); return; default: fatald("Unknown dumpAST operator", n->op); } } ================================================ FILE: 48_Static/types.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Types and type handling // Copyright (c) 2019 Warren Toomey, GPL3 // Return true if a type is an int type // of any size, false otherwise int inttype(int type) { return (((type & 0xf) == 0) && (type >= P_CHAR && type <= P_LONG)); } // Return true if a type is of pointer type int ptrtype(int type) { return ((type & 0xf) != 0); } // Given a primitive type, return // the type which is a pointer to it int pointer_to(int type) { if ((type & 0xf) == 0xf) fatald("Unrecognised in pointer_to: type", type); return (type + 1); } // Given a primitive pointer type, return // the type which it points to int value_at(int type) { if ((type & 0xf) == 0x0) fatald("Unrecognised in value_at: type", type); return (type - 1); } // Given a type and a composite type pointer, return // the size of this type in bytes int typesize(int type, struct symtable *ctype) { if (type == P_STRUCT || type == P_UNION) return (ctype->size); return (genprimsize(type)); } // Given an AST tree and a type which we want it to become, // possibly modify the tree by widening or scaling so that // it is compatible with this type. Return the original tree // if no changes occurred, a modified tree, or NULL if the // tree is not compatible with the given type. // If this will be part of a binary operation, the AST op is not zero. struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op) { int ltype; int lsize, rsize; ltype = tree->type; // XXX No idea on these yet if (ltype == P_STRUCT || ltype == P_UNION) fatal("Don't know how to do this yet"); if (rtype == P_STRUCT || rtype == P_UNION) fatal("Don't know how to do this yet"); // Compare scalar int types if (inttype(ltype) && inttype(rtype)) { // Both types same, nothing to do if (ltype == rtype) return (tree); // Get the sizes for each type lsize = typesize(ltype, NULL); rsize = typesize(rtype, NULL); // Tree's size is too big if (lsize > rsize) return (NULL); // Widen to the right if (rsize > lsize) return (mkastunary(A_WIDEN, rtype, tree, NULL, 0)); } // For pointers if (ptrtype(ltype) && ptrtype(rtype)) { // We can compare them if (op >= A_EQ && op <= A_GE) return(tree); // A comparison of the same type for a non-binary operation is OK, // or when the left tree is of `void *` type. if (op == 0 && (ltype == rtype || ltype == pointer_to(P_VOID))) return (tree); } // We can scale only on A_ADD or A_SUBTRACT operation if (op == A_ADD || op == A_SUBTRACT) { // Left is int type, right is pointer type and the size // of the original type is >1: scale the left if (inttype(ltype) && ptrtype(rtype)) { rsize = genprimsize(value_at(rtype)); if (rsize > 1) return (mkastunary(A_SCALE, rtype, tree, NULL, rsize)); else return (tree); // Size 1, no need to scale } } // If we get here, the types are not compatible return (NULL); } ================================================ FILE: 49_Ternary/Makefile ================================================ # Define the location of the include directory # and the location to install the compiler binary INCDIR=/tmp/include BINDIR=/tmp HSRCS= data.h decl.h defs.h SRCS= cg.c decl.c expr.c gen.c main.c misc.c \ opt.c scan.c stmt.c sym.c tree.c types.c SRCN= cgn.c decl.c expr.c gen.c main.c misc.c \ opt.c scan.c stmt.c sym.c tree.c types.c ARMSRCS= cg_arm.c decl.c expr.c gen.c main.c misc.c \ opt.c scan.c stmt.c sym.c tree.c types.c cwj: $(SRCS) $(HSRCS) cc -o cwj -g -Wall -DINCDIR=\"$(INCDIR)\" $(SRCS) compn: $(SRCN) $(HSRCS) cc -D__NASM__ -o compn -g -Wall -DINCDIR=\"$(INCDIR)\" $(SRCN) cwjarm: $(ARMSRCS) $(HSRCS) cc -o cwjarm -g -Wall $(ARMSRCS) cp cwjarm cwj install: cwj mkdir -p $(INCDIR) rsync -a include/. $(INCDIR) cp cwj $(BINDIR) chmod +x $(BINDIR)/cwj installn: compn mkdir -p $(INCDIR) rsync -a include/. $(INCDIR) cp compn $(BINDIR) chmod +x $(BINDIR)/compn clean: rm -f cwj cwjarm compn *.o *.s out a.out test: install tests/runtests (cd tests; chmod +x runtests; ./runtests) armtest: cwjarm tests/runtests (cd tests; chmod +x runtests; ./runtests) testn: installn tests/runtestsn (cd tests; chmod +x runtestsn; ./runtestsn) ================================================ FILE: 49_Ternary/Readme.md ================================================ # Part 49: The Ternary Operator In this part of our compiler writing journey, I've implemented the [ternary operator](https://en.wikipedia.org/wiki/%3F:). This is one of the really natty operators in the C language which can reduce lines of code in your source file. The basic syntax is: ``` ternary_expression: logical_expression '?' true_expression ':' false_expression ; ``` We evaluate the logical expression. If this is true, we then evaluate only the true expression. Otherwise, we only evaluate the false expression. The result of either the true or the false expression becomes the result of the whole expression. One subtlety here is that, for example, in: ```c x= y != 5 ? y++ : ++y; ``` If `y != 5` then `x= y++`, otherwise `x= ++y`. Either way, `y` is only incremented once. We can rewrite the above code as an IF statement: ```c if (y != 5) x= y++; else x= ++y; ``` However, the ternary operator is an expression, so we can also do: ```c x= 23 * (y != 5 ? y++ : ++y) - 18; ``` This can't be easily converted into an IF statement now. However, we can borrow some of the mechanics from the IF code generator to use for the ternary operator. ## Tokens, Operators and Operator Precedence We already have ':' in our grammar as a token; now we need to add the '?' token. This is going to be treated as an operator, so we'd better set its precedence. According to [this list of C operators](https://en.cppreference.com/w/c/language/operator_precedence), the '?' operator has precedence just above the assignment operators. The way we have designed our precedence, our operator tokens must be in precedence order and the AST operators must correspond to the tokens. Thus, in `defs.h`, we now have: ```c // Token types enum { T_EOF, // Binary operators T_ASSIGN, T_ASPLUS, T_ASMINUS, T_ASSTAR, T_ASSLASH, T_QUESTION, // The '?' token ... enum { A_ASSIGN = 1, A_ASPLUS, A_ASMINUS, A_ASSTAR, A_ASSLASH, A_TERNARY, // The ternary AST operator ... ``` And in `expr.c`, we now have: ```c static int OpPrec[] = { 0, 10, 10, // T_EOF, T_ASSIGN, T_ASPLUS, 10, 10, 10, // T_ASMINUS, T_ASSTAR, T_ASSLASH, 15, // T_QUESTION ... ``` As always, I will leave you to browse the changes in `scan.c` for the new T_QUESTION token. ## Parsing the Ternary Operator Even though the ternary operator isn't a binary operator, because it has precedence, we need to implement it in `binexpr()` with the binary operators. Here's the code: ```c struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; struct ASTnode *ltemp, ... switch (ASTop) { case A_TERNARY: // Ensure we have a ':' token, scan in the expression after it match(T_COLON, ":"); ltemp= binexpr(0); // Build and return the AST for this statement. Use the middle // expression's type as the return type. XXX We should also // consider the third expression's type. return (mkastnode(A_TERNARY, right->type, left, right, ltemp, NULL, 0)); ... } ... } ``` When we hit the A_TERNARY case, we have the AST tree of the logical expression stored in `left`, the true expression in `right` and we have parsed the '?' token. Now we need to parse the ':' token and the false expression. With all three tokens parsed, we can now build an AST node to hold all three. One problem is how to determine the type of this node. As you can see, it's easy to choose the type of the middle token. To do it properly, I should see, of the true and false expressions, which one is wider and choose that one. I'll leave it for now and revisit it. ## Generating The Assembly Code: Issues Generating the assembly code for the ternary operator is very similar to that of the IF statement: we evaluate a logical expression. If true, we evaluate one expression; if false, we evaluate the other. We are going to need some labels, and we are going to have to insert jumps to these labels as required. I did actually try to modify the `genIF()` code in `gen.c` to do both IF and the ternary operator, but it was easier just to write another function. There is one wrinkle to the generation of the assembly code. Consider: ```c x= (y > 4) ? 2 * y - 18 : y * z - 3 * a; ``` We have three expressions, and we need to allocate registers to evaluate each one. After the logical expression is evaluated and we have jumped to the correct next section of code, we can free all the registers used in the evaluation. For the true and false expressions, we can free all the registers *except one*: the register holding the expression's rvalue. We also can't predict which register this will be, because each expression has different operands and operators; thus, the number of registers used will differ, and the (last) register allocated to hold the result may be different. But we need to know which register does hold the result of both the true and false expressions, so when we jump to the code that will use this result, it knows which register to access. Thus, we need to do three things: + allocate a register to hold the result *before* we run either the true of false expression, + copy the true and false expression result into this register, and + free all registers *except* the register with the result. ## Freeing Registers We already have a function to free all registers, `freeall_registers()`, which takes no arguments. Our registers are numbered zero upwards. I've modified this function to take, as an argument, the register we want to *keep*. And, in order to free *all* registers, we pass it NOREG which is defined to be the number `-1`: ```c // Set all registers as available. // But if reg is positive, don't free that one. void freeall_registers(int keepreg) { int i; for (i = 0; i < NUMFREEREGS; i++) if (i != keepreg) freereg[i] = 1; } ``` Throughout the compiler, you will now see `freeall_registers(-1)` to replace what used to be `freeall_registers()`. ## Generating The Assembly Code We now have a function in `gen.c` to deal with the ternary operator. It gets called from the top of `genAST()`: ```c // We have some specific AST node handling at the top // so that we don't evaluate the child sub-trees immediately switch (n->op) { ... case A_TERNARY: return (gen_ternary(n)); ``` Let's have a look at the function in stages. ```c // Generate code for a ternary expression static int gen_ternary(struct ASTnode *n) { int Lfalse, Lend; int reg, expreg; // Generate two labels: one for the // false expression, and one for the // end of the overall expression Lfalse = genlabel(); Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. genAST(n->left, Lfalse, NOLABEL, NOLABEL, n->op); genfreeregs(-1); ``` This is pretty much exactly the same as the IF generating code. We pass the logical expression sub-tree, the false label and the A_TERNARY operator into `genAST()`. When `genAST()` sees this, it knows to generate a jump if false to this label. ```c // Get a register to hold the result of the two expressions reg = alloc_register(); // Generate the true expression and the false label. // Move the expression result into the known register. expreg = genAST(n->mid, NOLABEL, NOLABEL, NOLABEL, n->op); cgmove(expreg, reg); // Don't free the register holding the result, though! genfreeregs(reg); cgjump(Lend); cglabel(Lfalse); ``` With the logical expression done, we can now allocate the register to hold both the true and false expression results. We call `genAST()` to generate the true expression code, and we get back the register with the result. We now have to move this register's value into the known register. With this done, we can free all registers except the known register. If we did the true expression, we now jump to the end of the ternary assembly code. ```c // Generate the false expression and the end label. // Move the expression result into the known register. expreg = genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); cgmove(expreg, reg); // Don't free the register holding the result, though! genfreeregs(reg); cglabel(Lend); return (reg); } ``` And the code to evaluate the false expression is very similar. Either way, execution will get to the end label and, once we get here, we know that the ternary result is in the known register. ## Testing the New Code I was worried about nested ternary operators, which I've used quite a bit in other code. The ternary operator is *right associative*, which means we bind the '?' to the right more tightly than to the left. Fortunately, as we greedily seek out the ':' token and the false expression once we have parsed the '?' token, our parser is already treating the ternary operator as right associative. `tests/input121.c` is an example of a nested ternary operator: ```c #include int x; int y= 3; int main() { for (y= 0; y < 10; y++) { x= (y < 4) ? y + 2 : (y > 7) ? 1000 : y + 9; printf("%d\n", x); } return(0); } ``` If `y<4`, then `x` becomes `y+2`. If not, we evaluate the second ternary operator. If `y>7`, `x` becomes 1000, otherwise it becomes `y+9`. The effect is to do `y+2` for `y` values 0 to 3, `y+9` for `y` values 4 to 7, and 1000 for higher y values: ``` 2 3 4 5 13 14 15 16 1000 1000 ``` ## Conclusion and What's Next Like a few of the steps so far, I was apprehensive to tackle the ternary operator because I thought it would be very difficult. I did have problems putting it into the IF generating code, so I stepped back a bit. Actually, I went out to see a movie with my wife and this gave me a chance to mull things over. I realised that I had to free all but one registers, and I should write a separate function. After that, writing the code was straight forward. It's always good to step away from the keyboard now and then. In the next part of our compiler writing journey, I will feed the compiler to itself, look at the parse errors I get and choose one or more of them to fix. > P.S. We've reached 5,000 lines of code and 90,000 words in the Readme files. We must be nearly there! [Next step](../50_Mop_up_pt1/Readme.md) ================================================ FILE: 49_Ternary/cg.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\t.text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\t.data\n", Outfile); currSeg = data_seg; } } // Given a scalar type value, return the // size of the type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch (type) { case P_CHAR: return (offset); case P_INT: case P_LONG: break; default: if (!ptrtype(type)) fatald("Bad type in cg_align:", type); } // Here we have an int or a long. Align it on a 4-byte offset // I put the generic code here so it can be reused elsewhere. alignment = 4; offset = (offset + direction * (alignment - 1)) & ~(alignment - 1); return (offset); } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. static int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "%r10", "%r11", "%r12", "%r13", "%r9", "%r8", "%rcx", "%rdx", "%rsi", "%rdi" }; static char *breglist[] = { "%r10b", "%r11b", "%r12b", "%r13b", "%r9b", "%r8b", "%cl", "%dl", "%sil", "%dil" }; static char *dreglist[] = { "%r10d", "%r11d", "%r12d", "%r13d", "%r9d", "%r8d", "%ecx", "%edx", "%esi", "%edi" }; // Set all registers as available. // But if reg is positive, don't free that one. void freeall_registers(int keepreg) { int i; for (i = 0; i < NUMFREEREGS; i++) if (i != keepreg) freereg[i] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. int alloc_register(void) { for (int i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(NOREG); cgtextseg(); fprintf(Outfile, "# internal switch(expr) routine\n" "# %%rsi = switch table, %%rax = expr\n" "# from SubC: http://www.t3x.org/subc/\n" "\n" "switch:\n" " pushq %%rsi\n" " movq %%rdx, %%rsi\n" " movq %%rax, %%rbx\n" " cld\n" " lodsq\n" " movq %%rax, %%rcx\n" "next:\n" " lodsq\n" " movq %%rax, %%rdx\n" " lodsq\n" " cmpq %%rdx, %%rbx\n" " jnz no\n" " popq %%rsi\n" " jmp *%%rax\n" "no:\n" " loop next\n" " lodsq\n" " popq %%rsi\n" " jmp *%%rax\n" "\n"); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the %rsp and %rsp if (sym->class == C_GLOBAL) fprintf(Outfile, "\t.globl\t%s\n" "\t.type\t%s, @function\n", name, name); fprintf(Outfile, "%s:\n" "\tpushq\t%%rbp\n" "\tmovq\t%%rsp, %%rbp\n", name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->st_posn = paramOffset; paramOffset += 8; } else { parm->st_posn = newlocaloffset(parm->type); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->st_posn = newlocaloffset(locvar->type); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\taddq\t$%d,%%rsp\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->st_endlabel); fprintf(Outfile, "\taddq\t$%d,%%rsp\n", stackOffset); fputs("\tpopq %rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmovq\t$%d, %s\n", value, reglist[r]); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", sym->name); } else // Print out the code to initialise it switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovzbq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", sym->name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovslq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", sym->name); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", sym->st_posn); fprintf(Outfile, "\tmovq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", sym->st_posn); } else switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", sym->st_posn); fprintf(Outfile, "\tmovzbq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", sym->st_posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", sym->st_posn); fprintf(Outfile, "\tmovslq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", sym->st_posn); break; default: fatald("Bad type in cgloadlocal:", sym->type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tleaq\tL%d(%%rip), %s\n", label, reglist[r]); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\taddq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsubq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timulq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s,%%rax\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidivq\t%s\n", reglist[r2]); fprintf(Outfile, "\tmovq\t%%rax,%s\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tandq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\torq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txorq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshlq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshrq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tnegq\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnotq\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); return (r); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(struct symtable *sym, int numargs) { // Get a new register int outr = alloc_register(); // Call the function fprintf(Outfile, "\tcall\t%s@PLT\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\taddq\t$%d, %%rsp\n", 8 * (numargs - 6)); // and copy the return value into our register fprintf(Outfile, "\tmovq\t%%rax, %s\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpushq\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmovq\t%s, %s\n", reglist[r], reglist[FIRSTPARAMREG - argposn + 1]); } } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsalq\t$%d, %s\n", val, reglist[r]); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %s(%%rip)\n", reglist[r], sym->name); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %s(%%rip)\n", breglist[r], sym->name); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %s(%%rip)\n", dreglist[r], sym->name); break; default: fatald("Bad type in cgstorglob:", sym->type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %d(%%rbp)\n", reglist[r], sym->st_posn); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %d(%%rbp)\n", breglist[r], sym->st_posn); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %d(%%rbp)\n", dreglist[r], sym->st_posn); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size, type; int initvalue; int i; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the variable (or its elements if an array) // and the type of the variable if (node->stype == S_ARRAY) { size = typesize(value_at(node->type), node->ctype); type = value_at(node->type); } else { size = node->size; type = node->type; } // Generate the global identity and the label cgdataseg(); if (node->class == C_GLOBAL) fprintf(Outfile, "\t.globl\t%s\n", node->name); fprintf(Outfile, "%s:\n", node->name); // Output space for one or more elements for (i = 0; i < node->nelems; i++) { // Get any initial value initvalue = 0; if (node->initlist != NULL) initvalue = node->initlist[i]; // Generate the space for this type switch (size) { case 1: fprintf(Outfile, "\t.byte\t%d\n", initvalue); break; case 4: fprintf(Outfile, "\t.long\t%d\n", initvalue); break; case 8: // Generate the pointer to a string literal. Treat a zero value // as actually zero, not the label L0 if (node->initlist != NULL && type == pointer_to(P_CHAR) && initvalue != 0) fprintf(Outfile, "\t.quad\tL%d\n", initvalue); else fprintf(Outfile, "\t.quad\t%d\n", initvalue); break; default: for (int i = 0; i < size; i++) fprintf(Outfile, "\t.byte\t0\n"); } } } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\t.byte\t%d\n", *cptr); } fprintf(Outfile, "\t.byte\t0\n"); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(NOREG); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Deal with pointers here as we can't put them in // the switch statement if (ptrtype(sym->type)) fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); else { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzbl\t%s, %%eax\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %%eax\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } } cgjump(sym->st_endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = alloc_register(); if (sym->class == C_GLOBAL || sym->class == C_STATIC) fprintf(Outfile, "\tleaq\t%s(%%rip), %s\n", sym->name, reglist[r]); else fprintf(Outfile, "\tleaq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzbq\t(%s), %s\n", reglist[r], reglist[r]); break; case 2: fprintf(Outfile, "\tmovslq\t(%s), %s\n", reglist[r], reglist[r]); break; case 4: case 8: fprintf(Outfile, "\tmovq\t(%s), %s\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { // Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmovb\t%s, (%s)\n", breglist[r1], reglist[r2]); break; case 2: case 4: case 8: fprintf(Outfile, "\tmovq\t%s, (%s)\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } // Generate a switch jump table and the code to // load the registers and call the switch() code void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel) { int i, label; // Get a label for the switch table label = genlabel(); cglabel(label); // Heuristic. If we have no cases, create one case // which points to the default case if (casecount == 0) { caseval[0] = 0; caselabel[0] = defaultlabel; casecount = 1; } // Generate the switch jump table. fprintf(Outfile, "\t.quad\t%d\n", casecount); for (i = 0; i < casecount; i++) fprintf(Outfile, "\t.quad\t%d, L%d\n", caseval[i], caselabel[i]); fprintf(Outfile, "\t.quad\tL%d\n", defaultlabel); // Load the specific registers cglabel(toplabel); fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); fprintf(Outfile, "\tleaq\tL%d(%%rip), %%rdx\n", label); fprintf(Outfile, "\tjmp\tswitch\n"); } // Move value between registers void cgmove(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s, %s\n", reglist[r1], reglist[r2]); } ================================================ FILE: 49_Ternary/cg_arm.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for ARMv6 on Raspberry Pi // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. static int freereg[4]; static char *reglist[4] = { "r4", "r5", "r6", "r7" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // We have to store large integer literal values in memory. // Keep a list of them which will be output in the postamble #define MAXINTS 1024 int Intlist[MAXINTS]; static int Intslot = 0; // Determine the offset of a large integer // literal from the .L3 label. If the integer // isn't in the list, add it. static void set_int_offset(int val) { int offset = -1; // See if it is already there for (int i = 0; i < Intslot; i++) { if (Intlist[i] == val) { offset = 4 * i; break; } } // Not in the list, so add it if (offset == -1) { offset = 4 * Intslot; if (Intslot == MAXINTS) fatal("Out of int slots in set_int_offset()"); Intlist[Intslot++] = val; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L3+%d\n", offset); } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n", Outfile); } // Print out the assembly postamble void cgpostamble() { // Print out the global variables fprintf(Outfile, ".L2:\n"); for (int i = 0; i < Globs; i++) { if (Symtable[i].stype == S_VARIABLE) fprintf(Outfile, "\t.word %s\n", Symtable[i].name); } // Print out the integer literals fprintf(Outfile, ".L3:\n"); for (int i = 0; i < Intslot; i++) { fprintf(Outfile, "\t.word %d\n", Intlist[i]); } } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Symtable[id].name; fprintf(Outfile, "\t.text\n" "\t.globl\t%s\n" "\t.type\t%s, \%%function\n" "%s:\n" "\tpush\t{fp, lr}\n" "\tadd\tfp, sp, #4\n" "\tsub\tsp, sp, #8\n" "\tstr\tr0, [fp, #-8]\n", name, name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Symtable[id].endlabel); fputs("\tsub\tsp, fp, #4\n" "\tpop\t{fp, pc}\n" "\t.align\t2\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); // If the literal value is small, do it with one instruction if (value <= 1000) fprintf(Outfile, "\tmov\t%s, #%d\n", reglist[r], value); else { set_int_offset(value); fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); } return (r); } // Determine the offset of a variable from the .L2 // label. Yes, this is inefficient code. static void set_var_offset(int id) { int offset = 0; // Walk the symbol table up to id. // Find S_VARIABLEs and add on 4 until // we get to our variable for (int i = 0; i < id; i++) { if (Symtable[i].stype == S_VARIABLE) offset += 4; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L2+%d\n", offset); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tldrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgloadglob:", Symtable[id].type); } return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s, %s\n", reglist[r1], reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\tmul\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { // To do a divide: r0 holds the dividend, r1 holds the divisor. // The quotient is in r0. fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r1]); fprintf(Outfile, "\tmov\tr1, %s\n", reglist[r2]); fprintf(Outfile, "\tbl\t__aeabi_idiv\n"); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r1]); free_register(r2); return (r1); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r]); fprintf(Outfile, "\tbl\t%s\n", Symtable[id].name); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r]); return (r); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tlsl\t%s, %s, #%d\n", reglist[r], reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, int id) { // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tstr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgstorglob:", Symtable[id].type); } return (r); } // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (4); switch (type) { case P_CHAR: return (1); case P_INT: case P_LONG: return (4); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Symtable[id].type); fprintf(Outfile, "\t.data\n" "\t.globl\t%s\n", Symtable[id].name); switch (typesize) { case 1: fprintf(Outfile, "%s:\t.byte\t0\n", Symtable[id].name); break; case 4: fprintf(Outfile, "%s:\t.long\t0\n", Symtable[id].name); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "moveq", "movne", "movlt", "movgt", "movle", "movge" }; // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "movne", "moveq", "movge", "movle", "movgt", "movlt" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #1\n", cmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #0\n", invcmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\tuxtb\t%s, %s\n", reglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tb\tL%d\n", l); } // List of inverted branch instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *brlist[] = { "bne", "beq", "bge", "ble", "bgt", "blt" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", brlist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[reg]); cgjump(Symtable[id].endlabel); } // Generate code to load the address of a global // identifier into a variable. Return a new register int cgaddress(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); fprintf(Outfile, "\tmov\t%s, r3\n", reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tldrb\t%s, [%s]\n", reglist[r], reglist[r]); break; case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [%s]\n", reglist[r], reglist[r]); break; } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [%s]\n", reglist[r1], reglist[r2]); break; case P_INT: case P_LONG: fprintf(Outfile, "\tstr\t%s, [%s]\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 49_Ternary/cgn.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\tsection .text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\tsection .data\n", Outfile); currSeg = data_seg; } } // Given a scalar type value, return the // size of the type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch (type) { case P_CHAR: return (offset); case P_INT: case P_LONG: break; default: if (!ptrtype(type)) fatald("Bad type in cg_align:", type); } // Here we have an int or a long. Align it on a 4-byte offset // I put the generic code here so it can be reused elsewhere. alignment = 4; offset = (offset + direction * (alignment - 1)) & ~(alignment - 1); return (offset); } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "r10", "r11", "r12", "r13", "r9", "r8", "rcx", "rdx", "rsi", "rdi" }; static char *breglist[] = { "r10b", "r11b", "r12b", "r13b", "r9b", "r8b", "cl", "dl", "sil", "dil" }; static char *dreglist[] = { "r10d", "r11d", "r12d", "r13d", "r9d", "r8d", "ecx", "edx", "esi", "edi" }; // Set all registers as available // But if reg is positive, don't free that one. void freeall_registers(int keepreg) { int i; for (i = 0; i < NUMFREEREGS; i++) if (i != keepreg) freereg[i] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. int alloc_register(void) { for (int i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(NOREG); fputs("\textern\tprintint\n", Outfile); fputs("\textern\tprintchar\n", Outfile); fputs("\textern\topen\n", Outfile); fputs("\textern\tclose\n", Outfile); fputs("\textern\tread\n", Outfile); fputs("\textern\twrite\n", Outfile); fputs("\textern\tprintf\n", Outfile); cgtextseg(); fprintf(Outfile, "; internal switch(expr) routine\n" "; rsi = switch table, rax = expr\n" "; from SubC: http://www.t3x.org/subc/\n" "\n" "switch:\n" " push rsi\n" " mov rsi, rdx\n" " mov rbx, rax\n" " cld\n" " lodsq\n" " mov rcx, rax\n" "next:\n" " lodsq\n" " mov rdx, rax\n" " lodsq\n" " cmp rbx, rdx\n" " jnz no\n" " pop rsi\n" " jmp rax\n" "no:\n" " loop next\n" " lodsq\n" " pop rsi\n" " jmp rax\n" "\n"); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the rsp and rbp if (sym->class == C_GLOBAL) fprintf(Outfile, "\tglobal\t%s\n", name); fprintf(Outfile, "%s:\n" "\tpush\trbp\n" "\tmov\trbp, rsp\n", name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->st_posn = paramOffset; paramOffset += 8; } else { parm->st_posn = newlocaloffset(parm->type); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->st_posn = newlocaloffset(locvar->type); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\tadd\trsp, %d\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->st_endlabel); fprintf(Outfile, "\tadd\trsp, %d\n", stackOffset); fputs("\tpop rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], value); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", sym->name); fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], sym->name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", sym->name); } else // Print out the code to initialise it switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", sym->name); fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], sym->name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", sym->name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", sym->name); fprintf(Outfile, "\tmovsx\t%s, word [%s]\n", dreglist[r], sym->name); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", sym->name); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", sym->st_posn); fprintf(Outfile, "\tmov\t%s, [rbp+%d]\n", reglist[r], sym->st_posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", sym->st_posn); } else switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", sym->st_posn); fprintf(Outfile, "\tmovzx\t%s, byte [rbp+%d]\n", reglist[r], sym->st_posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", sym->st_posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", sym->st_posn); fprintf(Outfile, "\tmovsx\t%s, dword [rbp+%d]\n", reglist[r], sym->st_posn); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", sym->st_posn); break; default: fatald("Bad type in cgloadlocal:", sym->type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, L%d\n", reglist[r], label); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timul\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmov\trax, %s\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidiv\t%s\n", reglist[r2]); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tand\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\tor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshl\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshr\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tneg\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnot\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r], breglist[r]); return (r); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, byte %s\n", reglist[r], breglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(struct symtable *sym, int numargs) { // Get a new register int outr = alloc_register(); // Call the function fprintf(Outfile, "\tcall\t%s\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\tadd\trsp, %d\n", 8 * (numargs - 6)); // and copy the return value into our register fprintf(Outfile, "\tmov\t%s, rax\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpush\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmov\t%s, %s\n", reglist[FIRSTPARAMREG - argposn + 1], reglist[r]); } } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsal\t%s, %d\n", reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, dreglist[r]); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\tqword\t[rbp+%d], %s\n", sym->st_posn, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\tbyte\t[rbp+%d], %s\n", sym->st_posn, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\tdword\t[rbp+%d], %s\n", sym->st_posn, dreglist[r]); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size, type; int initvalue; int i; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the variable (or its elements if an array) // and the type of the variable if (node->stype == S_ARRAY) { size = typesize(value_at(node->type), node->ctype); type = value_at(node->type); } else { size = node->size; type = node->type; } // Generate the global identity and the label cgdataseg(); if (node->class == C_GLOBAL) fprintf(Outfile, "\tglobal\t%s\n", node->name); fprintf(Outfile, "%s:\n", node->name); // Output space for one or more elements for (i = 0; i < node->nelems; i++) { // Get any initial value initvalue = 0; if (node->initlist != NULL) initvalue = node->initlist[i]; // Generate the space for this type // original version switch(size) { case 1: fprintf(Outfile, "\tdb\t%d\n", initvalue); break; case 4: fprintf(Outfile, "\tdd\t%d\n", initvalue); break; case 8: // Generate the pointer to a string literal. Treat a zero value // as actually zero, not the label L0 if (node->initlist != NULL && type == pointer_to(P_CHAR) && initvalue != 0) fprintf(Outfile, "\tdq\tL%d\n", initvalue); else fprintf(Outfile, "\tdq\t%d\n", initvalue); break; default: for (int i = 0; i < size; i++) fprintf(Outfile, "\tdb\t0\n"); /* compact version using times instead of loop fprintf(Outfile, "\ttimes\t%d\tdb\t0\n", size); */ } } } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\tdb\t%d\n", *cptr); } fprintf(Outfile, "\tdb\t0\n"); /* put string in readable format on a single line // probably went overboard with error checking int comma = 0, quote = 0, start = 1; fprintf(Outfile, "\tdb\t"); for (cptr=strvalue; *cptr; cptr++) { if ( ! isprint(*cptr) ) if (comma || start) { fprintf(Outfile, "%d, ", *cptr); start = 0; comma = 1; } else if (quote) { fprintf(Outfile, "\', %d, ", *cptr); comma = 1; quote = 0; } else { fprintf(Outfile, "%d, ", *cptr); comma = 1; quote = 0; } else if (start || comma) { fprintf(Outfile, "\'%c", *cptr); start = comma = 0; quote = 1; } else { fprintf(Outfile, "%c", *cptr); comma = 0; quote = 1; } } if (comma || start) fprintf(Outfile, "0\n"); else fprintf(Outfile, "\', 0\n"); */ } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r2], breglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(NOREG); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Deal with pointers here as we can't put them in // the switch statement if (ptrtype(sym->type)) fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); else { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzx\teax, %s\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmov\teax, %s\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } } cgjump(sym->st_endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = alloc_register(); if (sym->class == C_GLOBAL || sym->class == C_STATIC) fprintf(Outfile, "\tmov\t%s, %s\n", reglist[r], sym->name); else fprintf(Outfile, "\tlea\t%s, [rbp+%d]\n", reglist[r], sym->st_posn); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], reglist[r]); break; case 2: fprintf(Outfile, "\tmovsx\t%s, dword [%s]\n", reglist[r], reglist[r]); break; case 4: case 8: fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { //Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmov\t[%s], byte %s\n", reglist[r2], breglist[r1]); break; case 2: case 4: case 8: fprintf(Outfile, "\tmov\t[%s], %s\n", reglist[r2], reglist[r1]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } // Generate a switch jump table and the code to // load the registers and call the switch() code void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel) { int i, label; // Get a label for the switch table label = genlabel(); cglabel(label); // Heuristic. If we have no cases, create one case // which points to the default case if (casecount == 0) { caseval[0] = 0; caselabel[0] = defaultlabel; casecount = 1; } // Generate the switch jump table. fprintf(Outfile, "\tdq\t%d\n", casecount); for (i = 0; i < casecount; i++) fprintf(Outfile, "\tdq\t%d, L%d\n", caseval[i], caselabel[i]); fprintf(Outfile, "\tdq\tL%d\n", defaultlabel); // Load the specific registers cglabel(toplabel); fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); fprintf(Outfile, "\tmov\trdx, L%d\n", label); fprintf(Outfile, "\tjmp\tswitch\n"); } // Move value between registers void cgmove(int r1, int r2) { fprintf(Outfile, "\tmov\t%s, %s\n", reglist[r2], reglist[r1]); } ================================================ FILE: 49_Ternary/data.h ================================================ #ifndef extern_ #define extern_ extern #endif // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 extern_ int Line; // Current line number extern_ int Putback; // Character put back by scanner extern_ struct symtable *Functionid; // Symbol ptr of the current function extern_ FILE *Infile; // Input and output files extern_ FILE *Outfile; extern_ char *Infilename; // Name of file we are parsing extern_ char *Outfilename; // Name of file we opened as Outfile extern_ struct token Token; // Last token scanned extern_ struct token Peektoken; // A look-ahead token extern_ char Text[TEXTLEN + 1]; // Last identifier scanned extern_ int Looplevel; // Depth of nested loops extern_ int Switchlevel; // Depth of nested switches // Symbol table lists extern_ struct symtable *Globhead, *Globtail; // Global variables and functions extern_ struct symtable *Loclhead, *Locltail; // Local variables extern_ struct symtable *Parmhead, *Parmtail; // Local parameters extern_ struct symtable *Membhead, *Membtail; // Temp list of struct/union members extern_ struct symtable *Structhead, *Structtail; // List of struct types extern_ struct symtable *Unionhead, *Uniontail; // List of union types extern_ struct symtable *Enumhead, *Enumtail; // List of enum types and values extern_ struct symtable *Typehead, *Typetail; // List of typedefs // Command-line flags extern_ int O_dumpAST; // If true, dump the AST trees extern_ int O_keepasm; // If true, keep any assembly files extern_ int O_assemble; // If true, assemble the assembly files extern_ int O_dolink; // If true, link the object files extern_ int O_verbose; // If true, print info on compilation stages ================================================ FILE: 49_Ternary/decl.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of declarations // Copyright (c) 2019 Warren Toomey, GPL3 static struct symtable *composite_declaration(int type); static int typedef_declaration(struct symtable **ctype); static int type_of_typedef(char *name, struct symtable **ctype); static void enum_declaration(void); // Parse the current token and return a primitive type enum value, // a pointer to any composite type and possibly modify // the class of the type. int parse_type(struct symtable **ctype, int *class) { int type, exstatic = 1; // See if the class has been changed to extern or static while (exstatic) { switch (Token.token) { case T_EXTERN: if (*class == C_STATIC) fatal("Illegal to have extern and static at the same time"); *class = C_EXTERN; scan(&Token); break; case T_STATIC: if (*class == C_LOCAL) fatal("Compiler doesn't support static local declarations"); if (*class == C_EXTERN) fatal("Illegal to have extern and static at the same time"); *class = C_STATIC; scan(&Token); break; default: exstatic = 0; } } // Now work on the actual type keyword switch (Token.token) { case T_VOID: type = P_VOID; scan(&Token); break; case T_CHAR: type = P_CHAR; scan(&Token); break; case T_INT: type = P_INT; scan(&Token); break; case T_LONG: type = P_LONG; scan(&Token); break; // For the following, if we have a ';' after the // parsing then there is no type, so return -1. // Example: struct x {int y; int z}; case T_STRUCT: type = P_STRUCT; *ctype = composite_declaration(P_STRUCT); if (Token.token == T_SEMI) type = -1; break; case T_UNION: type = P_UNION; *ctype = composite_declaration(P_UNION); if (Token.token == T_SEMI) type = -1; break; case T_ENUM: type = P_INT; // Enums are really ints enum_declaration(); if (Token.token == T_SEMI) type = -1; break; case T_TYPEDEF: type = typedef_declaration(ctype); if (Token.token == T_SEMI) type = -1; break; case T_IDENT: type = type_of_typedef(Text, ctype); break; default: fatals("Illegal type, token", Token.tokstr); } return (type); } // Given a type parsed by parse_type(), scan in any following // '*' tokens and return the new type int parse_stars(int type) { while (1) { if (Token.token != T_STAR) break; type = pointer_to(type); scan(&Token); } return (type); } // Parse a type which appears inside a cast int parse_cast(void) { int type, class=0; struct symtable *ctype; // Get the type inside the parentheses type= parse_stars(parse_type(&ctype, &class)); // Do some error checking. I'm sure more can be done if (type == P_STRUCT || type == P_UNION || type == P_VOID) fatal("Cannot cast to a struct, union or void type"); return(type); } // Given a type, parse an expression of literals and ensure // that the type of this expression matches the given type. // Parse any type cast that precedes the expression. // If an integer literal, return this value. // If a string literal, return the label number of the string. int parse_literal(int type) { struct ASTnode *tree; // Parse the expression and optimise the resulting AST tree tree= optimise(binexpr(0)); // If there's a cast, get the child and // mark it as having the type from the cast if (tree->op == A_CAST) { tree->left->type= tree->type; tree= tree->left; } // The tree must now have an integer or string literal if (tree->op != A_INTLIT && tree->op != A_STRLIT) fatal("Cannot initialise globals with a general expression"); // If the type is char * and if (type == pointer_to(P_CHAR)) { // We have a string literal, return the label number if (tree->op == A_STRLIT) return(tree->a_intvalue); // We have a zero int literal, so that's a NULL if (tree->op == A_INTLIT && tree->a_intvalue==0) return(0); } // We only get here with an integer literal. The input type // is an integer type and is wide enough to hold the literal value if (inttype(type) && typesize(type, NULL) >= typesize(tree->type, NULL)) return(tree->a_intvalue); fatal("Type mismatch: literal vs. variable"); return(0); // Keep -Wall happy } // Given the type, name and class of a scalar variable, // parse any initialisation value and allocate storage for it. // Return the variable's symbol table entry. static struct symtable *scalar_declaration(char *varname, int type, struct symtable *ctype, int class, struct ASTnode **tree) { struct symtable *sym=NULL; struct ASTnode *varnode, *exprnode; *tree= NULL; // Add this as a known scalar switch (class) { case C_STATIC: case C_EXTERN: case C_GLOBAL: sym= addglob(varname, type, ctype, S_VARIABLE, class, 1, 0); break; case C_LOCAL: sym= addlocl(varname, type, ctype, S_VARIABLE, 1); break; case C_PARAM: sym= addparm(varname, type, ctype, S_VARIABLE); break; case C_MEMBER: sym= addmemb(varname, type, ctype, S_VARIABLE, 1); break; } // The variable is being initialised if (Token.token == T_ASSIGN) { // Only possible for a global or local if (class != C_GLOBAL && class != C_LOCAL && class != C_STATIC) fatals("Variable can not be initialised", varname); scan(&Token); // Globals must be assigned a literal value if (class == C_GLOBAL || class == C_STATIC) { // Create one initial value for the variable and // parse this value sym->initlist= (int *)malloc(sizeof(int)); sym->initlist[0]= parse_literal(type); } if (class == C_LOCAL) { // Make an A_IDENT AST node with the variable varnode = mkastleaf(A_IDENT, sym->type, sym, 0); // Get the expression for the assignment, make into a rvalue exprnode = binexpr(0); exprnode->rvalue = 1; // Ensure the expression's type matches the variable exprnode = modify_type(exprnode, varnode->type, 0); if (exprnode == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree *tree = mkastnode(A_ASSIGN, exprnode->type, exprnode, NULL, varnode, NULL, 0); } } // Generate any global space if (class == C_GLOBAL || class == C_STATIC) genglobsym(sym); return (sym); } // Given the type, name and class of an variable, parse // the size of the array, if any. Then parse any initialisation // value and allocate storage for it. // Return the variable's symbol table entry. static struct symtable *array_declaration(char *varname, int type, struct symtable *ctype, int class) { struct symtable *sym; // New symbol table entry int nelems= -1; // Assume the number of elements won't be given int maxelems; // The maximum number of elements in the init list int *initlist; // The list of initial elements int i=0, j; // Skip past the '[' scan(&Token); // See we have an array size if (Token.token != T_RBRACKET) { nelems= parse_literal(P_INT); if (nelems <= 0) fatald("Array size is illegal", nelems); } // Ensure we have a following ']' match(T_RBRACKET, "]"); // Add this as a known array. We treat the // array as a pointer to its elements' type switch (class) { case C_STATIC: case C_EXTERN: case C_GLOBAL: sym = addglob(varname, pointer_to(type), ctype, S_ARRAY, class, 0, 0); break; default: fatal("For now, declaration of non-global arrays is not implemented"); } // Array initialisation if (Token.token == T_ASSIGN) { if (class != C_GLOBAL && class != C_STATIC) fatals("Variable can not be initialised", varname); scan(&Token); // Get the following left curly bracket match(T_LBRACE, "{"); #define TABLE_INCREMENT 10 // If the array already has nelems, allocate that many elements // in the list. Otherwise, start with TABLE_INCREMENT. if (nelems != -1) maxelems= nelems; else maxelems= TABLE_INCREMENT; initlist= (int *)malloc(maxelems *sizeof(int)); // Loop getting a new literal value from the list while (1) { // Check we can add the next value, then parse and add it if (nelems != -1 && i == maxelems) fatal("Too many values in initialisation list"); initlist[i++]= parse_literal(type); // Increase the list size if the original size was // not set and we have hit the end of the current list if (nelems == -1 && i == maxelems) { maxelems += TABLE_INCREMENT; initlist= (int *)realloc(initlist, maxelems *sizeof(int)); } // Leave when we hit the right curly bracket if (Token.token == T_RBRACE) { scan(&Token); break; } // Next token must be a comma, then comma(); } // Zero any unused elements in the initlist. // Attach the list to the symbol table entry for (j=i; j < sym->nelems; j++) initlist[j]=0; if (i > nelems) nelems = i; sym->initlist= initlist; } // Set the size of the array and the number of elements sym->nelems= nelems; sym->size= sym->nelems * typesize(type, ctype); // Generate any global space if (class == C_GLOBAL || class == C_STATIC) genglobsym(sym); return (sym); } // Given a pointer to the new function being declared and // a possibly NULL pointer to the function's previous declaration, // parse a list of parameters and cross-check them against the // previous declaration. Return the count of parameters static int param_declaration_list(struct symtable *oldfuncsym, struct symtable *newfuncsym) { int type, paramcnt = 0; struct symtable *ctype; struct symtable *protoptr = NULL; struct ASTnode *unused; // Get the pointer to the first prototype parameter if (oldfuncsym != NULL) protoptr = oldfuncsym->member; // Loop getting any parameters while (Token.token != T_RPAREN) { // If the first token is 'void' if (Token.token == T_VOID) { // Peek at the next token. If a ')', the function // has no parameters, so leave the loop. scan(&Peektoken); if (Peektoken.token == T_RPAREN) { // Move the Peektoken into the Token paramcnt= 0; scan(&Token); break; } } // Get the type of the next parameter type = declaration_list(&ctype, C_PARAM, T_COMMA, T_RPAREN, &unused); if (type == -1) fatal("Bad type in parameter list"); // Ensure the type of this parameter matches the prototype if (protoptr != NULL) { if (type != protoptr->type) fatald("Type doesn't match prototype for parameter", paramcnt + 1); protoptr = protoptr->next; } paramcnt++; // Stop when we hit the right parenthesis if (Token.token == T_RPAREN) break; // We need a comma as separator comma(); } if (oldfuncsym != NULL && paramcnt != oldfuncsym->nelems) fatals("Parameter count mismatch for function", oldfuncsym->name); // Return the count of parameters return (paramcnt); } // // function_declaration: type identifier '(' parameter_list ')' ; // | type identifier '(' parameter_list ')' compound_statement ; // // Parse the declaration of function. static struct symtable *function_declaration(char *funcname, int type, struct symtable *ctype, int class) { struct ASTnode *tree, *finalstmt; struct symtable *oldfuncsym, *newfuncsym = NULL; int endlabel, paramcnt; // Text has the identifier's name. If this exists and is a // function, get the id. Otherwise, set oldfuncsym to NULL. if ((oldfuncsym = findsymbol(funcname)) != NULL) if (oldfuncsym->stype != S_FUNCTION) oldfuncsym = NULL; // If this is a new function declaration, get a // label-id for the end label, and add the function // to the symbol table, if (oldfuncsym == NULL) { endlabel = genlabel(); // Assumtion: functions only return scalar types, so NULL below newfuncsym = addglob(funcname, type, NULL, S_FUNCTION, class, 0, endlabel); } // Scan in the '(', any parameters and the ')'. // Pass in any existing function prototype pointer lparen(); paramcnt = param_declaration_list(oldfuncsym, newfuncsym); rparen(); // If this is a new function declaration, update the // function symbol entry with the number of parameters. // Also copy the parameter list into the function's node. if (newfuncsym) { newfuncsym->nelems = paramcnt; newfuncsym->member = Parmhead; oldfuncsym = newfuncsym; } // Clear out the parameter list Parmhead = Parmtail = NULL; // Declaration ends in a semicolon, only a prototype. if (Token.token == T_SEMI) return (oldfuncsym); // This is not just a prototype. // Set the Functionid global to the function's symbol pointer Functionid = oldfuncsym; // Get the AST tree for the compound statement and mark // that we have parsed no loops or switches yet Looplevel = 0; Switchlevel = 0; lbrace(); tree = compound_statement(0); rbrace(); // If the function type isn't P_VOID .. if (type != P_VOID) { // Error if no statements in the function if (tree == NULL) fatal("No statements in function with non-void type"); // Check that the last AST operation in the // compound statement was a return statement finalstmt = (tree->op == A_GLUE) ? tree->right : tree; if (finalstmt == NULL || finalstmt->op != A_RETURN) fatal("No return for function with non-void type"); } // Build the A_FUNCTION node which has the function's symbol pointer // and the compound statement sub-tree tree = mkastunary(A_FUNCTION, type, tree, oldfuncsym, endlabel); // Do optimisations on the AST tree tree= optimise(tree); // Dump the AST tree if requested if (O_dumpAST) { dumpAST(tree, NOLABEL, 0); fprintf(stdout, "\n\n"); } // Generate the assembly code for it genAST(tree, NOLABEL, NOLABEL, NOLABEL, 0); // Now free the symbols associated // with this function freeloclsyms(); return (oldfuncsym); } // Parse composite type declarations: structs or unions. // Either find an existing struct/union declaration, or build // a struct/union symbol table entry and return its pointer. static struct symtable *composite_declaration(int type) { struct symtable *ctype = NULL; struct symtable *m; struct ASTnode *unused; int offset; int t; // Skip the struct/union keyword scan(&Token); // See if there is a following struct/union name if (Token.token == T_IDENT) { // Find any matching composite type if (type == P_STRUCT) ctype = findstruct(Text); else ctype = findunion(Text); scan(&Token); } // If the next token isn't an LBRACE , this is // the usage of an existing struct/union type. // Return the pointer to the type. if (Token.token != T_LBRACE) { if (ctype == NULL) fatals("unknown struct/union type", Text); return (ctype); } // Ensure this struct/union type hasn't been // previously defined if (ctype) fatals("previously defined struct/union", Text); // Build the composite type and skip the left brace if (type == P_STRUCT) ctype = addstruct(Text); else ctype = addunion(Text); scan(&Token); // Scan in the list of members while (1) { // Get the next member. m is used as a dummy t= declaration_list(&m, C_MEMBER, T_SEMI, T_RBRACE, &unused); if (t== -1) fatal("Bad type in member list"); if (Token.token == T_SEMI) scan(&Token); if (Token.token == T_RBRACE) break; } // Attach to the struct type's node rbrace(); if (Membhead==NULL) fatals("No members in struct", ctype->name); ctype->member = Membhead; Membhead = Membtail = NULL; // Set the offset of the initial member // and find the first free byte after it m = ctype->member; m->st_posn = 0; offset = typesize(m->type, m->ctype); // Set the position of each successive member in the composite type // Unions are easy. For structs, align the member and find the next free byte for (m = m->next; m != NULL; m = m->next) { // Set the offset for this member if (type == P_STRUCT) m->st_posn = genalign(m->type, offset, 1); else m->st_posn = 0; // Get the offset of the next free byte after this member offset += typesize(m->type, m->ctype); } // Set the overall size of the composite type ctype->size = offset; return (ctype); } // Parse an enum declaration static void enum_declaration(void) { struct symtable *etype = NULL; char *name= NULL; int intval = 0; // Skip the enum keyword. scan(&Token); // If there's a following enum type name, get a // pointer to any existing enum type node. if (Token.token == T_IDENT) { etype = findenumtype(Text); name = strdup(Text); // As it gets tromped soon scan(&Token); } // If the next token isn't a LBRACE, check // that we have an enum type name, then return if (Token.token != T_LBRACE) { if (etype == NULL) fatals("undeclared enum type:", name); return; } // We do have an LBRACE. Skip it scan(&Token); // If we have an enum type name, ensure that it // hasn't been declared before. if (etype != NULL) fatals("enum type redeclared:", etype->name); else // Build an enum type node for this identifier etype = addenum(name, C_ENUMTYPE, 0); // Loop to get all the enum values while (1) { // Ensure we have an identifier // Copy it in case there's an int literal coming up ident(); name = strdup(Text); // Ensure this enum value hasn't been declared before etype = findenumval(name); if (etype != NULL) fatals("enum value redeclared:", Text); // If the next token is an '=', skip it and // get the following int literal if (Token.token == T_ASSIGN) { scan(&Token); if (Token.token != T_INTLIT) fatal("Expected int literal after '='"); intval = Token.intvalue; scan(&Token); } // Build an enum value node for this identifier. // Increment the value for the next enum identifier. etype = addenum(name, C_ENUMVAL, intval++); // Bail out on a right curly bracket, else get a comma if (Token.token == T_RBRACE) break; comma(); } scan(&Token); // Skip over the right curly bracket } // Parse a typedef declaration and return the type // and ctype that it represents static int typedef_declaration(struct symtable **ctype) { int type, class = 0; // Skip the typedef keyword. scan(&Token); // Get the actual type following the keyword type = parse_type(ctype, &class); if (class != 0) fatal("Can't have extern in a typedef declaration"); // See if the typedef identifier already exists if (findtypedef(Text) != NULL) fatals("redefinition of typedef", Text); // Get any following '*' tokens type = parse_stars(type); // It doesn't exist so add it to the typedef list addtypedef(Text, type, *ctype); scan(&Token); return (type); } // Given a typedef name, return the type it represents static int type_of_typedef(char *name, struct symtable **ctype) { struct symtable *t; // Look up the typedef in the list t = findtypedef(name); if (t == NULL) fatals("unknown type", name); scan(&Token); *ctype = t->ctype; return (t->type); } // Parse the declaration of a variable or function. // The type and any following '*'s have been scanned, and we // have the identifier in the Token variable. // The class argument is the variable's class. // Return a pointer to the symbol's entry in the symbol table static struct symtable *symbol_declaration(int type, struct symtable *ctype, int class, struct ASTnode **tree) { struct symtable *sym = NULL; char *varname = strdup(Text); // Ensure that we have an identifier. // We copied it above so we can scan more tokens in, e.g. // an assignment expression for a local variable. ident(); // Deal with function declarations if (Token.token == T_LPAREN) { return (function_declaration(varname, type, ctype, class)); } // See if this array or scalar variable has already been declared switch (class) { case C_EXTERN: case C_STATIC: case C_GLOBAL: if (findglob(varname) != NULL) fatals("Duplicate global variable declaration", varname); case C_LOCAL: case C_PARAM: if (findlocl(varname) != NULL) fatals("Duplicate local variable declaration", varname); case C_MEMBER: if (findmember(varname) != NULL) fatals("Duplicate struct/union member declaration", varname); } // Add the array or scalar variable to the symbol table if (Token.token == T_LBRACKET) sym = array_declaration(varname, type, ctype, class); else sym = scalar_declaration(varname, type, ctype, class, tree); return (sym); } // Parse a list of symbols where there is an initial type. // Return the type of the symbols. et1 and et2 are end tokens. int declaration_list(struct symtable **ctype, int class, int et1, int et2, struct ASTnode **gluetree) { int inittype, type; struct symtable *sym; struct ASTnode *tree; *gluetree= NULL; // Get the initial type. If -1, it was // a composite type definition, return this if ((inittype = parse_type(ctype, &class)) == -1) return (inittype); // Now parse the list of symbols while (1) { // See if this symbol is a pointer type = parse_stars(inittype); // Parse this symbol sym = symbol_declaration(type, *ctype, class, &tree); // We parsed a function, there is no list so leave if (sym->stype == S_FUNCTION) { if (class != C_GLOBAL && class != C_STATIC) fatal("Function definition not at global level"); return (type); } // Glue any AST tree from a local declaration // to build a sequence of assignments to perform if (*gluetree== NULL) *gluetree= tree; else *gluetree = mkastnode(A_GLUE, P_NONE, *gluetree, NULL, tree, NULL, 0); // We are at the end of the list, leave if (Token.token == et1 || Token.token == et2) return (type); // Otherwise, we need a comma as separator comma(); } } // Parse one or more global declarations, either // variables, functions or structs void global_declarations(void) { struct symtable *ctype; struct ASTnode *unused; while (Token.token != T_EOF) { declaration_list(&ctype, C_GLOBAL, T_SEMI, T_EOF, &unused); // Skip any semicolons and right curly brackets if (Token.token == T_SEMI) scan(&Token); } } ================================================ FILE: 49_Ternary/decl.h ================================================ // Function prototypes for all compiler files // Copyright (c) 2019 Warren Toomey, GPL3 // scan.c void reject_token(struct token *t); int scan(struct token *t); // tree.c struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue); struct ASTnode *mkastleaf(int op, int type, struct symtable *sym, int intvalue); struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, struct symtable *sym, int intvalue); void dumpAST(struct ASTnode *n, int label, int parentASTop); // gen.c int genlabel(void); int genAST(struct ASTnode *n, int iflabel, int looptoplabel, int loopendlabel, int parentASTop); void genpreamble(); void genpostamble(); void genfreeregs(int keepreg); void genglobsym(struct symtable *node); int genglobstr(char *strvalue); int genprimsize(int type); int genalign(int type, int offset, int direction); void genreturn(int reg, int id); // cg.c int cgprimsize(int type); int cgalign(int type, int offset, int direction); void cgtextseg(); void cgdataseg(); int alloc_register(void); void freeall_registers(int keepreg); void cgpreamble(); void cgpostamble(); void cgfuncpreamble(struct symtable *sym); void cgfuncpostamble(struct symtable *sym); int cgloadint(int value, int type); int cgloadglob(struct symtable *sym, int op); int cgloadlocal(struct symtable *sym, int op); int cgloadglobstr(int label); int cgadd(int r1, int r2); int cgsub(int r1, int r2); int cgmul(int r1, int r2); int cgdiv(int r1, int r2); int cgshlconst(int r, int val); int cgcall(struct symtable *sym, int numargs); void cgcopyarg(int r, int argposn); int cgstorglob(int r, struct symtable *sym); int cgstorlocal(int r, struct symtable *sym); void cgglobsym(struct symtable *node); void cgglobstr(int l, char *strvalue); int cgcompare_and_set(int ASTop, int r1, int r2); int cgcompare_and_jump(int ASTop, int r1, int r2, int label); void cglabel(int l); void cgjump(int l); int cgwiden(int r, int oldtype, int newtype); void cgreturn(int reg, struct symtable *sym); int cgaddress(struct symtable *sym); int cgderef(int r, int type); int cgstorderef(int r1, int r2, int type); int cgnegate(int r); int cginvert(int r); int cglognot(int r); int cgboolean(int r, int op, int label); int cgand(int r1, int r2); int cgor(int r1, int r2); int cgxor(int r1, int r2); int cgshl(int r1, int r2); int cgshr(int r1, int r2); void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel); void cgmove(int r1, int r2); // expr.c struct ASTnode *expression_list(int endtoken); struct ASTnode *binexpr(int ptp); // stmt.c struct ASTnode *compound_statement(int inswitch); // misc.c void match(int t, char *what); void semi(void); void lbrace(void); void rbrace(void); void lparen(void); void rparen(void); void ident(void); void comma(void); void fatal(char *s); void fatals(char *s1, char *s2); void fatald(char *s, int d); void fatalc(char *s, int c); // sym.c void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node); struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn); struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn); struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int nelems); struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype); struct symtable *addstruct(char *name); struct symtable *addunion(char *name); struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int nelems); struct symtable *addenum(char *name, int class, int value); struct symtable *addtypedef(char *name, int type, struct symtable *ctype); struct symtable *findglob(char *s); struct symtable *findlocl(char *s); struct symtable *findsymbol(char *s); struct symtable *findmember(char *s); struct symtable *findstruct(char *s); struct symtable *findunion(char *s); struct symtable *findenumtype(char *s); struct symtable *findenumval(char *s); struct symtable *findtypedef(char *s); void clear_symtable(void); void freeloclsyms(void); void freestaticsyms(void); // decl.c int parse_type(struct symtable **ctype, int *class); int parse_stars(int type); int parse_cast(void); int declaration_list(struct symtable **ctype, int class, int et1, int et2, struct ASTnode **gluetree); void global_declarations(void); // types.c int inttype(int type); int ptrtype(int type); int pointer_to(int type); int value_at(int type); int typesize(int type, struct symtable *ctype); struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op); // opt.c struct ASTnode *optimise(struct ASTnode *n); ================================================ FILE: 49_Ternary/defs.h ================================================ #include #include #include #include // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 enum { TEXTLEN = 512 // Length of identifiers in input }; // Commands and default filenames #define AOUT "a.out" #ifdef __NASM__ #define ASCMD "nasm -f elf64 -o " #define LDCMD "cc -no-pie -fno-plt -Wall -o " #else #define ASCMD "as -o " #define LDCMD "cc -o " #endif #define CPPCMD "cpp -nostdinc -isystem " // Token types enum { T_EOF, // Binary operators T_ASSIGN, T_ASPLUS, T_ASMINUS, T_ASSTAR, T_ASSLASH, T_QUESTION, T_LOGOR, T_LOGAND, T_OR, T_XOR, T_AMPER, T_EQ, T_NE, T_LT, T_GT, T_LE, T_GE, T_LSHIFT, T_RSHIFT, T_PLUS, T_MINUS, T_STAR, T_SLASH, // Other operators T_INC, T_DEC, T_INVERT, T_LOGNOT, // Type keywords T_VOID, T_CHAR, T_INT, T_LONG, // Other keywords T_IF, T_ELSE, T_WHILE, T_FOR, T_RETURN, T_STRUCT, T_UNION, T_ENUM, T_TYPEDEF, T_EXTERN, T_BREAK, T_CONTINUE, T_SWITCH, T_CASE, T_DEFAULT, T_SIZEOF, T_STATIC, // Structural tokens T_INTLIT, T_STRLIT, T_SEMI, T_IDENT, T_LBRACE, T_RBRACE, T_LPAREN, T_RPAREN, T_LBRACKET, T_RBRACKET, T_COMMA, T_DOT, T_ARROW, T_COLON }; // Token structure struct token { int token; // Token type, from the enum list above char *tokstr; // String version of the token int intvalue; // For T_INTLIT, the integer value }; // AST node types. The first few line up // with the related tokens enum { A_ASSIGN = 1, A_ASPLUS, A_ASMINUS, A_ASSTAR, A_ASSLASH, A_TERNARY, A_LOGOR, A_LOGAND, A_OR, A_XOR, A_AND, A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE, A_LSHIFT, A_RSHIFT, A_ADD, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, A_INTLIT, A_STRLIT, A_IDENT, A_GLUE, A_IF, A_WHILE, A_FUNCTION, A_WIDEN, A_RETURN, A_FUNCCALL, A_DEREF, A_ADDR, A_SCALE, A_PREINC, A_PREDEC, A_POSTINC, A_POSTDEC, A_NEGATE, A_INVERT, A_LOGNOT, A_TOBOOL, A_BREAK, A_CONTINUE, A_SWITCH, A_CASE, A_DEFAULT, A_CAST }; // Primitive types. The bottom 4 bits is an integer // value that represents the level of indirection, // e.g. 0= no pointer, 1= pointer, 2= pointer pointer etc. enum { P_NONE, P_VOID = 16, P_CHAR = 32, P_INT = 48, P_LONG = 64, P_STRUCT=80, P_UNION=96 }; // Structural types enum { S_VARIABLE, S_FUNCTION, S_ARRAY }; // Storage classes enum { C_GLOBAL = 1, // Globally visible symbol C_LOCAL, // Locally visible symbol C_PARAM, // Locally visible function parameter C_EXTERN, // External globally visible symbol C_STATIC, // Static symbol, visible in one file C_STRUCT, // A struct C_UNION, // A union C_MEMBER, // Member of a struct or union C_ENUMTYPE, // A named enumeration type C_ENUMVAL, // A named enumeration value C_TYPEDEF // A named typedef }; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol struct symtable *ctype; // If struct/union, ptr to that type int stype; // Structural type for the symbol int class; // Storage class for the symbol int size; // Total size in bytes of this symbol int nelems; // Functions: # params. Arrays: # elements #define st_endlabel st_posn // For functions, the end label int st_posn; // For locals, the negative offset // from the stack base pointer int *initlist; // List of initial values struct symtable *next; // Next symbol in one list struct symtable *member; // First member of a function, struct, }; // union or enum // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates int rvalue; // True if the node is an rvalue struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; struct symtable *sym; // For many AST nodes, the pointer to // the symbol in the symbol table #define a_intvalue a_size // For A_INTLIT, the integer value int a_size; // For A_SCALE, the size to scale by }; enum { NOREG = -1, // Use NOREG when the AST generation // functions have no register to return NOLABEL = 0 // Use NOLABEL when we have no label to // pass to genAST() }; ================================================ FILE: 49_Ternary/expr.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of expressions // Copyright (c) 2019 Warren Toomey, GPL3 // expression_list: // | expression // | expression ',' expression_list // ; // Parse a list of zero or more comma-separated expressions and // return an AST composed of A_GLUE nodes with the left-hand child // being the sub-tree of previous expressions (or NULL) and the right-hand // child being the next expression. Each A_GLUE node will have size field // set to the number of expressions in the tree at this point. If no // expressions are parsed, NULL is returned struct ASTnode *expression_list(int endtoken) { struct ASTnode *tree = NULL; struct ASTnode *child = NULL; int exprcount = 0; // Loop until the end token while (Token.token != endtoken) { // Parse the next expression and increment the expression count child = binexpr(0); exprcount++; // Build an A_GLUE AST node with the previous tree as the left child // and the new expression as the right child. Store the expression count. tree = mkastnode(A_GLUE, P_NONE, tree, NULL, child, NULL, exprcount); // Stop when we reach the end token if (Token.token == endtoken) break; // Must have a ',' at this point match(T_COMMA, ","); } // Return the tree of expressions return (tree); } // Parse a function call and return its AST static struct ASTnode *funccall(void) { struct ASTnode *tree; struct symtable *funcptr; // Check that the identifier has been defined as a function, // then make a leaf node for it. if ((funcptr = findsymbol(Text)) == NULL || funcptr->stype != S_FUNCTION) { fatals("Undeclared function", Text); } // Get the '(' lparen(); // Parse the argument expression list tree = expression_list(T_RPAREN); // XXX Check type of each argument against the function's prototype // Build the function call AST node. Store the // function's return type as this node's type. // Also record the function's symbol-id tree = mkastunary(A_FUNCCALL, funcptr->type, tree, funcptr, 0); // Get the ')' rparen(); return (tree); } // Parse the index into an array and return an AST tree for it static struct ASTnode *array_access(void) { struct ASTnode *left, *right; struct symtable *aryptr; // Check that the identifier has been defined as an array // then make a leaf node for it that points at the base if ((aryptr = findsymbol(Text)) == NULL || aryptr->stype != S_ARRAY) { fatals("Undeclared array", Text); } left = mkastleaf(A_ADDR, aryptr->type, aryptr, 0); // Get the '[' scan(&Token); // Parse the following expression right = binexpr(0); // Get the ']' match(T_RBRACKET, "]"); // Ensure that this is of int type if (!inttype(right->type)) fatal("Array index is not of integer type"); // Scale the index by the size of the element's type right = modify_type(right, left->type, A_ADD); // Return an AST tree where the array's base has the offset // added to it, and dereference the element. Still an lvalue // at this point. left = mkastnode(A_ADD, aryptr->type, left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, value_at(left->type), left, NULL, 0); return (left); } // Parse the member reference of a struct or union // and return an AST tree for it. If withpointer is true, // the access is through a pointer to the member. static struct ASTnode *member_access(int withpointer) { struct ASTnode *left, *right; struct symtable *compvar; struct symtable *typeptr; struct symtable *m; // Check that the identifier has been declared as a // struct/union or a struct/union pointer if ((compvar = findsymbol(Text)) == NULL) fatals("Undeclared variable", Text); if (withpointer && compvar->type != pointer_to(P_STRUCT) && compvar->type != pointer_to(P_UNION)) fatals("Undeclared variable", Text); if (!withpointer && compvar->type != P_STRUCT && compvar->type != P_UNION) fatals("Undeclared variable", Text); // If a pointer to a struct/union, get the pointer's value. // Otherwise, make a leaf node that points at the base // Either way, it's an rvalue if (withpointer) { left = mkastleaf(A_IDENT, pointer_to(compvar->type), compvar, 0); } else left = mkastleaf(A_ADDR, compvar->type, compvar, 0); left->rvalue = 1; // Get the details of the composite type typeptr = compvar->ctype; // Skip the '.' or '->' token and get the member's name scan(&Token); ident(); // Find the matching member's name in the type // Die if we can't find it for (m = typeptr->member; m != NULL; m = m->next) if (!strcmp(m->name, Text)) break; if (m == NULL) fatals("No member found in struct/union: ", Text); // Build an A_INTLIT node with the offset right = mkastleaf(A_INTLIT, P_INT, NULL, m->st_posn); // Add the member's offset to the base of the struct/union // and dereference it. Still an lvalue at this point left = mkastnode(A_ADD, pointer_to(m->type), left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, m->type, left, NULL, 0); return (left); } // Parse a postfix expression and return // an AST node representing it. The // identifier is already in Text. static struct ASTnode *postfix(void) { struct ASTnode *n; struct symtable *varptr; struct symtable *enumptr; // If the identifier matches an enum value, // return an A_INTLIT node if ((enumptr = findenumval(Text)) != NULL) { scan(&Token); return (mkastleaf(A_INTLIT, P_INT, NULL, enumptr->st_posn)); } // Scan in the next token to see if we have a postfix expression scan(&Token); // Function call if (Token.token == T_LPAREN) return (funccall()); // An array reference if (Token.token == T_LBRACKET) return (array_access()); // Access into a struct or union if (Token.token == T_DOT) return (member_access(0)); if (Token.token == T_ARROW) return (member_access(1)); // A variable. Check that the variable exists. if ((varptr = findsymbol(Text)) == NULL || varptr->stype != S_VARIABLE) fatals("Unknown variable", Text); switch (Token.token) { // Post-increment: skip over the token case T_INC: scan(&Token); n = mkastleaf(A_POSTINC, varptr->type, varptr, 0); break; // Post-decrement: skip over the token case T_DEC: scan(&Token); n = mkastleaf(A_POSTDEC, varptr->type, varptr, 0); break; // Just a variable reference default: n = mkastleaf(A_IDENT, varptr->type, varptr, 0); } return (n); } // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(void) { struct ASTnode *n; int id; int type = 0; int size, class; struct symtable *ctype; switch (Token.token) { case T_STATIC: case T_EXTERN: fatal("Compiler doesn't support static or extern local declarations"); case T_SIZEOF: // Skip the T_SIZEOF and ensure we have a left parenthesis scan(&Token); if (Token.token != T_LPAREN) fatal("Left parenthesis expected after sizeof"); scan(&Token); // Get the type inside the parentheses type = parse_stars(parse_type(&ctype, &class)); // Get the type's size size = typesize(type, ctype); rparen(); // Return a leaf node int literal with the size return (mkastleaf(A_INTLIT, P_INT, NULL, size)); case T_INTLIT: // For an INTLIT token, make a leaf AST node for it. // Make it a P_CHAR if it's within the P_CHAR range if (Token.intvalue >= 0 && Token.intvalue < 256) n = mkastleaf(A_INTLIT, P_CHAR, NULL, Token.intvalue); else n = mkastleaf(A_INTLIT, P_INT, NULL, Token.intvalue); break; case T_STRLIT: // For a STRLIT token, generate the assembly for it. // Then make a leaf AST node for it. id is the string's label. id = genglobstr(Text); n = mkastleaf(A_STRLIT, pointer_to(P_CHAR), NULL, id); break; case T_IDENT: return (postfix()); case T_LPAREN: // Beginning of a parenthesised expression, skip the '('. scan(&Token); // If the token after is a type identifier, this is a cast expression switch (Token.token) { case T_IDENT: // We have to see if the identifier matches a typedef. // If not, treat it as an expression. if (findtypedef(Text) == NULL) { n = binexpr(0); break; } case T_VOID: case T_CHAR: case T_INT: case T_LONG: case T_STRUCT: case T_UNION: case T_ENUM: // Get the type inside the parentheses type = parse_cast(); // Skip the closing ')' and then parse the following expression rparen(); default: n = binexpr(0); // Scan in the expression } // We now have at least an expression in n, and possibly a non-zero type in type // if there was a cast. Skip the closing ')' if there was no cast. if (type == 0) rparen(); else // Otherwise, make a unary AST node for the cast n = mkastunary(A_CAST, type, n, NULL, 0); return (n); default: fatals("Expecting a primary expression, got token", Token.tokstr); } // Scan in the next token and return the leaf node scan(&Token); return (n); } // Convert a binary operator token into a binary AST operation. // We rely on a 1:1 mapping from token to AST operation static int binastop(int tokentype) { if (tokentype > T_EOF && tokentype <= T_SLASH) return (tokentype); fatald("Syntax error, token", tokentype); return (0); // Keep -Wall happy } // Return true if a token is right-associative, // false otherwise. static int rightassoc(int tokentype) { if (tokentype >= T_ASSIGN && tokentype <= T_ASSLASH) return (1); return (0); } // Operator precedence for each token. Must // match up with the order of tokens in defs.h static int OpPrec[] = { 0, 10, 10, // T_EOF, T_ASSIGN, T_ASPLUS, 10, 10, 10, // T_ASMINUS, T_ASSTAR, T_ASSLASH, 15, // T_QUESTION, 20, 30, // T_LOGOR, T_LOGAND 40, 50, 60, // T_OR, T_XOR, T_AMPER 70, 70, // T_EQ, T_NE 80, 80, 80, 80, // T_LT, T_GT, T_LE, T_GE 90, 90, // T_LSHIFT, T_RSHIFT 100, 100, // T_PLUS, T_MINUS 110, 110 // T_STAR, T_SLASH }; // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec; if (tokentype > T_SLASH) fatald("Token with no precedence in op_precedence:", tokentype); prec = OpPrec[tokentype]; if (prec == 0) fatald("Syntax error, token", tokentype); return (prec); } // prefix_expression: primary // | '*' prefix_expression // | '&' prefix_expression // | '-' prefix_expression // | '++' prefix_expression // | '--' prefix_expression // ; // Parse a prefix expression and return // a sub-tree representing it. struct ASTnode *prefix(void) { struct ASTnode *tree; switch (Token.token) { case T_AMPER: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Ensure that it's an identifier if (tree->op != A_IDENT) fatal("& operator must be followed by an identifier"); // Now change the operator to A_ADDR and the type to // a pointer to the original type tree->op = A_ADDR; tree->type = pointer_to(tree->type); break; case T_STAR: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's either another deref or an // identifier if (tree->op != A_IDENT && tree->op != A_DEREF) fatal("* operator must be followed by an identifier or *"); // Prepend an A_DEREF operation to the tree tree = mkastunary(A_DEREF, value_at(tree->type), tree, NULL, 0); break; case T_MINUS: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_NEGATE operation to the tree and // make the child an rvalue. Because chars are unsigned, // also widen this to int so that it's signed tree->rvalue = 1; tree = modify_type(tree, P_INT, 0); tree = mkastunary(A_NEGATE, tree->type, tree, NULL, 0); break; case T_INVERT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_INVERT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_INVERT, tree->type, tree, NULL, 0); break; case T_LOGNOT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_LOGNOT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_LOGNOT, tree->type, tree, NULL, 0); break; case T_INC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("++ operator must be followed by an identifier"); // Prepend an A_PREINC operation to the tree tree = mkastunary(A_PREINC, tree->type, tree, NULL, 0); break; case T_DEC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("-- operator must be followed by an identifier"); // Prepend an A_PREDEC operation to the tree tree = mkastunary(A_PREDEC, tree->type, tree, NULL, 0); break; default: tree = primary(); } return (tree); } // Return an AST tree whose root is a binary operator. // Parameter ptp is the previous token's precedence. struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; struct ASTnode *ltemp, *rtemp; int ASTop; int tokentype; // Get the tree on the left. // Fetch the next token at the same time. left = prefix(); // If we hit one of several terminating tokens, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA || tokentype == T_COLON || tokentype == T_RBRACE) { left->rvalue = 1; return (left); } // While the precedence of this token is more than that of the // previous token precedence, or it's right associative and // equal to the previous token's precedence while ((op_precedence(tokentype) > ptp) || (rightassoc(tokentype) && op_precedence(tokentype) == ptp)) { // Fetch in the next integer literal scan(&Token); // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Determine the operation to be performed on the sub-trees ASTop = binastop(tokentype); switch (ASTop) { case A_TERNARY: // Ensure we have a ':' token, scan in the expression after it match(T_COLON, ":"); ltemp= binexpr(0); // Build and return the AST for this statement. Use the middle // expression's type as the return type. XXX We should also // consider the third expression's type. return (mkastnode(A_TERNARY, right->type, left, right, ltemp, NULL, 0)); case A_ASSIGN: // Assignment // Make the right tree into an rvalue right->rvalue = 1; // Ensure the right's type matches the left right = modify_type(right, left->type, 0); if (right == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree. However, switch // left and right around, so that the right expression's // code will be generated before the left expression ltemp = left; left = right; right = ltemp; break; default: // We are not doing a ternary or assignment, so both trees should // be rvalues. Convert both trees into rvalue if they are lvalue trees left->rvalue = 1; right->rvalue = 1; // Ensure the two types are compatible by trying // to modify each tree to match the other's type. ltemp = modify_type(left, right->type, ASTop); rtemp = modify_type(right, left->type, ASTop); if (ltemp == NULL && rtemp == NULL) fatal("Incompatible types in binary expression"); if (ltemp != NULL) left = ltemp; if (rtemp != NULL) right = rtemp; } // Join that sub-tree with ours. Convert the token // into an AST operation at the same time. left = mkastnode(binastop(tokentype), left->type, left, NULL, right, NULL, 0); // Update the details of the current token. // If we hit a terminating token, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA || tokentype == T_COLON || tokentype == T_RBRACE) { left->rvalue = 1; return (left); } } // Return the tree we have when the precedence // is the same or lower left->rvalue = 1; return (left); } ================================================ FILE: 49_Ternary/gen.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Generic code generator // Copyright (c) 2019 Warren Toomey, GPL3 // Generate and return a new label number static int labelid = 1; int genlabel(void) { return (labelid++); } // Generate the code for an IF statement // and an optional ELSE clause. static int genIF(struct ASTnode *n, int looptoplabel, int loopendlabel) { int Lfalse, Lend; // Generate two labels: one for the // false compound statement, and one // for the end of the overall IF statement. // When there is no ELSE clause, Lfalse _is_ // the ending label! Lfalse = genlabel(); if (n->right) Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. genAST(n->left, Lfalse, NOLABEL, NOLABEL, n->op); genfreeregs(NOREG); // Generate the true compound statement genAST(n->mid, NOLABEL, looptoplabel, loopendlabel, n->op); genfreeregs(NOREG); // If there is an optional ELSE clause, // generate the jump to skip to the end if (n->right) cgjump(Lend); // Now the false label cglabel(Lfalse); // Optional ELSE clause: generate the // false compound statement and the // end label if (n->right) { genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); genfreeregs(NOREG); cglabel(Lend); } return (NOREG); } // Generate the code for a WHILE statement static int genWHILE(struct ASTnode *n) { int Lstart, Lend; // Generate the start and end labels // and output the start label Lstart = genlabel(); Lend = genlabel(); cglabel(Lstart); // Generate the condition code followed // by a jump to the end label. genAST(n->left, Lend, Lstart, Lend, n->op); genfreeregs(NOREG); // Generate the compound statement for the body genAST(n->right, NOLABEL, Lstart, Lend, n->op); genfreeregs(NOREG); // Finally output the jump back to the condition, // and the end label cgjump(Lstart); cglabel(Lend); return (NOREG); } // Generate the code for a SWITCH statement static int genSWITCH(struct ASTnode *n) { int *caseval, *caselabel; int Ljumptop, Lend; int i, reg, defaultlabel = 0, casecount = 0; struct ASTnode *c; // Create arrays for the case values and associated labels. // Ensure that we have at least one position in each array. caseval = (int *) malloc((n->a_intvalue + 1) * sizeof(int)); caselabel = (int *) malloc((n->a_intvalue + 1) * sizeof(int)); // Generate labels for the top of the jump table, and the // end of the switch statement. Set a default label for // the end of the switch, in case we don't have a default. Ljumptop = genlabel(); Lend = genlabel(); defaultlabel = Lend; // Output the code to calculate the switch condition reg = genAST(n->left, NOLABEL, NOLABEL, NOLABEL, 0); cgjump(Ljumptop); genfreeregs(reg); // Walk the right-child linked list to // generate the code for each case for (i = 0, c = n->right; c != NULL; i++, c = c->right) { // Get a label for this case. Store it // and the case value in the arrays. // Record if it is the default case. caselabel[i] = genlabel(); caseval[i] = c->a_intvalue; cglabel(caselabel[i]); if (c->op == A_DEFAULT) defaultlabel = caselabel[i]; else casecount++; // Generate the case code. Pass in the end label for the breaks genAST(c->left, NOLABEL, NOLABEL, Lend, 0); genfreeregs(NOREG); } // Ensure the last case jumps past the switch table cgjump(Lend); // Now output the switch table and the end label. cgswitch(reg, casecount, Ljumptop, caselabel, caseval, defaultlabel); cglabel(Lend); return (NOREG); } // Generate the code to copy the arguments of a // function call to its parameters, then call the // function itself. Return the register that holds // the function's return value. static int gen_funccall(struct ASTnode *n) { struct ASTnode *gluetree = n->left; int reg; int numargs = 0; // If there is a list of arguments, walk this list // from the last argument (right-hand child) to the // first while (gluetree) { // Calculate the expression's value reg = genAST(gluetree->right, NOLABEL, NOLABEL, NOLABEL, gluetree->op); // Copy this into the n'th function parameter: size is 1, 2, 3, ... cgcopyarg(reg, gluetree->a_size); // Keep the first (highest) number of arguments if (numargs == 0) numargs = gluetree->a_size; genfreeregs(NOREG); gluetree = gluetree->left; } // Call the function, clean up the stack (based on numargs), // and return its result return (cgcall(n->sym, numargs)); } // Generate code for a ternary expression static int gen_ternary(struct ASTnode *n) { int Lfalse, Lend; int reg, expreg; // Generate two labels: one for the // false expression, and one for the // end of the overall expression Lfalse = genlabel(); Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. genAST(n->left, Lfalse, NOLABEL, NOLABEL, n->op); genfreeregs(NOREG); // Get a register to hold the result of the two expressions reg = alloc_register(); // Generate the true expression and the false label. // Move the expression result into the known register. expreg = genAST(n->mid, NOLABEL, NOLABEL, NOLABEL, n->op); cgmove(expreg, reg); // Don't free the register holding the result, though! genfreeregs(reg); cgjump(Lend); cglabel(Lfalse); // Generate the false expression and the end label. // Move the expression result into the known register. expreg = genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); cgmove(expreg, reg); // Don't free the register holding the result, though! genfreeregs(reg); cglabel(Lend); return (reg); } // Given an AST, an optional label, and the AST op // of the parent, generate assembly code recursively. // Return the register id with the tree's final value. int genAST(struct ASTnode *n, int iflabel, int looptoplabel, int loopendlabel, int parentASTop) { int leftreg, rightreg; // We have some specific AST node handling at the top // so that we don't evaluate the child sub-trees immediately switch (n->op) { case A_IF: return (genIF(n, looptoplabel, loopendlabel)); case A_WHILE: return (genWHILE(n)); case A_SWITCH: return (genSWITCH(n)); case A_FUNCCALL: return (gen_funccall(n)); case A_TERNARY: return (gen_ternary(n)); case A_GLUE: // Do each child statement, and free the // registers after each child if (n->left != NULL) genAST(n->left, iflabel, looptoplabel, loopendlabel, n->op); genfreeregs(NOREG); if (n->right != NULL) genAST(n->right, iflabel, looptoplabel, loopendlabel, n->op); genfreeregs(NOREG); return (NOREG); case A_FUNCTION: // Generate the function's preamble before the code // in the child sub-tree cgfuncpreamble(n->sym); genAST(n->left, NOLABEL, NOLABEL, NOLABEL, n->op); cgfuncpostamble(n->sym); return (NOREG); } // General AST node handling below // Get the left and right sub-tree values if (n->left) leftreg = genAST(n->left, NOLABEL, NOLABEL, NOLABEL, n->op); if (n->right) rightreg = genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); switch (n->op) { case A_ADD: return (cgadd(leftreg, rightreg)); case A_SUBTRACT: return (cgsub(leftreg, rightreg)); case A_MULTIPLY: return (cgmul(leftreg, rightreg)); case A_DIVIDE: return (cgdiv(leftreg, rightreg)); case A_AND: return (cgand(leftreg, rightreg)); case A_OR: return (cgor(leftreg, rightreg)); case A_XOR: return (cgxor(leftreg, rightreg)); case A_LSHIFT: return (cgshl(leftreg, rightreg)); case A_RSHIFT: return (cgshr(leftreg, rightreg)); case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: // If the parent AST node is an A_IF, A_WHILE or A_TERNARY, // generate a compare followed by a jump. Otherwise, compare // registers and set one to 1 or 0 based on the comparison. if (parentASTop == A_IF || parentASTop == A_WHILE || parentASTop == A_TERNARY) return (cgcompare_and_jump(n->op, leftreg, rightreg, iflabel)); else return (cgcompare_and_set(n->op, leftreg, rightreg)); case A_INTLIT: return (cgloadint(n->a_intvalue, n->type)); case A_STRLIT: return (cgloadglobstr(n->a_intvalue)); case A_IDENT: // Load our value if we are an rvalue // or we are being dereferenced if (n->rvalue || parentASTop == A_DEREF) { if (n->sym->class == C_GLOBAL || n->sym->class == C_STATIC) { return (cgloadglob(n->sym, n->op)); } else { return (cgloadlocal(n->sym, n->op)); } } else return (NOREG); case A_ASPLUS: case A_ASMINUS: case A_ASSTAR: case A_ASSLASH: case A_ASSIGN: // For the '+=' and friends operators, generate suitable code // and get the register with the result. Then take the left child, // make it the right child so that we can fall into the assignment code. switch (n->op) { case A_ASPLUS: leftreg = cgadd(leftreg, rightreg); n->right = n->left; break; case A_ASMINUS: leftreg = cgsub(leftreg, rightreg); n->right = n->left; break; case A_ASSTAR: leftreg = cgmul(leftreg, rightreg); n->right = n->left; break; case A_ASSLASH: leftreg = cgdiv(leftreg, rightreg); n->right = n->left; break; } // Now into the assignment code // Are we assigning to an identifier or through a pointer? switch (n->right->op) { case A_IDENT: if (n->right->sym->class == C_GLOBAL || n->right->sym->class == C_STATIC) return (cgstorglob(leftreg, n->right->sym)); else return (cgstorlocal(leftreg, n->right->sym)); case A_DEREF: return (cgstorderef(leftreg, rightreg, n->right->type)); default: fatald("Can't A_ASSIGN in genAST(), op", n->op); } case A_WIDEN: // Widen the child's type to the parent's type return (cgwiden(leftreg, n->left->type, n->type)); case A_RETURN: cgreturn(leftreg, Functionid); return (NOREG); case A_ADDR: return (cgaddress(n->sym)); case A_DEREF: // If we are an rvalue, dereference to get the value we point at, // otherwise leave it for A_ASSIGN to store through the pointer if (n->rvalue) return (cgderef(leftreg, n->left->type)); else return (leftreg); case A_SCALE: // Small optimisation: use shift if the // scale value is a known power of two switch (n->a_size) { case 2: return (cgshlconst(leftreg, 1)); case 4: return (cgshlconst(leftreg, 2)); case 8: return (cgshlconst(leftreg, 3)); default: // Load a register with the size and // multiply the leftreg by this size rightreg = cgloadint(n->a_size, P_INT); return (cgmul(leftreg, rightreg)); } case A_POSTINC: case A_POSTDEC: // Load and decrement the variable's value into a register // and post increment/decrement it if (n->sym->class == C_GLOBAL || n->sym->class == C_STATIC) return (cgloadglob(n->sym, n->op)); else return (cgloadlocal(n->sym, n->op)); case A_PREINC: case A_PREDEC: // Load and decrement the variable's value into a register // and pre increment/decrement it if (n->left->sym->class == C_GLOBAL || n->left->sym->class == C_STATIC) return (cgloadglob(n->left->sym, n->op)); else return (cgloadlocal(n->left->sym, n->op)); case A_NEGATE: return (cgnegate(leftreg)); case A_INVERT: return (cginvert(leftreg)); case A_LOGNOT: return (cglognot(leftreg)); case A_TOBOOL: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, set the register // to 0 or 1 based on it's zeroeness or non-zeroeness return (cgboolean(leftreg, parentASTop, iflabel)); case A_BREAK: cgjump(loopendlabel); return (NOREG); case A_CONTINUE: cgjump(looptoplabel); return (NOREG); case A_CAST: return (leftreg); // Not much to do default: fatald("Unknown AST operator", n->op); } return (NOREG); // Keep -Wall happy } void genpreamble() { cgpreamble(); } void genpostamble() { cgpostamble(); } void genfreeregs(int keepreg) { freeall_registers(keepreg); } void genglobsym(struct symtable *node) { cgglobsym(node); } int genglobstr(char *strvalue) { int l = genlabel(); cgglobstr(l, strvalue); return (l); } int genprimsize(int type) { return (cgprimsize(type)); } int genalign(int type, int offset, int direction) { return (cgalign(type, offset, direction)); } ================================================ FILE: 49_Ternary/include/ctype.h ================================================ #ifndef _CTYPE_H_ # define _CTYPE_H_ int isalnum(int c); int isalpha(int c); int iscntrl(int c); int isdigit(int c); int isgraph(int c); int islower(int c); int isprint(int c); int ispunct(int c); int isspace(int c); int isupper(int c); int isxdigit(int c); int isascii(int c); int isblank(int c); #endif // _CTYPE_H_ ================================================ FILE: 49_Ternary/include/errno.h ================================================ #ifndef _ERRNO_H_ # define _ERRNO_H_ #endif // _ERRNO_H_ ================================================ FILE: 49_Ternary/include/fcntl.h ================================================ #ifndef _FCNTL_H_ # define _FCNTL_H_ #define O_RDONLY 00 #define O_WRONLY 01 #define O_RDWR 02 int open(char *pathname, int flags); #endif // _FCNTL_H_ ================================================ FILE: 49_Ternary/include/stddef.h ================================================ #ifndef _STDDEF_H_ # define _STDDEF_H_ #ifndef NULL # define NULL (void *)0 #endif typedef long size_t; #endif //_STDDEF_H_ ================================================ FILE: 49_Ternary/include/stdio.h ================================================ #ifndef _STDIO_H_ # define _STDIO_H_ #include #ifndef NULL # define NULL (void *)0 #endif // This FILE definition will do for now typedef char * FILE; FILE *fopen(char *pathname, char *mode); size_t fread(void *ptr, size_t size, size_t nmemb, FILE *stream); size_t fwrite(void *ptr, size_t size, size_t nmemb, FILE *stream); int fclose(FILE *stream); int printf(char *format); int fprintf(FILE *stream, char *format); int fputc(int c, FILE *stream); int fputs(char *s, FILE *stream); int putc(int c, FILE *stream); int putchar(int c); int puts(char *s); extern FILE *stdin; extern FILE *stdout; extern FILE *stderr; #endif // _STDIO_H_ ================================================ FILE: 49_Ternary/include/stdlib.h ================================================ #ifndef _STDLIB_H_ # define _STDLIB_H_ void exit(int status); void _Exit(int status); void *malloc(int size); void free(void *ptr); void *calloc(int nmemb, int size); void *realloc(void *ptr, int size); #endif // _STDLIB_H_ ================================================ FILE: 49_Ternary/include/string.h ================================================ #ifndef _STRING_H_ # define _STRING_H_ char *strdup(char *s); char *strchr(char *s, int c); char *strrchr(char *s, int c); #endif // _STRING_H_ ================================================ FILE: 49_Ternary/include/unistd.h ================================================ #ifndef _UNISTD_H_ # define _UNISTD_H_ void _exit(int status); int unlink(char *pathname); #endif // _UNISTD_H_ ================================================ FILE: 49_Ternary/main.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "decl.h" #include #include // Compiler setup and top-level execution // Copyright (c) 2019 Warren Toomey, GPL3 // Given a string with a '.' and at least a 1-character suffix // after the '.', change the suffix to be the given character. // Return the new string or NULL if the original string could // not be modified char *alter_suffix(char *str, char suffix) { char *posn; char *newstr; // Clone the string if ((newstr = strdup(str)) == NULL) return (NULL); // Find the '.' if ((posn = strrchr(newstr, '.')) == NULL) return (NULL); // Ensure there is a suffix posn++; if (*posn == '\0') return (NULL); // Change the suffix and NUL-terminate the string *posn = suffix; posn++; *posn = '\0'; return (newstr); } // Given an input filename, compile that file // down to assembly code. Return the new file's name static char *do_compile(char *filename) { char cmd[TEXTLEN]; // Change the input file's suffix to .s Outfilename = alter_suffix(filename, 's'); if (Outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .c on the end\n", filename); exit(1); } // Generate the pre-processor command snprintf(cmd, TEXTLEN, "%s %s %s", CPPCMD, INCDIR, filename); // Open up the pre-processor pipe if ((Infile = popen(cmd, "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", filename, strerror(errno)); exit(1); } Infilename = filename; // Create the output file if ((Outfile = fopen(Outfilename, "w")) == NULL) { fprintf(stderr, "Unable to create %s: %s\n", Outfilename, strerror(errno)); exit(1); } Line = 1; // Reset the scanner Putback = '\n'; clear_symtable(); // Clear the symbol table if (O_verbose) printf("compiling %s\n", filename); scan(&Token); // Get the first token from the input Peektoken.token= 0; // and set there is no lookahead token genpreamble(); // Output the preamble global_declarations(); // Parse the global declarations genpostamble(); // Output the postamble fclose(Outfile); // Close the output file freestaticsyms(); // Free any static symbols in the file return (Outfilename); } // Given an input filename, assemble that file // down to object code. Return the object filename char *do_assemble(char *filename) { char cmd[TEXTLEN]; int err; char *outfilename = alter_suffix(filename, 'o'); if (outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .s on the end\n", filename); exit(1); } // Build the assembly command and run it snprintf(cmd, TEXTLEN, "%s %s %s", ASCMD, outfilename, filename); if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Assembly of %s failed\n", filename); exit(1); } return (outfilename); } // Given a list of object files and an output filename, // link all of the object filenames together. void do_link(char *outfilename, char *objlist[]) { int cnt, size = TEXTLEN; char cmd[TEXTLEN], *cptr; int err; // Start with the linker command and the output file cptr = cmd; cnt = snprintf(cptr, size, "%s %s ", LDCMD, outfilename); cptr += cnt; size -= cnt; // Now append each object file while (*objlist != NULL) { cnt = snprintf(cptr, size, "%s ", *objlist); cptr += cnt; size -= cnt; objlist++; } if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Linking failed\n"); exit(1); } } // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "Usage: %s [-vcST] [-o outfile] file [file ...]\n", prog); fprintf(stderr, " -v give verbose output of the compilation stages\n"); fprintf(stderr, " -c generate object files but don't link them\n"); fprintf(stderr, " -S generate assembly files but don't link them\n"); fprintf(stderr, " -T dump the AST trees for each input file\n"); fprintf(stderr, " -o outfile, produce the outfile executable file\n"); exit(1); } // Main program: check arguments and print a usage // if we don't have an argument. Open up the input // file and call scanfile() to scan the tokens in it. enum { MAXOBJ = 100 }; int main(int argc, char *argv[]) { char *outfilename = AOUT; char *asmfile, *objfile; char *objlist[MAXOBJ]; int i, objcnt = 0; // Initialise our variables O_dumpAST = 0; O_keepasm = 0; O_assemble = 0; O_verbose = 0; O_dolink = 1; // Scan for command-line options for (i = 1; i < argc; i++) { // No leading '-', stop scanning for options if (*argv[i] != '-') break; // For each option in this argument for (int j = 1; (*argv[i] == '-') && argv[i][j]; j++) { switch (argv[i][j]) { case 'o': outfilename = argv[++i]; // Save & skip to next argument break; case 'T': O_dumpAST = 1; break; case 'c': O_assemble = 1; O_keepasm = 0; O_dolink = 0; break; case 'S': O_keepasm = 1; O_assemble = 0; O_dolink = 0; break; case 'v': O_verbose = 1; break; default: usage(argv[0]); } } } // Ensure we have at lease one input file argument if (i >= argc) usage(argv[0]); // Work on each input file in turn while (i < argc) { asmfile = do_compile(argv[i]); // Compile the source file if (O_dolink || O_assemble) { objfile = do_assemble(asmfile); // Assemble it to object forma if (objcnt == (MAXOBJ - 2)) { fprintf(stderr, "Too many object files for the compiler to handle\n"); exit(1); } objlist[objcnt++] = objfile; // Add the object file's name objlist[objcnt] = NULL; // to the list of object files } if (!O_keepasm) // Remove the assembly file if unlink(asmfile); // we don't need to keep it i++; } // Now link all the object files together if (O_dolink) { do_link(outfilename, objlist); // If we don't need to keep the object // files, then remove them if (!O_assemble) { for (i = 0; objlist[i] != NULL; i++) unlink(objlist[i]); } } return (0); } ================================================ FILE: 49_Ternary/misc.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" #include #include // Miscellaneous functions // Copyright (c) 2019 Warren Toomey, GPL3 // Ensure that the current token is t, // and fetch the next token. Otherwise // throw an error void match(int t, char *what) { if (Token.token == t) { scan(&Token); } else { fatals("Expected", what); } } // Match a semicolon and fetch the next token void semi(void) { match(T_SEMI, ";"); } // Match a left brace and fetch the next token void lbrace(void) { match(T_LBRACE, "{"); } // Match a right brace and fetch the next token void rbrace(void) { match(T_RBRACE, "}"); } // Match a left parenthesis and fetch the next token void lparen(void) { match(T_LPAREN, "("); } // Match a right parenthesis and fetch the next token void rparen(void) { match(T_RPAREN, ")"); } // Match an identifier and fetch the next token void ident(void) { match(T_IDENT, "identifier"); } // Match a comma and fetch the next token void comma(void) { match(T_COMMA, "comma"); } // Print out fatal messages void fatal(char *s) { fprintf(stderr, "%s on line %d of %s\n", s, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatals(char *s1, char *s2) { fprintf(stderr, "%s:%s on line %d of %s\n", s1, s2, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatald(char *s, int d) { fprintf(stderr, "%s:%d on line %d of %s\n", s, d, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatalc(char *s, int c) { fprintf(stderr, "%s:%c on line %d of %s\n", s, c, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } ================================================ FILE: 49_Ternary/opt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST Tree Optimisation Code // Copyright (c) 2019 Warren Toomey, GPL3 // Fold an AST tree with a binary operator // and two A_INTLIT children. Return either // the original tree or a new leaf node. static struct ASTnode *fold2(struct ASTnode *n) { int val, leftval, rightval; // Get the values from each child leftval = n->left->a_intvalue; rightval = n->right->a_intvalue; // Perform some of the binary operations. // For any AST op we can't do, return // the original tree. switch (n->op) { case A_ADD: val = leftval + rightval; break; case A_SUBTRACT: val = leftval - rightval; break; case A_MULTIPLY: val = leftval * rightval; break; case A_DIVIDE: // Don't try to divide by zero. if (rightval == 0) return (n); val = leftval / rightval; break; default: return (n); } // Return a leaf node with the new value return (mkastleaf(A_INTLIT, n->type, NULL, val)); } // Fold an AST tree with a unary operator // and one INTLIT children. Return either // the original tree or a new leaf node. static struct ASTnode *fold1(struct ASTnode *n) { int val; // Get the child value. Do the // operation if recognised. // Return the new leaf node. val = n->left->a_intvalue; switch (n->op) { case A_WIDEN: break; case A_INVERT: val = ~val; break; case A_LOGNOT: val = !val; break; default: return (n); } // Return a leaf node with the new value return (mkastleaf(A_INTLIT, n->type, NULL, val)); } // Attempt to do constant folding on // the AST tree with the root node n static struct ASTnode *fold(struct ASTnode *n) { if (n == NULL) return (NULL); // Fold on the left child, then // do the same on the right child n->left = fold(n->left); n->right = fold(n->right); // If both children are A_INTLITs, do a fold2() if (n->left && n->left->op == A_INTLIT) { if (n->right && n->right->op == A_INTLIT) n = fold2(n); else // If only the left is A_INTLIT, do a fold1() n = fold1(n); } // Return the possibly modified tree return (n); } // Optimise an AST tree by // constant folding in all sub-trees struct ASTnode *optimise(struct ASTnode *n) { n = fold(n); return (n); } ================================================ FILE: 49_Ternary/scan.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { char *p; p = strchr(s, c); return (p ? p - s : -1); } // Get the next character from the input file. static int next(void) { int c, l; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return (c); } c = fgetc(Infile); // Read from input file while (c == '#') { // We've hit a pre-processor statement scan(&Token); // Get the line number into l if (Token.token != T_INTLIT) fatals("Expecting pre-processor line number, got:", Text); l = Token.intvalue; scan(&Token); // Get the filename in Text if (Token.token != T_STRLIT) fatals("Expecting pre-processor file name, got:", Text); if (Text[0] != '<') { // If this is a real filename if (strcmp(Text, Infilename)) // and not the one we have now Infilename = strdup(Text); // save it. Then update the line num Line = l; } while ((c = fgetc(Infile)) != '\n'); // Skip to the end of the line c = fgetc(Infile); // and get the next character } if ('\n' == c) Line++; // Increment line count return (c); } // Put back an unwanted character static void putback(int c) { Putback = c; } // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } // Read in a hexadecimal constant from the input static int hexchar(void) { int c, h, n = 0, f = 0; // Loop getting characters while (isxdigit(c = next())) { // Convert from char to int value h = chrpos("0123456789abcdef", tolower(c)); // Add to running hex value n = n * 16 + h; f = 1; } // We hit a non-hex character, put it back putback(c); // Flag tells us we never saw any hex characters if (!f) fatal("missing digits after '\\x'"); if (n > 255) fatal("value out of range after '\\x'"); return n; } // Return the next character from a character // or string literal static int scanch(void) { int i, c, c2; // Get the next input character and interpret // metacharacters that start with a backslash c = next(); if (c == '\\') { switch (c = next()) { case 'a': return ('\a'); case 'b': return ('\b'); case 'f': return ('\f'); case 'n': return ('\n'); case 'r': return ('\r'); case 't': return ('\t'); case 'v': return ('\v'); case '\\': return ('\\'); case '"': return ('"'); case '\'': return ('\''); // Deal with octal constants by reading in // characters until we hit a non-octal digit. // Build up the octal value in c2 and count // # digits in i. Permit only 3 octal digits. case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': for (i = c2 = 0; isdigit(c) && c < '8'; c = next()) { if (++i > 3) break; c2 = c2 * 8 + (c - '0'); } putback(c); // Put back the first non-octal char return (c2); case 'x': return hexchar(); default: fatalc("unknown escape sequence", c); } } return (c); // Just an ordinary old character! } // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0, radix = 10; // Assume the radix is 10, but if it starts with 0 if (c == '0') { // and the next character is 'x', it's radix 16 if ((c = next()) == 'x') { radix = 16; c = next(); } else // Otherwise, it's radix 8 radix = 8; } // Convert each character into an int value while ((k = chrpos("0123456789abcdef", tolower(c))) >= 0) { if (k >= radix) fatalc("invalid digit in integer literal", c); val = val * radix + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return (val); } // Scan in a string literal from the input file, // and store it in buf[]. Return the length of // the string. static int scanstr(char *buf) { int i, c; // Loop while we have enough buffer space for (i = 0; i < TEXTLEN - 1; i++) { // Get the next char and append to buf // Return when we hit the ending double quote if ((c = scanch()) == '"') { buf[i] = 0; return (i); } buf[i] = c; } // Ran out of buf[] space fatal("String literal too long"); return (0); } // Scan an identifier from the input file and // store it in buf[]. Return the identifier's length static int scanident(int c, char *buf, int lim) { int i = 0; // Allow digits, alpha and underscores while (isalpha(c) || isdigit(c) || '_' == c) { // Error if we hit the identifier length limit, // else append to buf[] and get next character if (lim - 1 == i) { fatal("Identifier too long"); } else if (i < lim - 1) { buf[i++] = c; } c = next(); } // We hit a non-valid character, put it back. // NUL-terminate the buf[] and return the length putback(c); buf[i] = '\0'; return (i); } // Given a word from the input, return the matching // keyword token number or 0 if it's not a keyword. // Switch on the first letter so that we don't have // to waste time strcmp()ing against all the keywords. static int keyword(char *s) { switch (*s) { case 'b': if (!strcmp(s, "break")) return (T_BREAK); break; case 'c': if (!strcmp(s, "case")) return (T_CASE); if (!strcmp(s, "char")) return (T_CHAR); if (!strcmp(s, "continue")) return (T_CONTINUE); break; case 'd': if (!strcmp(s, "default")) return (T_DEFAULT); break; case 'e': if (!strcmp(s, "else")) return (T_ELSE); if (!strcmp(s, "enum")) return (T_ENUM); if (!strcmp(s, "extern")) return (T_EXTERN); break; case 'f': if (!strcmp(s, "for")) return (T_FOR); break; case 'i': if (!strcmp(s, "if")) return (T_IF); if (!strcmp(s, "int")) return (T_INT); break; case 'l': if (!strcmp(s, "long")) return (T_LONG); break; case 'r': if (!strcmp(s, "return")) return (T_RETURN); break; case 's': if (!strcmp(s, "sizeof")) return (T_SIZEOF); if (!strcmp(s, "static")) return (T_STATIC); if (!strcmp(s, "struct")) return (T_STRUCT); if (!strcmp(s, "switch")) return (T_SWITCH); break; case 't': if (!strcmp(s, "typedef")) return (T_TYPEDEF); break; case 'u': if (!strcmp(s, "union")) return (T_UNION); break; case 'v': if (!strcmp(s, "void")) return (T_VOID); break; case 'w': if (!strcmp(s, "while")) return (T_WHILE); break; } return (0); } // List of token strings, for debugging purposes char *Tstring[] = { "EOF", "=", "+=", "-=", "*=", "/=", "?", "||", "&&", "|", "^", "&", "==", "!=", ",", ">", "<=", ">=", "<<", ">>", "+", "-", "*", "/", "++", "--", "~", "!", "void", "char", "int", "long", "if", "else", "while", "for", "return", "struct", "union", "enum", "typedef", "extern", "break", "continue", "switch", "case", "default", "sizeof", "static", "intlit", "strlit", ";", "identifier", "{", "}", "(", ")", "[", "]", ",", ".", "->", ":" }; // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c, tokentype; // If we have a lookahead token, return this token if (Peektoken.token != 0) { t->token = Peektoken.token; t->tokstr = Peektoken.tokstr; t->intvalue = Peektoken.intvalue; Peektoken.token = 0; return (1); } // Skip whitespace c = skip(); // Determine the token based on // the input character switch (c) { case EOF: t->token = T_EOF; return (0); case '+': if ((c = next()) == '+') { t->token = T_INC; } else if (c == '=') { t->token = T_ASPLUS; } else { putback(c); t->token = T_PLUS; } break; case '-': if ((c = next()) == '-') { t->token = T_DEC; } else if (c == '>') { t->token = T_ARROW; } else if (c == '=') { t->token = T_ASMINUS; } else if (isdigit(c)) { // Negative int literal t->intvalue = -scanint(c); t->token = T_INTLIT; } else { putback(c); t->token = T_MINUS; } break; case '*': if ((c = next()) == '=') { t->token = T_ASSTAR; } else { putback(c); t->token = T_STAR; } break; case '/': if ((c = next()) == '=') { t->token = T_ASSLASH; } else { putback(c); t->token = T_SLASH; } break; case ';': t->token = T_SEMI; break; case '{': t->token = T_LBRACE; break; case '}': t->token = T_RBRACE; break; case '(': t->token = T_LPAREN; break; case ')': t->token = T_RPAREN; break; case '[': t->token = T_LBRACKET; break; case ']': t->token = T_RBRACKET; break; case '~': t->token = T_INVERT; break; case '^': t->token = T_XOR; break; case ',': t->token = T_COMMA; break; case '.': t->token = T_DOT; break; case ':': t->token = T_COLON; break; case '?': t->token = T_QUESTION; break; case '=': if ((c = next()) == '=') { t->token = T_EQ; } else { putback(c); t->token = T_ASSIGN; } break; case '!': if ((c = next()) == '=') { t->token = T_NE; } else { putback(c); t->token = T_LOGNOT; } break; case '<': if ((c = next()) == '=') { t->token = T_LE; } else if (c == '<') { t->token = T_LSHIFT; } else { putback(c); t->token = T_LT; } break; case '>': if ((c = next()) == '=') { t->token = T_GE; } else if (c == '>') { t->token = T_RSHIFT; } else { putback(c); t->token = T_GT; } break; case '&': if ((c = next()) == '&') { t->token = T_LOGAND; } else { putback(c); t->token = T_AMPER; } break; case '|': if ((c = next()) == '|') { t->token = T_LOGOR; } else { putback(c); t->token = T_OR; } break; case '\'': // If it's a quote, scan in the // literal character value and // the trailing quote t->intvalue = scanch(); t->token = T_INTLIT; if (next() != '\'') fatal("Expected '\\'' at end of char literal"); break; case '"': // Scan in a literal string scanstr(Text); t->token = T_STRLIT; break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } else if (isalpha(c) || '_' == c) { // Read in a keyword or identifier scanident(c, Text, TEXTLEN); // If it's a recognised keyword, return that token if ((tokentype = keyword(Text)) != 0) { t->token = tokentype; break; } // Not a recognised keyword, so it must be an identifier t->token = T_IDENT; break; } // The character isn't part of any recognised token, error fatalc("Unrecognised character", c); } // We found a token t->tokstr = Tstring[t->token]; return (1); } ================================================ FILE: 49_Ternary/stmt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of statements // Copyright (c) 2019 Warren Toomey, GPL3 // Prototypes static struct ASTnode *single_statement(void); // compound_statement: // empty, i.e. no statement // | statement // | statement statements // ; // // statement: declaration // | expression_statement // | function_call // | if_statement // | while_statement // | for_statement // | return_statement // ; // if_statement: if_head // | if_head 'else' statement // ; // // if_head: 'if' '(' true_false_expression ')' statement ; // // Parse an IF statement including any // optional ELSE clause and return its AST static struct ASTnode *if_statement(void) { struct ASTnode *condAST, *trueAST, *falseAST = NULL; // Ensure we have 'if' '(' match(T_IF, "if"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); rparen(); // Get the AST for the statement trueAST = single_statement(); // If we have an 'else', skip it // and get the AST for the statement if (Token.token == T_ELSE) { scan(&Token); falseAST = single_statement(); } // Build and return the AST for this statement return (mkastnode(A_IF, P_NONE, condAST, trueAST, falseAST, NULL, 0)); } // while_statement: 'while' '(' true_false_expression ')' statement ; // // Parse a WHILE statement and return its AST static struct ASTnode *while_statement(void) { struct ASTnode *condAST, *bodyAST; // Ensure we have 'while' '(' match(T_WHILE, "while"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); rparen(); // Get the AST for the statement. // Update the loop depth in the process Looplevel++; bodyAST = single_statement(); Looplevel--; // Build and return the AST for this statement return (mkastnode(A_WHILE, P_NONE, condAST, NULL, bodyAST, NULL, 0)); } // for_statement: 'for' '(' expression_list ';' // true_false_expression ';' // expression_list ')' statement ; // // Parse a FOR statement and return its AST static struct ASTnode *for_statement(void) { struct ASTnode *condAST, *bodyAST; struct ASTnode *preopAST, *postopAST; struct ASTnode *tree; // Ensure we have 'for' '(' match(T_FOR, "for"); lparen(); // Get the pre_op expression and the ';' preopAST = expression_list(T_SEMI); semi(); // Get the condition and the ';'. // Force a non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); semi(); // Get the post_op expression and the ')' postopAST = expression_list(T_RPAREN); rparen(); // Get the statement which is the body // Update the loop depth in the process Looplevel++; bodyAST = single_statement(); Looplevel--; // Glue the statement and the postop tree tree = mkastnode(A_GLUE, P_NONE, bodyAST, NULL, postopAST, NULL, 0); // Make a WHILE loop with the condition and this new body tree = mkastnode(A_WHILE, P_NONE, condAST, NULL, tree, NULL, 0); // And glue the preop tree to the A_WHILE tree return (mkastnode(A_GLUE, P_NONE, preopAST, NULL, tree, NULL, 0)); } // return_statement: 'return' '(' expression ')' ; // // Parse a return statement and return its AST static struct ASTnode *return_statement(void) { struct ASTnode *tree; // Can't return a value if function returns P_VOID if (Functionid->type == P_VOID) fatal("Can't return from a void function"); // Ensure we have 'return' '(' match(T_RETURN, "return"); lparen(); // Parse the following expression tree = binexpr(0); // Ensure this is compatible with the function's type tree = modify_type(tree, Functionid->type, 0); if (tree == NULL) fatal("Incompatible type to return"); // Add on the A_RETURN node tree = mkastunary(A_RETURN, P_NONE, tree, NULL, 0); // Get the ')' and ';' rparen(); semi(); return (tree); } // break_statement: 'break' ; // // Parse a break statement and return its AST static struct ASTnode *break_statement(void) { if (Looplevel == 0 && Switchlevel == 0) fatal("no loop or switch to break out from"); scan(&Token); semi(); return (mkastleaf(A_BREAK, 0, NULL, 0)); } // continue_statement: 'continue' ; // // Parse a continue statement and return its AST static struct ASTnode *continue_statement(void) { if (Looplevel == 0) fatal("no loop to continue to"); scan(&Token); semi(); return (mkastleaf(A_CONTINUE, 0, NULL, 0)); } // Parse a switch statement and return its AST static struct ASTnode *switch_statement(void) { struct ASTnode *left, *n, *c, *casetree= NULL, *casetail; int inloop=1, casecount=0; int seendefault=0; int ASTop, casevalue; // Skip the 'switch' and '(' scan(&Token); lparen(); // Get the switch expression, the ')' and the '{' left= binexpr(0); rparen(); lbrace(); // Ensure that this is of int type if (!inttype(left->type)) fatal("Switch expression is not of integer type"); // Build an A_SWITCH subtree with the expression as // the child n= mkastunary(A_SWITCH, 0, left, NULL, 0); // Now parse the cases Switchlevel++; while (inloop) { switch(Token.token) { // Leave the loop when we hit a '}' case T_RBRACE: if (casecount==0) fatal("No cases in switch"); inloop=0; break; case T_CASE: case T_DEFAULT: // Ensure this isn't after a previous 'default' if (seendefault) fatal("case or default after existing default"); // Set the AST operation. Scan the case value if required if (Token.token==T_DEFAULT) { ASTop= A_DEFAULT; seendefault= 1; scan(&Token); } else { ASTop= A_CASE; scan(&Token); left= binexpr(0); // Ensure the case value is an integer literal if (left->op != A_INTLIT) fatal("Expecting integer literal for case value"); casevalue= left->a_intvalue; // Walk the list of existing case values to ensure // that there isn't a duplicate case value for (c= casetree; c != NULL; c= c -> right) if (casevalue == c->a_intvalue) fatal("Duplicate case value"); } // Scan the ':' and get the compound expression match(T_COLON, ":"); left= compound_statement(1); casecount++; // Build a sub-tree with the compound statement as the left child // and link it in to the growing A_CASE tree if (casetree==NULL) { casetree= casetail= mkastunary(ASTop, 0, left, NULL, casevalue); } else { casetail->right= mkastunary(ASTop, 0, left, NULL, casevalue); casetail= casetail->right; } break; default: fatals("Unexpected token in switch", Token.tokstr); } } Switchlevel--; // We have a sub-tree with the cases and any default. Put the // case count into the A_SWITCH node and attach the case tree. n->a_intvalue= casecount; n->right= casetree; rbrace(); return(n); } // Parse a single statement and return its AST. static struct ASTnode *single_statement(void) { struct ASTnode *stmt; struct symtable *ctype; switch (Token.token) { case T_LBRACE: // We have a '{', so this is a compound statement lbrace(); stmt = compound_statement(0); rbrace(); return(stmt); case T_IDENT: // We have to see if the identifier matches a typedef. // If not, treat it as an expression. // Otherwise, fall down to the parse_type() call. if (findtypedef(Text) == NULL) { stmt= binexpr(0); semi(); return(stmt); } case T_CHAR: case T_INT: case T_LONG: case T_STRUCT: case T_UNION: case T_ENUM: case T_TYPEDEF: // The beginning of a variable declaration list. declaration_list(&ctype, C_LOCAL, T_SEMI, T_EOF, &stmt); semi(); return (stmt); // Any assignments from the declarations case T_IF: return (if_statement()); case T_WHILE: return (while_statement()); case T_FOR: return (for_statement()); case T_RETURN: return (return_statement()); case T_BREAK: return (break_statement()); case T_CONTINUE: return (continue_statement()); case T_SWITCH: return (switch_statement()); default: // For now, see if this is an expression. // This catches assignment statements. stmt= binexpr(0); semi(); return(stmt); } return (NULL); // Keep -Wall happy } // Parse a compound statement // and return its AST. If inswitch is true, // we look for a '}', 'case' or 'default' token // to end the parsing. Otherwise, look for // just a '}' to end the parsing. struct ASTnode *compound_statement(int inswitch) { struct ASTnode *left = NULL; struct ASTnode *tree; while (1) { // Parse a single statement tree = single_statement(); // For each new tree, either save it in left // if left is empty, or glue the left and the // new tree together if (tree != NULL) { if (left == NULL) left = tree; else left = mkastnode(A_GLUE, P_NONE, left, NULL, tree, NULL, 0); } // Leave if we've hit the end token if (Token.token == T_RBRACE) return(left); if (inswitch && (Token.token == T_CASE || Token.token == T_DEFAULT)) return(left); } } ================================================ FILE: 49_Ternary/sym.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Symbol table functions // Copyright (c) 2019 Warren Toomey, GPL3 // Append a node to the singly-linked list pointed to by head or tail void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node) { // Check for valid pointers if (head == NULL || tail == NULL || node == NULL) fatal("Either head, tail or node is NULL in appendsym"); // Append to the list if (*tail) { (*tail)->next = node; *tail = node; } else *head = *tail = node; node->next = NULL; } // Create a symbol node to be added to a symbol table list. // Set up the node's: // + type: char, int etc. // + ctype: composite type pointer for struct/union // + structural type: var, function, array etc. // + size: number of elements, or endlabel: end label for a function // + posn: Position information for local symbols // Return a pointer to the new node struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn) { // Get a new node struct symtable *node = (struct symtable *) malloc(sizeof(struct symtable)); if (node == NULL) fatal("Unable to malloc a symbol table node in newsym"); // Fill in the values if (name == NULL) node->name = NULL; else node->name = strdup(name); node->type = type; node->ctype = ctype; node->stype = stype; node->class = class; node->nelems = nelems; // For pointers and integer types, set the size // of the symbol. structs and union declarations // manually set this up themselves. if (ptrtype(type) || inttype(type)) node->size = nelems * typesize(type, ctype); node->st_posn = posn; node->next = NULL; node->member = NULL; node->initlist = NULL; return (node); } // Add a symbol to the global symbol list struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn) { struct symtable *sym = newsym(name, type, ctype, stype, class, nelems, posn); // For structs and unions, copy the size from the type node if (type== P_STRUCT || type== P_UNION) sym->size = ctype->size; appendsym(&Globhead, &Globtail, sym); return (sym); } // Add a symbol to the local symbol list struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int nelems) { struct symtable *sym = newsym(name, type, ctype, stype, C_LOCAL, nelems, 0); // For structs and unions, copy the size from the type node if (type== P_STRUCT || type== P_UNION) sym->size = ctype->size; appendsym(&Loclhead, &Locltail, sym); return (sym); } // Add a symbol to the parameter list struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype) { struct symtable *sym = newsym(name, type, ctype, stype, C_PARAM, 1, 0); appendsym(&Parmhead, &Parmtail, sym); return (sym); } // Add a symbol to the temporary member list struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int nelems) { struct symtable *sym = newsym(name, type, ctype, stype, C_MEMBER, nelems, 0); // For structs and unions, copy the size from the type node if (type== P_STRUCT || type== P_UNION) sym->size = ctype->size; appendsym(&Membhead, &Membtail, sym); return (sym); } // Add a struct to the struct list struct symtable *addstruct(char *name) { struct symtable *sym = newsym(name, P_STRUCT, NULL, 0, C_STRUCT, 0, 0); appendsym(&Structhead, &Structtail, sym); return (sym); } // Add a struct to the union list struct symtable *addunion(char *name) { struct symtable *sym = newsym(name, P_UNION, NULL, 0, C_UNION, 0, 0); appendsym(&Unionhead, &Uniontail, sym); return (sym); } // Add an enum type or value to the enum list. // Class is C_ENUMTYPE or C_ENUMVAL. // Use posn to store the int value. struct symtable *addenum(char *name, int class, int value) { struct symtable *sym = newsym(name, P_INT, NULL, 0, class, 0, value); appendsym(&Enumhead, &Enumtail, sym); return (sym); } // Add a typedef to the typedef list struct symtable *addtypedef(char *name, int type, struct symtable *ctype) { struct symtable *sym = newsym(name, type, ctype, 0, C_TYPEDEF, 0, 0); appendsym(&Typehead, &Typetail, sym); return (sym); } // Search for a symbol in a specific list. // Return a pointer to the found node or NULL if not found. // If class is not zero, also match on the given class static struct symtable *findsyminlist(char *s, struct symtable *list, int class) { for (; list != NULL; list = list->next) if ((list->name != NULL) && !strcmp(s, list->name)) if (class==0 || class== list->class) return (list); return (NULL); } // Determine if the symbol s is in the global symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findglob(char *s) { return (findsyminlist(s, Globhead, 0)); } // Determine if the symbol s is in the local symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findlocl(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member, 0); if (node) return (node); } return (findsyminlist(s, Loclhead, 0)); } // Determine if the symbol s is in the symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findsymbol(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member, 0); if (node) return (node); } // Otherwise, try the local and global symbol lists node = findsyminlist(s, Loclhead, 0); if (node) return (node); return (findsyminlist(s, Globhead, 0)); } // Find a member in the member list // Return a pointer to the found node or NULL if not found. struct symtable *findmember(char *s) { return (findsyminlist(s, Membhead, 0)); } // Find a struct in the struct list // Return a pointer to the found node or NULL if not found. struct symtable *findstruct(char *s) { return (findsyminlist(s, Structhead, 0)); } // Find a struct in the union list // Return a pointer to the found node or NULL if not found. struct symtable *findunion(char *s) { return (findsyminlist(s, Unionhead, 0)); } // Find an enum type in the enum list // Return a pointer to the found node or NULL if not found. struct symtable *findenumtype(char *s) { return (findsyminlist(s, Enumhead, C_ENUMTYPE)); } // Find an enum value in the enum list // Return a pointer to the found node or NULL if not found. struct symtable *findenumval(char *s) { return (findsyminlist(s, Enumhead, C_ENUMVAL)); } // Find a type in the tyedef list // Return a pointer to the found node or NULL if not found. struct symtable *findtypedef(char *s) { return (findsyminlist(s, Typehead, 0)); } // Reset the contents of the symbol table void clear_symtable(void) { Globhead = Globtail = NULL; Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Membhead = Membtail = NULL; Structhead = Structtail = NULL; Unionhead = Uniontail = NULL; Enumhead = Enumtail = NULL; Typehead = Typetail = NULL; } // Clear all the entries in the local symbol table void freeloclsyms(void) { Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Functionid = NULL; } // Remove all static symbols from the global symbol table void freestaticsyms(void) { // g points at current node, prev at the previous one struct symtable *g, *prev= NULL; // Walk the global table looking for static entries for (g= Globhead; g != NULL; g= g->next) { if (g->class == C_STATIC) { // If there's a previous node, rearrange the prev pointer // to skip over the current node. If not, g is the head, // so do the same to Globhead if (prev != NULL) prev->next= g->next; else Globhead->next= g->next; // If g is the tail, point Globtail at the previous node // (if there is one), or Globhead if (g == Globtail) { if (prev != NULL) Globtail= prev; else Globtail= Globhead; } } } // Point prev at g before we move up to the next node prev= g; } ================================================ FILE: 49_Ternary/tests/err.input031.c ================================================ Expecting a primary expression, got token:+ on line 5 of input031.c ================================================ FILE: 49_Ternary/tests/err.input032.c ================================================ Unknown variable:cow on line 4 of input032.c ================================================ FILE: 49_Ternary/tests/err.input033.c ================================================ Incompatible type to return on line 4 of input033.c ================================================ FILE: 49_Ternary/tests/err.input034.c ================================================ For now, declaration of non-global arrays is not implemented on line 4 of input034.c ================================================ FILE: 49_Ternary/tests/err.input035.c ================================================ Duplicate local variable declaration:a on line 4 of input035.c ================================================ FILE: 49_Ternary/tests/err.input036.c ================================================ Type doesn't match prototype for parameter:2 on line 4 of input036.c ================================================ FILE: 49_Ternary/tests/err.input037.c ================================================ Expected:comma on line 3 of input037.c ================================================ FILE: 49_Ternary/tests/err.input038.c ================================================ Type doesn't match prototype for parameter:2 on line 4 of input038.c ================================================ FILE: 49_Ternary/tests/err.input039.c ================================================ No statements in function with non-void type on line 4 of input039.c ================================================ FILE: 49_Ternary/tests/err.input040.c ================================================ No return for function with non-void type on line 4 of input040.c ================================================ FILE: 49_Ternary/tests/err.input041.c ================================================ Can't return from a void function on line 3 of input041.c ================================================ FILE: 49_Ternary/tests/err.input042.c ================================================ Undeclared function:fred on line 3 of input042.c ================================================ FILE: 49_Ternary/tests/err.input043.c ================================================ Undeclared array:b on line 3 of input043.c ================================================ FILE: 49_Ternary/tests/err.input044.c ================================================ Unknown variable:z on line 3 of input044.c ================================================ FILE: 49_Ternary/tests/err.input045.c ================================================ & operator must be followed by an identifier on line 3 of input045.c ================================================ FILE: 49_Ternary/tests/err.input046.c ================================================ * operator must be followed by an identifier or * on line 3 of input046.c ================================================ FILE: 49_Ternary/tests/err.input047.c ================================================ ++ operator must be followed by an identifier on line 3 of input047.c ================================================ FILE: 49_Ternary/tests/err.input048.c ================================================ -- operator must be followed by an identifier on line 3 of input048.c ================================================ FILE: 49_Ternary/tests/err.input049.c ================================================ Incompatible expression in assignment on line 6 of input049.c ================================================ FILE: 49_Ternary/tests/err.input050.c ================================================ Incompatible types in binary expression on line 6 of input050.c ================================================ FILE: 49_Ternary/tests/err.input051.c ================================================ Expected '\'' at end of char literal on line 4 of input051.c ================================================ FILE: 49_Ternary/tests/err.input052.c ================================================ Unrecognised character:$ on line 5 of input052.c ================================================ FILE: 49_Ternary/tests/err.input056.c ================================================ unknown struct/union type:var1 on line 2 of input056.c ================================================ FILE: 49_Ternary/tests/err.input057.c ================================================ previously defined struct/union:fred on line 2 of input057.c ================================================ FILE: 49_Ternary/tests/err.input059.c ================================================ Undeclared variable:y on line 3 of input059.c ================================================ FILE: 49_Ternary/tests/err.input060.c ================================================ Undeclared variable:x on line 3 of input060.c ================================================ FILE: 49_Ternary/tests/err.input061.c ================================================ Undeclared variable:x on line 3 of input061.c ================================================ FILE: 49_Ternary/tests/err.input064.c ================================================ undeclared enum type::fred on line 1 of input064.c ================================================ FILE: 49_Ternary/tests/err.input065.c ================================================ enum type redeclared::fred on line 2 of input065.c ================================================ FILE: 49_Ternary/tests/err.input066.c ================================================ enum value redeclared::z on line 2 of input066.c ================================================ FILE: 49_Ternary/tests/err.input068.c ================================================ redefinition of typedef:FOO on line 2 of input068.c ================================================ FILE: 49_Ternary/tests/err.input069.c ================================================ unknown type:FLOO on line 2 of input069.c ================================================ FILE: 49_Ternary/tests/err.input072.c ================================================ no loop or switch to break out from on line 1 of input072.c ================================================ FILE: 49_Ternary/tests/err.input073.c ================================================ no loop to continue to on line 1 of input073.c ================================================ FILE: 49_Ternary/tests/err.input075.c ================================================ Unexpected token in switch:if on line 4 of input075.c ================================================ FILE: 49_Ternary/tests/err.input076.c ================================================ No cases in switch on line 3 of input076.c ================================================ FILE: 49_Ternary/tests/err.input077.c ================================================ case or default after existing default on line 6 of input077.c ================================================ FILE: 49_Ternary/tests/err.input078.c ================================================ case or default after existing default on line 6 of input078.c ================================================ FILE: 49_Ternary/tests/err.input079.c ================================================ Duplicate case value on line 6 of input079.c ================================================ FILE: 49_Ternary/tests/err.input085.c ================================================ Bad type in parameter list on line 1 of input085.c ================================================ FILE: 49_Ternary/tests/err.input086.c ================================================ Function definition not at global level on line 3 of input086.c ================================================ FILE: 49_Ternary/tests/err.input087.c ================================================ Bad type in member list on line 4 of input087.c ================================================ FILE: 49_Ternary/tests/err.input092.c ================================================ Type mismatch: literal vs. variable on line 1 of input092.c ================================================ FILE: 49_Ternary/tests/err.input093.c ================================================ Unknown variable:fred on line 1 of input093.c ================================================ FILE: 49_Ternary/tests/err.input094.c ================================================ Type mismatch: literal vs. variable on line 1 of input094.c ================================================ FILE: 49_Ternary/tests/err.input095.c ================================================ Variable can not be initialised:x on line 1 of input095.c ================================================ FILE: 49_Ternary/tests/err.input096.c ================================================ Array size is illegal:0 on line 1 of input096.c ================================================ FILE: 49_Ternary/tests/err.input097.c ================================================ For now, declaration of non-global arrays is not implemented on line 2 of input097.c ================================================ FILE: 49_Ternary/tests/err.input098.c ================================================ Too many values in initialisation list on line 1 of input098.c ================================================ FILE: 49_Ternary/tests/err.input102.c ================================================ Cannot cast to a struct, union or void type on line 3 of input102.c ================================================ FILE: 49_Ternary/tests/err.input103.c ================================================ Cannot cast to a struct, union or void type on line 3 of input103.c ================================================ FILE: 49_Ternary/tests/err.input104.c ================================================ Cannot cast to a struct, union or void type on line 2 of input104.c ================================================ FILE: 49_Ternary/tests/err.input105.c ================================================ Incompatible expression in assignment on line 4 of input105.c ================================================ FILE: 49_Ternary/tests/err.input118.c ================================================ Compiler doesn't support static or extern local declarations on line 2 of input118.c ================================================ FILE: 49_Ternary/tests/input001.c ================================================ int printf(char *fmt); void main() { printf("%d\n", 12 * 3); printf("%d\n", 18 - 2 * 4); printf("%d\n", 1 + 2 + 9 - 5/2 + 3*5); } ================================================ FILE: 49_Ternary/tests/input002.c ================================================ int printf(char *fmt); void main() { int fred; int jim; fred= 5; jim= 12; printf("%d\n", fred + jim); } ================================================ FILE: 49_Ternary/tests/input003.c ================================================ int printf(char *fmt); void main() { int x; x= 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); } ================================================ FILE: 49_Ternary/tests/input004.c ================================================ int printf(char *fmt); void main() { int x; x= 7 < 9; printf("%d\n", x); x= 7 <= 9; printf("%d\n", x); x= 7 != 9; printf("%d\n", x); x= 7 == 7; printf("%d\n", x); x= 7 >= 7; printf("%d\n", x); x= 7 <= 7; printf("%d\n", x); x= 9 > 7; printf("%d\n", x); x= 9 >= 7; printf("%d\n", x); x= 9 != 7; printf("%d\n", x); } ================================================ FILE: 49_Ternary/tests/input005.c ================================================ int printf(char *fmt); void main() { int i; int j; i=6; j=12; if (i < j) { printf("%d\n", i); } else { printf("%d\n", j); } } ================================================ FILE: 49_Ternary/tests/input006.c ================================================ int printf(char *fmt); void main() { int i; i=1; while (i <= 10) { printf("%d\n", i); i= i + 1; } } ================================================ FILE: 49_Ternary/tests/input007.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 49_Ternary/tests/input008.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 49_Ternary/tests/input009.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } void fred() { int a; int b; a= 12; b= 3 * a; if (a >= b) { printf("%d\n", 2 * b - a); } } ================================================ FILE: 49_Ternary/tests/input010.c ================================================ int printf(char *fmt); void main() { int i; char j; j= 20; printf("%d\n", j); i= 10; printf("%d\n", i); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 2; j= j + 1) { printf("%d\n", j); } } ================================================ FILE: 49_Ternary/tests/input011.c ================================================ int printf(char *fmt); int main() { int i; char j; long k; i= 10; printf("%d\n", i); j= 20; printf("%d\n", j); k= 30; printf("%d\n", k); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 4; j= j + 1) { printf("%d\n", j); } for (k= 1; k <= 5; k= k + 1) { printf("%d\n", k); } return(i); printf("%d\n", 12345); return(3); } ================================================ FILE: 49_Ternary/tests/input012.c ================================================ int printf(char *fmt); int fred() { return(5); } void main() { int x; x= fred(2); printf("%d\n", x); } ================================================ FILE: 49_Ternary/tests/input013.c ================================================ int printf(char *fmt); int fred() { return(56); } void main() { int dummy; int result; dummy= printf("%d\n", 23); result= fred(10); dummy= printf("%d\n", result); } ================================================ FILE: 49_Ternary/tests/input014.c ================================================ int printf(char *fmt); int fred() { return(20); } int main() { int result; printf("%d\n", 10); result= fred(15); printf("%d\n", result); printf("%d\n", fred(15)+10); return(0); } ================================================ FILE: 49_Ternary/tests/input015.c ================================================ int printf(char *fmt); int main() { char a; char *b; char c; int d; int *e; int f; a= 18; printf("%d\n", a); b= &a; c= *b; printf("%d\n", c); d= 12; printf("%d\n", d); e= &d; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 49_Ternary/tests/input016.c ================================================ int printf(char *fmt); int c; int d; int *e; int f; int main() { c= 12; d=18; printf("%d\n", c); e= &c + 1; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 49_Ternary/tests/input017.c ================================================ int printf(char *fmt); int main() { char a; char *b; int d; int *e; b= &a; *b= 19; printf("%d\n", a); e= &d; *e= 12; printf("%d\n", d); return(0); } ================================================ FILE: 49_Ternary/tests/input018.c ================================================ int printf(char *fmt); int main() { int a; int b; a= b= 34; printf("%d\n", a); printf("%d\n", b); return(0); } ================================================ FILE: 49_Ternary/tests/input018a.c ================================================ int printf(char *fmt); int a; int *b; char c; char *d; int main() { b= &a; *b= 15; printf("%d\n", a); d= &c; *d= 16; printf("%d\n", c); return(0); } ================================================ FILE: 49_Ternary/tests/input019.c ================================================ int printf(char *fmt); int a; int b; int c; int d; int e; int main() { a= 2; b= 4; c= 3; d= 2; e= (a+b) * (c+d); printf("%d\n", e); return(0); } ================================================ FILE: 49_Ternary/tests/input020.c ================================================ int printf(char *fmt); int a; int b[25]; int main() { b[3]= 12; a= b[3]; printf("%d\n", a); return(0); } ================================================ FILE: 49_Ternary/tests/input021.c ================================================ int printf(char *fmt); char c; char *str; int main() { c= '\n'; printf("%d\n", c); for (str= "Hello world\n"; *str != 0; str= str + 1) { printf("%c", *str); } return(0); } ================================================ FILE: 49_Ternary/tests/input022.c ================================================ int printf(char *fmt); char a; char b; char c; int d; int e; int f; long g; long h; long i; int main() { b= 5; c= 7; a= b + c++; printf("%d\n", a); e= 5; f= 7; d= e + f++; printf("%d\n", d); h= 5; i= 7; g= h + i++; printf("%d\n", g); a= b-- + c; printf("%d\n", a); d= e-- + f; printf("%d\n", d); g= h-- + i; printf("%d\n", g); a= ++b + c; printf("%d\n", a); d= ++e + f; printf("%d\n", d); g= ++h + i; printf("%d\n", g); a= b * --c; printf("%d\n", a); d= e * --f; printf("%d\n", d); g= h * --i; printf("%d\n", g); return(0); } ================================================ FILE: 49_Ternary/tests/input023.c ================================================ int printf(char *fmt); char *str; int x; int main() { x= -23; printf("%d\n", x); printf("%d\n", -10 * -10); x= 1; x= ~x; printf("%d\n", x); x= 2 > 5; printf("%d\n", x); x= !x; printf("%d\n", x); x= !x; printf("%d\n", x); x= 13; if (x) { printf("%d\n", 13); } x= 0; if (!x) { printf("%d\n", 14); } for (str= "Hello world\n"; *str; str++) { printf("%c", *str); } return(0); } ================================================ FILE: 49_Ternary/tests/input024.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { a= 42; b= 19; printf("%d\n", a & b); printf("%d\n", a | b); printf("%d\n", a ^ b); printf("%d\n", 1 << 3); printf("%d\n", 63 >> 3); return(0); } ================================================ FILE: 49_Ternary/tests/input025.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { char z; int y; int x; x= 10; y= 20; z= 30; printf("%d\n", x); printf("%d\n", y); printf("%d\n", z); a= 5; b= 15; c= 25; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); return(0); } ================================================ FILE: 49_Ternary/tests/input026.c ================================================ int printf(char *fmt); int main(int a, char b, long c, int d, int e, int f, int g, int h) { int i; int j; int k; a= 13; printf("%d\n", a); b= 23; printf("%d\n", b); c= 34; printf("%d\n", c); d= 44; printf("%d\n", d); e= 54; printf("%d\n", e); f= 64; printf("%d\n", f); g= 74; printf("%d\n", g); h= 84; printf("%d\n", h); i= 94; printf("%d\n", i); j= 95; printf("%d\n", j); k= 96; printf("%d\n", k); return(0); } ================================================ FILE: 49_Ternary/tests/input027.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int param5(int a, int b, int c, int d, int e) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param2(int a, int b) { int c; int d; int e; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param0() { int a; int b; int c; int d; int e; a= 1; b= 2; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int main() { param8(1,2,3,4,5,6,7,8); param5(1,2,3,4,5); param2(1,2); param0(); return(0); } ================================================ FILE: 49_Ternary/tests/input028.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 49_Ternary/tests/input029.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h); int fred(int a, int b, int c); int main(); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 49_Ternary/tests/input030.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input030.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 49_Ternary/tests/input031.c ================================================ int printf(char *fmt); int main() { int x; x= 2 + + 3 - * / ; } ================================================ FILE: 49_Ternary/tests/input032.c ================================================ int printf(char *fmt); int main() { pizza cow llama sausage; } ================================================ FILE: 49_Ternary/tests/input033.c ================================================ int printf(char *fmt); int main() { char *z; return(z); } ================================================ FILE: 49_Ternary/tests/input034.c ================================================ int printf(char *fmt); int main() { int a[12]; return(0); } ================================================ FILE: 49_Ternary/tests/input035.c ================================================ int printf(char *fmt); int fred(int a, int b) { int a; return(a); } ================================================ FILE: 49_Ternary/tests/input036.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c); ================================================ FILE: 49_Ternary/tests/input037.c ================================================ int printf(char *fmt); int fred(int a, char b +, int z); ================================================ FILE: 49_Ternary/tests/input038.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c, int g); ================================================ FILE: 49_Ternary/tests/input039.c ================================================ int printf(char *fmt); int main() { int a; } ================================================ FILE: 49_Ternary/tests/input040.c ================================================ int printf(char *fmt); int main() { int a; a= 5; } ================================================ FILE: 49_Ternary/tests/input041.c ================================================ int printf(char *fmt); void fred() { return(5); } ================================================ FILE: 49_Ternary/tests/input042.c ================================================ int printf(char *fmt); int main() { fred(5); } ================================================ FILE: 49_Ternary/tests/input043.c ================================================ int printf(char *fmt); int main() { int a; a= b[4]; } ================================================ FILE: 49_Ternary/tests/input044.c ================================================ int printf(char *fmt); int main() { int a; a= z; } ================================================ FILE: 49_Ternary/tests/input045.c ================================================ int printf(char *fmt); int main() { int a; a= &5; } ================================================ FILE: 49_Ternary/tests/input046.c ================================================ int printf(char *fmt); int main() { int a; a= *5; } ================================================ FILE: 49_Ternary/tests/input047.c ================================================ int printf(char *fmt); int main() { int a; a= ++5; } ================================================ FILE: 49_Ternary/tests/input048.c ================================================ int printf(char *fmt); int main() { int a; a= --5; } ================================================ FILE: 49_Ternary/tests/input049.c ================================================ int printf(char *fmt); int main() { int x; char y; y= x; } ================================================ FILE: 49_Ternary/tests/input050.c ================================================ int printf(char *fmt); int main() { char *a; char *b; a= a + b; } ================================================ FILE: 49_Ternary/tests/input051.c ================================================ int printf(char *fmt); int main() { char a; a= 'fred'; } ================================================ FILE: 49_Ternary/tests/input052.c ================================================ int printf(char *fmt); int main() { int a; a= $5.00; } ================================================ FILE: 49_Ternary/tests/input053.c ================================================ int printf(char *fmt); int main() { printf("Hello world, %d\n", 23); return(0); } ================================================ FILE: 49_Ternary/tests/input054.c ================================================ int printf(char *fmt); int main() { int i; for (i=0; i < 20; i++) { printf("Hello world, %d\n", i); } return(0); } ================================================ FILE: 49_Ternary/tests/input055.c ================================================ int printf(char *fmt); int main(int argc, char **argv) { int i; char *argument; printf("Hello world\n"); for (i=0; i < argc; i++) { argument= *argv; argv= argv + 1; printf("Argument %d is %s\n", i, argument); } return(0); } ================================================ FILE: 49_Ternary/tests/input056.c ================================================ struct foo { int x; }; struct mary var1; ================================================ FILE: 49_Ternary/tests/input057.c ================================================ struct fred { int x; } ; struct fred { char y; } ; ================================================ FILE: 49_Ternary/tests/input058.c ================================================ int printf(char *fmt); struct fred { int x; char y; long z; }; struct fred var2; struct fred *varptr; int main() { long result; var2.x= 12; printf("%d\n", var2.x); var2.y= 'c'; printf("%d\n", var2.y); var2.z= 4005; printf("%d\n", var2.z); result= var2.x + var2.y + var2.z; printf("%d\n", result); varptr= &var2; result= varptr->x + varptr->y + varptr->z; printf("%d\n", result); return(0); } ================================================ FILE: 49_Ternary/tests/input059.c ================================================ int main() { int x; x= y.foo; } ================================================ FILE: 49_Ternary/tests/input060.c ================================================ int main() { int x; x= x.foo; } ================================================ FILE: 49_Ternary/tests/input061.c ================================================ int main() { int x; x= x->foo; } ================================================ FILE: 49_Ternary/tests/input062.c ================================================ int printf(char *fmt); union fred { char w; int x; int y; long z; }; union fred var1; union fred *varptr; int main() { var1.x= 65; printf("%d\n", var1.x); var1.x= 66; printf("%d\n", var1.x); printf("%d\n", var1.y); printf("The next two depend on the endian of the platform\n"); printf("%d\n", var1.w); printf("%d\n", var1.z); varptr= &var1; varptr->x= 67; printf("%d\n", varptr->x); printf("%d\n", varptr->y); return(0); } ================================================ FILE: 49_Ternary/tests/input063.c ================================================ int printf(char *fmt); enum fred { apple=1, banana, carrot, pear=10, peach, mango, papaya }; enum jane { aple=1, bnana, crrot, par=10, pech, mago, paaya }; enum fred var1; enum jane var2; enum fred var3; int main() { var1= carrot + pear + mango; printf("%d\n", var1); return(0); } ================================================ FILE: 49_Ternary/tests/input064.c ================================================ enum fred var3; ================================================ FILE: 49_Ternary/tests/input065.c ================================================ enum fred { x, y, z }; enum fred { a, b }; ================================================ FILE: 49_Ternary/tests/input066.c ================================================ enum fred { x, y, z }; enum mary { a, b, z }; ================================================ FILE: 49_Ternary/tests/input067.c ================================================ int printf(char *fmt); typedef int FOO; FOO var1; struct bar { int x; int y} ; typedef struct bar BAR; BAR var2; int main() { var1= 5; printf("%d\n", var1); var2.x= 7; var2.y= 10; printf("%d\n", var2.x + var2.y); return(0); } ================================================ FILE: 49_Ternary/tests/input068.c ================================================ typedef int FOO; typedef char FOO; ================================================ FILE: 49_Ternary/tests/input069.c ================================================ typedef int FOO; FLOO y; ================================================ FILE: 49_Ternary/tests/input070.c ================================================ #include typedef int FOO; int main() { FOO x; x= 56; printf("%d\n", x); return(0); } ================================================ FILE: 49_Ternary/tests/input071.c ================================================ #include int main() { int x; x = 0; while (x < 100) { if (x == 5) { x = x + 2; continue; } printf("%d\n", x); if (x == 14) { break; } x = x + 1; } printf("Done\n"); return (0); } ================================================ FILE: 49_Ternary/tests/input072.c ================================================ int main() { break; } ================================================ FILE: 49_Ternary/tests/input073.c ================================================ int main() { continue; } ================================================ FILE: 49_Ternary/tests/input074.c ================================================ #include int main() { int x; int y; y= 0; for (x=0; x < 5; x++) { switch(x) { case 1: { y= 5; break; } case 2: { y= 7; break; } case 3: { y= 9; } default: { y= 100; } } printf("%d\n", y); } return(0); } ================================================ FILE: 49_Ternary/tests/input075.c ================================================ int main() { int x; switch(x) { if (x<5); } } ================================================ FILE: 49_Ternary/tests/input076.c ================================================ int main() { int x; switch(x) { } } ================================================ FILE: 49_Ternary/tests/input077.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } default: { x= 3; } case 4: { x= 2; } } } ================================================ FILE: 49_Ternary/tests/input078.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } default: { x= 3; } default: { x= 2; } } } ================================================ FILE: 49_Ternary/tests/input079.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } case 2: { x= 2; } case 1: { x= 2; } default: { x= 2; } } } ================================================ FILE: 49_Ternary/tests/input080.c ================================================ #include int x; int y; int main() { for (x=0, y=1; x < 6; x++, y=y+2) { printf("%d %d\n", x, y); } return(0); } ================================================ FILE: 49_Ternary/tests/input081.c ================================================ #include int x; int y; int main() { x= 0; y=1; for (;x<5;) { printf("%d %d\n", x, y); x=x+1; y=y+2; } return(0); } ================================================ FILE: 49_Ternary/tests/input082.c ================================================ #include int main() { int x; x= 10; // Dangling else test if (x > 5) if (x > 15) printf("x > 15\n"); else printf("15 >= x > 5\n"); else printf("5 >= x\n"); return(0); } ================================================ FILE: 49_Ternary/tests/input083.c ================================================ #include int main() { int x; // Dangling else test. // We should not print anything for x<= 5 for (x=0; x < 12; x++) if (x > 5) if (x > 10) printf("10 < %2d\n", x); else printf(" 5 < %2d <= 10\n", x); return(0); } ================================================ FILE: 49_Ternary/tests/input084.c ================================================ #include int main() { int x, y; x=2; y=3; printf("%d %d\n", x, y); char a, *b; a= 'f'; b= &a; printf("%c %c\n", a, *b); return(0); } ================================================ FILE: 49_Ternary/tests/input085.c ================================================ int main(struct foo { int x; }; , int a) { return(0); } ================================================ FILE: 49_Ternary/tests/input086.c ================================================ int main() { int fred() { return(5); } int x; x=2; return(x); } ================================================ FILE: 49_Ternary/tests/input087.c ================================================ struct foo { int x; int y; struct blah { int g; }; }; ================================================ FILE: 49_Ternary/tests/input088.c ================================================ #include struct foo { int x; int y; } fred, mary; struct foo james; int main() { fred.x= 5; mary.y= 6; printf("%d %d\n", fred.x, mary.y); return(0); } ================================================ FILE: 49_Ternary/tests/input089.c ================================================ #include int x= 23; char y= 'H'; char *z= "Hello world"; int main() { printf("%d %c %s\n", x, y, z); return(0); } ================================================ FILE: 49_Ternary/tests/input090.c ================================================ #include int a = 23, b = 100; char y = 'H', *z = "Hello world"; int main() { printf("%d %d %c %s\n", a, b, y, z); return (0); } ================================================ FILE: 49_Ternary/tests/input091.c ================================================ #include int fred[] = { 1, 2, 3, 4, 5 }; int jim[10] = { 1, 2, 3, 4, 5 }; int main() { int i; for (i=0; i < 5; i++) printf("%d\n", fred[i]); for (i=0; i < 10; i++) printf("%d\n", jim[i]); return(0); } ================================================ FILE: 49_Ternary/tests/input092.c ================================================ char x= 3000; ================================================ FILE: 49_Ternary/tests/input093.c ================================================ char x= fred; ================================================ FILE: 49_Ternary/tests/input094.c ================================================ char *s= 54; ================================================ FILE: 49_Ternary/tests/input095.c ================================================ int fred(int x=2) { return(x); } ================================================ FILE: 49_Ternary/tests/input096.c ================================================ int fred[0]; ================================================ FILE: 49_Ternary/tests/input097.c ================================================ int main() { int x[45]; return(0); } ================================================ FILE: 49_Ternary/tests/input098.c ================================================ int fred[3]= { 1, 2, 3, 4, 5 }; ================================================ FILE: 49_Ternary/tests/input099.c ================================================ #include // List of token strings, for debugging purposes. // As yet, we can't store a NULL into the list char *Tstring[] = { "EOF", "=", "||", "&&", "|", "^", "&", "==", "!=", ",", ">", "<=", ">=", "<<", ">>", "+", "-", "*", "/", "++", "--", "~", "!", "void", "char", "int", "long", "if", "else", "while", "for", "return", "struct", "union", "enum", "typedef", "extern", "break", "continue", "switch", "case", "default", "intlit", "strlit", ";", "identifier", "{", "}", "(", ")", "[", "]", ",", ".", "->", ":", "" }; int main() { int i; char *str; i=0; while (1) { str= Tstring[i]; if (*str == 0) break; printf("%s\n", str); i++; } return(0); } ================================================ FILE: 49_Ternary/tests/input100.c ================================================ #include int main() { int x= 3, y=14; int z= 2 * x + y; char *str= "Hello world"; printf("%s %d %d\n", str, x+y, z); return(0); } ================================================ FILE: 49_Ternary/tests/input101.c ================================================ #include int main() { int x= 65535; char y; char *str; y= (char )x; printf("0x%x\n", y); str= (char *)0; printf("0x%lx\n", (long)str); return(0); } ================================================ FILE: 49_Ternary/tests/input102.c ================================================ int main() { struct foo { int p; }; int y= (struct foo) x; } ================================================ FILE: 49_Ternary/tests/input103.c ================================================ int main() { union foo { int p; }; int y= (union foo) x; } ================================================ FILE: 49_Ternary/tests/input104.c ================================================ int main() { int y= (void) x; } ================================================ FILE: 49_Ternary/tests/input105.c ================================================ int main() { int x; char *y; y= (char) x; } ================================================ FILE: 49_Ternary/tests/input106.c ================================================ #include char *y= (char *)0; int main() { printf("0x%lx\n", (long)y); return(0); } ================================================ FILE: 49_Ternary/tests/input107.c ================================================ #include char *y[] = { "fish", "cow", NULL }; char *z= NULL; int main() { int i; char *ptr; for (i=0; i < 3; i++) { ptr= y[i]; if (ptr != (char *)0) printf("%s\n", y[i]); else printf("NULL\n"); } return(0); } ================================================ FILE: 49_Ternary/tests/input108.c ================================================ int main() { char *str= (void *)0; return(0); } ================================================ FILE: 49_Ternary/tests/input109.c ================================================ #include int main() { int x= 17 - 1; printf("%d\n", x); return(0); } ================================================ FILE: 49_Ternary/tests/input110.c ================================================ #include int x; int y; int main() { x= 3; y= 15; y += x; printf("%d\n", y); x= 3; y= 15; y -= x; printf("%d\n", y); x= 3; y= 15; y *= x; printf("%d\n", y); x= 3; y= 15; y /= x; printf("%d\n", y); return(0); } ================================================ FILE: 49_Ternary/tests/input111.c ================================================ #include int main() { int x= 2000 + 3 + 4 * 5 + 6; printf("%d\n", x); return(0); } ================================================ FILE: 49_Ternary/tests/input112.c ================================================ #include char* y = NULL; int x= 10 + 6; int fred [ 2 + 3 ]; int main() { fred[2]= x; printf("%d\n", fred[2]); return(0); } ================================================ FILE: 49_Ternary/tests/input113.c ================================================ #include void fred(void); void fred(void) { printf("fred says hello\n"); } int main(void) { fred(); return(0); } ================================================ FILE: 49_Ternary/tests/input114.c ================================================ #include int main() { int x= 0x4a; printf("%c\n", x); return(0); } ================================================ FILE: 49_Ternary/tests/input115.c ================================================ #include struct foo { int x; char y; long z; }; typedef struct foo blah; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol struct symtable *ctype; // If struct/union, ptr to that type int stype; // Structural type for the symbol int class; // Storage class for the symbol int size; // Total size in bytes of this symbol int nelems; // Functions: # params. Arrays: # elements #define st_endlabel st_posn // For functions, the end label int st_posn; // For locals, the negative offset // from the stack base pointer int *initlist; // List of initial values struct symtable *next; // Next symbol in one list struct symtable *member; // First member of a function, struct, }; // union or enum // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates int rvalue; // True if the node is an rvalue struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; struct symtable *sym; // For many AST nodes, the pointer to // the symbol in the symbol table #define a_intvalue a_size // For A_INTLIT, the integer value int a_size; // For A_SCALE, the size to scale by }; int main() { printf("%ld\n", sizeof(char)); printf("%ld\n", sizeof(int)); printf("%ld\n", sizeof(long)); printf("%ld\n", sizeof(char *)); printf("%ld\n", sizeof(blah)); printf("%ld\n", sizeof(struct symtable)); printf("%ld\n", sizeof(struct ASTnode)); return(0); } ================================================ FILE: 49_Ternary/tests/input116.c ================================================ #include static int counter=0; static int fred(void) { return(counter++); } int main(void) { int i; for (i=0; i < 5; i++) printf("%d\n", fred()); return(0); } ================================================ FILE: 49_Ternary/tests/input117.c ================================================ #include static char *fred(void) { return("Hello"); } int main(void) { printf("%s\n", fred()); return(0); } ================================================ FILE: 49_Ternary/tests/input118.c ================================================ int main(void) { static int x; } ================================================ FILE: 49_Ternary/tests/input119.c ================================================ #include int x; int y= 3; int main() { x= y != 3 ? 6 : 8; printf("%d\n", x); x= (y == 3) ? 6 : 8; printf("%d\n", x); return(0); } ================================================ FILE: 49_Ternary/tests/input120.c ================================================ #include int x; int y= 3; int main() { for (y= 0; y < 10; y++) { x= y > 4 ? y + 2 : y + 9; printf("%d\n", x); } return(0); } ================================================ FILE: 49_Ternary/tests/input121.c ================================================ #include int x; int y= 3; int main() { for (y= 0; y < 10; y++) { x= (y < 4) ? y + 2 : (y > 7) ? 1000 : y + 9; printf("%d\n", x); } return(0); } ================================================ FILE: 49_Ternary/tests/mktests ================================================ #!/bin/sh # Make the output files for each test # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make install) fi for i in input*c do if [ ! -f "out.$i" -a ! -f "err.$i" ] then ../cwj -o out $i 2> "err.$i" # If the err file is empty if [ ! -s "err.$i" ] then rm -f "err.$i" cc -o out $i ./out > "out.$i" fi fi rm -f out out.s done ================================================ FILE: 49_Ternary/tests/out.input001.c ================================================ 36 10 25 ================================================ FILE: 49_Ternary/tests/out.input002.c ================================================ 17 ================================================ FILE: 49_Ternary/tests/out.input003.c ================================================ 1 2 3 4 5 ================================================ FILE: 49_Ternary/tests/out.input004.c ================================================ 1 1 1 1 1 1 1 1 1 ================================================ FILE: 49_Ternary/tests/out.input005.c ================================================ 6 ================================================ FILE: 49_Ternary/tests/out.input006.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 49_Ternary/tests/out.input007.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 49_Ternary/tests/out.input008.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 49_Ternary/tests/out.input009.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 49_Ternary/tests/out.input010.c ================================================ 20 10 1 2 3 4 5 253 254 255 0 1 ================================================ FILE: 49_Ternary/tests/out.input011.c ================================================ 10 20 30 1 2 3 4 5 253 254 255 0 1 2 3 1 2 3 4 5 ================================================ FILE: 49_Ternary/tests/out.input012.c ================================================ 5 ================================================ FILE: 49_Ternary/tests/out.input013.c ================================================ 23 56 ================================================ FILE: 49_Ternary/tests/out.input014.c ================================================ 10 20 30 ================================================ FILE: 49_Ternary/tests/out.input015.c ================================================ 18 18 12 12 ================================================ FILE: 49_Ternary/tests/out.input016.c ================================================ 12 18 ================================================ FILE: 49_Ternary/tests/out.input017.c ================================================ 19 12 ================================================ FILE: 49_Ternary/tests/out.input018.c ================================================ 34 34 ================================================ FILE: 49_Ternary/tests/out.input018a.c ================================================ 15 16 ================================================ FILE: 49_Ternary/tests/out.input019.c ================================================ 30 ================================================ FILE: 49_Ternary/tests/out.input020.c ================================================ 12 ================================================ FILE: 49_Ternary/tests/out.input021.c ================================================ 10 Hello world ================================================ FILE: 49_Ternary/tests/out.input022.c ================================================ 12 12 12 13 13 13 13 13 13 35 35 35 ================================================ FILE: 49_Ternary/tests/out.input023.c ================================================ -23 100 -2 0 1 0 13 14 Hello world ================================================ FILE: 49_Ternary/tests/out.input024.c ================================================ 2 59 57 8 7 ================================================ FILE: 49_Ternary/tests/out.input025.c ================================================ 10 20 30 5 15 25 ================================================ FILE: 49_Ternary/tests/out.input026.c ================================================ 13 23 34 44 54 64 74 84 94 95 96 ================================================ FILE: 49_Ternary/tests/out.input027.c ================================================ 1 2 3 4 5 6 7 8 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 ================================================ FILE: 49_Ternary/tests/out.input028.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 49_Ternary/tests/out.input029.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 49_Ternary/tests/out.input030.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input030.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 49_Ternary/tests/out.input053.c ================================================ Hello world, 23 ================================================ FILE: 49_Ternary/tests/out.input054.c ================================================ Hello world, 0 Hello world, 1 Hello world, 2 Hello world, 3 Hello world, 4 Hello world, 5 Hello world, 6 Hello world, 7 Hello world, 8 Hello world, 9 Hello world, 10 Hello world, 11 Hello world, 12 Hello world, 13 Hello world, 14 Hello world, 15 Hello world, 16 Hello world, 17 Hello world, 18 Hello world, 19 ================================================ FILE: 49_Ternary/tests/out.input055.c ================================================ Hello world Argument 0 is ./out ================================================ FILE: 49_Ternary/tests/out.input058.c ================================================ 12 99 4005 4116 4116 ================================================ FILE: 49_Ternary/tests/out.input062.c ================================================ 65 66 66 The next two depend on the endian of the platform 66 66 67 67 ================================================ FILE: 49_Ternary/tests/out.input063.c ================================================ 25 ================================================ FILE: 49_Ternary/tests/out.input067.c ================================================ 5 17 ================================================ FILE: 49_Ternary/tests/out.input070.c ================================================ 56 ================================================ FILE: 49_Ternary/tests/out.input071.c ================================================ 0 1 2 3 4 7 8 9 10 11 12 13 14 Done ================================================ FILE: 49_Ternary/tests/out.input074.c ================================================ 100 5 7 100 100 ================================================ FILE: 49_Ternary/tests/out.input080.c ================================================ 0 1 1 3 2 5 3 7 4 9 5 11 ================================================ FILE: 49_Ternary/tests/out.input081.c ================================================ 0 1 1 3 2 5 3 7 4 9 ================================================ FILE: 49_Ternary/tests/out.input082.c ================================================ 15 >= x > 5 ================================================ FILE: 49_Ternary/tests/out.input083.c ================================================ 5 < 6 <= 10 5 < 7 <= 10 5 < 8 <= 10 5 < 9 <= 10 5 < 10 <= 10 10 < 11 ================================================ FILE: 49_Ternary/tests/out.input084.c ================================================ 2 3 f f ================================================ FILE: 49_Ternary/tests/out.input088.c ================================================ 5 6 ================================================ FILE: 49_Ternary/tests/out.input089.c ================================================ 23 H Hello world ================================================ FILE: 49_Ternary/tests/out.input090.c ================================================ 23 100 H Hello world ================================================ FILE: 49_Ternary/tests/out.input091.c ================================================ 1 2 3 4 5 1 2 3 4 5 0 0 0 0 0 ================================================ FILE: 49_Ternary/tests/out.input099.c ================================================ EOF = || && | ^ & == != , > <= >= << >> + - * / ++ -- ~ ! void char int long if else while for return struct union enum typedef extern break continue switch case default intlit strlit ; identifier { } ( ) [ ] , . -> : ================================================ FILE: 49_Ternary/tests/out.input100.c ================================================ Hello world 17 20 ================================================ FILE: 49_Ternary/tests/out.input101.c ================================================ 0xff 0x0 ================================================ FILE: 49_Ternary/tests/out.input106.c ================================================ 0x0 ================================================ FILE: 49_Ternary/tests/out.input107.c ================================================ fish cow NULL ================================================ FILE: 49_Ternary/tests/out.input108.c ================================================ ================================================ FILE: 49_Ternary/tests/out.input109.c ================================================ 16 ================================================ FILE: 49_Ternary/tests/out.input110.c ================================================ 18 12 45 5 ================================================ FILE: 49_Ternary/tests/out.input111.c ================================================ 2029 ================================================ FILE: 49_Ternary/tests/out.input112.c ================================================ 16 ================================================ FILE: 49_Ternary/tests/out.input113.c ================================================ fred says hello ================================================ FILE: 49_Ternary/tests/out.input114.c ================================================ J ================================================ FILE: 49_Ternary/tests/out.input115.c ================================================ 1 4 8 8 13 64 48 ================================================ FILE: 49_Ternary/tests/out.input116.c ================================================ 0 1 2 3 4 ================================================ FILE: 49_Ternary/tests/out.input117.c ================================================ Hello ================================================ FILE: 49_Ternary/tests/out.input119.c ================================================ 8 6 ================================================ FILE: 49_Ternary/tests/out.input120.c ================================================ 9 10 11 12 13 7 8 9 10 11 ================================================ FILE: 49_Ternary/tests/out.input121.c ================================================ 2 3 4 5 13 14 15 16 1000 1000 ================================================ FILE: 49_Ternary/tests/runtests ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make install) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../cwj -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../cwj $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.s "trial.$i" done ================================================ FILE: 49_Ternary/tests/runtestsn ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../compn ] then (cd ..; make install) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../compn -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../compn $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.o out.s "trial.$i" done ================================================ FILE: 49_Ternary/tree.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree functions // Copyright (c) 2019 Warren Toomey, GPL3 // Build and return a generic AST node struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue) { struct ASTnode *n; // Malloc a new ASTnode n = (struct ASTnode *) malloc(sizeof(struct ASTnode)); if (n == NULL) fatal("Unable to malloc in mkastnode()"); // Copy in the field values and return it n->op = op; n->type = type; n->left = left; n->mid = mid; n->right = right; n->sym = sym; n->a_intvalue = intvalue; return (n); } // Make an AST leaf node struct ASTnode *mkastleaf(int op, int type, struct symtable *sym, int intvalue) { return (mkastnode(op, type, NULL, NULL, NULL, sym, intvalue)); } // Make a unary AST node: only one child struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, struct symtable *sym, int intvalue) { return (mkastnode(op, type, left, NULL, NULL, sym, intvalue)); } // Generate and return a new label number // just for AST dumping purposes static int dumpid = 1; static int gendumplabel(void) { return (dumpid++); } // Given an AST tree, print it out and follow the // traversal of the tree that genAST() follows void dumpAST(struct ASTnode *n, int label, int level) { int Lfalse, Lstart, Lend; switch (n->op) { case A_IF: Lfalse = gendumplabel(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_IF"); if (n->right) { Lend = gendumplabel(); fprintf(stdout, ", end L%d", Lend); } fprintf(stdout, "\n"); dumpAST(n->left, Lfalse, level + 2); dumpAST(n->mid, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); return; case A_WHILE: Lstart = gendumplabel(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_WHILE, start L%d\n", Lstart); Lend = gendumplabel(); dumpAST(n->left, Lend, level + 2); dumpAST(n->right, NOLABEL, level + 2); return; } // Reset level to -2 for A_GLUE if (n->op == A_GLUE) level = -2; // General AST node handling if (n->left) dumpAST(n->left, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); for (int i = 0; i < level; i++) fprintf(stdout, " "); switch (n->op) { case A_GLUE: fprintf(stdout, "\n\n"); return; case A_FUNCTION: fprintf(stdout, "A_FUNCTION %s\n", n->sym->name); return; case A_ADD: fprintf(stdout, "A_ADD\n"); return; case A_SUBTRACT: fprintf(stdout, "A_SUBTRACT\n"); return; case A_MULTIPLY: fprintf(stdout, "A_MULTIPLY\n"); return; case A_DIVIDE: fprintf(stdout, "A_DIVIDE\n"); return; case A_EQ: fprintf(stdout, "A_EQ\n"); return; case A_NE: fprintf(stdout, "A_NE\n"); return; case A_LT: fprintf(stdout, "A_LE\n"); return; case A_GT: fprintf(stdout, "A_GT\n"); return; case A_LE: fprintf(stdout, "A_LE\n"); return; case A_GE: fprintf(stdout, "A_GE\n"); return; case A_INTLIT: fprintf(stdout, "A_INTLIT %d\n", n->a_intvalue); return; case A_STRLIT: fprintf(stdout, "A_STRLIT rval label L%d\n", n->a_intvalue); return; case A_IDENT: if (n->rvalue) fprintf(stdout, "A_IDENT rval %s\n", n->sym->name); else fprintf(stdout, "A_IDENT %s\n", n->sym->name); return; case A_ASSIGN: fprintf(stdout, "A_ASSIGN\n"); return; case A_WIDEN: fprintf(stdout, "A_WIDEN\n"); return; case A_RETURN: fprintf(stdout, "A_RETURN\n"); return; case A_FUNCCALL: fprintf(stdout, "A_FUNCCALL %s\n", n->sym->name); return; case A_ADDR: fprintf(stdout, "A_ADDR %s\n", n->sym->name); return; case A_DEREF: if (n->rvalue) fprintf(stdout, "A_DEREF rval\n"); else fprintf(stdout, "A_DEREF\n"); return; case A_SCALE: fprintf(stdout, "A_SCALE %d\n", n->a_size); return; case A_PREINC: fprintf(stdout, "A_PREINC %s\n", n->sym->name); return; case A_PREDEC: fprintf(stdout, "A_PREDEC %s\n", n->sym->name); return; case A_POSTINC: fprintf(stdout, "A_POSTINC\n"); return; case A_POSTDEC: fprintf(stdout, "A_POSTDEC\n"); return; case A_NEGATE: fprintf(stdout, "A_NEGATE\n"); return; case A_BREAK: fprintf(stdout, "A_BREAK\n"); return; case A_CONTINUE: fprintf(stdout, "A_CONTINUE\n"); return; case A_CASE: fprintf(stdout, "A_CASE %d\n", n->a_intvalue); return; case A_DEFAULT: fprintf(stdout, "A_DEFAULT\n"); return; case A_SWITCH: fprintf(stdout, "A_SWITCH\n"); return; case A_CAST: fprintf(stdout, "A_CAST %d\n", n->type); return; case A_ASPLUS: fprintf(stdout, "A_ASPLUS\n"); return; case A_ASMINUS: fprintf(stdout, "A_ASMINUS\n"); return; case A_ASSTAR: fprintf(stdout, "A_ASSTAR\n"); return; case A_ASSLASH: fprintf(stdout, "A_ASSLASH\n"); return; default: fatald("Unknown dumpAST operator", n->op); } } ================================================ FILE: 49_Ternary/types.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Types and type handling // Copyright (c) 2019 Warren Toomey, GPL3 // Return true if a type is an int type // of any size, false otherwise int inttype(int type) { return (((type & 0xf) == 0) && (type >= P_CHAR && type <= P_LONG)); } // Return true if a type is of pointer type int ptrtype(int type) { return ((type & 0xf) != 0); } // Given a primitive type, return // the type which is a pointer to it int pointer_to(int type) { if ((type & 0xf) == 0xf) fatald("Unrecognised in pointer_to: type", type); return (type + 1); } // Given a primitive pointer type, return // the type which it points to int value_at(int type) { if ((type & 0xf) == 0x0) fatald("Unrecognised in value_at: type", type); return (type - 1); } // Given a type and a composite type pointer, return // the size of this type in bytes int typesize(int type, struct symtable *ctype) { if (type == P_STRUCT || type == P_UNION) return (ctype->size); return (genprimsize(type)); } // Given an AST tree and a type which we want it to become, // possibly modify the tree by widening or scaling so that // it is compatible with this type. Return the original tree // if no changes occurred, a modified tree, or NULL if the // tree is not compatible with the given type. // If this will be part of a binary operation, the AST op is not zero. struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op) { int ltype; int lsize, rsize; ltype = tree->type; // XXX No idea on these yet if (ltype == P_STRUCT || ltype == P_UNION) fatal("Don't know how to do this yet"); if (rtype == P_STRUCT || rtype == P_UNION) fatal("Don't know how to do this yet"); // Compare scalar int types if (inttype(ltype) && inttype(rtype)) { // Both types same, nothing to do if (ltype == rtype) return (tree); // Get the sizes for each type lsize = typesize(ltype, NULL); rsize = typesize(rtype, NULL); // Tree's size is too big if (lsize > rsize) return (NULL); // Widen to the right if (rsize > lsize) return (mkastunary(A_WIDEN, rtype, tree, NULL, 0)); } // For pointers if (ptrtype(ltype) && ptrtype(rtype)) { // We can compare them if (op >= A_EQ && op <= A_GE) return(tree); // A comparison of the same type for a non-binary operation is OK, // or when the left tree is of `void *` type. if (op == 0 && (ltype == rtype || ltype == pointer_to(P_VOID))) return (tree); } // We can scale only on A_ADD or A_SUBTRACT operation if (op == A_ADD || op == A_SUBTRACT) { // Left is int type, right is pointer type and the size // of the original type is >1: scale the left if (inttype(ltype) && ptrtype(rtype)) { rsize = genprimsize(value_at(rtype)); if (rsize > 1) return (mkastunary(A_SCALE, rtype, tree, NULL, rsize)); else return (tree); // Size 1, no need to scale } } // If we get here, the types are not compatible return (NULL); } ================================================ FILE: 50_Mop_up_pt1/Makefile ================================================ # Define the location of the include directory # and the location to install the compiler binary INCDIR=/tmp/include BINDIR=/tmp HSRCS= data.h decl.h defs.h SRCS= cg.c decl.c expr.c gen.c main.c misc.c \ opt.c scan.c stmt.c sym.c tree.c types.c SRCN= cgn.c decl.c expr.c gen.c main.c misc.c \ opt.c scan.c stmt.c sym.c tree.c types.c ARMSRCS= cg_arm.c decl.c expr.c gen.c main.c misc.c \ opt.c scan.c stmt.c sym.c tree.c types.c cwj: $(SRCS) $(HSRCS) cc -o cwj -g -Wall -DINCDIR=\"$(INCDIR)\" $(SRCS) compn: $(SRCN) $(HSRCS) cc -D__NASM__ -o compn -g -Wall -DINCDIR=\"$(INCDIR)\" $(SRCN) cwjarm: $(ARMSRCS) $(HSRCS) cc -o cwjarm -g -Wall $(ARMSRCS) cp cwjarm cwj install: cwj mkdir -p $(INCDIR) rsync -a include/. $(INCDIR) cp cwj $(BINDIR) chmod +x $(BINDIR)/cwj installn: compn mkdir -p $(INCDIR) rsync -a include/. $(INCDIR) cp compn $(BINDIR) chmod +x $(BINDIR)/compn clean: rm -f cwj cwjarm compn *.o *.s out a.out test: install tests/runtests (cd tests; chmod +x runtests; ./runtests) armtest: cwjarm tests/runtests (cd tests; chmod +x runtests; ./runtests) testn: installn tests/runtestsn (cd tests; chmod +x runtestsn; ./runtestsn) ================================================ FILE: 50_Mop_up_pt1/Readme.md ================================================ # Part 50: Mopping Up, part 1 We have definitely reached the "mopping up" phase, as in this part of our compiler writing journey I don't introduce any major feature. Instead, I fix a couple of problems and add a couple of minor functions. ## Consecutive Cases At present, the compiler can't parse ```c switch(x) { case 1: case 2: printf("Hello\n"); } ``` because the parser expects a compound statement after the ':' token. In `switch_statement()` in `stmt.c`: ```c // Scan the ':' and get the compound expression match(T_COLON, ":"); left= compound_statement(1); casecount++; ... // Build a sub-tree with the compound statement as the left child casetail->right= mkastunary(ASTop, 0, left, NULL, casevalue); ``` What we want is to allow an empty compound statement, so that any case with a missing compound statement falls down into the next existing compound statement. The change in `switch_statement()` is: ```c // Scan the ':' and increment the casecount match(T_COLON, ":"); casecount++; // If the next token is a T_CASE, the existing case will fall // into the next case. Otherwise, parse the case body. if (Token.token == T_CASE) body= NULL; else body= compound_statement(1); ``` This is, however, only half the story. Now in the code generation section, we have to catch the NULL compound statement and do something about it. In `genSWITCH()` in `gen.c`: ```c // Walk the right-child linked list to // generate the code for each case for (i = 0, c = n->right; c != NULL; i++, c = c->right) { ... // Generate the case code. Pass in the end label for the breaks. // If case has no body, we will fall into the following body. if (c->left) genAST(c->left, NOLABEL, NOLABEL, Lend, 0); genfreeregs(NOREG); } ``` So, this was a nice and simple fix. `tests/input123.c` is the test program to confirm this change works. ## Dumping the Symbol Table While I was trying to work out why the global `Text` variable wasn't visible to the compiler, I added code in `sym.c` to dump the symbol table at the end of every source code file. There is an `-M` command line argument to enable the functionality. I won't go through the code, but here is an example of its output: ``` Symbols for misc.c Global -------- void exit(): global, 1 params int status: param, size 4 void _Exit(): global, 1 params int status: param, size 4 void *malloc(): global, 1 params int size: param, size 4 ... int Line: extern, size 4 int Putback: extern, size 4 struct symtable *Functionid: extern, size 8 char **Infile: extern, size 8 char **Outfile: extern, size 8 char *Text[]: extern, 513 elems, size 513 struct symtable *Globhead: extern, size 8 struct symtable *Globtail: extern, size 8 ... struct mkastleaf *mkastleaf(): global, 4 params int op: param, size 4 int type: param, size 4 struct symtable *sym: param, size 8 int intvalue: param, size 4 ... Enums -------- int (null): enumtype, size 0 int TEXTLEN: enumval, value 512 int (null): enumtype, size 0 int T_EOF: enumval, value 0 int T_ASSIGN: enumval, value 1 int T_ASPLUS: enumval, value 2 int T_ASMINUS: enumval, value 3 int T_ASSTAR: enumval, value 4 int T_ASSLASH: enumval, value 5 ... Typedefs -------- long size_t: typedef, size 0 char *FILE: typedef, size 0 ``` ## Passing Arrays as Arguments I made the following change, but in hindsight I realise that I probably need to rethink how I deal with arrays completely. Anyway ... when I compile `decl.c` with the compiler, I get the error: ``` Unknown variable:Text on line 87 of decl.c ``` which prompted me to write the symbol dumping code. `Text` is in the global symbol table, so why is the parser complaining that it's missing? The answer is that `postfix()` in `expr.c`, after finding an identifier, consults the following token. If it is a '[', then the identifier must be an array. If there is no '[', then the identifier must be a variable: ```c // A variable. Check that the variable exists. if ((varptr = findsymbol(Text)) == NULL || varptr->stype != S_VARIABLE) fatals("Unknown variable", Text); ``` This is preventing the passing of an array reference as an argument to a function. The "offending" line that prompts the error message is in `decl.c`: ```c type = type_of_typedef(Text, ctype); ``` We are passing the address of the base of `Text` as an argument. But with no following '[', our compiler thinks that it's a scalar variable, and complains that there is no scalar variable `Text`. I made the change to allow S_ARRAY as well as S_VARIABLE here, but this is just the tip of a bigger problem: arrays and pointers in our compiler are not as interchangeable as they should be. I'll tackle this in the next part. ## Missing Operators In our compiler, we've had these tokens and AST operators since part 21 of the journey: + ||, T_LOGOR, A_LOGOR + `&&`, T_LOGAND, A_LOGAND Somehow, I'd never implemented them! So, it's time to do them. For A_LOGAND, we have two expressions. If both evaluate to true, we need to set a register to the rvalue of 1, otherwise 0. For A_LOGOR, if either evaluate to true, we need to set a register to the rvalue of 1, otherwise 0. The `binexpr()` code in `expr.c` already parses the tokens and builds the A_LOGOR and A_LOGAND AST nodes. So we need to fix up the code generator. In `genAST()` in `gen.c`, we now have: ```c case A_LOGOR: return (cglogor(leftreg, rightreg)); case A_LOGAND: return (cglogand(leftreg, rightreg)); ``` with two corresponding functions in `cg.c`. Before we look at the `cg.c` functions, let's just see an example C expression and the assembly code that will be produced. ```c int x, y, z; ... z= x || y; ``` when compiled, results in: ``` movslq x(%rip), %r10 # Load x's rvalue movslq y(%rip), %r11 # Load y's rvalue test %r10, %r10 # Test x's boolean value jne L13 # True, jump to L13 test %r11, %r11 # Test y's boolean value jne L13 # True, jump to L13 movq $0, %r10 # Neither true, set %r10 to false jmp L14 # and jump to L14 L13: movq $1, %r10 # Set %r10 to true L14: movl %r10d, z(%rip) # Save boolean result to z ``` We test each expression, jump based on the boolean result and either store 0 or 1 into our output register. The assembly for A_LOGAND is similar, except that the conditional jumps are `je` (jump if equal to zero) and the `movq $0` and `movq $1` are swapped around. So, without further comment, are the new `cg.c` functions: ```c // Logically OR two registers and return a // register with the result, 1 or 0 int cglogor(int r1, int r2) { // Generate two labels int Ltrue = genlabel(); int Lend = genlabel(); // Test r1 and jump to true label if true fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r1], reglist[r1]); fprintf(Outfile, "\tjne\tL%d\n", Ltrue); // Test r2 and jump to true label if true fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r2], reglist[r2]); fprintf(Outfile, "\tjne\tL%d\n", Ltrue); // Didn't jump, so result is false fprintf(Outfile, "\tmovq\t$0, %s\n", reglist[r1]); fprintf(Outfile, "\tjmp\tL%d\n", Lend); // Someone jumped to the true label, so result is true cglabel(Ltrue); fprintf(Outfile, "\tmovq\t$1, %s\n", reglist[r1]); cglabel(Lend); free_register(r2); return(r1); } ``` ```c // Logically AND two registers and return a // register with the result, 1 or 0 int cglogand(int r1, int r2) { // Generate two labels int Lfalse = genlabel(); int Lend = genlabel(); // Test r1 and jump to false label if not true fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r1], reglist[r1]); fprintf(Outfile, "\tje\tL%d\n", Lfalse); // Test r2 and jump to false label if not true fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r2], reglist[r2]); fprintf(Outfile, "\tje\tL%d\n", Lfalse); // Didn't jump, so result is true fprintf(Outfile, "\tmovq\t$1, %s\n", reglist[r1]); fprintf(Outfile, "\tjmp\tL%d\n", Lend); // Someone jumped to the false label, so result is false cglabel(Lfalse); fprintf(Outfile, "\tmovq\t$0, %s\n", reglist[r1]); cglabel(Lend); free_register(r2); return(r1); } ``` The program `tests/input122.c` is the test to confirm that this new functionality works. ## Conclusion and What's Next So that's a few small things fixed up in this part of our journey. What I will do now is step back, rethink the array/pointer design and try to fix this up in the next part of our compiler writing journey. [Next step](../51_Arrays_pt2/Readme.md) ================================================ FILE: 50_Mop_up_pt1/cg.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\t.text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\t.data\n", Outfile); currSeg = data_seg; } } // Given a scalar type value, return the // size of the type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch (type) { case P_CHAR: return (offset); case P_INT: case P_LONG: break; default: if (!ptrtype(type)) fatald("Bad type in cg_align:", type); } // Here we have an int or a long. Align it on a 4-byte offset // I put the generic code here so it can be reused elsewhere. alignment = 4; offset = (offset + direction * (alignment - 1)) & ~(alignment - 1); return (offset); } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. static int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "%r10", "%r11", "%r12", "%r13", "%r9", "%r8", "%rcx", "%rdx", "%rsi", "%rdi" }; static char *breglist[] = { "%r10b", "%r11b", "%r12b", "%r13b", "%r9b", "%r8b", "%cl", "%dl", "%sil", "%dil" }; static char *dreglist[] = { "%r10d", "%r11d", "%r12d", "%r13d", "%r9d", "%r8d", "%ecx", "%edx", "%esi", "%edi" }; // Set all registers as available. // But if reg is positive, don't free that one. void freeall_registers(int keepreg) { int i; for (i = 0; i < NUMFREEREGS; i++) if (i != keepreg) freereg[i] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. int alloc_register(void) { for (int i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(NOREG); cgtextseg(); fprintf(Outfile, "# internal switch(expr) routine\n" "# %%rsi = switch table, %%rax = expr\n" "# from SubC: http://www.t3x.org/subc/\n" "\n" "switch:\n" " pushq %%rsi\n" " movq %%rdx, %%rsi\n" " movq %%rax, %%rbx\n" " cld\n" " lodsq\n" " movq %%rax, %%rcx\n" "next:\n" " lodsq\n" " movq %%rax, %%rdx\n" " lodsq\n" " cmpq %%rdx, %%rbx\n" " jnz no\n" " popq %%rsi\n" " jmp *%%rax\n" "no:\n" " loop next\n" " lodsq\n" " popq %%rsi\n" " jmp *%%rax\n" "\n"); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the %rsp and %rsp if (sym->class == C_GLOBAL) fprintf(Outfile, "\t.globl\t%s\n" "\t.type\t%s, @function\n", name, name); fprintf(Outfile, "%s:\n" "\tpushq\t%%rbp\n" "\tmovq\t%%rsp, %%rbp\n", name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->st_posn = paramOffset; paramOffset += 8; } else { parm->st_posn = newlocaloffset(parm->type); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->st_posn = newlocaloffset(locvar->type); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\taddq\t$%d,%%rsp\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->st_endlabel); fprintf(Outfile, "\taddq\t$%d,%%rsp\n", stackOffset); fputs("\tpopq %rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmovq\t$%d, %s\n", value, reglist[r]); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", sym->name); } else // Print out the code to initialise it switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovzbq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", sym->name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovslq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", sym->name); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", sym->st_posn); fprintf(Outfile, "\tmovq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", sym->st_posn); } else switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", sym->st_posn); fprintf(Outfile, "\tmovzbq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", sym->st_posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", sym->st_posn); fprintf(Outfile, "\tmovslq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", sym->st_posn); break; default: fatald("Bad type in cgloadlocal:", sym->type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tleaq\tL%d(%%rip), %s\n", label, reglist[r]); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\taddq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsubq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timulq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s,%%rax\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidivq\t%s\n", reglist[r2]); fprintf(Outfile, "\tmovq\t%%rax,%s\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tandq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\torq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txorq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshlq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshrq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tnegq\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnotq\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); return (r); } // Logically OR two registers and return a // register with the result, 1 or 0 int cglogor(int r1, int r2) { // Generate two labels int Ltrue = genlabel(); int Lend = genlabel(); // Test r1 and jump to true label if true fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r1], reglist[r1]); fprintf(Outfile, "\tjne\tL%d\n", Ltrue); // Test r2 and jump to true label if true fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r2], reglist[r2]); fprintf(Outfile, "\tjne\tL%d\n", Ltrue); // Didn't jump, so result is false fprintf(Outfile, "\tmovq\t$0, %s\n", reglist[r1]); fprintf(Outfile, "\tjmp\tL%d\n", Lend); // Someone jumped to the true label, so result is true cglabel(Ltrue); fprintf(Outfile, "\tmovq\t$1, %s\n", reglist[r1]); cglabel(Lend); free_register(r2); return(r1); } // Logically AND two registers and return a // register with the result, 1 or 0 int cglogand(int r1, int r2) { // Generate two labels int Lfalse = genlabel(); int Lend = genlabel(); // Test r1 and jump to false label if not true fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r1], reglist[r1]); fprintf(Outfile, "\tje\tL%d\n", Lfalse); // Test r2 and jump to false label if not true fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r2], reglist[r2]); fprintf(Outfile, "\tje\tL%d\n", Lfalse); // Didn't jump, so result is true fprintf(Outfile, "\tmovq\t$1, %s\n", reglist[r1]); fprintf(Outfile, "\tjmp\tL%d\n", Lend); // Someone jumped to the false label, so result is false cglabel(Lfalse); fprintf(Outfile, "\tmovq\t$0, %s\n", reglist[r1]); cglabel(Lend); free_register(r2); return(r1); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(struct symtable *sym, int numargs) { // Get a new register int outr = alloc_register(); // Call the function fprintf(Outfile, "\tcall\t%s@PLT\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\taddq\t$%d, %%rsp\n", 8 * (numargs - 6)); // and copy the return value into our register fprintf(Outfile, "\tmovq\t%%rax, %s\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpushq\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmovq\t%s, %s\n", reglist[r], reglist[FIRSTPARAMREG - argposn + 1]); } } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsalq\t$%d, %s\n", val, reglist[r]); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %s(%%rip)\n", reglist[r], sym->name); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %s(%%rip)\n", breglist[r], sym->name); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %s(%%rip)\n", dreglist[r], sym->name); break; default: fatald("Bad type in cgstorglob:", sym->type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %d(%%rbp)\n", reglist[r], sym->st_posn); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %d(%%rbp)\n", breglist[r], sym->st_posn); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %d(%%rbp)\n", dreglist[r], sym->st_posn); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size, type; int initvalue; int i; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the variable (or its elements if an array) // and the type of the variable if (node->stype == S_ARRAY) { size = typesize(value_at(node->type), node->ctype); type = value_at(node->type); } else { size = node->size; type = node->type; } // Generate the global identity and the label cgdataseg(); if (node->class == C_GLOBAL) fprintf(Outfile, "\t.globl\t%s\n", node->name); fprintf(Outfile, "%s:\n", node->name); // Output space for one or more elements for (i = 0; i < node->nelems; i++) { // Get any initial value initvalue = 0; if (node->initlist != NULL) initvalue = node->initlist[i]; // Generate the space for this type switch (size) { case 1: fprintf(Outfile, "\t.byte\t%d\n", initvalue); break; case 4: fprintf(Outfile, "\t.long\t%d\n", initvalue); break; case 8: // Generate the pointer to a string literal. Treat a zero value // as actually zero, not the label L0 if (node->initlist != NULL && type == pointer_to(P_CHAR) && initvalue != 0) fprintf(Outfile, "\t.quad\tL%d\n", initvalue); else fprintf(Outfile, "\t.quad\t%d\n", initvalue); break; default: for (int i = 0; i < size; i++) fprintf(Outfile, "\t.byte\t0\n"); } } } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\t.byte\t%d\n", *cptr); } fprintf(Outfile, "\t.byte\t0\n"); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(NOREG); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Deal with pointers here as we can't put them in // the switch statement if (ptrtype(sym->type)) fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); else { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzbl\t%s, %%eax\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %%eax\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } } cgjump(sym->st_endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = alloc_register(); if (sym->class == C_GLOBAL || sym->class == C_STATIC) fprintf(Outfile, "\tleaq\t%s(%%rip), %s\n", sym->name, reglist[r]); else fprintf(Outfile, "\tleaq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzbq\t(%s), %s\n", reglist[r], reglist[r]); break; case 2: fprintf(Outfile, "\tmovslq\t(%s), %s\n", reglist[r], reglist[r]); break; case 4: case 8: fprintf(Outfile, "\tmovq\t(%s), %s\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { // Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmovb\t%s, (%s)\n", breglist[r1], reglist[r2]); break; case 2: case 4: case 8: fprintf(Outfile, "\tmovq\t%s, (%s)\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } // Generate a switch jump table and the code to // load the registers and call the switch() code void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel) { int i, label; // Get a label for the switch table label = genlabel(); cglabel(label); // Heuristic. If we have no cases, create one case // which points to the default case if (casecount == 0) { caseval[0] = 0; caselabel[0] = defaultlabel; casecount = 1; } // Generate the switch jump table. fprintf(Outfile, "\t.quad\t%d\n", casecount); for (i = 0; i < casecount; i++) fprintf(Outfile, "\t.quad\t%d, L%d\n", caseval[i], caselabel[i]); fprintf(Outfile, "\t.quad\tL%d\n", defaultlabel); // Load the specific registers cglabel(toplabel); fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); fprintf(Outfile, "\tleaq\tL%d(%%rip), %%rdx\n", label); fprintf(Outfile, "\tjmp\tswitch\n"); } // Move value between registers void cgmove(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s, %s\n", reglist[r1], reglist[r2]); } ================================================ FILE: 50_Mop_up_pt1/cg_arm.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for ARMv6 on Raspberry Pi // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. static int freereg[4]; static char *reglist[4] = { "r4", "r5", "r6", "r7" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // We have to store large integer literal values in memory. // Keep a list of them which will be output in the postamble #define MAXINTS 1024 int Intlist[MAXINTS]; static int Intslot = 0; // Determine the offset of a large integer // literal from the .L3 label. If the integer // isn't in the list, add it. static void set_int_offset(int val) { int offset = -1; // See if it is already there for (int i = 0; i < Intslot; i++) { if (Intlist[i] == val) { offset = 4 * i; break; } } // Not in the list, so add it if (offset == -1) { offset = 4 * Intslot; if (Intslot == MAXINTS) fatal("Out of int slots in set_int_offset()"); Intlist[Intslot++] = val; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L3+%d\n", offset); } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n", Outfile); } // Print out the assembly postamble void cgpostamble() { // Print out the global variables fprintf(Outfile, ".L2:\n"); for (int i = 0; i < Globs; i++) { if (Symtable[i].stype == S_VARIABLE) fprintf(Outfile, "\t.word %s\n", Symtable[i].name); } // Print out the integer literals fprintf(Outfile, ".L3:\n"); for (int i = 0; i < Intslot; i++) { fprintf(Outfile, "\t.word %d\n", Intlist[i]); } } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Symtable[id].name; fprintf(Outfile, "\t.text\n" "\t.globl\t%s\n" "\t.type\t%s, \%%function\n" "%s:\n" "\tpush\t{fp, lr}\n" "\tadd\tfp, sp, #4\n" "\tsub\tsp, sp, #8\n" "\tstr\tr0, [fp, #-8]\n", name, name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Symtable[id].endlabel); fputs("\tsub\tsp, fp, #4\n" "\tpop\t{fp, pc}\n" "\t.align\t2\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); // If the literal value is small, do it with one instruction if (value <= 1000) fprintf(Outfile, "\tmov\t%s, #%d\n", reglist[r], value); else { set_int_offset(value); fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); } return (r); } // Determine the offset of a variable from the .L2 // label. Yes, this is inefficient code. static void set_var_offset(int id) { int offset = 0; // Walk the symbol table up to id. // Find S_VARIABLEs and add on 4 until // we get to our variable for (int i = 0; i < id; i++) { if (Symtable[i].stype == S_VARIABLE) offset += 4; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L2+%d\n", offset); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tldrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgloadglob:", Symtable[id].type); } return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s, %s\n", reglist[r1], reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\tmul\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { // To do a divide: r0 holds the dividend, r1 holds the divisor. // The quotient is in r0. fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r1]); fprintf(Outfile, "\tmov\tr1, %s\n", reglist[r2]); fprintf(Outfile, "\tbl\t__aeabi_idiv\n"); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r1]); free_register(r2); return (r1); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r]); fprintf(Outfile, "\tbl\t%s\n", Symtable[id].name); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r]); return (r); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tlsl\t%s, %s, #%d\n", reglist[r], reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, int id) { // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tstr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgstorglob:", Symtable[id].type); } return (r); } // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (4); switch (type) { case P_CHAR: return (1); case P_INT: case P_LONG: return (4); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Symtable[id].type); fprintf(Outfile, "\t.data\n" "\t.globl\t%s\n", Symtable[id].name); switch (typesize) { case 1: fprintf(Outfile, "%s:\t.byte\t0\n", Symtable[id].name); break; case 4: fprintf(Outfile, "%s:\t.long\t0\n", Symtable[id].name); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "moveq", "movne", "movlt", "movgt", "movle", "movge" }; // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "movne", "moveq", "movge", "movle", "movgt", "movlt" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #1\n", cmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #0\n", invcmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\tuxtb\t%s, %s\n", reglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tb\tL%d\n", l); } // List of inverted branch instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *brlist[] = { "bne", "beq", "bge", "ble", "bgt", "blt" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", brlist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[reg]); cgjump(Symtable[id].endlabel); } // Generate code to load the address of a global // identifier into a variable. Return a new register int cgaddress(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); fprintf(Outfile, "\tmov\t%s, r3\n", reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tldrb\t%s, [%s]\n", reglist[r], reglist[r]); break; case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [%s]\n", reglist[r], reglist[r]); break; } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [%s]\n", reglist[r1], reglist[r2]); break; case P_INT: case P_LONG: fprintf(Outfile, "\tstr\t%s, [%s]\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 50_Mop_up_pt1/cgn.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\tsection .text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\tsection .data\n", Outfile); currSeg = data_seg; } } // Given a scalar type value, return the // size of the type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch (type) { case P_CHAR: return (offset); case P_INT: case P_LONG: break; default: if (!ptrtype(type)) fatald("Bad type in cg_align:", type); } // Here we have an int or a long. Align it on a 4-byte offset // I put the generic code here so it can be reused elsewhere. alignment = 4; offset = (offset + direction * (alignment - 1)) & ~(alignment - 1); return (offset); } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "r10", "r11", "r12", "r13", "r9", "r8", "rcx", "rdx", "rsi", "rdi" }; static char *breglist[] = { "r10b", "r11b", "r12b", "r13b", "r9b", "r8b", "cl", "dl", "sil", "dil" }; static char *dreglist[] = { "r10d", "r11d", "r12d", "r13d", "r9d", "r8d", "ecx", "edx", "esi", "edi" }; // Set all registers as available // But if reg is positive, don't free that one. void freeall_registers(int keepreg) { int i; for (i = 0; i < NUMFREEREGS; i++) if (i != keepreg) freereg[i] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. int alloc_register(void) { for (int i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(NOREG); fputs("\textern\tprintint\n", Outfile); fputs("\textern\tprintchar\n", Outfile); fputs("\textern\topen\n", Outfile); fputs("\textern\tclose\n", Outfile); fputs("\textern\tread\n", Outfile); fputs("\textern\twrite\n", Outfile); fputs("\textern\tprintf\n", Outfile); cgtextseg(); fprintf(Outfile, "; internal switch(expr) routine\n" "; rsi = switch table, rax = expr\n" "; from SubC: http://www.t3x.org/subc/\n" "\n" "switch:\n" " push rsi\n" " mov rsi, rdx\n" " mov rbx, rax\n" " cld\n" " lodsq\n" " mov rcx, rax\n" "next:\n" " lodsq\n" " mov rdx, rax\n" " lodsq\n" " cmp rbx, rdx\n" " jnz no\n" " pop rsi\n" " jmp rax\n" "no:\n" " loop next\n" " lodsq\n" " pop rsi\n" " jmp rax\n" "\n"); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the rsp and rbp if (sym->class == C_GLOBAL) fprintf(Outfile, "\tglobal\t%s\n", name); fprintf(Outfile, "%s:\n" "\tpush\trbp\n" "\tmov\trbp, rsp\n", name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->st_posn = paramOffset; paramOffset += 8; } else { parm->st_posn = newlocaloffset(parm->type); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->st_posn = newlocaloffset(locvar->type); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\tadd\trsp, %d\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->st_endlabel); fprintf(Outfile, "\tadd\trsp, %d\n", stackOffset); fputs("\tpop rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], value); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", sym->name); fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], sym->name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", sym->name); } else // Print out the code to initialise it switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", sym->name); fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], sym->name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", sym->name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", sym->name); fprintf(Outfile, "\tmovsx\t%s, word [%s]\n", dreglist[r], sym->name); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", sym->name); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", sym->st_posn); fprintf(Outfile, "\tmov\t%s, [rbp+%d]\n", reglist[r], sym->st_posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", sym->st_posn); } else switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", sym->st_posn); fprintf(Outfile, "\tmovzx\t%s, byte [rbp+%d]\n", reglist[r], sym->st_posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", sym->st_posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", sym->st_posn); fprintf(Outfile, "\tmovsx\t%s, dword [rbp+%d]\n", reglist[r], sym->st_posn); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", sym->st_posn); break; default: fatald("Bad type in cgloadlocal:", sym->type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, L%d\n", reglist[r], label); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timul\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmov\trax, %s\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidiv\t%s\n", reglist[r2]); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tand\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\tor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshl\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshr\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tneg\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnot\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r], breglist[r]); return (r); } // Logically OR two registers and return a // register with the result, 1 or 0 int cglogor(int r1, int r2) { // Generate two labels int Ltrue = genlabel(); int Lend = genlabel(); // Test r1 and jump to true label if true fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r1], reglist[r1]); fprintf(Outfile, "\tjne\tL%d\n", Ltrue); // Test r2 and jump to true label if true fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r2], reglist[r2]); fprintf(Outfile, "\tjne\tL%d\n", Ltrue); // Didn't jump, so result is false fprintf(Outfile, "\tmov\t%s, 0\n", reglist[r1]); fprintf(Outfile, "\tjmp\tL%d\n", Lend); // Someone jumped to the true label, so result is true cglabel(Ltrue); fprintf(Outfile, "\tmov\t%s, 1\n", reglist[r1]); cglabel(Lend); free_register(r2); return(r1); } // Logically AND two registers and return a // register with the result, 1 or 0 int cglogand(int r1, int r2) { // Generate two labels int Lfalse = genlabel(); int Lend = genlabel(); // Test r1 and jump to false label if not true fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r1], reglist[r1]); fprintf(Outfile, "\tje\tL%d\n", Lfalse); // Test r2 and jump to false label if not true fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r2], reglist[r2]); fprintf(Outfile, "\tje\tL%d\n", Lfalse); // Didn't jump, so result is true fprintf(Outfile, "\tmov\t%s, 1\n", reglist[r1]); fprintf(Outfile, "\tjmp\tL%d\n", Lend); // Someone jumped to the false label, so result is false cglabel(Lfalse); fprintf(Outfile, "\tmov\t%s, 0\n", reglist[r1]); cglabel(Lend); free_register(r2); return(r1); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, byte %s\n", reglist[r], breglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(struct symtable *sym, int numargs) { // Get a new register int outr = alloc_register(); // Call the function fprintf(Outfile, "\tcall\t%s\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\tadd\trsp, %d\n", 8 * (numargs - 6)); // and copy the return value into our register fprintf(Outfile, "\tmov\t%s, rax\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpush\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmov\t%s, %s\n", reglist[FIRSTPARAMREG - argposn + 1], reglist[r]); } } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsal\t%s, %d\n", reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, dreglist[r]); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\tqword\t[rbp+%d], %s\n", sym->st_posn, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\tbyte\t[rbp+%d], %s\n", sym->st_posn, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\tdword\t[rbp+%d], %s\n", sym->st_posn, dreglist[r]); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size, type; int initvalue; int i; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the variable (or its elements if an array) // and the type of the variable if (node->stype == S_ARRAY) { size = typesize(value_at(node->type), node->ctype); type = value_at(node->type); } else { size = node->size; type = node->type; } // Generate the global identity and the label cgdataseg(); if (node->class == C_GLOBAL) fprintf(Outfile, "\tglobal\t%s\n", node->name); fprintf(Outfile, "%s:\n", node->name); // Output space for one or more elements for (i = 0; i < node->nelems; i++) { // Get any initial value initvalue = 0; if (node->initlist != NULL) initvalue = node->initlist[i]; // Generate the space for this type // original version switch(size) { case 1: fprintf(Outfile, "\tdb\t%d\n", initvalue); break; case 4: fprintf(Outfile, "\tdd\t%d\n", initvalue); break; case 8: // Generate the pointer to a string literal. Treat a zero value // as actually zero, not the label L0 if (node->initlist != NULL && type == pointer_to(P_CHAR) && initvalue != 0) fprintf(Outfile, "\tdq\tL%d\n", initvalue); else fprintf(Outfile, "\tdq\t%d\n", initvalue); break; default: for (int i = 0; i < size; i++) fprintf(Outfile, "\tdb\t0\n"); /* compact version using times instead of loop fprintf(Outfile, "\ttimes\t%d\tdb\t0\n", size); */ } } } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\tdb\t%d\n", *cptr); } fprintf(Outfile, "\tdb\t0\n"); /* put string in readable format on a single line // probably went overboard with error checking int comma = 0, quote = 0, start = 1; fprintf(Outfile, "\tdb\t"); for (cptr=strvalue; *cptr; cptr++) { if ( ! isprint(*cptr) ) if (comma || start) { fprintf(Outfile, "%d, ", *cptr); start = 0; comma = 1; } else if (quote) { fprintf(Outfile, "\', %d, ", *cptr); comma = 1; quote = 0; } else { fprintf(Outfile, "%d, ", *cptr); comma = 1; quote = 0; } else if (start || comma) { fprintf(Outfile, "\'%c", *cptr); start = comma = 0; quote = 1; } else { fprintf(Outfile, "%c", *cptr); comma = 0; quote = 1; } } if (comma || start) fprintf(Outfile, "0\n"); else fprintf(Outfile, "\', 0\n"); */ } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r2], breglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(NOREG); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Deal with pointers here as we can't put them in // the switch statement if (ptrtype(sym->type)) fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); else { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzx\teax, %s\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmov\teax, %s\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } } cgjump(sym->st_endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = alloc_register(); if (sym->class == C_GLOBAL || sym->class == C_STATIC) fprintf(Outfile, "\tmov\t%s, %s\n", reglist[r], sym->name); else fprintf(Outfile, "\tlea\t%s, [rbp+%d]\n", reglist[r], sym->st_posn); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], reglist[r]); break; case 2: fprintf(Outfile, "\tmovsx\t%s, dword [%s]\n", reglist[r], reglist[r]); break; case 4: case 8: fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { //Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmov\t[%s], byte %s\n", reglist[r2], breglist[r1]); break; case 2: case 4: case 8: fprintf(Outfile, "\tmov\t[%s], %s\n", reglist[r2], reglist[r1]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } // Generate a switch jump table and the code to // load the registers and call the switch() code void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel) { int i, label; // Get a label for the switch table label = genlabel(); cglabel(label); // Heuristic. If we have no cases, create one case // which points to the default case if (casecount == 0) { caseval[0] = 0; caselabel[0] = defaultlabel; casecount = 1; } // Generate the switch jump table. fprintf(Outfile, "\tdq\t%d\n", casecount); for (i = 0; i < casecount; i++) fprintf(Outfile, "\tdq\t%d, L%d\n", caseval[i], caselabel[i]); fprintf(Outfile, "\tdq\tL%d\n", defaultlabel); // Load the specific registers cglabel(toplabel); fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); fprintf(Outfile, "\tmov\trdx, L%d\n", label); fprintf(Outfile, "\tjmp\tswitch\n"); } // Move value between registers void cgmove(int r1, int r2) { fprintf(Outfile, "\tmov\t%s, %s\n", reglist[r2], reglist[r1]); } ================================================ FILE: 50_Mop_up_pt1/data.h ================================================ #ifndef extern_ #define extern_ extern #endif // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 extern_ int Line; // Current line number extern_ int Putback; // Character put back by scanner extern_ struct symtable *Functionid; // Symbol ptr of the current function extern_ FILE *Infile; // Input and output files extern_ FILE *Outfile; extern_ char *Infilename; // Name of file we are parsing extern_ char *Outfilename; // Name of file we opened as Outfile extern_ struct token Token; // Last token scanned extern_ struct token Peektoken; // A look-ahead token extern_ char Text[TEXTLEN + 1]; // Last identifier scanned extern_ int Looplevel; // Depth of nested loops extern_ int Switchlevel; // Depth of nested switches // Symbol table lists extern_ struct symtable *Globhead, *Globtail; // Global variables and functions extern_ struct symtable *Loclhead, *Locltail; // Local variables extern_ struct symtable *Parmhead, *Parmtail; // Local parameters extern_ struct symtable *Membhead, *Membtail; // Temp list of struct/union members extern_ struct symtable *Structhead, *Structtail; // List of struct types extern_ struct symtable *Unionhead, *Uniontail; // List of union types extern_ struct symtable *Enumhead, *Enumtail; // List of enum types and values extern_ struct symtable *Typehead, *Typetail; // List of typedefs // Command-line flags extern_ int O_dumpAST; // If true, dump the AST trees extern_ int O_dumpsym; // If true, dump the symbol table extern_ int O_keepasm; // If true, keep any assembly files extern_ int O_assemble; // If true, assemble the assembly files extern_ int O_dolink; // If true, link the object files extern_ int O_verbose; // If true, print info on compilation stages ================================================ FILE: 50_Mop_up_pt1/decl.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of declarations // Copyright (c) 2019 Warren Toomey, GPL3 static struct symtable *composite_declaration(int type); static int typedef_declaration(struct symtable **ctype); static int type_of_typedef(char *name, struct symtable **ctype); static void enum_declaration(void); // Parse the current token and return a primitive type enum value, // a pointer to any composite type and possibly modify // the class of the type. int parse_type(struct symtable **ctype, int *class) { int type, exstatic = 1; // See if the class has been changed to extern or static while (exstatic) { switch (Token.token) { case T_EXTERN: if (*class == C_STATIC) fatal("Illegal to have extern and static at the same time"); *class = C_EXTERN; scan(&Token); break; case T_STATIC: if (*class == C_LOCAL) fatal("Compiler doesn't support static local declarations"); if (*class == C_EXTERN) fatal("Illegal to have extern and static at the same time"); *class = C_STATIC; scan(&Token); break; default: exstatic = 0; } } // Now work on the actual type keyword switch (Token.token) { case T_VOID: type = P_VOID; scan(&Token); break; case T_CHAR: type = P_CHAR; scan(&Token); break; case T_INT: type = P_INT; scan(&Token); break; case T_LONG: type = P_LONG; scan(&Token); break; // For the following, if we have a ';' after the // parsing then there is no type, so return -1. // Example: struct x {int y; int z}; case T_STRUCT: type = P_STRUCT; *ctype = composite_declaration(P_STRUCT); if (Token.token == T_SEMI) type = -1; break; case T_UNION: type = P_UNION; *ctype = composite_declaration(P_UNION); if (Token.token == T_SEMI) type = -1; break; case T_ENUM: type = P_INT; // Enums are really ints enum_declaration(); if (Token.token == T_SEMI) type = -1; break; case T_TYPEDEF: type = typedef_declaration(ctype); if (Token.token == T_SEMI) type = -1; break; case T_IDENT: type = type_of_typedef(Text, ctype); break; default: fatals("Illegal type, token", Token.tokstr); } return (type); } // Given a type parsed by parse_type(), scan in any following // '*' tokens and return the new type int parse_stars(int type) { while (1) { if (Token.token != T_STAR) break; type = pointer_to(type); scan(&Token); } return (type); } // Parse a type which appears inside a cast int parse_cast(void) { int type, class=0; struct symtable *ctype; // Get the type inside the parentheses type= parse_stars(parse_type(&ctype, &class)); // Do some error checking. I'm sure more can be done if (type == P_STRUCT || type == P_UNION || type == P_VOID) fatal("Cannot cast to a struct, union or void type"); return(type); } // Given a type, parse an expression of literals and ensure // that the type of this expression matches the given type. // Parse any type cast that precedes the expression. // If an integer literal, return this value. // If a string literal, return the label number of the string. int parse_literal(int type) { struct ASTnode *tree; // Parse the expression and optimise the resulting AST tree tree= optimise(binexpr(0)); // If there's a cast, get the child and // mark it as having the type from the cast if (tree->op == A_CAST) { tree->left->type= tree->type; tree= tree->left; } // The tree must now have an integer or string literal if (tree->op != A_INTLIT && tree->op != A_STRLIT) fatal("Cannot initialise globals with a general expression"); // If the type is char * and if (type == pointer_to(P_CHAR)) { // We have a string literal, return the label number if (tree->op == A_STRLIT) return(tree->a_intvalue); // We have a zero int literal, so that's a NULL if (tree->op == A_INTLIT && tree->a_intvalue==0) return(0); } // We only get here with an integer literal. The input type // is an integer type and is wide enough to hold the literal value if (inttype(type) && typesize(type, NULL) >= typesize(tree->type, NULL)) return(tree->a_intvalue); fatal("Type mismatch: literal vs. variable"); return(0); // Keep -Wall happy } // Given the type, name and class of a scalar variable, // parse any initialisation value and allocate storage for it. // Return the variable's symbol table entry. static struct symtable *scalar_declaration(char *varname, int type, struct symtable *ctype, int class, struct ASTnode **tree) { struct symtable *sym=NULL; struct ASTnode *varnode, *exprnode; *tree= NULL; // Add this as a known scalar switch (class) { case C_STATIC: case C_EXTERN: case C_GLOBAL: sym= addglob(varname, type, ctype, S_VARIABLE, class, 1, 0); break; case C_LOCAL: sym= addlocl(varname, type, ctype, S_VARIABLE, 1); break; case C_PARAM: sym= addparm(varname, type, ctype, S_VARIABLE); break; case C_MEMBER: sym= addmemb(varname, type, ctype, S_VARIABLE, 1); break; } // The variable is being initialised if (Token.token == T_ASSIGN) { // Only possible for a global or local if (class != C_GLOBAL && class != C_LOCAL && class != C_STATIC) fatals("Variable can not be initialised", varname); scan(&Token); // Globals must be assigned a literal value if (class == C_GLOBAL || class == C_STATIC) { // Create one initial value for the variable and // parse this value sym->initlist= (int *)malloc(sizeof(int)); sym->initlist[0]= parse_literal(type); } if (class == C_LOCAL) { // Make an A_IDENT AST node with the variable varnode = mkastleaf(A_IDENT, sym->type, sym, 0); // Get the expression for the assignment, make into a rvalue exprnode = binexpr(0); exprnode->rvalue = 1; // Ensure the expression's type matches the variable exprnode = modify_type(exprnode, varnode->type, 0); if (exprnode == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree *tree = mkastnode(A_ASSIGN, exprnode->type, exprnode, NULL, varnode, NULL, 0); } } // Generate any global space if (class == C_GLOBAL || class == C_STATIC) genglobsym(sym); return (sym); } // Given the type, name and class of an variable, parse // the size of the array, if any. Then parse any initialisation // value and allocate storage for it. // Return the variable's symbol table entry. static struct symtable *array_declaration(char *varname, int type, struct symtable *ctype, int class) { struct symtable *sym; // New symbol table entry int nelems= -1; // Assume the number of elements won't be given int maxelems; // The maximum number of elements in the init list int *initlist; // The list of initial elements int i=0, j; // Skip past the '[' scan(&Token); // See we have an array size if (Token.token != T_RBRACKET) { nelems= parse_literal(P_INT); if (nelems <= 0) fatald("Array size is illegal", nelems); } // Ensure we have a following ']' match(T_RBRACKET, "]"); // Add this as a known array. We treat the // array as a pointer to its elements' type switch (class) { case C_STATIC: case C_EXTERN: case C_GLOBAL: sym = addglob(varname, pointer_to(type), ctype, S_ARRAY, class, 0, 0); break; default: fatal("For now, declaration of non-global arrays is not implemented"); } // Array initialisation if (Token.token == T_ASSIGN) { if (class != C_GLOBAL && class != C_STATIC) fatals("Variable can not be initialised", varname); scan(&Token); // Get the following left curly bracket match(T_LBRACE, "{"); #define TABLE_INCREMENT 10 // If the array already has nelems, allocate that many elements // in the list. Otherwise, start with TABLE_INCREMENT. if (nelems != -1) maxelems= nelems; else maxelems= TABLE_INCREMENT; initlist= (int *)malloc(maxelems *sizeof(int)); // Loop getting a new literal value from the list while (1) { // Check we can add the next value, then parse and add it if (nelems != -1 && i == maxelems) fatal("Too many values in initialisation list"); initlist[i++]= parse_literal(type); // Increase the list size if the original size was // not set and we have hit the end of the current list if (nelems == -1 && i == maxelems) { maxelems += TABLE_INCREMENT; initlist= (int *)realloc(initlist, maxelems *sizeof(int)); } // Leave when we hit the right curly bracket if (Token.token == T_RBRACE) { scan(&Token); break; } // Next token must be a comma, then comma(); } // Zero any unused elements in the initlist. // Attach the list to the symbol table entry for (j=i; j < sym->nelems; j++) initlist[j]=0; if (i > nelems) nelems = i; sym->initlist= initlist; } // Set the size of the array and the number of elements sym->nelems= nelems; sym->size= sym->nelems * typesize(type, ctype); // Generate any global space if (class == C_GLOBAL || class == C_STATIC) genglobsym(sym); return (sym); } // Given a pointer to the new function being declared and // a possibly NULL pointer to the function's previous declaration, // parse a list of parameters and cross-check them against the // previous declaration. Return the count of parameters static int param_declaration_list(struct symtable *oldfuncsym, struct symtable *newfuncsym) { int type, paramcnt = 0; struct symtable *ctype; struct symtable *protoptr = NULL; struct ASTnode *unused; // Get the pointer to the first prototype parameter if (oldfuncsym != NULL) protoptr = oldfuncsym->member; // Loop getting any parameters while (Token.token != T_RPAREN) { // If the first token is 'void' if (Token.token == T_VOID) { // Peek at the next token. If a ')', the function // has no parameters, so leave the loop. scan(&Peektoken); if (Peektoken.token == T_RPAREN) { // Move the Peektoken into the Token paramcnt= 0; scan(&Token); break; } } // Get the type of the next parameter type = declaration_list(&ctype, C_PARAM, T_COMMA, T_RPAREN, &unused); if (type == -1) fatal("Bad type in parameter list"); // Ensure the type of this parameter matches the prototype if (protoptr != NULL) { if (type != protoptr->type) fatald("Type doesn't match prototype for parameter", paramcnt + 1); protoptr = protoptr->next; } paramcnt++; // Stop when we hit the right parenthesis if (Token.token == T_RPAREN) break; // We need a comma as separator comma(); } if (oldfuncsym != NULL && paramcnt != oldfuncsym->nelems) fatals("Parameter count mismatch for function", oldfuncsym->name); // Return the count of parameters return (paramcnt); } // // function_declaration: type identifier '(' parameter_list ')' ; // | type identifier '(' parameter_list ')' compound_statement ; // // Parse the declaration of function. static struct symtable *function_declaration(char *funcname, int type, struct symtable *ctype, int class) { struct ASTnode *tree, *finalstmt; struct symtable *oldfuncsym, *newfuncsym = NULL; int endlabel, paramcnt; // Text has the identifier's name. If this exists and is a // function, get the id. Otherwise, set oldfuncsym to NULL. if ((oldfuncsym = findsymbol(funcname)) != NULL) if (oldfuncsym->stype != S_FUNCTION) oldfuncsym = NULL; // If this is a new function declaration, get a // label-id for the end label, and add the function // to the symbol table, if (oldfuncsym == NULL) { endlabel = genlabel(); // Assumtion: functions only return scalar types, so NULL below newfuncsym = addglob(funcname, type, NULL, S_FUNCTION, class, 0, endlabel); } // Scan in the '(', any parameters and the ')'. // Pass in any existing function prototype pointer lparen(); paramcnt = param_declaration_list(oldfuncsym, newfuncsym); rparen(); // If this is a new function declaration, update the // function symbol entry with the number of parameters. // Also copy the parameter list into the function's node. if (newfuncsym) { newfuncsym->nelems = paramcnt; newfuncsym->member = Parmhead; oldfuncsym = newfuncsym; } // Clear out the parameter list Parmhead = Parmtail = NULL; // Declaration ends in a semicolon, only a prototype. if (Token.token == T_SEMI) return (oldfuncsym); // This is not just a prototype. // Set the Functionid global to the function's symbol pointer Functionid = oldfuncsym; // Get the AST tree for the compound statement and mark // that we have parsed no loops or switches yet Looplevel = 0; Switchlevel = 0; lbrace(); tree = compound_statement(0); rbrace(); // If the function type isn't P_VOID .. if (type != P_VOID) { // Error if no statements in the function if (tree == NULL) fatal("No statements in function with non-void type"); // Check that the last AST operation in the // compound statement was a return statement finalstmt = (tree->op == A_GLUE) ? tree->right : tree; if (finalstmt == NULL || finalstmt->op != A_RETURN) fatal("No return for function with non-void type"); } // Build the A_FUNCTION node which has the function's symbol pointer // and the compound statement sub-tree tree = mkastunary(A_FUNCTION, type, tree, oldfuncsym, endlabel); // Do optimisations on the AST tree tree= optimise(tree); // Dump the AST tree if requested if (O_dumpAST) { dumpAST(tree, NOLABEL, 0); fprintf(stdout, "\n\n"); } // Generate the assembly code for it genAST(tree, NOLABEL, NOLABEL, NOLABEL, 0); // Now free the symbols associated // with this function freeloclsyms(); return (oldfuncsym); } // Parse composite type declarations: structs or unions. // Either find an existing struct/union declaration, or build // a struct/union symbol table entry and return its pointer. static struct symtable *composite_declaration(int type) { struct symtable *ctype = NULL; struct symtable *m; struct ASTnode *unused; int offset; int t; // Skip the struct/union keyword scan(&Token); // See if there is a following struct/union name if (Token.token == T_IDENT) { // Find any matching composite type if (type == P_STRUCT) ctype = findstruct(Text); else ctype = findunion(Text); scan(&Token); } // If the next token isn't an LBRACE , this is // the usage of an existing struct/union type. // Return the pointer to the type. if (Token.token != T_LBRACE) { if (ctype == NULL) fatals("unknown struct/union type", Text); return (ctype); } // Ensure this struct/union type hasn't been // previously defined if (ctype) fatals("previously defined struct/union", Text); // Build the composite type and skip the left brace if (type == P_STRUCT) ctype = addstruct(Text); else ctype = addunion(Text); scan(&Token); // Scan in the list of members while (1) { // Get the next member. m is used as a dummy t= declaration_list(&m, C_MEMBER, T_SEMI, T_RBRACE, &unused); if (t== -1) fatal("Bad type in member list"); if (Token.token == T_SEMI) scan(&Token); if (Token.token == T_RBRACE) break; } // Attach to the struct type's node rbrace(); if (Membhead==NULL) fatals("No members in struct", ctype->name); ctype->member = Membhead; Membhead = Membtail = NULL; // Set the offset of the initial member // and find the first free byte after it m = ctype->member; m->st_posn = 0; offset = typesize(m->type, m->ctype); // Set the position of each successive member in the composite type // Unions are easy. For structs, align the member and find the next free byte for (m = m->next; m != NULL; m = m->next) { // Set the offset for this member if (type == P_STRUCT) m->st_posn = genalign(m->type, offset, 1); else m->st_posn = 0; // Get the offset of the next free byte after this member offset += typesize(m->type, m->ctype); } // Set the overall size of the composite type ctype->size = offset; return (ctype); } // Parse an enum declaration static void enum_declaration(void) { struct symtable *etype = NULL; char *name= NULL; int intval = 0; // Skip the enum keyword. scan(&Token); // If there's a following enum type name, get a // pointer to any existing enum type node. if (Token.token == T_IDENT) { etype = findenumtype(Text); name = strdup(Text); // As it gets tromped soon scan(&Token); } // If the next token isn't a LBRACE, check // that we have an enum type name, then return if (Token.token != T_LBRACE) { if (etype == NULL) fatals("undeclared enum type:", name); return; } // We do have an LBRACE. Skip it scan(&Token); // If we have an enum type name, ensure that it // hasn't been declared before. if (etype != NULL) fatals("enum type redeclared:", etype->name); else // Build an enum type node for this identifier etype = addenum(name, C_ENUMTYPE, 0); // Loop to get all the enum values while (1) { // Ensure we have an identifier // Copy it in case there's an int literal coming up ident(); name = strdup(Text); // Ensure this enum value hasn't been declared before etype = findenumval(name); if (etype != NULL) fatals("enum value redeclared:", Text); // If the next token is an '=', skip it and // get the following int literal if (Token.token == T_ASSIGN) { scan(&Token); if (Token.token != T_INTLIT) fatal("Expected int literal after '='"); intval = Token.intvalue; scan(&Token); } // Build an enum value node for this identifier. // Increment the value for the next enum identifier. etype = addenum(name, C_ENUMVAL, intval++); // Bail out on a right curly bracket, else get a comma if (Token.token == T_RBRACE) break; comma(); } scan(&Token); // Skip over the right curly bracket } // Parse a typedef declaration and return the type // and ctype that it represents static int typedef_declaration(struct symtable **ctype) { int type, class = 0; // Skip the typedef keyword. scan(&Token); // Get the actual type following the keyword type = parse_type(ctype, &class); if (class != 0) fatal("Can't have extern in a typedef declaration"); // See if the typedef identifier already exists if (findtypedef(Text) != NULL) fatals("redefinition of typedef", Text); // Get any following '*' tokens type = parse_stars(type); // It doesn't exist so add it to the typedef list addtypedef(Text, type, *ctype); scan(&Token); return (type); } // Given a typedef name, return the type it represents static int type_of_typedef(char *name, struct symtable **ctype) { struct symtable *t; // Look up the typedef in the list t = findtypedef(name); if (t == NULL) fatals("unknown type", name); scan(&Token); *ctype = t->ctype; return (t->type); } // Parse the declaration of a variable or function. // The type and any following '*'s have been scanned, and we // have the identifier in the Token variable. // The class argument is the variable's class. // Return a pointer to the symbol's entry in the symbol table static struct symtable *symbol_declaration(int type, struct symtable *ctype, int class, struct ASTnode **tree) { struct symtable *sym = NULL; char *varname = strdup(Text); // Ensure that we have an identifier. // We copied it above so we can scan more tokens in, e.g. // an assignment expression for a local variable. ident(); // Deal with function declarations if (Token.token == T_LPAREN) { return (function_declaration(varname, type, ctype, class)); } // See if this array or scalar variable has already been declared switch (class) { case C_EXTERN: case C_STATIC: case C_GLOBAL: if (findglob(varname) != NULL) fatals("Duplicate global variable declaration", varname); case C_LOCAL: case C_PARAM: if (findlocl(varname) != NULL) fatals("Duplicate local variable declaration", varname); case C_MEMBER: if (findmember(varname) != NULL) fatals("Duplicate struct/union member declaration", varname); } // Add the array or scalar variable to the symbol table if (Token.token == T_LBRACKET) sym = array_declaration(varname, type, ctype, class); else sym = scalar_declaration(varname, type, ctype, class, tree); return (sym); } // Parse a list of symbols where there is an initial type. // Return the type of the symbols. et1 and et2 are end tokens. int declaration_list(struct symtable **ctype, int class, int et1, int et2, struct ASTnode **gluetree) { int inittype, type; struct symtable *sym; struct ASTnode *tree; *gluetree= NULL; // Get the initial type. If -1, it was // a composite type definition, return this if ((inittype = parse_type(ctype, &class)) == -1) return (inittype); // Now parse the list of symbols while (1) { // See if this symbol is a pointer type = parse_stars(inittype); // Parse this symbol sym = symbol_declaration(type, *ctype, class, &tree); // We parsed a function, there is no list so leave if (sym->stype == S_FUNCTION) { if (class != C_GLOBAL && class != C_STATIC) fatal("Function definition not at global level"); return (type); } // Glue any AST tree from a local declaration // to build a sequence of assignments to perform if (*gluetree== NULL) *gluetree= tree; else *gluetree = mkastnode(A_GLUE, P_NONE, *gluetree, NULL, tree, NULL, 0); // We are at the end of the list, leave if (Token.token == et1 || Token.token == et2) return (type); // Otherwise, we need a comma as separator comma(); } } // Parse one or more global declarations, either // variables, functions or structs void global_declarations(void) { struct symtable *ctype; struct ASTnode *unused; while (Token.token != T_EOF) { declaration_list(&ctype, C_GLOBAL, T_SEMI, T_EOF, &unused); // Skip any semicolons and right curly brackets if (Token.token == T_SEMI) scan(&Token); } } ================================================ FILE: 50_Mop_up_pt1/decl.h ================================================ // Function prototypes for all compiler files // Copyright (c) 2019 Warren Toomey, GPL3 // scan.c void reject_token(struct token *t); int scan(struct token *t); // tree.c struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue); struct ASTnode *mkastleaf(int op, int type, struct symtable *sym, int intvalue); struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, struct symtable *sym, int intvalue); void dumpAST(struct ASTnode *n, int label, int parentASTop); // gen.c int genlabel(void); int genAST(struct ASTnode *n, int iflabel, int looptoplabel, int loopendlabel, int parentASTop); void genpreamble(); void genpostamble(); void genfreeregs(int keepreg); void genglobsym(struct symtable *node); int genglobstr(char *strvalue); int genprimsize(int type); int genalign(int type, int offset, int direction); void genreturn(int reg, int id); // cg.c int cgprimsize(int type); int cgalign(int type, int offset, int direction); void cgtextseg(); void cgdataseg(); int alloc_register(void); void freeall_registers(int keepreg); void cgpreamble(); void cgpostamble(); void cgfuncpreamble(struct symtable *sym); void cgfuncpostamble(struct symtable *sym); int cgloadint(int value, int type); int cgloadglob(struct symtable *sym, int op); int cgloadlocal(struct symtable *sym, int op); int cgloadglobstr(int label); int cgadd(int r1, int r2); int cgsub(int r1, int r2); int cgmul(int r1, int r2); int cgdiv(int r1, int r2); int cgshlconst(int r, int val); int cgcall(struct symtable *sym, int numargs); void cgcopyarg(int r, int argposn); int cgstorglob(int r, struct symtable *sym); int cgstorlocal(int r, struct symtable *sym); void cgglobsym(struct symtable *node); void cgglobstr(int l, char *strvalue); int cgcompare_and_set(int ASTop, int r1, int r2); int cgcompare_and_jump(int ASTop, int r1, int r2, int label); void cglabel(int l); void cgjump(int l); int cgwiden(int r, int oldtype, int newtype); void cgreturn(int reg, struct symtable *sym); int cgaddress(struct symtable *sym); int cgderef(int r, int type); int cgstorderef(int r1, int r2, int type); int cgnegate(int r); int cginvert(int r); int cglognot(int r); int cglogor(int r1, int r2); int cglogand(int r1, int r2); int cgboolean(int r, int op, int label); int cgand(int r1, int r2); int cgor(int r1, int r2); int cgxor(int r1, int r2); int cgshl(int r1, int r2); int cgshr(int r1, int r2); void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel); void cgmove(int r1, int r2); // expr.c struct ASTnode *expression_list(int endtoken); struct ASTnode *binexpr(int ptp); // stmt.c struct ASTnode *compound_statement(int inswitch); // misc.c void match(int t, char *what); void semi(void); void lbrace(void); void rbrace(void); void lparen(void); void rparen(void); void ident(void); void comma(void); void fatal(char *s); void fatals(char *s1, char *s2); void fatald(char *s, int d); void fatalc(char *s, int c); // sym.c void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node); struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn); struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn); struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int nelems); struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype); struct symtable *addstruct(char *name); struct symtable *addunion(char *name); struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int nelems); struct symtable *addenum(char *name, int class, int value); struct symtable *addtypedef(char *name, int type, struct symtable *ctype); struct symtable *findglob(char *s); struct symtable *findlocl(char *s); struct symtable *findsymbol(char *s); struct symtable *findmember(char *s); struct symtable *findstruct(char *s); struct symtable *findunion(char *s); struct symtable *findenumtype(char *s); struct symtable *findenumval(char *s); struct symtable *findtypedef(char *s); void clear_symtable(void); void freeloclsyms(void); void freestaticsyms(void); void dumptable(struct symtable *head, char *name, int indent); void dumpsymtables(void); // decl.c int parse_type(struct symtable **ctype, int *class); int parse_stars(int type); int parse_cast(void); int declaration_list(struct symtable **ctype, int class, int et1, int et2, struct ASTnode **gluetree); void global_declarations(void); // types.c int inttype(int type); int ptrtype(int type); int pointer_to(int type); int value_at(int type); int typesize(int type, struct symtable *ctype); struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op); // opt.c struct ASTnode *optimise(struct ASTnode *n); ================================================ FILE: 50_Mop_up_pt1/defs.h ================================================ #include #include #include #include // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 enum { TEXTLEN = 512 // Length of identifiers in input }; // Commands and default filenames #define AOUT "a.out" #ifdef __NASM__ #define ASCMD "nasm -f elf64 -o " #define LDCMD "cc -no-pie -fno-plt -Wall -o " #else #define ASCMD "as -o " #define LDCMD "cc -o " #endif #define CPPCMD "cpp -nostdinc -isystem " // Token types enum { T_EOF, // Binary operators T_ASSIGN, T_ASPLUS, T_ASMINUS, T_ASSTAR, T_ASSLASH, T_QUESTION, T_LOGOR, T_LOGAND, T_OR, T_XOR, T_AMPER, T_EQ, T_NE, T_LT, T_GT, T_LE, T_GE, T_LSHIFT, T_RSHIFT, T_PLUS, T_MINUS, T_STAR, T_SLASH, // Other operators T_INC, T_DEC, T_INVERT, T_LOGNOT, // Type keywords T_VOID, T_CHAR, T_INT, T_LONG, // Other keywords T_IF, T_ELSE, T_WHILE, T_FOR, T_RETURN, T_STRUCT, T_UNION, T_ENUM, T_TYPEDEF, T_EXTERN, T_BREAK, T_CONTINUE, T_SWITCH, T_CASE, T_DEFAULT, T_SIZEOF, T_STATIC, // Structural tokens T_INTLIT, T_STRLIT, T_SEMI, T_IDENT, T_LBRACE, T_RBRACE, T_LPAREN, T_RPAREN, T_LBRACKET, T_RBRACKET, T_COMMA, T_DOT, T_ARROW, T_COLON }; // Token structure struct token { int token; // Token type, from the enum list above char *tokstr; // String version of the token int intvalue; // For T_INTLIT, the integer value }; // AST node types. The first few line up // with the related tokens enum { A_ASSIGN = 1, A_ASPLUS, A_ASMINUS, A_ASSTAR, A_ASSLASH, A_TERNARY, A_LOGOR, A_LOGAND, A_OR, A_XOR, A_AND, A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE, A_LSHIFT, A_RSHIFT, A_ADD, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, A_INTLIT, A_STRLIT, A_IDENT, A_GLUE, A_IF, A_WHILE, A_FUNCTION, A_WIDEN, A_RETURN, A_FUNCCALL, A_DEREF, A_ADDR, A_SCALE, A_PREINC, A_PREDEC, A_POSTINC, A_POSTDEC, A_NEGATE, A_INVERT, A_LOGNOT, A_TOBOOL, A_BREAK, A_CONTINUE, A_SWITCH, A_CASE, A_DEFAULT, A_CAST }; // Primitive types. The bottom 4 bits is an integer // value that represents the level of indirection, // e.g. 0= no pointer, 1= pointer, 2= pointer pointer etc. enum { P_NONE, P_VOID = 16, P_CHAR = 32, P_INT = 48, P_LONG = 64, P_STRUCT=80, P_UNION=96 }; // Structural types enum { S_VARIABLE, S_FUNCTION, S_ARRAY }; // Storage classes enum { C_GLOBAL = 1, // Globally visible symbol C_LOCAL, // Locally visible symbol C_PARAM, // Locally visible function parameter C_EXTERN, // External globally visible symbol C_STATIC, // Static symbol, visible in one file C_STRUCT, // A struct C_UNION, // A union C_MEMBER, // Member of a struct or union C_ENUMTYPE, // A named enumeration type C_ENUMVAL, // A named enumeration value C_TYPEDEF // A named typedef }; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol struct symtable *ctype; // If struct/union, ptr to that type int stype; // Structural type for the symbol int class; // Storage class for the symbol int size; // Total size in bytes of this symbol int nelems; // Functions: # params. Arrays: # elements #define st_endlabel st_posn // For functions, the end label int st_posn; // For locals, the negative offset // from the stack base pointer int *initlist; // List of initial values struct symtable *next; // Next symbol in one list struct symtable *member; // First member of a function, struct, }; // union or enum // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates int rvalue; // True if the node is an rvalue struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; struct symtable *sym; // For many AST nodes, the pointer to // the symbol in the symbol table #define a_intvalue a_size // For A_INTLIT, the integer value int a_size; // For A_SCALE, the size to scale by }; enum { NOREG = -1, // Use NOREG when the AST generation // functions have no register to return NOLABEL = 0 // Use NOLABEL when we have no label to // pass to genAST() }; ================================================ FILE: 50_Mop_up_pt1/expr.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of expressions // Copyright (c) 2019 Warren Toomey, GPL3 // expression_list: // | expression // | expression ',' expression_list // ; // Parse a list of zero or more comma-separated expressions and // return an AST composed of A_GLUE nodes with the left-hand child // being the sub-tree of previous expressions (or NULL) and the right-hand // child being the next expression. Each A_GLUE node will have size field // set to the number of expressions in the tree at this point. If no // expressions are parsed, NULL is returned struct ASTnode *expression_list(int endtoken) { struct ASTnode *tree = NULL; struct ASTnode *child = NULL; int exprcount = 0; // Loop until the end token while (Token.token != endtoken) { // Parse the next expression and increment the expression count child = binexpr(0); exprcount++; // Build an A_GLUE AST node with the previous tree as the left child // and the new expression as the right child. Store the expression count. tree = mkastnode(A_GLUE, P_NONE, tree, NULL, child, NULL, exprcount); // Stop when we reach the end token if (Token.token == endtoken) break; // Must have a ',' at this point match(T_COMMA, ","); } // Return the tree of expressions return (tree); } // Parse a function call and return its AST static struct ASTnode *funccall(void) { struct ASTnode *tree; struct symtable *funcptr; // Check that the identifier has been defined as a function, // then make a leaf node for it. if ((funcptr = findsymbol(Text)) == NULL || funcptr->stype != S_FUNCTION) { fatals("Undeclared function", Text); } // Get the '(' lparen(); // Parse the argument expression list tree = expression_list(T_RPAREN); // XXX Check type of each argument against the function's prototype // Build the function call AST node. Store the // function's return type as this node's type. // Also record the function's symbol-id tree = mkastunary(A_FUNCCALL, funcptr->type, tree, funcptr, 0); // Get the ')' rparen(); return (tree); } // Parse the index into an array and return an AST tree for it static struct ASTnode *array_access(void) { struct ASTnode *left, *right; struct symtable *aryptr; // Check that the identifier has been defined as an array // then make a leaf node for it that points at the base if ((aryptr = findsymbol(Text)) == NULL || aryptr->stype != S_ARRAY) { fatals("Undeclared array", Text); } left = mkastleaf(A_ADDR, aryptr->type, aryptr, 0); // Get the '[' scan(&Token); // Parse the following expression right = binexpr(0); // Get the ']' match(T_RBRACKET, "]"); // Ensure that this is of int type if (!inttype(right->type)) fatal("Array index is not of integer type"); // Scale the index by the size of the element's type right = modify_type(right, left->type, A_ADD); // Return an AST tree where the array's base has the offset // added to it, and dereference the element. Still an lvalue // at this point. left = mkastnode(A_ADD, aryptr->type, left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, value_at(left->type), left, NULL, 0); return (left); } // Parse the member reference of a struct or union // and return an AST tree for it. If withpointer is true, // the access is through a pointer to the member. static struct ASTnode *member_access(int withpointer) { struct ASTnode *left, *right; struct symtable *compvar; struct symtable *typeptr; struct symtable *m; // Check that the identifier has been declared as a // struct/union or a struct/union pointer if ((compvar = findsymbol(Text)) == NULL) fatals("Undeclared variable", Text); if (withpointer && compvar->type != pointer_to(P_STRUCT) && compvar->type != pointer_to(P_UNION)) fatals("Undeclared variable", Text); if (!withpointer && compvar->type != P_STRUCT && compvar->type != P_UNION) fatals("Undeclared variable", Text); // If a pointer to a struct/union, get the pointer's value. // Otherwise, make a leaf node that points at the base // Either way, it's an rvalue if (withpointer) { left = mkastleaf(A_IDENT, pointer_to(compvar->type), compvar, 0); } else left = mkastleaf(A_ADDR, compvar->type, compvar, 0); left->rvalue = 1; // Get the details of the composite type typeptr = compvar->ctype; // Skip the '.' or '->' token and get the member's name scan(&Token); ident(); // Find the matching member's name in the type // Die if we can't find it for (m = typeptr->member; m != NULL; m = m->next) if (!strcmp(m->name, Text)) break; if (m == NULL) fatals("No member found in struct/union: ", Text); // Build an A_INTLIT node with the offset right = mkastleaf(A_INTLIT, P_INT, NULL, m->st_posn); // Add the member's offset to the base of the struct/union // and dereference it. Still an lvalue at this point left = mkastnode(A_ADD, pointer_to(m->type), left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, m->type, left, NULL, 0); return (left); } // Parse a postfix expression and return // an AST node representing it. The // identifier is already in Text. static struct ASTnode *postfix(void) { struct ASTnode *n; struct symtable *varptr; struct symtable *enumptr; // If the identifier matches an enum value, // return an A_INTLIT node if ((enumptr = findenumval(Text)) != NULL) { scan(&Token); return (mkastleaf(A_INTLIT, P_INT, NULL, enumptr->st_posn)); } // Scan in the next token to see if we have a postfix expression scan(&Token); // Function call if (Token.token == T_LPAREN) return (funccall()); // An array reference if (Token.token == T_LBRACKET) return (array_access()); // Access into a struct or union if (Token.token == T_DOT) return (member_access(0)); if (Token.token == T_ARROW) return (member_access(1)); // A variable or name of an array. Check that it exists. if ((varptr = findsymbol(Text)) == NULL || (varptr->stype != S_VARIABLE && varptr->stype != S_ARRAY)) { fatals("Unknown variable", Text); } switch (Token.token) { // Post-increment: skip over the token case T_INC: scan(&Token); n = mkastleaf(A_POSTINC, varptr->type, varptr, 0); break; // Post-decrement: skip over the token case T_DEC: scan(&Token); n = mkastleaf(A_POSTDEC, varptr->type, varptr, 0); break; // Just a variable reference default: n = mkastleaf(A_IDENT, varptr->type, varptr, 0); } return (n); } // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(void) { struct ASTnode *n; int id; int type = 0; int size, class; struct symtable *ctype; switch (Token.token) { case T_STATIC: case T_EXTERN: fatal("Compiler doesn't support static or extern local declarations"); case T_SIZEOF: // Skip the T_SIZEOF and ensure we have a left parenthesis scan(&Token); if (Token.token != T_LPAREN) fatal("Left parenthesis expected after sizeof"); scan(&Token); // Get the type inside the parentheses type = parse_stars(parse_type(&ctype, &class)); // Get the type's size size = typesize(type, ctype); rparen(); // Return a leaf node int literal with the size return (mkastleaf(A_INTLIT, P_INT, NULL, size)); case T_INTLIT: // For an INTLIT token, make a leaf AST node for it. // Make it a P_CHAR if it's within the P_CHAR range if (Token.intvalue >= 0 && Token.intvalue < 256) n = mkastleaf(A_INTLIT, P_CHAR, NULL, Token.intvalue); else n = mkastleaf(A_INTLIT, P_INT, NULL, Token.intvalue); break; case T_STRLIT: // For a STRLIT token, generate the assembly for it. // Then make a leaf AST node for it. id is the string's label. id = genglobstr(Text); n = mkastleaf(A_STRLIT, pointer_to(P_CHAR), NULL, id); break; case T_IDENT: return (postfix()); case T_LPAREN: // Beginning of a parenthesised expression, skip the '('. scan(&Token); // If the token after is a type identifier, this is a cast expression switch (Token.token) { case T_IDENT: // We have to see if the identifier matches a typedef. // If not, treat it as an expression. if (findtypedef(Text) == NULL) { n = binexpr(0); break; } case T_VOID: case T_CHAR: case T_INT: case T_LONG: case T_STRUCT: case T_UNION: case T_ENUM: // Get the type inside the parentheses type = parse_cast(); // Skip the closing ')' and then parse the following expression rparen(); default: n = binexpr(0); // Scan in the expression } // We now have at least an expression in n, and possibly a non-zero type in type // if there was a cast. Skip the closing ')' if there was no cast. if (type == 0) rparen(); else // Otherwise, make a unary AST node for the cast n = mkastunary(A_CAST, type, n, NULL, 0); return (n); default: fatals("Expecting a primary expression, got token", Token.tokstr); } // Scan in the next token and return the leaf node scan(&Token); return (n); } // Convert a binary operator token into a binary AST operation. // We rely on a 1:1 mapping from token to AST operation static int binastop(int tokentype) { if (tokentype > T_EOF && tokentype <= T_SLASH) return (tokentype); fatald("Syntax error, token", tokentype); return (0); // Keep -Wall happy } // Return true if a token is right-associative, // false otherwise. static int rightassoc(int tokentype) { if (tokentype >= T_ASSIGN && tokentype <= T_ASSLASH) return (1); return (0); } // Operator precedence for each token. Must // match up with the order of tokens in defs.h static int OpPrec[] = { 0, 10, 10, // T_EOF, T_ASSIGN, T_ASPLUS, 10, 10, 10, // T_ASMINUS, T_ASSTAR, T_ASSLASH, 15, // T_QUESTION, 20, 30, // T_LOGOR, T_LOGAND 40, 50, 60, // T_OR, T_XOR, T_AMPER 70, 70, // T_EQ, T_NE 80, 80, 80, 80, // T_LT, T_GT, T_LE, T_GE 90, 90, // T_LSHIFT, T_RSHIFT 100, 100, // T_PLUS, T_MINUS 110, 110 // T_STAR, T_SLASH }; // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec; if (tokentype > T_SLASH) fatald("Token with no precedence in op_precedence:", tokentype); prec = OpPrec[tokentype]; if (prec == 0) fatald("Syntax error, token", tokentype); return (prec); } // prefix_expression: primary // | '*' prefix_expression // | '&' prefix_expression // | '-' prefix_expression // | '++' prefix_expression // | '--' prefix_expression // ; // Parse a prefix expression and return // a sub-tree representing it. struct ASTnode *prefix(void) { struct ASTnode *tree; switch (Token.token) { case T_AMPER: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Ensure that it's an identifier if (tree->op != A_IDENT) fatal("& operator must be followed by an identifier"); // Now change the operator to A_ADDR and the type to // a pointer to the original type tree->op = A_ADDR; tree->type = pointer_to(tree->type); break; case T_STAR: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's either another deref or an // identifier if (tree->op != A_IDENT && tree->op != A_DEREF) fatal("* operator must be followed by an identifier or *"); // Prepend an A_DEREF operation to the tree tree = mkastunary(A_DEREF, value_at(tree->type), tree, NULL, 0); break; case T_MINUS: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_NEGATE operation to the tree and // make the child an rvalue. Because chars are unsigned, // also widen this to int so that it's signed tree->rvalue = 1; tree = modify_type(tree, P_INT, 0); tree = mkastunary(A_NEGATE, tree->type, tree, NULL, 0); break; case T_INVERT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_INVERT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_INVERT, tree->type, tree, NULL, 0); break; case T_LOGNOT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_LOGNOT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_LOGNOT, tree->type, tree, NULL, 0); break; case T_INC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("++ operator must be followed by an identifier"); // Prepend an A_PREINC operation to the tree tree = mkastunary(A_PREINC, tree->type, tree, NULL, 0); break; case T_DEC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("-- operator must be followed by an identifier"); // Prepend an A_PREDEC operation to the tree tree = mkastunary(A_PREDEC, tree->type, tree, NULL, 0); break; default: tree = primary(); } return (tree); } // Return an AST tree whose root is a binary operator. // Parameter ptp is the previous token's precedence. struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; struct ASTnode *ltemp, *rtemp; int ASTop; int tokentype; // Get the tree on the left. // Fetch the next token at the same time. left = prefix(); // If we hit one of several terminating tokens, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA || tokentype == T_COLON || tokentype == T_RBRACE) { left->rvalue = 1; return (left); } // While the precedence of this token is more than that of the // previous token precedence, or it's right associative and // equal to the previous token's precedence while ((op_precedence(tokentype) > ptp) || (rightassoc(tokentype) && op_precedence(tokentype) == ptp)) { // Fetch in the next integer literal scan(&Token); // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Determine the operation to be performed on the sub-trees ASTop = binastop(tokentype); switch (ASTop) { case A_TERNARY: // Ensure we have a ':' token, scan in the expression after it match(T_COLON, ":"); ltemp= binexpr(0); // Build and return the AST for this statement. Use the middle // expression's type as the return type. XXX We should also // consider the third expression's type. return (mkastnode(A_TERNARY, right->type, left, right, ltemp, NULL, 0)); case A_ASSIGN: // Assignment // Make the right tree into an rvalue right->rvalue = 1; // Ensure the right's type matches the left right = modify_type(right, left->type, 0); if (right == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree. However, switch // left and right around, so that the right expression's // code will be generated before the left expression ltemp = left; left = right; right = ltemp; break; default: // We are not doing a ternary or assignment, so both trees should // be rvalues. Convert both trees into rvalue if they are lvalue trees left->rvalue = 1; right->rvalue = 1; // Ensure the two types are compatible by trying // to modify each tree to match the other's type. ltemp = modify_type(left, right->type, ASTop); rtemp = modify_type(right, left->type, ASTop); if (ltemp == NULL && rtemp == NULL) fatal("Incompatible types in binary expression"); if (ltemp != NULL) left = ltemp; if (rtemp != NULL) right = rtemp; } // Join that sub-tree with ours. Convert the token // into an AST operation at the same time. left = mkastnode(binastop(tokentype), left->type, left, NULL, right, NULL, 0); // Update the details of the current token. // If we hit a terminating token, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA || tokentype == T_COLON || tokentype == T_RBRACE) { left->rvalue = 1; return (left); } } // Return the tree we have when the precedence // is the same or lower left->rvalue = 1; return (left); } ================================================ FILE: 50_Mop_up_pt1/gen.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Generic code generator // Copyright (c) 2019 Warren Toomey, GPL3 // Generate and return a new label number static int labelid = 1; int genlabel(void) { return (labelid++); } // Generate the code for an IF statement // and an optional ELSE clause. static int genIF(struct ASTnode *n, int looptoplabel, int loopendlabel) { int Lfalse, Lend; // Generate two labels: one for the // false compound statement, and one // for the end of the overall IF statement. // When there is no ELSE clause, Lfalse _is_ // the ending label! Lfalse = genlabel(); if (n->right) Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. genAST(n->left, Lfalse, NOLABEL, NOLABEL, n->op); genfreeregs(NOREG); // Generate the true compound statement genAST(n->mid, NOLABEL, looptoplabel, loopendlabel, n->op); genfreeregs(NOREG); // If there is an optional ELSE clause, // generate the jump to skip to the end if (n->right) cgjump(Lend); // Now the false label cglabel(Lfalse); // Optional ELSE clause: generate the // false compound statement and the // end label if (n->right) { genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); genfreeregs(NOREG); cglabel(Lend); } return (NOREG); } // Generate the code for a WHILE statement static int genWHILE(struct ASTnode *n) { int Lstart, Lend; // Generate the start and end labels // and output the start label Lstart = genlabel(); Lend = genlabel(); cglabel(Lstart); // Generate the condition code followed // by a jump to the end label. genAST(n->left, Lend, Lstart, Lend, n->op); genfreeregs(NOREG); // Generate the compound statement for the body genAST(n->right, NOLABEL, Lstart, Lend, n->op); genfreeregs(NOREG); // Finally output the jump back to the condition, // and the end label cgjump(Lstart); cglabel(Lend); return (NOREG); } // Generate the code for a SWITCH statement static int genSWITCH(struct ASTnode *n) { int *caseval, *caselabel; int Ljumptop, Lend; int i, reg, defaultlabel = 0, casecount = 0; struct ASTnode *c; // Create arrays for the case values and associated labels. // Ensure that we have at least one position in each array. caseval = (int *) malloc((n->a_intvalue + 1) * sizeof(int)); caselabel = (int *) malloc((n->a_intvalue + 1) * sizeof(int)); // Generate labels for the top of the jump table, and the // end of the switch statement. Set a default label for // the end of the switch, in case we don't have a default. Ljumptop = genlabel(); Lend = genlabel(); defaultlabel = Lend; // Output the code to calculate the switch condition reg = genAST(n->left, NOLABEL, NOLABEL, NOLABEL, 0); cgjump(Ljumptop); genfreeregs(reg); // Walk the right-child linked list to // generate the code for each case for (i = 0, c = n->right; c != NULL; i++, c = c->right) { // Get a label for this case. Store it // and the case value in the arrays. // Record if it is the default case. caselabel[i] = genlabel(); caseval[i] = c->a_intvalue; cglabel(caselabel[i]); if (c->op == A_DEFAULT) defaultlabel = caselabel[i]; else casecount++; // Generate the case code. Pass in the end label for the breaks. // If case has no body, we will fall into the following body. if (c->left) genAST(c->left, NOLABEL, NOLABEL, Lend, 0); genfreeregs(NOREG); } // Ensure the last case jumps past the switch table cgjump(Lend); // Now output the switch table and the end label. cgswitch(reg, casecount, Ljumptop, caselabel, caseval, defaultlabel); cglabel(Lend); return (NOREG); } // Generate the code to copy the arguments of a // function call to its parameters, then call the // function itself. Return the register that holds // the function's return value. static int gen_funccall(struct ASTnode *n) { struct ASTnode *gluetree = n->left; int reg; int numargs = 0; // If there is a list of arguments, walk this list // from the last argument (right-hand child) to the // first while (gluetree) { // Calculate the expression's value reg = genAST(gluetree->right, NOLABEL, NOLABEL, NOLABEL, gluetree->op); // Copy this into the n'th function parameter: size is 1, 2, 3, ... cgcopyarg(reg, gluetree->a_size); // Keep the first (highest) number of arguments if (numargs == 0) numargs = gluetree->a_size; genfreeregs(NOREG); gluetree = gluetree->left; } // Call the function, clean up the stack (based on numargs), // and return its result return (cgcall(n->sym, numargs)); } // Generate code for a ternary expression static int gen_ternary(struct ASTnode *n) { int Lfalse, Lend; int reg, expreg; // Generate two labels: one for the // false expression, and one for the // end of the overall expression Lfalse = genlabel(); Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. genAST(n->left, Lfalse, NOLABEL, NOLABEL, n->op); genfreeregs(NOREG); // Get a register to hold the result of the two expressions reg = alloc_register(); // Generate the true expression and the false label. // Move the expression result into the known register. expreg = genAST(n->mid, NOLABEL, NOLABEL, NOLABEL, n->op); cgmove(expreg, reg); // Don't free the register holding the result, though! genfreeregs(reg); cgjump(Lend); cglabel(Lfalse); // Generate the false expression and the end label. // Move the expression result into the known register. expreg = genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); cgmove(expreg, reg); // Don't free the register holding the result, though! genfreeregs(reg); cglabel(Lend); return (reg); } // Given an AST, an optional label, and the AST op // of the parent, generate assembly code recursively. // Return the register id with the tree's final value. int genAST(struct ASTnode *n, int iflabel, int looptoplabel, int loopendlabel, int parentASTop) { int leftreg, rightreg; // We have some specific AST node handling at the top // so that we don't evaluate the child sub-trees immediately switch (n->op) { case A_IF: return (genIF(n, looptoplabel, loopendlabel)); case A_WHILE: return (genWHILE(n)); case A_SWITCH: return (genSWITCH(n)); case A_FUNCCALL: return (gen_funccall(n)); case A_TERNARY: return (gen_ternary(n)); case A_GLUE: // Do each child statement, and free the // registers after each child if (n->left != NULL) genAST(n->left, iflabel, looptoplabel, loopendlabel, n->op); genfreeregs(NOREG); if (n->right != NULL) genAST(n->right, iflabel, looptoplabel, loopendlabel, n->op); genfreeregs(NOREG); return (NOREG); case A_FUNCTION: // Generate the function's preamble before the code // in the child sub-tree cgfuncpreamble(n->sym); genAST(n->left, NOLABEL, NOLABEL, NOLABEL, n->op); cgfuncpostamble(n->sym); return (NOREG); } // General AST node handling below // Get the left and right sub-tree values if (n->left) leftreg = genAST(n->left, NOLABEL, NOLABEL, NOLABEL, n->op); if (n->right) rightreg = genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); switch (n->op) { case A_ADD: return (cgadd(leftreg, rightreg)); case A_SUBTRACT: return (cgsub(leftreg, rightreg)); case A_MULTIPLY: return (cgmul(leftreg, rightreg)); case A_DIVIDE: return (cgdiv(leftreg, rightreg)); case A_AND: return (cgand(leftreg, rightreg)); case A_OR: return (cgor(leftreg, rightreg)); case A_XOR: return (cgxor(leftreg, rightreg)); case A_LSHIFT: return (cgshl(leftreg, rightreg)); case A_RSHIFT: return (cgshr(leftreg, rightreg)); case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: // If the parent AST node is an A_IF, A_WHILE or A_TERNARY, // generate a compare followed by a jump. Otherwise, compare // registers and set one to 1 or 0 based on the comparison. if (parentASTop == A_IF || parentASTop == A_WHILE || parentASTop == A_TERNARY) return (cgcompare_and_jump(n->op, leftreg, rightreg, iflabel)); else return (cgcompare_and_set(n->op, leftreg, rightreg)); case A_INTLIT: return (cgloadint(n->a_intvalue, n->type)); case A_STRLIT: return (cgloadglobstr(n->a_intvalue)); case A_IDENT: // Load our value if we are an rvalue // or we are being dereferenced if (n->rvalue || parentASTop == A_DEREF) { if (n->sym->class == C_GLOBAL || n->sym->class == C_STATIC) { return (cgloadglob(n->sym, n->op)); } else { return (cgloadlocal(n->sym, n->op)); } } else return (NOREG); case A_ASPLUS: case A_ASMINUS: case A_ASSTAR: case A_ASSLASH: case A_ASSIGN: // For the '+=' and friends operators, generate suitable code // and get the register with the result. Then take the left child, // make it the right child so that we can fall into the assignment code. switch (n->op) { case A_ASPLUS: leftreg = cgadd(leftreg, rightreg); n->right = n->left; break; case A_ASMINUS: leftreg = cgsub(leftreg, rightreg); n->right = n->left; break; case A_ASSTAR: leftreg = cgmul(leftreg, rightreg); n->right = n->left; break; case A_ASSLASH: leftreg = cgdiv(leftreg, rightreg); n->right = n->left; break; } // Now into the assignment code // Are we assigning to an identifier or through a pointer? switch (n->right->op) { case A_IDENT: if (n->right->sym->class == C_GLOBAL || n->right->sym->class == C_STATIC) return (cgstorglob(leftreg, n->right->sym)); else return (cgstorlocal(leftreg, n->right->sym)); case A_DEREF: return (cgstorderef(leftreg, rightreg, n->right->type)); default: fatald("Can't A_ASSIGN in genAST(), op", n->op); } case A_WIDEN: // Widen the child's type to the parent's type return (cgwiden(leftreg, n->left->type, n->type)); case A_RETURN: cgreturn(leftreg, Functionid); return (NOREG); case A_ADDR: return (cgaddress(n->sym)); case A_DEREF: // If we are an rvalue, dereference to get the value we point at, // otherwise leave it for A_ASSIGN to store through the pointer if (n->rvalue) return (cgderef(leftreg, n->left->type)); else return (leftreg); case A_SCALE: // Small optimisation: use shift if the // scale value is a known power of two switch (n->a_size) { case 2: return (cgshlconst(leftreg, 1)); case 4: return (cgshlconst(leftreg, 2)); case 8: return (cgshlconst(leftreg, 3)); default: // Load a register with the size and // multiply the leftreg by this size rightreg = cgloadint(n->a_size, P_INT); return (cgmul(leftreg, rightreg)); } case A_POSTINC: case A_POSTDEC: // Load and decrement the variable's value into a register // and post increment/decrement it if (n->sym->class == C_GLOBAL || n->sym->class == C_STATIC) return (cgloadglob(n->sym, n->op)); else return (cgloadlocal(n->sym, n->op)); case A_PREINC: case A_PREDEC: // Load and decrement the variable's value into a register // and pre increment/decrement it if (n->left->sym->class == C_GLOBAL || n->left->sym->class == C_STATIC) return (cgloadglob(n->left->sym, n->op)); else return (cgloadlocal(n->left->sym, n->op)); case A_NEGATE: return (cgnegate(leftreg)); case A_INVERT: return (cginvert(leftreg)); case A_LOGNOT: return (cglognot(leftreg)); case A_LOGOR: return (cglogor(leftreg, rightreg)); case A_LOGAND: return (cglogand(leftreg, rightreg)); case A_TOBOOL: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, set the register // to 0 or 1 based on it's zeroeness or non-zeroeness return (cgboolean(leftreg, parentASTop, iflabel)); case A_BREAK: cgjump(loopendlabel); return (NOREG); case A_CONTINUE: cgjump(looptoplabel); return (NOREG); case A_CAST: return (leftreg); // Not much to do default: fatald("Unknown AST operator", n->op); } return (NOREG); // Keep -Wall happy } void genpreamble() { cgpreamble(); } void genpostamble() { cgpostamble(); } void genfreeregs(int keepreg) { freeall_registers(keepreg); } void genglobsym(struct symtable *node) { cgglobsym(node); } int genglobstr(char *strvalue) { int l = genlabel(); cgglobstr(l, strvalue); return (l); } int genprimsize(int type) { return (cgprimsize(type)); } int genalign(int type, int offset, int direction) { return (cgalign(type, offset, direction)); } ================================================ FILE: 50_Mop_up_pt1/include/ctype.h ================================================ #ifndef _CTYPE_H_ # define _CTYPE_H_ int isalnum(int c); int isalpha(int c); int iscntrl(int c); int isdigit(int c); int isgraph(int c); int islower(int c); int isprint(int c); int ispunct(int c); int isspace(int c); int isupper(int c); int isxdigit(int c); int isascii(int c); int isblank(int c); #endif // _CTYPE_H_ ================================================ FILE: 50_Mop_up_pt1/include/errno.h ================================================ #ifndef _ERRNO_H_ # define _ERRNO_H_ #endif // _ERRNO_H_ ================================================ FILE: 50_Mop_up_pt1/include/fcntl.h ================================================ #ifndef _FCNTL_H_ # define _FCNTL_H_ #define O_RDONLY 00 #define O_WRONLY 01 #define O_RDWR 02 int open(char *pathname, int flags); #endif // _FCNTL_H_ ================================================ FILE: 50_Mop_up_pt1/include/stddef.h ================================================ #ifndef _STDDEF_H_ # define _STDDEF_H_ #ifndef NULL # define NULL (void *)0 #endif typedef long size_t; #endif //_STDDEF_H_ ================================================ FILE: 50_Mop_up_pt1/include/stdio.h ================================================ #ifndef _STDIO_H_ # define _STDIO_H_ #include #ifndef NULL # define NULL (void *)0 #endif // This FILE definition will do for now typedef char * FILE; FILE *fopen(char *pathname, char *mode); size_t fread(void *ptr, size_t size, size_t nmemb, FILE *stream); size_t fwrite(void *ptr, size_t size, size_t nmemb, FILE *stream); int fclose(FILE *stream); int printf(char *format); int fprintf(FILE *stream, char *format); int fputc(int c, FILE *stream); int fputs(char *s, FILE *stream); int putc(int c, FILE *stream); int putchar(int c); int puts(char *s); extern FILE *stdin; extern FILE *stdout; extern FILE *stderr; #endif // _STDIO_H_ ================================================ FILE: 50_Mop_up_pt1/include/stdlib.h ================================================ #ifndef _STDLIB_H_ # define _STDLIB_H_ void exit(int status); void _Exit(int status); void *malloc(int size); void free(void *ptr); void *calloc(int nmemb, int size); void *realloc(void *ptr, int size); #endif // _STDLIB_H_ ================================================ FILE: 50_Mop_up_pt1/include/string.h ================================================ #ifndef _STRING_H_ # define _STRING_H_ char *strdup(char *s); char *strchr(char *s, int c); char *strrchr(char *s, int c); #endif // _STRING_H_ ================================================ FILE: 50_Mop_up_pt1/include/unistd.h ================================================ #ifndef _UNISTD_H_ # define _UNISTD_H_ void _exit(int status); int unlink(char *pathname); #endif // _UNISTD_H_ ================================================ FILE: 50_Mop_up_pt1/main.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "decl.h" #include #include // Compiler setup and top-level execution // Copyright (c) 2019 Warren Toomey, GPL3 // Given a string with a '.' and at least a 1-character suffix // after the '.', change the suffix to be the given character. // Return the new string or NULL if the original string could // not be modified char *alter_suffix(char *str, char suffix) { char *posn; char *newstr; // Clone the string if ((newstr = strdup(str)) == NULL) return (NULL); // Find the '.' if ((posn = strrchr(newstr, '.')) == NULL) return (NULL); // Ensure there is a suffix posn++; if (*posn == '\0') return (NULL); // Change the suffix and NUL-terminate the string *posn = suffix; posn++; *posn = '\0'; return (newstr); } // Given an input filename, compile that file // down to assembly code. Return the new file's name static char *do_compile(char *filename) { char cmd[TEXTLEN]; // Change the input file's suffix to .s Outfilename = alter_suffix(filename, 's'); if (Outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .c on the end\n", filename); exit(1); } // Generate the pre-processor command snprintf(cmd, TEXTLEN, "%s %s %s", CPPCMD, INCDIR, filename); // Open up the pre-processor pipe if ((Infile = popen(cmd, "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", filename, strerror(errno)); exit(1); } Infilename = filename; // Create the output file if ((Outfile = fopen(Outfilename, "w")) == NULL) { fprintf(stderr, "Unable to create %s: %s\n", Outfilename, strerror(errno)); exit(1); } Line = 1; // Reset the scanner Putback = '\n'; clear_symtable(); // Clear the symbol table if (O_verbose) printf("compiling %s\n", filename); scan(&Token); // Get the first token from the input Peektoken.token= 0; // and set there is no lookahead token genpreamble(); // Output the preamble global_declarations(); // Parse the global declarations genpostamble(); // Output the postamble fclose(Outfile); // Close the output file // Dump the symbol table if requested if (O_dumpsym) { printf("Symbols for %s\n", filename); dumpsymtables(); fprintf(stdout, "\n\n"); } freestaticsyms(); // Free any static symbols in the file return (Outfilename); } // Given an input filename, assemble that file // down to object code. Return the object filename char *do_assemble(char *filename) { char cmd[TEXTLEN]; int err; char *outfilename = alter_suffix(filename, 'o'); if (outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .s on the end\n", filename); exit(1); } // Build the assembly command and run it snprintf(cmd, TEXTLEN, "%s %s %s", ASCMD, outfilename, filename); if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Assembly of %s failed\n", filename); exit(1); } return (outfilename); } // Given a list of object files and an output filename, // link all of the object filenames together. void do_link(char *outfilename, char *objlist[]) { int cnt, size = TEXTLEN; char cmd[TEXTLEN], *cptr; int err; // Start with the linker command and the output file cptr = cmd; cnt = snprintf(cptr, size, "%s %s ", LDCMD, outfilename); cptr += cnt; size -= cnt; // Now append each object file while (*objlist != NULL) { cnt = snprintf(cptr, size, "%s ", *objlist); cptr += cnt; size -= cnt; objlist++; } if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Linking failed\n"); exit(1); } } // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "Usage: %s [-vcSTM] [-o outfile] file [file ...]\n", prog); fprintf(stderr, " -v give verbose output of the compilation stages\n"); fprintf(stderr, " -c generate object files but don't link them\n"); fprintf(stderr, " -S generate assembly files but don't link them\n"); fprintf(stderr, " -T dump the AST trees for each input file\n"); fprintf(stderr, " -M dump the symbol table for each input file\n"); fprintf(stderr, " -o outfile, produce the outfile executable file\n"); exit(1); } // Main program: check arguments and print a usage // if we don't have an argument. Open up the input // file and call scanfile() to scan the tokens in it. enum { MAXOBJ = 100 }; int main(int argc, char *argv[]) { char *outfilename = AOUT; char *asmfile, *objfile; char *objlist[MAXOBJ]; int i, objcnt = 0; // Initialise our variables O_dumpAST = 0; O_dumpsym = 0; O_keepasm = 0; O_assemble = 0; O_verbose = 0; O_dolink = 1; // Scan for command-line options for (i = 1; i < argc; i++) { // No leading '-', stop scanning for options if (*argv[i] != '-') break; // For each option in this argument for (int j = 1; (*argv[i] == '-') && argv[i][j]; j++) { switch (argv[i][j]) { case 'o': outfilename = argv[++i]; // Save & skip to next argument break; case 'T': O_dumpAST = 1; break; case 'M': O_dumpsym = 1; break; case 'c': O_assemble = 1; O_keepasm = 0; O_dolink = 0; break; case 'S': O_keepasm = 1; O_assemble = 0; O_dolink = 0; break; case 'v': O_verbose = 1; break; default: usage(argv[0]); } } } // Ensure we have at lease one input file argument if (i >= argc) usage(argv[0]); // Work on each input file in turn while (i < argc) { asmfile = do_compile(argv[i]); // Compile the source file if (O_dolink || O_assemble) { objfile = do_assemble(asmfile); // Assemble it to object forma if (objcnt == (MAXOBJ - 2)) { fprintf(stderr, "Too many object files for the compiler to handle\n"); exit(1); } objlist[objcnt++] = objfile; // Add the object file's name objlist[objcnt] = NULL; // to the list of object files } if (!O_keepasm) // Remove the assembly file if unlink(asmfile); // we don't need to keep it i++; } // Now link all the object files together if (O_dolink) { do_link(outfilename, objlist); // If we don't need to keep the object // files, then remove them if (!O_assemble) { for (i = 0; objlist[i] != NULL; i++) unlink(objlist[i]); } } return (0); } ================================================ FILE: 50_Mop_up_pt1/misc.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" #include #include // Miscellaneous functions // Copyright (c) 2019 Warren Toomey, GPL3 // Ensure that the current token is t, // and fetch the next token. Otherwise // throw an error void match(int t, char *what) { if (Token.token == t) { scan(&Token); } else { fatals("Expected", what); } } // Match a semicolon and fetch the next token void semi(void) { match(T_SEMI, ";"); } // Match a left brace and fetch the next token void lbrace(void) { match(T_LBRACE, "{"); } // Match a right brace and fetch the next token void rbrace(void) { match(T_RBRACE, "}"); } // Match a left parenthesis and fetch the next token void lparen(void) { match(T_LPAREN, "("); } // Match a right parenthesis and fetch the next token void rparen(void) { match(T_RPAREN, ")"); } // Match an identifier and fetch the next token void ident(void) { match(T_IDENT, "identifier"); } // Match a comma and fetch the next token void comma(void) { match(T_COMMA, "comma"); } // Print out fatal messages void fatal(char *s) { fprintf(stderr, "%s on line %d of %s\n", s, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatals(char *s1, char *s2) { fprintf(stderr, "%s:%s on line %d of %s\n", s1, s2, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatald(char *s, int d) { fprintf(stderr, "%s:%d on line %d of %s\n", s, d, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatalc(char *s, int c) { fprintf(stderr, "%s:%c on line %d of %s\n", s, c, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } ================================================ FILE: 50_Mop_up_pt1/opt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST Tree Optimisation Code // Copyright (c) 2019 Warren Toomey, GPL3 // Fold an AST tree with a binary operator // and two A_INTLIT children. Return either // the original tree or a new leaf node. static struct ASTnode *fold2(struct ASTnode *n) { int val, leftval, rightval; // Get the values from each child leftval = n->left->a_intvalue; rightval = n->right->a_intvalue; // Perform some of the binary operations. // For any AST op we can't do, return // the original tree. switch (n->op) { case A_ADD: val = leftval + rightval; break; case A_SUBTRACT: val = leftval - rightval; break; case A_MULTIPLY: val = leftval * rightval; break; case A_DIVIDE: // Don't try to divide by zero. if (rightval == 0) return (n); val = leftval / rightval; break; default: return (n); } // Return a leaf node with the new value return (mkastleaf(A_INTLIT, n->type, NULL, val)); } // Fold an AST tree with a unary operator // and one INTLIT children. Return either // the original tree or a new leaf node. static struct ASTnode *fold1(struct ASTnode *n) { int val; // Get the child value. Do the // operation if recognised. // Return the new leaf node. val = n->left->a_intvalue; switch (n->op) { case A_WIDEN: break; case A_INVERT: val = ~val; break; case A_LOGNOT: val = !val; break; default: return (n); } // Return a leaf node with the new value return (mkastleaf(A_INTLIT, n->type, NULL, val)); } // Attempt to do constant folding on // the AST tree with the root node n static struct ASTnode *fold(struct ASTnode *n) { if (n == NULL) return (NULL); // Fold on the left child, then // do the same on the right child n->left = fold(n->left); n->right = fold(n->right); // If both children are A_INTLITs, do a fold2() if (n->left && n->left->op == A_INTLIT) { if (n->right && n->right->op == A_INTLIT) n = fold2(n); else // If only the left is A_INTLIT, do a fold1() n = fold1(n); } // Return the possibly modified tree return (n); } // Optimise an AST tree by // constant folding in all sub-trees struct ASTnode *optimise(struct ASTnode *n) { n = fold(n); return (n); } ================================================ FILE: 50_Mop_up_pt1/scan.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { char *p; p = strchr(s, c); return (p ? p - s : -1); } // Get the next character from the input file. static int next(void) { int c, l; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return (c); } c = fgetc(Infile); // Read from input file while (c == '#') { // We've hit a pre-processor statement scan(&Token); // Get the line number into l if (Token.token != T_INTLIT) fatals("Expecting pre-processor line number, got:", Text); l = Token.intvalue; scan(&Token); // Get the filename in Text if (Token.token != T_STRLIT) fatals("Expecting pre-processor file name, got:", Text); if (Text[0] != '<') { // If this is a real filename if (strcmp(Text, Infilename)) // and not the one we have now Infilename = strdup(Text); // save it. Then update the line num Line = l; } while ((c = fgetc(Infile)) != '\n'); // Skip to the end of the line c = fgetc(Infile); // and get the next character } if ('\n' == c) Line++; // Increment line count return (c); } // Put back an unwanted character static void putback(int c) { Putback = c; } // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } // Read in a hexadecimal constant from the input static int hexchar(void) { int c, h, n = 0, f = 0; // Loop getting characters while (isxdigit(c = next())) { // Convert from char to int value h = chrpos("0123456789abcdef", tolower(c)); // Add to running hex value n = n * 16 + h; f = 1; } // We hit a non-hex character, put it back putback(c); // Flag tells us we never saw any hex characters if (!f) fatal("missing digits after '\\x'"); if (n > 255) fatal("value out of range after '\\x'"); return n; } // Return the next character from a character // or string literal static int scanch(void) { int i, c, c2; // Get the next input character and interpret // metacharacters that start with a backslash c = next(); if (c == '\\') { switch (c = next()) { case 'a': return ('\a'); case 'b': return ('\b'); case 'f': return ('\f'); case 'n': return ('\n'); case 'r': return ('\r'); case 't': return ('\t'); case 'v': return ('\v'); case '\\': return ('\\'); case '"': return ('"'); case '\'': return ('\''); // Deal with octal constants by reading in // characters until we hit a non-octal digit. // Build up the octal value in c2 and count // # digits in i. Permit only 3 octal digits. case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': for (i = c2 = 0; isdigit(c) && c < '8'; c = next()) { if (++i > 3) break; c2 = c2 * 8 + (c - '0'); } putback(c); // Put back the first non-octal char return (c2); case 'x': return hexchar(); default: fatalc("unknown escape sequence", c); } } return (c); // Just an ordinary old character! } // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0, radix = 10; // Assume the radix is 10, but if it starts with 0 if (c == '0') { // and the next character is 'x', it's radix 16 if ((c = next()) == 'x') { radix = 16; c = next(); } else // Otherwise, it's radix 8 radix = 8; } // Convert each character into an int value while ((k = chrpos("0123456789abcdef", tolower(c))) >= 0) { if (k >= radix) fatalc("invalid digit in integer literal", c); val = val * radix + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return (val); } // Scan in a string literal from the input file, // and store it in buf[]. Return the length of // the string. static int scanstr(char *buf) { int i, c; // Loop while we have enough buffer space for (i = 0; i < TEXTLEN - 1; i++) { // Get the next char and append to buf // Return when we hit the ending double quote if ((c = scanch()) == '"') { buf[i] = 0; return (i); } buf[i] = c; } // Ran out of buf[] space fatal("String literal too long"); return (0); } // Scan an identifier from the input file and // store it in buf[]. Return the identifier's length static int scanident(int c, char *buf, int lim) { int i = 0; // Allow digits, alpha and underscores while (isalpha(c) || isdigit(c) || '_' == c) { // Error if we hit the identifier length limit, // else append to buf[] and get next character if (lim - 1 == i) { fatal("Identifier too long"); } else if (i < lim - 1) { buf[i++] = c; } c = next(); } // We hit a non-valid character, put it back. // NUL-terminate the buf[] and return the length putback(c); buf[i] = '\0'; return (i); } // Given a word from the input, return the matching // keyword token number or 0 if it's not a keyword. // Switch on the first letter so that we don't have // to waste time strcmp()ing against all the keywords. static int keyword(char *s) { switch (*s) { case 'b': if (!strcmp(s, "break")) return (T_BREAK); break; case 'c': if (!strcmp(s, "case")) return (T_CASE); if (!strcmp(s, "char")) return (T_CHAR); if (!strcmp(s, "continue")) return (T_CONTINUE); break; case 'd': if (!strcmp(s, "default")) return (T_DEFAULT); break; case 'e': if (!strcmp(s, "else")) return (T_ELSE); if (!strcmp(s, "enum")) return (T_ENUM); if (!strcmp(s, "extern")) return (T_EXTERN); break; case 'f': if (!strcmp(s, "for")) return (T_FOR); break; case 'i': if (!strcmp(s, "if")) return (T_IF); if (!strcmp(s, "int")) return (T_INT); break; case 'l': if (!strcmp(s, "long")) return (T_LONG); break; case 'r': if (!strcmp(s, "return")) return (T_RETURN); break; case 's': if (!strcmp(s, "sizeof")) return (T_SIZEOF); if (!strcmp(s, "static")) return (T_STATIC); if (!strcmp(s, "struct")) return (T_STRUCT); if (!strcmp(s, "switch")) return (T_SWITCH); break; case 't': if (!strcmp(s, "typedef")) return (T_TYPEDEF); break; case 'u': if (!strcmp(s, "union")) return (T_UNION); break; case 'v': if (!strcmp(s, "void")) return (T_VOID); break; case 'w': if (!strcmp(s, "while")) return (T_WHILE); break; } return (0); } // List of token strings, for debugging purposes char *Tstring[] = { "EOF", "=", "+=", "-=", "*=", "/=", "?", "||", "&&", "|", "^", "&", "==", "!=", ",", ">", "<=", ">=", "<<", ">>", "+", "-", "*", "/", "++", "--", "~", "!", "void", "char", "int", "long", "if", "else", "while", "for", "return", "struct", "union", "enum", "typedef", "extern", "break", "continue", "switch", "case", "default", "sizeof", "static", "intlit", "strlit", ";", "identifier", "{", "}", "(", ")", "[", "]", ",", ".", "->", ":" }; // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c, tokentype; // If we have a lookahead token, return this token if (Peektoken.token != 0) { t->token = Peektoken.token; t->tokstr = Peektoken.tokstr; t->intvalue = Peektoken.intvalue; Peektoken.token = 0; return (1); } // Skip whitespace c = skip(); // Determine the token based on // the input character switch (c) { case EOF: t->token = T_EOF; return (0); case '+': if ((c = next()) == '+') { t->token = T_INC; } else if (c == '=') { t->token = T_ASPLUS; } else { putback(c); t->token = T_PLUS; } break; case '-': if ((c = next()) == '-') { t->token = T_DEC; } else if (c == '>') { t->token = T_ARROW; } else if (c == '=') { t->token = T_ASMINUS; } else if (isdigit(c)) { // Negative int literal t->intvalue = -scanint(c); t->token = T_INTLIT; } else { putback(c); t->token = T_MINUS; } break; case '*': if ((c = next()) == '=') { t->token = T_ASSTAR; } else { putback(c); t->token = T_STAR; } break; case '/': if ((c = next()) == '=') { t->token = T_ASSLASH; } else { putback(c); t->token = T_SLASH; } break; case ';': t->token = T_SEMI; break; case '{': t->token = T_LBRACE; break; case '}': t->token = T_RBRACE; break; case '(': t->token = T_LPAREN; break; case ')': t->token = T_RPAREN; break; case '[': t->token = T_LBRACKET; break; case ']': t->token = T_RBRACKET; break; case '~': t->token = T_INVERT; break; case '^': t->token = T_XOR; break; case ',': t->token = T_COMMA; break; case '.': t->token = T_DOT; break; case ':': t->token = T_COLON; break; case '?': t->token = T_QUESTION; break; case '=': if ((c = next()) == '=') { t->token = T_EQ; } else { putback(c); t->token = T_ASSIGN; } break; case '!': if ((c = next()) == '=') { t->token = T_NE; } else { putback(c); t->token = T_LOGNOT; } break; case '<': if ((c = next()) == '=') { t->token = T_LE; } else if (c == '<') { t->token = T_LSHIFT; } else { putback(c); t->token = T_LT; } break; case '>': if ((c = next()) == '=') { t->token = T_GE; } else if (c == '>') { t->token = T_RSHIFT; } else { putback(c); t->token = T_GT; } break; case '&': if ((c = next()) == '&') { t->token = T_LOGAND; } else { putback(c); t->token = T_AMPER; } break; case '|': if ((c = next()) == '|') { t->token = T_LOGOR; } else { putback(c); t->token = T_OR; } break; case '\'': // If it's a quote, scan in the // literal character value and // the trailing quote t->intvalue = scanch(); t->token = T_INTLIT; if (next() != '\'') fatal("Expected '\\'' at end of char literal"); break; case '"': // Scan in a literal string scanstr(Text); t->token = T_STRLIT; break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } else if (isalpha(c) || '_' == c) { // Read in a keyword or identifier scanident(c, Text, TEXTLEN); // If it's a recognised keyword, return that token if ((tokentype = keyword(Text)) != 0) { t->token = tokentype; break; } // Not a recognised keyword, so it must be an identifier t->token = T_IDENT; break; } // The character isn't part of any recognised token, error fatalc("Unrecognised character", c); } // We found a token t->tokstr = Tstring[t->token]; return (1); } ================================================ FILE: 50_Mop_up_pt1/stmt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of statements // Copyright (c) 2019 Warren Toomey, GPL3 // Prototypes static struct ASTnode *single_statement(void); // compound_statement: // empty, i.e. no statement // | statement // | statement statements // ; // // statement: declaration // | expression_statement // | function_call // | if_statement // | while_statement // | for_statement // | return_statement // ; // if_statement: if_head // | if_head 'else' statement // ; // // if_head: 'if' '(' true_false_expression ')' statement ; // // Parse an IF statement including any // optional ELSE clause and return its AST static struct ASTnode *if_statement(void) { struct ASTnode *condAST, *trueAST, *falseAST = NULL; // Ensure we have 'if' '(' match(T_IF, "if"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); rparen(); // Get the AST for the statement trueAST = single_statement(); // If we have an 'else', skip it // and get the AST for the statement if (Token.token == T_ELSE) { scan(&Token); falseAST = single_statement(); } // Build and return the AST for this statement return (mkastnode(A_IF, P_NONE, condAST, trueAST, falseAST, NULL, 0)); } // while_statement: 'while' '(' true_false_expression ')' statement ; // // Parse a WHILE statement and return its AST static struct ASTnode *while_statement(void) { struct ASTnode *condAST, *bodyAST; // Ensure we have 'while' '(' match(T_WHILE, "while"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); rparen(); // Get the AST for the statement. // Update the loop depth in the process Looplevel++; bodyAST = single_statement(); Looplevel--; // Build and return the AST for this statement return (mkastnode(A_WHILE, P_NONE, condAST, NULL, bodyAST, NULL, 0)); } // for_statement: 'for' '(' expression_list ';' // true_false_expression ';' // expression_list ')' statement ; // // Parse a FOR statement and return its AST static struct ASTnode *for_statement(void) { struct ASTnode *condAST, *bodyAST; struct ASTnode *preopAST, *postopAST; struct ASTnode *tree; // Ensure we have 'for' '(' match(T_FOR, "for"); lparen(); // Get the pre_op expression and the ';' preopAST = expression_list(T_SEMI); semi(); // Get the condition and the ';'. // Force a non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); semi(); // Get the post_op expression and the ')' postopAST = expression_list(T_RPAREN); rparen(); // Get the statement which is the body // Update the loop depth in the process Looplevel++; bodyAST = single_statement(); Looplevel--; // Glue the statement and the postop tree tree = mkastnode(A_GLUE, P_NONE, bodyAST, NULL, postopAST, NULL, 0); // Make a WHILE loop with the condition and this new body tree = mkastnode(A_WHILE, P_NONE, condAST, NULL, tree, NULL, 0); // And glue the preop tree to the A_WHILE tree return (mkastnode(A_GLUE, P_NONE, preopAST, NULL, tree, NULL, 0)); } // return_statement: 'return' '(' expression ')' ; // // Parse a return statement and return its AST static struct ASTnode *return_statement(void) { struct ASTnode *tree; // Can't return a value if function returns P_VOID if (Functionid->type == P_VOID) fatal("Can't return from a void function"); // Ensure we have 'return' '(' match(T_RETURN, "return"); lparen(); // Parse the following expression tree = binexpr(0); // Ensure this is compatible with the function's type tree = modify_type(tree, Functionid->type, 0); if (tree == NULL) fatal("Incompatible type to return"); // Add on the A_RETURN node tree = mkastunary(A_RETURN, P_NONE, tree, NULL, 0); // Get the ')' and ';' rparen(); semi(); return (tree); } // break_statement: 'break' ; // // Parse a break statement and return its AST static struct ASTnode *break_statement(void) { if (Looplevel == 0 && Switchlevel == 0) fatal("no loop or switch to break out from"); scan(&Token); semi(); return (mkastleaf(A_BREAK, 0, NULL, 0)); } // continue_statement: 'continue' ; // // Parse a continue statement and return its AST static struct ASTnode *continue_statement(void) { if (Looplevel == 0) fatal("no loop to continue to"); scan(&Token); semi(); return (mkastleaf(A_CONTINUE, 0, NULL, 0)); } // Parse a switch statement and return its AST static struct ASTnode *switch_statement(void) { struct ASTnode *left, *body, *n, *c; struct ASTnode *casetree= NULL, *casetail; int inloop=1, casecount=0; int seendefault=0; int ASTop, casevalue; // Skip the 'switch' and '(' scan(&Token); lparen(); // Get the switch expression, the ')' and the '{' left= binexpr(0); rparen(); lbrace(); // Ensure that this is of int type if (!inttype(left->type)) fatal("Switch expression is not of integer type"); // Build an A_SWITCH subtree with the expression as // the child n= mkastunary(A_SWITCH, 0, left, NULL, 0); // Now parse the cases Switchlevel++; while (inloop) { switch(Token.token) { // Leave the loop when we hit a '}' case T_RBRACE: if (casecount==0) fatal("No cases in switch"); inloop=0; break; case T_CASE: case T_DEFAULT: // Ensure this isn't after a previous 'default' if (seendefault) fatal("case or default after existing default"); // Set the AST operation. Scan the case value if required if (Token.token==T_DEFAULT) { ASTop= A_DEFAULT; seendefault= 1; scan(&Token); } else { ASTop= A_CASE; scan(&Token); left= binexpr(0); // Ensure the case value is an integer literal if (left->op != A_INTLIT) fatal("Expecting integer literal for case value"); casevalue= left->a_intvalue; // Walk the list of existing case values to ensure // that there isn't a duplicate case value for (c= casetree; c != NULL; c= c -> right) if (casevalue == c->a_intvalue) fatal("Duplicate case value"); } // Scan the ':' and increment the casecount match(T_COLON, ":"); casecount++; // If the next token is a T_CASE, the existing case will fall // into the next case. Otherwise, parse the case body. if (Token.token == T_CASE) body= NULL; else body= compound_statement(1); // Build a sub-tree with any compound statement as the left child // and link it in to the growing A_CASE tree if (casetree==NULL) { casetree= casetail= mkastunary(ASTop, 0, body, NULL, casevalue); } else { casetail->right= mkastunary(ASTop, 0, body, NULL, casevalue); casetail= casetail->right; } break; default: fatals("Unexpected token in switch", Token.tokstr); } } Switchlevel--; // We have a sub-tree with the cases and any default. Put the // case count into the A_SWITCH node and attach the case tree. n->a_intvalue= casecount; n->right= casetree; rbrace(); return(n); } // Parse a single statement and return its AST. static struct ASTnode *single_statement(void) { struct ASTnode *stmt; struct symtable *ctype; switch (Token.token) { case T_LBRACE: // We have a '{', so this is a compound statement lbrace(); stmt = compound_statement(0); rbrace(); return(stmt); case T_IDENT: // We have to see if the identifier matches a typedef. // If not, treat it as an expression. // Otherwise, fall down to the parse_type() call. if (findtypedef(Text) == NULL) { stmt= binexpr(0); semi(); return(stmt); } case T_CHAR: case T_INT: case T_LONG: case T_STRUCT: case T_UNION: case T_ENUM: case T_TYPEDEF: // The beginning of a variable declaration list. declaration_list(&ctype, C_LOCAL, T_SEMI, T_EOF, &stmt); semi(); return (stmt); // Any assignments from the declarations case T_IF: return (if_statement()); case T_WHILE: return (while_statement()); case T_FOR: return (for_statement()); case T_RETURN: return (return_statement()); case T_BREAK: return (break_statement()); case T_CONTINUE: return (continue_statement()); case T_SWITCH: return (switch_statement()); default: // For now, see if this is an expression. // This catches assignment statements. stmt= binexpr(0); semi(); return(stmt); } return (NULL); // Keep -Wall happy } // Parse a compound statement // and return its AST. If inswitch is true, // we look for a '}', 'case' or 'default' token // to end the parsing. Otherwise, look for // just a '}' to end the parsing. struct ASTnode *compound_statement(int inswitch) { struct ASTnode *left = NULL; struct ASTnode *tree; while (1) { // Parse a single statement tree = single_statement(); // For each new tree, either save it in left // if left is empty, or glue the left and the // new tree together if (tree != NULL) { if (left == NULL) left = tree; else left = mkastnode(A_GLUE, P_NONE, left, NULL, tree, NULL, 0); } // Leave if we've hit the end token if (Token.token == T_RBRACE) return(left); if (inswitch && (Token.token == T_CASE || Token.token == T_DEFAULT)) return(left); } } ================================================ FILE: 50_Mop_up_pt1/sym.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Symbol table functions // Copyright (c) 2019 Warren Toomey, GPL3 // Append a node to the singly-linked list pointed to by head or tail void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node) { // Check for valid pointers if (head == NULL || tail == NULL || node == NULL) fatal("Either head, tail or node is NULL in appendsym"); // Append to the list if (*tail) { (*tail)->next = node; *tail = node; } else *head = *tail = node; node->next = NULL; } // Create a symbol node to be added to a symbol table list. // Set up the node's: // + type: char, int etc. // + ctype: composite type pointer for struct/union // + structural type: var, function, array etc. // + size: number of elements, or endlabel: end label for a function // + posn: Position information for local symbols // Return a pointer to the new node struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn) { // Get a new node struct symtable *node = (struct symtable *) malloc(sizeof(struct symtable)); if (node == NULL) fatal("Unable to malloc a symbol table node in newsym"); // Fill in the values if (name == NULL) node->name = NULL; else node->name = strdup(name); node->type = type; node->ctype = ctype; node->stype = stype; node->class = class; node->nelems = nelems; // For pointers and integer types, set the size // of the symbol. structs and union declarations // manually set this up themselves. if (ptrtype(type) || inttype(type)) node->size = nelems * typesize(type, ctype); node->st_posn = posn; node->next = NULL; node->member = NULL; node->initlist = NULL; return (node); } // Add a symbol to the global symbol list struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn) { struct symtable *sym = newsym(name, type, ctype, stype, class, nelems, posn); // For structs and unions, copy the size from the type node if (type == P_STRUCT || type == P_UNION) sym->size = ctype->size; appendsym(&Globhead, &Globtail, sym); return (sym); } // Add a symbol to the local symbol list struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int nelems) { struct symtable *sym = newsym(name, type, ctype, stype, C_LOCAL, nelems, 0); // For structs and unions, copy the size from the type node if (type == P_STRUCT || type == P_UNION) sym->size = ctype->size; appendsym(&Loclhead, &Locltail, sym); return (sym); } // Add a symbol to the parameter list struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype) { struct symtable *sym = newsym(name, type, ctype, stype, C_PARAM, 1, 0); appendsym(&Parmhead, &Parmtail, sym); return (sym); } // Add a symbol to the temporary member list struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int nelems) { struct symtable *sym = newsym(name, type, ctype, stype, C_MEMBER, nelems, 0); // For structs and unions, copy the size from the type node if (type == P_STRUCT || type == P_UNION) sym->size = ctype->size; appendsym(&Membhead, &Membtail, sym); return (sym); } // Add a struct to the struct list struct symtable *addstruct(char *name) { struct symtable *sym = newsym(name, P_STRUCT, NULL, 0, C_STRUCT, 0, 0); appendsym(&Structhead, &Structtail, sym); return (sym); } // Add a struct to the union list struct symtable *addunion(char *name) { struct symtable *sym = newsym(name, P_UNION, NULL, 0, C_UNION, 0, 0); appendsym(&Unionhead, &Uniontail, sym); return (sym); } // Add an enum type or value to the enum list. // Class is C_ENUMTYPE or C_ENUMVAL. // Use posn to store the int value. struct symtable *addenum(char *name, int class, int value) { struct symtable *sym = newsym(name, P_INT, NULL, 0, class, 0, value); appendsym(&Enumhead, &Enumtail, sym); return (sym); } // Add a typedef to the typedef list struct symtable *addtypedef(char *name, int type, struct symtable *ctype) { struct symtable *sym = newsym(name, type, ctype, 0, C_TYPEDEF, 0, 0); appendsym(&Typehead, &Typetail, sym); return (sym); } // Search for a symbol in a specific list. // Return a pointer to the found node or NULL if not found. // If class is not zero, also match on the given class static struct symtable *findsyminlist(char *s, struct symtable *list, int class) { for (; list != NULL; list = list->next) if ((list->name != NULL) && !strcmp(s, list->name)) if (class == 0 || class == list->class) return (list); return (NULL); } // Determine if the symbol s is in the global symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findglob(char *s) { return (findsyminlist(s, Globhead, 0)); } // Determine if the symbol s is in the local symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findlocl(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member, 0); if (node) return (node); } return (findsyminlist(s, Loclhead, 0)); } // Determine if the symbol s is in the symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findsymbol(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member, 0); if (node) return (node); } // Otherwise, try the local and global symbol lists node = findsyminlist(s, Loclhead, 0); if (node) return (node); return (findsyminlist(s, Globhead, 0)); } // Find a member in the member list // Return a pointer to the found node or NULL if not found. struct symtable *findmember(char *s) { return (findsyminlist(s, Membhead, 0)); } // Find a struct in the struct list // Return a pointer to the found node or NULL if not found. struct symtable *findstruct(char *s) { return (findsyminlist(s, Structhead, 0)); } // Find a struct in the union list // Return a pointer to the found node or NULL if not found. struct symtable *findunion(char *s) { return (findsyminlist(s, Unionhead, 0)); } // Find an enum type in the enum list // Return a pointer to the found node or NULL if not found. struct symtable *findenumtype(char *s) { return (findsyminlist(s, Enumhead, C_ENUMTYPE)); } // Find an enum value in the enum list // Return a pointer to the found node or NULL if not found. struct symtable *findenumval(char *s) { return (findsyminlist(s, Enumhead, C_ENUMVAL)); } // Find a type in the tyedef list // Return a pointer to the found node or NULL if not found. struct symtable *findtypedef(char *s) { return (findsyminlist(s, Typehead, 0)); } // Reset the contents of the symbol table void clear_symtable(void) { Globhead = Globtail = NULL; Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Membhead = Membtail = NULL; Structhead = Structtail = NULL; Unionhead = Uniontail = NULL; Enumhead = Enumtail = NULL; Typehead = Typetail = NULL; } // Clear all the entries in the local symbol table void freeloclsyms(void) { Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Functionid = NULL; } // Remove all static symbols from the global symbol table void freestaticsyms(void) { // g points at current node, prev at the previous one struct symtable *g, *prev = NULL; // Walk the global table looking for static entries for (g = Globhead; g != NULL; g = g->next) { if (g->class == C_STATIC) { // If there's a previous node, rearrange the prev pointer // to skip over the current node. If not, g is the head, // so do the same to Globhead if (prev != NULL) prev->next = g->next; else Globhead->next = g->next; // If g is the tail, point Globtail at the previous node // (if there is one), or Globhead if (g == Globtail) { if (prev != NULL) Globtail = prev; else Globtail = Globhead; } } } // Point prev at g before we move up to the next node prev = g; } // Dump a single symbol static void dumpsym(struct symtable *sym, int indent) { int i; for (i = 0; i < indent; i++) printf(" "); switch (sym->type & (~0xf)) { case P_VOID: printf("void "); break; case P_CHAR: printf("char "); break; case P_INT: printf("int "); break; case P_LONG: printf("long "); break; case P_STRUCT: if (sym->ctype != NULL) printf("struct %s ", sym->ctype->name); else printf("struct %s ", sym->name); break; case P_UNION: if (sym->ctype != NULL) printf("union %s ", sym->ctype->name); else printf("union %s ", sym->name); break; default: printf("unknown type "); } for (i = 0; i < (sym->type & 0xf); i++) printf("*"); printf("%s", sym->name); switch (sym->stype) { case S_VARIABLE: break; case S_FUNCTION: printf("()"); break; case S_ARRAY: printf("[]"); break; default: printf(" unknown stype"); } switch (sym->class) { case C_GLOBAL: printf(": global"); break; case C_LOCAL: printf(": local"); break; case C_PARAM: printf(": param"); break; case C_EXTERN: printf(": extern"); break; case C_STATIC: printf(": static"); break; case C_STRUCT: printf(": struct"); break; case C_UNION: printf(": union"); break; case C_MEMBER: printf(": member"); break; case C_ENUMTYPE: printf(": enumtype"); break; case C_ENUMVAL: printf(": enumval"); break; case C_TYPEDEF: printf(": typedef"); break; default: printf(": unknown class"); } switch (sym->stype) { case S_VARIABLE: if (sym->class == C_ENUMVAL) printf(", value %d\n", sym->st_posn); else printf(", size %d\n", sym->size); break; case S_FUNCTION: printf(", %d params\n", sym->nelems); break; case S_ARRAY: printf(", %d elems, size %d\n", sym->nelems, sym->size); break; } switch (sym->type & (~0xf)) { case P_STRUCT: case P_UNION: dumptable(sym->member, NULL, 4); } switch (sym->stype) { case S_FUNCTION: dumptable(sym->member, NULL, 4); } } // Dump one symbol table void dumptable(struct symtable *head, char *name, int indent) { struct symtable *sym; if (head != NULL && name != NULL) printf("%s\n--------\n", name); for (sym = head; sym != NULL; sym = sym->next) dumpsym(sym, indent); } void dumpsymtables(void) { dumptable(Globhead, "Global", 0); printf("\n"); dumptable(Enumhead, "Enums", 0); printf("\n"); dumptable(Typehead, "Typedefs", 0); } ================================================ FILE: 50_Mop_up_pt1/tests/err.input031.c ================================================ Expecting a primary expression, got token:+ on line 5 of input031.c ================================================ FILE: 50_Mop_up_pt1/tests/err.input032.c ================================================ Unknown variable:cow on line 4 of input032.c ================================================ FILE: 50_Mop_up_pt1/tests/err.input033.c ================================================ Incompatible type to return on line 4 of input033.c ================================================ FILE: 50_Mop_up_pt1/tests/err.input034.c ================================================ For now, declaration of non-global arrays is not implemented on line 4 of input034.c ================================================ FILE: 50_Mop_up_pt1/tests/err.input035.c ================================================ Duplicate local variable declaration:a on line 4 of input035.c ================================================ FILE: 50_Mop_up_pt1/tests/err.input036.c ================================================ Type doesn't match prototype for parameter:2 on line 4 of input036.c ================================================ FILE: 50_Mop_up_pt1/tests/err.input037.c ================================================ Expected:comma on line 3 of input037.c ================================================ FILE: 50_Mop_up_pt1/tests/err.input038.c ================================================ Type doesn't match prototype for parameter:2 on line 4 of input038.c ================================================ FILE: 50_Mop_up_pt1/tests/err.input039.c ================================================ No statements in function with non-void type on line 4 of input039.c ================================================ FILE: 50_Mop_up_pt1/tests/err.input040.c ================================================ No return for function with non-void type on line 4 of input040.c ================================================ FILE: 50_Mop_up_pt1/tests/err.input041.c ================================================ Can't return from a void function on line 3 of input041.c ================================================ FILE: 50_Mop_up_pt1/tests/err.input042.c ================================================ Undeclared function:fred on line 3 of input042.c ================================================ FILE: 50_Mop_up_pt1/tests/err.input043.c ================================================ Undeclared array:b on line 3 of input043.c ================================================ FILE: 50_Mop_up_pt1/tests/err.input044.c ================================================ Unknown variable:z on line 3 of input044.c ================================================ FILE: 50_Mop_up_pt1/tests/err.input045.c ================================================ & operator must be followed by an identifier on line 3 of input045.c ================================================ FILE: 50_Mop_up_pt1/tests/err.input046.c ================================================ * operator must be followed by an identifier or * on line 3 of input046.c ================================================ FILE: 50_Mop_up_pt1/tests/err.input047.c ================================================ ++ operator must be followed by an identifier on line 3 of input047.c ================================================ FILE: 50_Mop_up_pt1/tests/err.input048.c ================================================ -- operator must be followed by an identifier on line 3 of input048.c ================================================ FILE: 50_Mop_up_pt1/tests/err.input049.c ================================================ Incompatible expression in assignment on line 6 of input049.c ================================================ FILE: 50_Mop_up_pt1/tests/err.input050.c ================================================ Incompatible types in binary expression on line 6 of input050.c ================================================ FILE: 50_Mop_up_pt1/tests/err.input051.c ================================================ Expected '\'' at end of char literal on line 4 of input051.c ================================================ FILE: 50_Mop_up_pt1/tests/err.input052.c ================================================ Unrecognised character:$ on line 5 of input052.c ================================================ FILE: 50_Mop_up_pt1/tests/err.input056.c ================================================ unknown struct/union type:var1 on line 2 of input056.c ================================================ FILE: 50_Mop_up_pt1/tests/err.input057.c ================================================ previously defined struct/union:fred on line 2 of input057.c ================================================ FILE: 50_Mop_up_pt1/tests/err.input059.c ================================================ Undeclared variable:y on line 3 of input059.c ================================================ FILE: 50_Mop_up_pt1/tests/err.input060.c ================================================ Undeclared variable:x on line 3 of input060.c ================================================ FILE: 50_Mop_up_pt1/tests/err.input061.c ================================================ Undeclared variable:x on line 3 of input061.c ================================================ FILE: 50_Mop_up_pt1/tests/err.input064.c ================================================ undeclared enum type::fred on line 1 of input064.c ================================================ FILE: 50_Mop_up_pt1/tests/err.input065.c ================================================ enum type redeclared::fred on line 2 of input065.c ================================================ FILE: 50_Mop_up_pt1/tests/err.input066.c ================================================ enum value redeclared::z on line 2 of input066.c ================================================ FILE: 50_Mop_up_pt1/tests/err.input068.c ================================================ redefinition of typedef:FOO on line 2 of input068.c ================================================ FILE: 50_Mop_up_pt1/tests/err.input069.c ================================================ unknown type:FLOO on line 2 of input069.c ================================================ FILE: 50_Mop_up_pt1/tests/err.input072.c ================================================ no loop or switch to break out from on line 1 of input072.c ================================================ FILE: 50_Mop_up_pt1/tests/err.input073.c ================================================ no loop to continue to on line 1 of input073.c ================================================ FILE: 50_Mop_up_pt1/tests/err.input075.c ================================================ Unexpected token in switch:if on line 4 of input075.c ================================================ FILE: 50_Mop_up_pt1/tests/err.input076.c ================================================ No cases in switch on line 3 of input076.c ================================================ FILE: 50_Mop_up_pt1/tests/err.input077.c ================================================ case or default after existing default on line 6 of input077.c ================================================ FILE: 50_Mop_up_pt1/tests/err.input078.c ================================================ case or default after existing default on line 6 of input078.c ================================================ FILE: 50_Mop_up_pt1/tests/err.input079.c ================================================ Duplicate case value on line 6 of input079.c ================================================ FILE: 50_Mop_up_pt1/tests/err.input085.c ================================================ Bad type in parameter list on line 1 of input085.c ================================================ FILE: 50_Mop_up_pt1/tests/err.input086.c ================================================ Function definition not at global level on line 3 of input086.c ================================================ FILE: 50_Mop_up_pt1/tests/err.input087.c ================================================ Bad type in member list on line 4 of input087.c ================================================ FILE: 50_Mop_up_pt1/tests/err.input092.c ================================================ Type mismatch: literal vs. variable on line 1 of input092.c ================================================ FILE: 50_Mop_up_pt1/tests/err.input093.c ================================================ Unknown variable:fred on line 1 of input093.c ================================================ FILE: 50_Mop_up_pt1/tests/err.input094.c ================================================ Type mismatch: literal vs. variable on line 1 of input094.c ================================================ FILE: 50_Mop_up_pt1/tests/err.input095.c ================================================ Variable can not be initialised:x on line 1 of input095.c ================================================ FILE: 50_Mop_up_pt1/tests/err.input096.c ================================================ Array size is illegal:0 on line 1 of input096.c ================================================ FILE: 50_Mop_up_pt1/tests/err.input097.c ================================================ For now, declaration of non-global arrays is not implemented on line 2 of input097.c ================================================ FILE: 50_Mop_up_pt1/tests/err.input098.c ================================================ Too many values in initialisation list on line 1 of input098.c ================================================ FILE: 50_Mop_up_pt1/tests/err.input102.c ================================================ Cannot cast to a struct, union or void type on line 3 of input102.c ================================================ FILE: 50_Mop_up_pt1/tests/err.input103.c ================================================ Cannot cast to a struct, union or void type on line 3 of input103.c ================================================ FILE: 50_Mop_up_pt1/tests/err.input104.c ================================================ Cannot cast to a struct, union or void type on line 2 of input104.c ================================================ FILE: 50_Mop_up_pt1/tests/err.input105.c ================================================ Incompatible expression in assignment on line 4 of input105.c ================================================ FILE: 50_Mop_up_pt1/tests/err.input118.c ================================================ Compiler doesn't support static or extern local declarations on line 2 of input118.c ================================================ FILE: 50_Mop_up_pt1/tests/input001.c ================================================ int printf(char *fmt); void main() { printf("%d\n", 12 * 3); printf("%d\n", 18 - 2 * 4); printf("%d\n", 1 + 2 + 9 - 5/2 + 3*5); } ================================================ FILE: 50_Mop_up_pt1/tests/input002.c ================================================ int printf(char *fmt); void main() { int fred; int jim; fred= 5; jim= 12; printf("%d\n", fred + jim); } ================================================ FILE: 50_Mop_up_pt1/tests/input003.c ================================================ int printf(char *fmt); void main() { int x; x= 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); } ================================================ FILE: 50_Mop_up_pt1/tests/input004.c ================================================ int printf(char *fmt); void main() { int x; x= 7 < 9; printf("%d\n", x); x= 7 <= 9; printf("%d\n", x); x= 7 != 9; printf("%d\n", x); x= 7 == 7; printf("%d\n", x); x= 7 >= 7; printf("%d\n", x); x= 7 <= 7; printf("%d\n", x); x= 9 > 7; printf("%d\n", x); x= 9 >= 7; printf("%d\n", x); x= 9 != 7; printf("%d\n", x); } ================================================ FILE: 50_Mop_up_pt1/tests/input005.c ================================================ int printf(char *fmt); void main() { int i; int j; i=6; j=12; if (i < j) { printf("%d\n", i); } else { printf("%d\n", j); } } ================================================ FILE: 50_Mop_up_pt1/tests/input006.c ================================================ int printf(char *fmt); void main() { int i; i=1; while (i <= 10) { printf("%d\n", i); i= i + 1; } } ================================================ FILE: 50_Mop_up_pt1/tests/input007.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 50_Mop_up_pt1/tests/input008.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 50_Mop_up_pt1/tests/input009.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } void fred() { int a; int b; a= 12; b= 3 * a; if (a >= b) { printf("%d\n", 2 * b - a); } } ================================================ FILE: 50_Mop_up_pt1/tests/input010.c ================================================ int printf(char *fmt); void main() { int i; char j; j= 20; printf("%d\n", j); i= 10; printf("%d\n", i); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 2; j= j + 1) { printf("%d\n", j); } } ================================================ FILE: 50_Mop_up_pt1/tests/input011.c ================================================ int printf(char *fmt); int main() { int i; char j; long k; i= 10; printf("%d\n", i); j= 20; printf("%d\n", j); k= 30; printf("%d\n", k); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 4; j= j + 1) { printf("%d\n", j); } for (k= 1; k <= 5; k= k + 1) { printf("%d\n", k); } return(i); printf("%d\n", 12345); return(3); } ================================================ FILE: 50_Mop_up_pt1/tests/input012.c ================================================ int printf(char *fmt); int fred() { return(5); } void main() { int x; x= fred(2); printf("%d\n", x); } ================================================ FILE: 50_Mop_up_pt1/tests/input013.c ================================================ int printf(char *fmt); int fred() { return(56); } void main() { int dummy; int result; dummy= printf("%d\n", 23); result= fred(10); dummy= printf("%d\n", result); } ================================================ FILE: 50_Mop_up_pt1/tests/input014.c ================================================ int printf(char *fmt); int fred() { return(20); } int main() { int result; printf("%d\n", 10); result= fred(15); printf("%d\n", result); printf("%d\n", fred(15)+10); return(0); } ================================================ FILE: 50_Mop_up_pt1/tests/input015.c ================================================ int printf(char *fmt); int main() { char a; char *b; char c; int d; int *e; int f; a= 18; printf("%d\n", a); b= &a; c= *b; printf("%d\n", c); d= 12; printf("%d\n", d); e= &d; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 50_Mop_up_pt1/tests/input016.c ================================================ int printf(char *fmt); int c; int d; int *e; int f; int main() { c= 12; d=18; printf("%d\n", c); e= &c + 1; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 50_Mop_up_pt1/tests/input017.c ================================================ int printf(char *fmt); int main() { char a; char *b; int d; int *e; b= &a; *b= 19; printf("%d\n", a); e= &d; *e= 12; printf("%d\n", d); return(0); } ================================================ FILE: 50_Mop_up_pt1/tests/input018.c ================================================ int printf(char *fmt); int main() { int a; int b; a= b= 34; printf("%d\n", a); printf("%d\n", b); return(0); } ================================================ FILE: 50_Mop_up_pt1/tests/input018a.c ================================================ int printf(char *fmt); int a; int *b; char c; char *d; int main() { b= &a; *b= 15; printf("%d\n", a); d= &c; *d= 16; printf("%d\n", c); return(0); } ================================================ FILE: 50_Mop_up_pt1/tests/input019.c ================================================ int printf(char *fmt); int a; int b; int c; int d; int e; int main() { a= 2; b= 4; c= 3; d= 2; e= (a+b) * (c+d); printf("%d\n", e); return(0); } ================================================ FILE: 50_Mop_up_pt1/tests/input020.c ================================================ int printf(char *fmt); int a; int b[25]; int main() { b[3]= 12; a= b[3]; printf("%d\n", a); return(0); } ================================================ FILE: 50_Mop_up_pt1/tests/input021.c ================================================ int printf(char *fmt); char c; char *str; int main() { c= '\n'; printf("%d\n", c); for (str= "Hello world\n"; *str != 0; str= str + 1) { printf("%c", *str); } return(0); } ================================================ FILE: 50_Mop_up_pt1/tests/input022.c ================================================ int printf(char *fmt); char a; char b; char c; int d; int e; int f; long g; long h; long i; int main() { b= 5; c= 7; a= b + c++; printf("%d\n", a); e= 5; f= 7; d= e + f++; printf("%d\n", d); h= 5; i= 7; g= h + i++; printf("%d\n", g); a= b-- + c; printf("%d\n", a); d= e-- + f; printf("%d\n", d); g= h-- + i; printf("%d\n", g); a= ++b + c; printf("%d\n", a); d= ++e + f; printf("%d\n", d); g= ++h + i; printf("%d\n", g); a= b * --c; printf("%d\n", a); d= e * --f; printf("%d\n", d); g= h * --i; printf("%d\n", g); return(0); } ================================================ FILE: 50_Mop_up_pt1/tests/input023.c ================================================ int printf(char *fmt); char *str; int x; int main() { x= -23; printf("%d\n", x); printf("%d\n", -10 * -10); x= 1; x= ~x; printf("%d\n", x); x= 2 > 5; printf("%d\n", x); x= !x; printf("%d\n", x); x= !x; printf("%d\n", x); x= 13; if (x) { printf("%d\n", 13); } x= 0; if (!x) { printf("%d\n", 14); } for (str= "Hello world\n"; *str; str++) { printf("%c", *str); } return(0); } ================================================ FILE: 50_Mop_up_pt1/tests/input024.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { a= 42; b= 19; printf("%d\n", a & b); printf("%d\n", a | b); printf("%d\n", a ^ b); printf("%d\n", 1 << 3); printf("%d\n", 63 >> 3); return(0); } ================================================ FILE: 50_Mop_up_pt1/tests/input025.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { char z; int y; int x; x= 10; y= 20; z= 30; printf("%d\n", x); printf("%d\n", y); printf("%d\n", z); a= 5; b= 15; c= 25; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); return(0); } ================================================ FILE: 50_Mop_up_pt1/tests/input026.c ================================================ int printf(char *fmt); int main(int a, char b, long c, int d, int e, int f, int g, int h) { int i; int j; int k; a= 13; printf("%d\n", a); b= 23; printf("%d\n", b); c= 34; printf("%d\n", c); d= 44; printf("%d\n", d); e= 54; printf("%d\n", e); f= 64; printf("%d\n", f); g= 74; printf("%d\n", g); h= 84; printf("%d\n", h); i= 94; printf("%d\n", i); j= 95; printf("%d\n", j); k= 96; printf("%d\n", k); return(0); } ================================================ FILE: 50_Mop_up_pt1/tests/input027.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int param5(int a, int b, int c, int d, int e) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param2(int a, int b) { int c; int d; int e; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param0() { int a; int b; int c; int d; int e; a= 1; b= 2; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int main() { param8(1,2,3,4,5,6,7,8); param5(1,2,3,4,5); param2(1,2); param0(); return(0); } ================================================ FILE: 50_Mop_up_pt1/tests/input028.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 50_Mop_up_pt1/tests/input029.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h); int fred(int a, int b, int c); int main(); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 50_Mop_up_pt1/tests/input030.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input030.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 50_Mop_up_pt1/tests/input031.c ================================================ int printf(char *fmt); int main() { int x; x= 2 + + 3 - * / ; } ================================================ FILE: 50_Mop_up_pt1/tests/input032.c ================================================ int printf(char *fmt); int main() { pizza cow llama sausage; } ================================================ FILE: 50_Mop_up_pt1/tests/input033.c ================================================ int printf(char *fmt); int main() { char *z; return(z); } ================================================ FILE: 50_Mop_up_pt1/tests/input034.c ================================================ int printf(char *fmt); int main() { int a[12]; return(0); } ================================================ FILE: 50_Mop_up_pt1/tests/input035.c ================================================ int printf(char *fmt); int fred(int a, int b) { int a; return(a); } ================================================ FILE: 50_Mop_up_pt1/tests/input036.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c); ================================================ FILE: 50_Mop_up_pt1/tests/input037.c ================================================ int printf(char *fmt); int fred(int a, char b +, int z); ================================================ FILE: 50_Mop_up_pt1/tests/input038.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c, int g); ================================================ FILE: 50_Mop_up_pt1/tests/input039.c ================================================ int printf(char *fmt); int main() { int a; } ================================================ FILE: 50_Mop_up_pt1/tests/input040.c ================================================ int printf(char *fmt); int main() { int a; a= 5; } ================================================ FILE: 50_Mop_up_pt1/tests/input041.c ================================================ int printf(char *fmt); void fred() { return(5); } ================================================ FILE: 50_Mop_up_pt1/tests/input042.c ================================================ int printf(char *fmt); int main() { fred(5); } ================================================ FILE: 50_Mop_up_pt1/tests/input043.c ================================================ int printf(char *fmt); int main() { int a; a= b[4]; } ================================================ FILE: 50_Mop_up_pt1/tests/input044.c ================================================ int printf(char *fmt); int main() { int a; a= z; } ================================================ FILE: 50_Mop_up_pt1/tests/input045.c ================================================ int printf(char *fmt); int main() { int a; a= &5; } ================================================ FILE: 50_Mop_up_pt1/tests/input046.c ================================================ int printf(char *fmt); int main() { int a; a= *5; } ================================================ FILE: 50_Mop_up_pt1/tests/input047.c ================================================ int printf(char *fmt); int main() { int a; a= ++5; } ================================================ FILE: 50_Mop_up_pt1/tests/input048.c ================================================ int printf(char *fmt); int main() { int a; a= --5; } ================================================ FILE: 50_Mop_up_pt1/tests/input049.c ================================================ int printf(char *fmt); int main() { int x; char y; y= x; } ================================================ FILE: 50_Mop_up_pt1/tests/input050.c ================================================ int printf(char *fmt); int main() { char *a; char *b; a= a + b; } ================================================ FILE: 50_Mop_up_pt1/tests/input051.c ================================================ int printf(char *fmt); int main() { char a; a= 'fred'; } ================================================ FILE: 50_Mop_up_pt1/tests/input052.c ================================================ int printf(char *fmt); int main() { int a; a= $5.00; } ================================================ FILE: 50_Mop_up_pt1/tests/input053.c ================================================ int printf(char *fmt); int main() { printf("Hello world, %d\n", 23); return(0); } ================================================ FILE: 50_Mop_up_pt1/tests/input054.c ================================================ int printf(char *fmt); int main() { int i; for (i=0; i < 20; i++) { printf("Hello world, %d\n", i); } return(0); } ================================================ FILE: 50_Mop_up_pt1/tests/input055.c ================================================ int printf(char *fmt); int main(int argc, char **argv) { int i; char *argument; printf("Hello world\n"); for (i=0; i < argc; i++) { argument= *argv; argv= argv + 1; printf("Argument %d is %s\n", i, argument); } return(0); } ================================================ FILE: 50_Mop_up_pt1/tests/input056.c ================================================ struct foo { int x; }; struct mary var1; ================================================ FILE: 50_Mop_up_pt1/tests/input057.c ================================================ struct fred { int x; } ; struct fred { char y; } ; ================================================ FILE: 50_Mop_up_pt1/tests/input058.c ================================================ int printf(char *fmt); struct fred { int x; char y; long z; }; struct fred var2; struct fred *varptr; int main() { long result; var2.x= 12; printf("%d\n", var2.x); var2.y= 'c'; printf("%d\n", var2.y); var2.z= 4005; printf("%d\n", var2.z); result= var2.x + var2.y + var2.z; printf("%d\n", result); varptr= &var2; result= varptr->x + varptr->y + varptr->z; printf("%d\n", result); return(0); } ================================================ FILE: 50_Mop_up_pt1/tests/input059.c ================================================ int main() { int x; x= y.foo; } ================================================ FILE: 50_Mop_up_pt1/tests/input060.c ================================================ int main() { int x; x= x.foo; } ================================================ FILE: 50_Mop_up_pt1/tests/input061.c ================================================ int main() { int x; x= x->foo; } ================================================ FILE: 50_Mop_up_pt1/tests/input062.c ================================================ int printf(char *fmt); union fred { char w; int x; int y; long z; }; union fred var1; union fred *varptr; int main() { var1.x= 65; printf("%d\n", var1.x); var1.x= 66; printf("%d\n", var1.x); printf("%d\n", var1.y); printf("The next two depend on the endian of the platform\n"); printf("%d\n", var1.w); printf("%d\n", var1.z); varptr= &var1; varptr->x= 67; printf("%d\n", varptr->x); printf("%d\n", varptr->y); return(0); } ================================================ FILE: 50_Mop_up_pt1/tests/input063.c ================================================ int printf(char *fmt); enum fred { apple=1, banana, carrot, pear=10, peach, mango, papaya }; enum jane { aple=1, bnana, crrot, par=10, pech, mago, paaya }; enum fred var1; enum jane var2; enum fred var3; int main() { var1= carrot + pear + mango; printf("%d\n", var1); return(0); } ================================================ FILE: 50_Mop_up_pt1/tests/input064.c ================================================ enum fred var3; ================================================ FILE: 50_Mop_up_pt1/tests/input065.c ================================================ enum fred { x, y, z }; enum fred { a, b }; ================================================ FILE: 50_Mop_up_pt1/tests/input066.c ================================================ enum fred { x, y, z }; enum mary { a, b, z }; ================================================ FILE: 50_Mop_up_pt1/tests/input067.c ================================================ int printf(char *fmt); typedef int FOO; FOO var1; struct bar { int x; int y} ; typedef struct bar BAR; BAR var2; int main() { var1= 5; printf("%d\n", var1); var2.x= 7; var2.y= 10; printf("%d\n", var2.x + var2.y); return(0); } ================================================ FILE: 50_Mop_up_pt1/tests/input068.c ================================================ typedef int FOO; typedef char FOO; ================================================ FILE: 50_Mop_up_pt1/tests/input069.c ================================================ typedef int FOO; FLOO y; ================================================ FILE: 50_Mop_up_pt1/tests/input070.c ================================================ #include typedef int FOO; int main() { FOO x; x= 56; printf("%d\n", x); return(0); } ================================================ FILE: 50_Mop_up_pt1/tests/input071.c ================================================ #include int main() { int x; x = 0; while (x < 100) { if (x == 5) { x = x + 2; continue; } printf("%d\n", x); if (x == 14) { break; } x = x + 1; } printf("Done\n"); return (0); } ================================================ FILE: 50_Mop_up_pt1/tests/input072.c ================================================ int main() { break; } ================================================ FILE: 50_Mop_up_pt1/tests/input073.c ================================================ int main() { continue; } ================================================ FILE: 50_Mop_up_pt1/tests/input074.c ================================================ #include int main() { int x; int y; y= 0; for (x=0; x < 5; x++) { switch(x) { case 1: { y= 5; break; } case 2: { y= 7; break; } case 3: { y= 9; } default: { y= 100; } } printf("%d\n", y); } return(0); } ================================================ FILE: 50_Mop_up_pt1/tests/input075.c ================================================ int main() { int x; switch(x) { if (x<5); } } ================================================ FILE: 50_Mop_up_pt1/tests/input076.c ================================================ int main() { int x; switch(x) { } } ================================================ FILE: 50_Mop_up_pt1/tests/input077.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } default: { x= 3; } case 4: { x= 2; } } } ================================================ FILE: 50_Mop_up_pt1/tests/input078.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } default: { x= 3; } default: { x= 2; } } } ================================================ FILE: 50_Mop_up_pt1/tests/input079.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } case 2: { x= 2; } case 1: { x= 2; } default: { x= 2; } } } ================================================ FILE: 50_Mop_up_pt1/tests/input080.c ================================================ #include int x; int y; int main() { for (x=0, y=1; x < 6; x++, y=y+2) { printf("%d %d\n", x, y); } return(0); } ================================================ FILE: 50_Mop_up_pt1/tests/input081.c ================================================ #include int x; int y; int main() { x= 0; y=1; for (;x<5;) { printf("%d %d\n", x, y); x=x+1; y=y+2; } return(0); } ================================================ FILE: 50_Mop_up_pt1/tests/input082.c ================================================ #include int main() { int x; x= 10; // Dangling else test if (x > 5) if (x > 15) printf("x > 15\n"); else printf("15 >= x > 5\n"); else printf("5 >= x\n"); return(0); } ================================================ FILE: 50_Mop_up_pt1/tests/input083.c ================================================ #include int main() { int x; // Dangling else test. // We should not print anything for x<= 5 for (x=0; x < 12; x++) if (x > 5) if (x > 10) printf("10 < %2d\n", x); else printf(" 5 < %2d <= 10\n", x); return(0); } ================================================ FILE: 50_Mop_up_pt1/tests/input084.c ================================================ #include int main() { int x, y; x=2; y=3; printf("%d %d\n", x, y); char a, *b; a= 'f'; b= &a; printf("%c %c\n", a, *b); return(0); } ================================================ FILE: 50_Mop_up_pt1/tests/input085.c ================================================ int main(struct foo { int x; }; , int a) { return(0); } ================================================ FILE: 50_Mop_up_pt1/tests/input086.c ================================================ int main() { int fred() { return(5); } int x; x=2; return(x); } ================================================ FILE: 50_Mop_up_pt1/tests/input087.c ================================================ struct foo { int x; int y; struct blah { int g; }; }; ================================================ FILE: 50_Mop_up_pt1/tests/input088.c ================================================ #include struct foo { int x; int y; } fred, mary; struct foo james; int main() { fred.x= 5; mary.y= 6; printf("%d %d\n", fred.x, mary.y); return(0); } ================================================ FILE: 50_Mop_up_pt1/tests/input089.c ================================================ #include int x= 23; char y= 'H'; char *z= "Hello world"; int main() { printf("%d %c %s\n", x, y, z); return(0); } ================================================ FILE: 50_Mop_up_pt1/tests/input090.c ================================================ #include int a = 23, b = 100; char y = 'H', *z = "Hello world"; int main() { printf("%d %d %c %s\n", a, b, y, z); return (0); } ================================================ FILE: 50_Mop_up_pt1/tests/input091.c ================================================ #include int fred[] = { 1, 2, 3, 4, 5 }; int jim[10] = { 1, 2, 3, 4, 5 }; int main() { int i; for (i=0; i < 5; i++) printf("%d\n", fred[i]); for (i=0; i < 10; i++) printf("%d\n", jim[i]); return(0); } ================================================ FILE: 50_Mop_up_pt1/tests/input092.c ================================================ char x= 3000; ================================================ FILE: 50_Mop_up_pt1/tests/input093.c ================================================ char x= fred; ================================================ FILE: 50_Mop_up_pt1/tests/input094.c ================================================ char *s= 54; ================================================ FILE: 50_Mop_up_pt1/tests/input095.c ================================================ int fred(int x=2) { return(x); } ================================================ FILE: 50_Mop_up_pt1/tests/input096.c ================================================ int fred[0]; ================================================ FILE: 50_Mop_up_pt1/tests/input097.c ================================================ int main() { int x[45]; return(0); } ================================================ FILE: 50_Mop_up_pt1/tests/input098.c ================================================ int fred[3]= { 1, 2, 3, 4, 5 }; ================================================ FILE: 50_Mop_up_pt1/tests/input099.c ================================================ #include // List of token strings, for debugging purposes. // As yet, we can't store a NULL into the list char *Tstring[] = { "EOF", "=", "||", "&&", "|", "^", "&", "==", "!=", ",", ">", "<=", ">=", "<<", ">>", "+", "-", "*", "/", "++", "--", "~", "!", "void", "char", "int", "long", "if", "else", "while", "for", "return", "struct", "union", "enum", "typedef", "extern", "break", "continue", "switch", "case", "default", "intlit", "strlit", ";", "identifier", "{", "}", "(", ")", "[", "]", ",", ".", "->", ":", "" }; int main() { int i; char *str; i=0; while (1) { str= Tstring[i]; if (*str == 0) break; printf("%s\n", str); i++; } return(0); } ================================================ FILE: 50_Mop_up_pt1/tests/input100.c ================================================ #include int main() { int x= 3, y=14; int z= 2 * x + y; char *str= "Hello world"; printf("%s %d %d\n", str, x+y, z); return(0); } ================================================ FILE: 50_Mop_up_pt1/tests/input101.c ================================================ #include int main() { int x= 65535; char y; char *str; y= (char )x; printf("0x%x\n", y); str= (char *)0; printf("0x%lx\n", (long)str); return(0); } ================================================ FILE: 50_Mop_up_pt1/tests/input102.c ================================================ int main() { struct foo { int p; }; int y= (struct foo) x; } ================================================ FILE: 50_Mop_up_pt1/tests/input103.c ================================================ int main() { union foo { int p; }; int y= (union foo) x; } ================================================ FILE: 50_Mop_up_pt1/tests/input104.c ================================================ int main() { int y= (void) x; } ================================================ FILE: 50_Mop_up_pt1/tests/input105.c ================================================ int main() { int x; char *y; y= (char) x; } ================================================ FILE: 50_Mop_up_pt1/tests/input106.c ================================================ #include char *y= (char *)0; int main() { printf("0x%lx\n", (long)y); return(0); } ================================================ FILE: 50_Mop_up_pt1/tests/input107.c ================================================ #include char *y[] = { "fish", "cow", NULL }; char *z= NULL; int main() { int i; char *ptr; for (i=0; i < 3; i++) { ptr= y[i]; if (ptr != (char *)0) printf("%s\n", y[i]); else printf("NULL\n"); } return(0); } ================================================ FILE: 50_Mop_up_pt1/tests/input108.c ================================================ int main() { char *str= (void *)0; return(0); } ================================================ FILE: 50_Mop_up_pt1/tests/input109.c ================================================ #include int main() { int x= 17 - 1; printf("%d\n", x); return(0); } ================================================ FILE: 50_Mop_up_pt1/tests/input110.c ================================================ #include int x; int y; int main() { x= 3; y= 15; y += x; printf("%d\n", y); x= 3; y= 15; y -= x; printf("%d\n", y); x= 3; y= 15; y *= x; printf("%d\n", y); x= 3; y= 15; y /= x; printf("%d\n", y); return(0); } ================================================ FILE: 50_Mop_up_pt1/tests/input111.c ================================================ #include int main() { int x= 2000 + 3 + 4 * 5 + 6; printf("%d\n", x); return(0); } ================================================ FILE: 50_Mop_up_pt1/tests/input112.c ================================================ #include char* y = NULL; int x= 10 + 6; int fred [ 2 + 3 ]; int main() { fred[2]= x; printf("%d\n", fred[2]); return(0); } ================================================ FILE: 50_Mop_up_pt1/tests/input113.c ================================================ #include void fred(void); void fred(void) { printf("fred says hello\n"); } int main(void) { fred(); return(0); } ================================================ FILE: 50_Mop_up_pt1/tests/input114.c ================================================ #include int main() { int x= 0x4a; printf("%c\n", x); return(0); } ================================================ FILE: 50_Mop_up_pt1/tests/input115.c ================================================ #include struct foo { int x; char y; long z; }; typedef struct foo blah; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol struct symtable *ctype; // If struct/union, ptr to that type int stype; // Structural type for the symbol int class; // Storage class for the symbol int size; // Total size in bytes of this symbol int nelems; // Functions: # params. Arrays: # elements #define st_endlabel st_posn // For functions, the end label int st_posn; // For locals, the negative offset // from the stack base pointer int *initlist; // List of initial values struct symtable *next; // Next symbol in one list struct symtable *member; // First member of a function, struct, }; // union or enum // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates int rvalue; // True if the node is an rvalue struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; struct symtable *sym; // For many AST nodes, the pointer to // the symbol in the symbol table #define a_intvalue a_size // For A_INTLIT, the integer value int a_size; // For A_SCALE, the size to scale by }; int main() { printf("%ld\n", sizeof(char)); printf("%ld\n", sizeof(int)); printf("%ld\n", sizeof(long)); printf("%ld\n", sizeof(char *)); printf("%ld\n", sizeof(blah)); printf("%ld\n", sizeof(struct symtable)); printf("%ld\n", sizeof(struct ASTnode)); return(0); } ================================================ FILE: 50_Mop_up_pt1/tests/input116.c ================================================ #include static int counter=0; static int fred(void) { return(counter++); } int main(void) { int i; for (i=0; i < 5; i++) printf("%d\n", fred()); return(0); } ================================================ FILE: 50_Mop_up_pt1/tests/input117.c ================================================ #include static char *fred(void) { return("Hello"); } int main(void) { printf("%s\n", fred()); return(0); } ================================================ FILE: 50_Mop_up_pt1/tests/input118.c ================================================ int main(void) { static int x; } ================================================ FILE: 50_Mop_up_pt1/tests/input119.c ================================================ #include int x; int y= 3; int main() { x= y != 3 ? 6 : 8; printf("%d\n", x); x= (y == 3) ? 6 : 8; printf("%d\n", x); return(0); } ================================================ FILE: 50_Mop_up_pt1/tests/input120.c ================================================ #include int x; int y= 3; int main() { for (y= 0; y < 10; y++) { x= y > 4 ? y + 2 : y + 9; printf("%d\n", x); } return(0); } ================================================ FILE: 50_Mop_up_pt1/tests/input121.c ================================================ #include int x; int y= 3; int main() { for (y= 0; y < 10; y++) { x= (y < 4) ? y + 2 : (y > 7) ? 1000 : y + 9; printf("%d\n", x); } return(0); } ================================================ FILE: 50_Mop_up_pt1/tests/input122.c ================================================ #include int x, y, z1, z2; int main() { for (x= 0; x <= 1; x++) { for (y= 0; y <= 1; y++) { z1= x || y; z2= x && y; printf("x %d, y %d, x || y %d, x && y %d\n", x, y, z1, z2); } } //z= x || y; return(0); } ================================================ FILE: 50_Mop_up_pt1/tests/input123.c ================================================ #include int main() { int x; for (x=0; x < 20; x++) switch(x) { case 2: case 3: case 5: case 7: case 11: printf("%2d infant prime\n", x); break; case 13: case 17: case 19: printf("%2d teen prime\n", x); break; case 0: case 1: case 4: case 6: case 8: case 9: case 10: case 12: printf("%2d infant composite\n", x); break; default: printf("%2d teen composite\n", x); break; } return(0); } ================================================ FILE: 50_Mop_up_pt1/tests/mktests ================================================ #!/bin/sh # Make the output files for each test # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make install) fi for i in input*c do if [ ! -f "out.$i" -a ! -f "err.$i" ] then ../cwj -o out $i 2> "err.$i" # If the err file is empty if [ ! -s "err.$i" ] then rm -f "err.$i" cc -o out $i ./out > "out.$i" fi fi rm -f out out.s done ================================================ FILE: 50_Mop_up_pt1/tests/out.input001.c ================================================ 36 10 25 ================================================ FILE: 50_Mop_up_pt1/tests/out.input002.c ================================================ 17 ================================================ FILE: 50_Mop_up_pt1/tests/out.input003.c ================================================ 1 2 3 4 5 ================================================ FILE: 50_Mop_up_pt1/tests/out.input004.c ================================================ 1 1 1 1 1 1 1 1 1 ================================================ FILE: 50_Mop_up_pt1/tests/out.input005.c ================================================ 6 ================================================ FILE: 50_Mop_up_pt1/tests/out.input006.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 50_Mop_up_pt1/tests/out.input007.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 50_Mop_up_pt1/tests/out.input008.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 50_Mop_up_pt1/tests/out.input009.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 50_Mop_up_pt1/tests/out.input010.c ================================================ 20 10 1 2 3 4 5 253 254 255 0 1 ================================================ FILE: 50_Mop_up_pt1/tests/out.input011.c ================================================ 10 20 30 1 2 3 4 5 253 254 255 0 1 2 3 1 2 3 4 5 ================================================ FILE: 50_Mop_up_pt1/tests/out.input012.c ================================================ 5 ================================================ FILE: 50_Mop_up_pt1/tests/out.input013.c ================================================ 23 56 ================================================ FILE: 50_Mop_up_pt1/tests/out.input014.c ================================================ 10 20 30 ================================================ FILE: 50_Mop_up_pt1/tests/out.input015.c ================================================ 18 18 12 12 ================================================ FILE: 50_Mop_up_pt1/tests/out.input016.c ================================================ 12 18 ================================================ FILE: 50_Mop_up_pt1/tests/out.input017.c ================================================ 19 12 ================================================ FILE: 50_Mop_up_pt1/tests/out.input018.c ================================================ 34 34 ================================================ FILE: 50_Mop_up_pt1/tests/out.input018a.c ================================================ 15 16 ================================================ FILE: 50_Mop_up_pt1/tests/out.input019.c ================================================ 30 ================================================ FILE: 50_Mop_up_pt1/tests/out.input020.c ================================================ 12 ================================================ FILE: 50_Mop_up_pt1/tests/out.input021.c ================================================ 10 Hello world ================================================ FILE: 50_Mop_up_pt1/tests/out.input022.c ================================================ 12 12 12 13 13 13 13 13 13 35 35 35 ================================================ FILE: 50_Mop_up_pt1/tests/out.input023.c ================================================ -23 100 -2 0 1 0 13 14 Hello world ================================================ FILE: 50_Mop_up_pt1/tests/out.input024.c ================================================ 2 59 57 8 7 ================================================ FILE: 50_Mop_up_pt1/tests/out.input025.c ================================================ 10 20 30 5 15 25 ================================================ FILE: 50_Mop_up_pt1/tests/out.input026.c ================================================ 13 23 34 44 54 64 74 84 94 95 96 ================================================ FILE: 50_Mop_up_pt1/tests/out.input027.c ================================================ 1 2 3 4 5 6 7 8 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 ================================================ FILE: 50_Mop_up_pt1/tests/out.input028.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 50_Mop_up_pt1/tests/out.input029.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 50_Mop_up_pt1/tests/out.input030.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input030.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 50_Mop_up_pt1/tests/out.input053.c ================================================ Hello world, 23 ================================================ FILE: 50_Mop_up_pt1/tests/out.input054.c ================================================ Hello world, 0 Hello world, 1 Hello world, 2 Hello world, 3 Hello world, 4 Hello world, 5 Hello world, 6 Hello world, 7 Hello world, 8 Hello world, 9 Hello world, 10 Hello world, 11 Hello world, 12 Hello world, 13 Hello world, 14 Hello world, 15 Hello world, 16 Hello world, 17 Hello world, 18 Hello world, 19 ================================================ FILE: 50_Mop_up_pt1/tests/out.input055.c ================================================ Hello world Argument 0 is ./out ================================================ FILE: 50_Mop_up_pt1/tests/out.input058.c ================================================ 12 99 4005 4116 4116 ================================================ FILE: 50_Mop_up_pt1/tests/out.input062.c ================================================ 65 66 66 The next two depend on the endian of the platform 66 66 67 67 ================================================ FILE: 50_Mop_up_pt1/tests/out.input063.c ================================================ 25 ================================================ FILE: 50_Mop_up_pt1/tests/out.input067.c ================================================ 5 17 ================================================ FILE: 50_Mop_up_pt1/tests/out.input070.c ================================================ 56 ================================================ FILE: 50_Mop_up_pt1/tests/out.input071.c ================================================ 0 1 2 3 4 7 8 9 10 11 12 13 14 Done ================================================ FILE: 50_Mop_up_pt1/tests/out.input074.c ================================================ 100 5 7 100 100 ================================================ FILE: 50_Mop_up_pt1/tests/out.input080.c ================================================ 0 1 1 3 2 5 3 7 4 9 5 11 ================================================ FILE: 50_Mop_up_pt1/tests/out.input081.c ================================================ 0 1 1 3 2 5 3 7 4 9 ================================================ FILE: 50_Mop_up_pt1/tests/out.input082.c ================================================ 15 >= x > 5 ================================================ FILE: 50_Mop_up_pt1/tests/out.input083.c ================================================ 5 < 6 <= 10 5 < 7 <= 10 5 < 8 <= 10 5 < 9 <= 10 5 < 10 <= 10 10 < 11 ================================================ FILE: 50_Mop_up_pt1/tests/out.input084.c ================================================ 2 3 f f ================================================ FILE: 50_Mop_up_pt1/tests/out.input088.c ================================================ 5 6 ================================================ FILE: 50_Mop_up_pt1/tests/out.input089.c ================================================ 23 H Hello world ================================================ FILE: 50_Mop_up_pt1/tests/out.input090.c ================================================ 23 100 H Hello world ================================================ FILE: 50_Mop_up_pt1/tests/out.input091.c ================================================ 1 2 3 4 5 1 2 3 4 5 0 0 0 0 0 ================================================ FILE: 50_Mop_up_pt1/tests/out.input099.c ================================================ EOF = || && | ^ & == != , > <= >= << >> + - * / ++ -- ~ ! void char int long if else while for return struct union enum typedef extern break continue switch case default intlit strlit ; identifier { } ( ) [ ] , . -> : ================================================ FILE: 50_Mop_up_pt1/tests/out.input100.c ================================================ Hello world 17 20 ================================================ FILE: 50_Mop_up_pt1/tests/out.input101.c ================================================ 0xff 0x0 ================================================ FILE: 50_Mop_up_pt1/tests/out.input106.c ================================================ 0x0 ================================================ FILE: 50_Mop_up_pt1/tests/out.input107.c ================================================ fish cow NULL ================================================ FILE: 50_Mop_up_pt1/tests/out.input108.c ================================================ ================================================ FILE: 50_Mop_up_pt1/tests/out.input109.c ================================================ 16 ================================================ FILE: 50_Mop_up_pt1/tests/out.input110.c ================================================ 18 12 45 5 ================================================ FILE: 50_Mop_up_pt1/tests/out.input111.c ================================================ 2029 ================================================ FILE: 50_Mop_up_pt1/tests/out.input112.c ================================================ 16 ================================================ FILE: 50_Mop_up_pt1/tests/out.input113.c ================================================ fred says hello ================================================ FILE: 50_Mop_up_pt1/tests/out.input114.c ================================================ J ================================================ FILE: 50_Mop_up_pt1/tests/out.input115.c ================================================ 1 4 8 8 13 64 48 ================================================ FILE: 50_Mop_up_pt1/tests/out.input116.c ================================================ 0 1 2 3 4 ================================================ FILE: 50_Mop_up_pt1/tests/out.input117.c ================================================ Hello ================================================ FILE: 50_Mop_up_pt1/tests/out.input119.c ================================================ 8 6 ================================================ FILE: 50_Mop_up_pt1/tests/out.input120.c ================================================ 9 10 11 12 13 7 8 9 10 11 ================================================ FILE: 50_Mop_up_pt1/tests/out.input121.c ================================================ 2 3 4 5 13 14 15 16 1000 1000 ================================================ FILE: 50_Mop_up_pt1/tests/out.input122.c ================================================ x 0, y 0, x || y 0, x && y 0 x 0, y 1, x || y 1, x && y 0 x 1, y 0, x || y 1, x && y 0 x 1, y 1, x || y 1, x && y 1 ================================================ FILE: 50_Mop_up_pt1/tests/out.input123.c ================================================ 0 infant composite 1 infant composite 2 infant prime 3 infant prime 4 infant composite 5 infant prime 6 infant composite 7 infant prime 8 infant composite 9 infant composite 10 infant composite 11 infant prime 12 infant composite 13 teen prime 14 teen composite 15 teen composite 16 teen composite 17 teen prime 18 teen composite 19 teen prime ================================================ FILE: 50_Mop_up_pt1/tests/runtests ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make install) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../cwj -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../cwj $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.s "trial.$i" done ================================================ FILE: 50_Mop_up_pt1/tests/runtestsn ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../compn ] then (cd ..; make install) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../compn -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../compn $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.o out.s "trial.$i" done ================================================ FILE: 50_Mop_up_pt1/tree.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree functions // Copyright (c) 2019 Warren Toomey, GPL3 // Build and return a generic AST node struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue) { struct ASTnode *n; // Malloc a new ASTnode n = (struct ASTnode *) malloc(sizeof(struct ASTnode)); if (n == NULL) fatal("Unable to malloc in mkastnode()"); // Copy in the field values and return it n->op = op; n->type = type; n->left = left; n->mid = mid; n->right = right; n->sym = sym; n->a_intvalue = intvalue; return (n); } // Make an AST leaf node struct ASTnode *mkastleaf(int op, int type, struct symtable *sym, int intvalue) { return (mkastnode(op, type, NULL, NULL, NULL, sym, intvalue)); } // Make a unary AST node: only one child struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, struct symtable *sym, int intvalue) { return (mkastnode(op, type, left, NULL, NULL, sym, intvalue)); } // Generate and return a new label number // just for AST dumping purposes static int dumpid = 1; static int gendumplabel(void) { return (dumpid++); } // Given an AST tree, print it out and follow the // traversal of the tree that genAST() follows void dumpAST(struct ASTnode *n, int label, int level) { int Lfalse, Lstart, Lend; switch (n->op) { case A_IF: Lfalse = gendumplabel(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_IF"); if (n->right) { Lend = gendumplabel(); fprintf(stdout, ", end L%d", Lend); } fprintf(stdout, "\n"); dumpAST(n->left, Lfalse, level + 2); dumpAST(n->mid, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); return; case A_WHILE: Lstart = gendumplabel(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_WHILE, start L%d\n", Lstart); Lend = gendumplabel(); dumpAST(n->left, Lend, level + 2); dumpAST(n->right, NOLABEL, level + 2); return; } // Reset level to -2 for A_GLUE if (n->op == A_GLUE) level = -2; // General AST node handling if (n->left) dumpAST(n->left, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); for (int i = 0; i < level; i++) fprintf(stdout, " "); switch (n->op) { case A_GLUE: fprintf(stdout, "\n\n"); return; case A_FUNCTION: fprintf(stdout, "A_FUNCTION %s\n", n->sym->name); return; case A_ADD: fprintf(stdout, "A_ADD\n"); return; case A_SUBTRACT: fprintf(stdout, "A_SUBTRACT\n"); return; case A_MULTIPLY: fprintf(stdout, "A_MULTIPLY\n"); return; case A_DIVIDE: fprintf(stdout, "A_DIVIDE\n"); return; case A_EQ: fprintf(stdout, "A_EQ\n"); return; case A_NE: fprintf(stdout, "A_NE\n"); return; case A_LT: fprintf(stdout, "A_LE\n"); return; case A_GT: fprintf(stdout, "A_GT\n"); return; case A_LE: fprintf(stdout, "A_LE\n"); return; case A_GE: fprintf(stdout, "A_GE\n"); return; case A_INTLIT: fprintf(stdout, "A_INTLIT %d\n", n->a_intvalue); return; case A_STRLIT: fprintf(stdout, "A_STRLIT rval label L%d\n", n->a_intvalue); return; case A_IDENT: if (n->rvalue) fprintf(stdout, "A_IDENT rval %s\n", n->sym->name); else fprintf(stdout, "A_IDENT %s\n", n->sym->name); return; case A_ASSIGN: fprintf(stdout, "A_ASSIGN\n"); return; case A_WIDEN: fprintf(stdout, "A_WIDEN\n"); return; case A_RETURN: fprintf(stdout, "A_RETURN\n"); return; case A_FUNCCALL: fprintf(stdout, "A_FUNCCALL %s\n", n->sym->name); return; case A_ADDR: fprintf(stdout, "A_ADDR %s\n", n->sym->name); return; case A_DEREF: if (n->rvalue) fprintf(stdout, "A_DEREF rval\n"); else fprintf(stdout, "A_DEREF\n"); return; case A_SCALE: fprintf(stdout, "A_SCALE %d\n", n->a_size); return; case A_PREINC: fprintf(stdout, "A_PREINC %s\n", n->sym->name); return; case A_PREDEC: fprintf(stdout, "A_PREDEC %s\n", n->sym->name); return; case A_POSTINC: fprintf(stdout, "A_POSTINC\n"); return; case A_POSTDEC: fprintf(stdout, "A_POSTDEC\n"); return; case A_NEGATE: fprintf(stdout, "A_NEGATE\n"); return; case A_BREAK: fprintf(stdout, "A_BREAK\n"); return; case A_CONTINUE: fprintf(stdout, "A_CONTINUE\n"); return; case A_CASE: fprintf(stdout, "A_CASE %d\n", n->a_intvalue); return; case A_DEFAULT: fprintf(stdout, "A_DEFAULT\n"); return; case A_SWITCH: fprintf(stdout, "A_SWITCH\n"); return; case A_CAST: fprintf(stdout, "A_CAST %d\n", n->type); return; case A_ASPLUS: fprintf(stdout, "A_ASPLUS\n"); return; case A_ASMINUS: fprintf(stdout, "A_ASMINUS\n"); return; case A_ASSTAR: fprintf(stdout, "A_ASSTAR\n"); return; case A_ASSLASH: fprintf(stdout, "A_ASSLASH\n"); return; default: fatald("Unknown dumpAST operator", n->op); } } ================================================ FILE: 50_Mop_up_pt1/types.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Types and type handling // Copyright (c) 2019 Warren Toomey, GPL3 // Return true if a type is an int type // of any size, false otherwise int inttype(int type) { return (((type & 0xf) == 0) && (type >= P_CHAR && type <= P_LONG)); } // Return true if a type is of pointer type int ptrtype(int type) { return ((type & 0xf) != 0); } // Given a primitive type, return // the type which is a pointer to it int pointer_to(int type) { if ((type & 0xf) == 0xf) fatald("Unrecognised in pointer_to: type", type); return (type + 1); } // Given a primitive pointer type, return // the type which it points to int value_at(int type) { if ((type & 0xf) == 0x0) fatald("Unrecognised in value_at: type", type); return (type - 1); } // Given a type and a composite type pointer, return // the size of this type in bytes int typesize(int type, struct symtable *ctype) { if (type == P_STRUCT || type == P_UNION) return (ctype->size); return (genprimsize(type)); } // Given an AST tree and a type which we want it to become, // possibly modify the tree by widening or scaling so that // it is compatible with this type. Return the original tree // if no changes occurred, a modified tree, or NULL if the // tree is not compatible with the given type. // If this will be part of a binary operation, the AST op is not zero. struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op) { int ltype; int lsize, rsize; ltype = tree->type; // XXX No idea on these yet if (ltype == P_STRUCT || ltype == P_UNION) fatal("Don't know how to do this yet"); if (rtype == P_STRUCT || rtype == P_UNION) fatal("Don't know how to do this yet"); // Compare scalar int types if (inttype(ltype) && inttype(rtype)) { // Both types same, nothing to do if (ltype == rtype) return (tree); // Get the sizes for each type lsize = typesize(ltype, NULL); rsize = typesize(rtype, NULL); // Tree's size is too big if (lsize > rsize) return (NULL); // Widen to the right if (rsize > lsize) return (mkastunary(A_WIDEN, rtype, tree, NULL, 0)); } // For pointers if (ptrtype(ltype) && ptrtype(rtype)) { // We can compare them if (op >= A_EQ && op <= A_GE) return(tree); // A comparison of the same type for a non-binary operation is OK, // or when the left tree is of `void *` type. if (op == 0 && (ltype == rtype || ltype == pointer_to(P_VOID))) return (tree); } // We can scale only on A_ADD or A_SUBTRACT operation if (op == A_ADD || op == A_SUBTRACT) { // Left is int type, right is pointer type and the size // of the original type is >1: scale the left if (inttype(ltype) && ptrtype(rtype)) { rsize = genprimsize(value_at(rtype)); if (rsize > 1) return (mkastunary(A_SCALE, rtype, tree, NULL, rsize)); else return (tree); // Size 1, no need to scale } } // If we get here, the types are not compatible return (NULL); } ================================================ FILE: 51_Arrays_pt2/Makefile ================================================ # Define the location of the include directory # and the location to install the compiler binary INCDIR=/tmp/include BINDIR=/tmp HSRCS= data.h decl.h defs.h SRCS= cg.c decl.c expr.c gen.c main.c misc.c \ opt.c scan.c stmt.c sym.c tree.c types.c SRCN= cgn.c decl.c expr.c gen.c main.c misc.c \ opt.c scan.c stmt.c sym.c tree.c types.c ARMSRCS= cg_arm.c decl.c expr.c gen.c main.c misc.c \ opt.c scan.c stmt.c sym.c tree.c types.c cwj: $(SRCS) $(HSRCS) cc -o cwj -g -Wall -DINCDIR=\"$(INCDIR)\" $(SRCS) compn: $(SRCN) $(HSRCS) cc -D__NASM__ -o compn -g -Wall -DINCDIR=\"$(INCDIR)\" $(SRCN) cwjarm: $(ARMSRCS) $(HSRCS) cc -o cwjarm -g -Wall $(ARMSRCS) cp cwjarm cwj install: cwj mkdir -p $(INCDIR) rsync -a include/. $(INCDIR) cp cwj $(BINDIR) chmod +x $(BINDIR)/cwj installn: compn mkdir -p $(INCDIR) rsync -a include/. $(INCDIR) cp compn $(BINDIR) chmod +x $(BINDIR)/compn clean: rm -f cwj cwjarm compn *.o *.s out a.out test: install tests/runtests (cd tests; chmod +x runtests; ./runtests) armtest: cwjarm tests/runtests (cd tests; chmod +x runtests; ./runtests) testn: installn tests/runtestsn (cd tests; chmod +x runtestsn; ./runtestsn) ================================================ FILE: 51_Arrays_pt2/Readme.md ================================================ # Part 51: Arrays, part 2 In the last part of our compiler writing journey, I realised that I had implemented arrays not exactly right. In this part of our compiler writing journey, I'll try to rectify things. To start with, I stepped back and thought a bit about arrays and pointers. I realised that an array is similar to a pointer except: 1. You can't use the unadorned array identifier as an rvalue. 2. The size of an array is the size of all of its elements. The size of a pointer does not include the elements of any array that it points to. 3. The address of an array (e.g. `&ary`) doesn't mean anything useful, unlike the address of a pointer (e.g. `&ptr`). As an example of the first point above, consider: ```c int ary[5]; int *ptr; int main() { ptr= ary; // OK, put base address of ary into ptr ary= ptr; // Bad, can't change ary's base address ``` And, for those C purists out there, yes I know that point 3 isn't entirely true. But I'm not going to use `&ary` anywhere, so I can get our compiler to reject it, and that means I won't need to implement this functionality! So, exactly what do we need to change? + allow a scalar or an an array identifier before a '[' token + allow an unadorned array identifier but mark it as an rvalue + add some more errors when we try to do bad things with arrays That's about it. I've made these changes to the compiler. I hope that they cover all the array issues, but it's likely that I've overlooked something else. If so, we'll revisit again. ## Changes to `postfix()` In the last part, I put in a "band-aid" fix to `postfix()` in `expr.c`, but it's time to go back and fix it properly. We need to allow unadorned array identifiers but mark them as an rvalues. Here are the changes: ```c static struct ASTnode *postfix(void) { ... int rvalue=0; ... // An identifier, check that it exists. For arrays, set rvalue to 1. if ((varptr = findsymbol(Text)) == NULL) fatals("Unknown variable", Text); switch(varptr->stype) { case S_VARIABLE: break; case S_ARRAY: rvalue= 1; break; default: fatals("Identifier not a scalar or array variable", Text); } switch (Token.token) { // Post-increment: skip over the token. Also same for post-decrement case T_INC: if (rvalue == 1) fatals("Cannot ++ on rvalue", Text); ... // Just a variable reference. Ensure any arrays // cannot be treated as lvalues. default: if (varptr->stype == S_ARRAY) { n = mkastleaf(A_ADDR, varptr->type, varptr, 0); n->rvalue = rvalue; } else n = mkastleaf(A_IDENT, varptr->type, varptr, 0); } return (n); } ``` Now either scalar or array variables can be used unadorned, but arrays can't be lvalues. Also, arrays can't be pre- or post-incremented. We either load the address of the array base, or load the value in the scalar variable. ## Changes to `array_access()` Now we need to modify `array_access()` in `expr.c` to allow pointers to be used with '[' ']' indexing. Here are the changes: ```c static struct ASTnode *array_access(void) { struct ASTnode *left, *right; struct symtable *aryptr; // Check that the identifier has been defined as an array or a pointer. if ((aryptr = findsymbol(Text)) == NULL) fatals("Undeclared variable", Text); if (aryptr->stype != S_ARRAY && (aryptr->stype == S_VARIABLE && !ptrtype(aryptr->type))) fatals("Not an array or pointer", Text); // Make a leaf node for it that points at the base of // the array, or loads the pointer's value as an rvalue if (aryptr->stype == S_ARRAY) left = mkastleaf(A_ADDR, aryptr->type, aryptr, 0); else { left = mkastleaf(A_IDENT, aryptr->type, aryptr, 0); left->rvalue= 1; } ... } ``` We now check that the symbol exists and is either an array or a scalar variable of pointer type. Once this is OK, we either load the address of the array base, or load the value in the pointer variable. ## Testing the Code Changes I won't go through all the tests; instead I'll summarise them: + `tests/input124.c` checks that `ary++` can't be done on an array. + `tests/input125.c` checks that we can assign `ptr= ary` and then access the array though the pointer. + `tests/input126.c` checks that we can't do `&ary`. + `tests/input127.c` calls a function with `fred(ary)` and ensures that we can receive it as a pointer parameter. ## Conclusion and What's Next Well, I was worried that I'd had to rewrite a whole pile of code to get arrays to work correctly. As it stood, the code was nearly right but just needed some more tweaking to cover all the functionality that we needed. In the next part of our compiler writing journey, we will go back to mopping up. [Next step](../52_Pointers_pt2/Readme.md) ================================================ FILE: 51_Arrays_pt2/cg.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\t.text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\t.data\n", Outfile); currSeg = data_seg; } } // Given a scalar type value, return the // size of the type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch (type) { case P_CHAR: return (offset); case P_INT: case P_LONG: break; default: if (!ptrtype(type)) fatald("Bad type in cg_align:", type); } // Here we have an int or a long. Align it on a 4-byte offset // I put the generic code here so it can be reused elsewhere. alignment = 4; offset = (offset + direction * (alignment - 1)) & ~(alignment - 1); return (offset); } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. static int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "%r10", "%r11", "%r12", "%r13", "%r9", "%r8", "%rcx", "%rdx", "%rsi", "%rdi" }; static char *breglist[] = { "%r10b", "%r11b", "%r12b", "%r13b", "%r9b", "%r8b", "%cl", "%dl", "%sil", "%dil" }; static char *dreglist[] = { "%r10d", "%r11d", "%r12d", "%r13d", "%r9d", "%r8d", "%ecx", "%edx", "%esi", "%edi" }; // Set all registers as available. // But if reg is positive, don't free that one. void freeall_registers(int keepreg) { int i; for (i = 0; i < NUMFREEREGS; i++) if (i != keepreg) freereg[i] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. int alloc_register(void) { int i; for (i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(NOREG); cgtextseg(); fprintf(Outfile, "# internal switch(expr) routine\n" "# %%rsi = switch table, %%rax = expr\n" "# from SubC: http://www.t3x.org/subc/\n" "\n" "switch:\n" " pushq %%rsi\n" " movq %%rdx, %%rsi\n" " movq %%rax, %%rbx\n" " cld\n" " lodsq\n" " movq %%rax, %%rcx\n" "next:\n" " lodsq\n" " movq %%rax, %%rdx\n" " lodsq\n" " cmpq %%rdx, %%rbx\n" " jnz no\n" " popq %%rsi\n" " jmp *%%rax\n" "no:\n" " loop next\n" " lodsq\n" " popq %%rsi\n" " jmp *%%rax\n" "\n"); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the %rsp and %rsp if (sym->class == C_GLOBAL) fprintf(Outfile, "\t.globl\t%s\n" "\t.type\t%s, @function\n", name, name); fprintf(Outfile, "%s:\n" "\tpushq\t%%rbp\n" "\tmovq\t%%rsp, %%rbp\n", name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->st_posn = paramOffset; paramOffset += 8; } else { parm->st_posn = newlocaloffset(parm->type); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->st_posn = newlocaloffset(locvar->type); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\taddq\t$%d,%%rsp\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->st_endlabel); fprintf(Outfile, "\taddq\t$%d,%%rsp\n", stackOffset); fputs("\tpopq %rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmovq\t$%d, %s\n", value, reglist[r]); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", sym->name); } else // Print out the code to initialise it switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovzbq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", sym->name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovslq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", sym->name); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", sym->st_posn); fprintf(Outfile, "\tmovq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", sym->st_posn); } else switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", sym->st_posn); fprintf(Outfile, "\tmovzbq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", sym->st_posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", sym->st_posn); fprintf(Outfile, "\tmovslq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", sym->st_posn); break; default: fatald("Bad type in cgloadlocal:", sym->type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tleaq\tL%d(%%rip), %s\n", label, reglist[r]); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\taddq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsubq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timulq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s,%%rax\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidivq\t%s\n", reglist[r2]); fprintf(Outfile, "\tmovq\t%%rax,%s\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tandq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\torq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txorq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshlq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshrq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tnegq\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnotq\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); return (r); } // Logically OR two registers and return a // register with the result, 1 or 0 int cglogor(int r1, int r2) { // Generate two labels int Ltrue = genlabel(); int Lend = genlabel(); // Test r1 and jump to true label if true fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r1], reglist[r1]); fprintf(Outfile, "\tjne\tL%d\n", Ltrue); // Test r2 and jump to true label if true fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r2], reglist[r2]); fprintf(Outfile, "\tjne\tL%d\n", Ltrue); // Didn't jump, so result is false fprintf(Outfile, "\tmovq\t$0, %s\n", reglist[r1]); fprintf(Outfile, "\tjmp\tL%d\n", Lend); // Someone jumped to the true label, so result is true cglabel(Ltrue); fprintf(Outfile, "\tmovq\t$1, %s\n", reglist[r1]); cglabel(Lend); free_register(r2); return(r1); } // Logically AND two registers and return a // register with the result, 1 or 0 int cglogand(int r1, int r2) { // Generate two labels int Lfalse = genlabel(); int Lend = genlabel(); // Test r1 and jump to false label if not true fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r1], reglist[r1]); fprintf(Outfile, "\tje\tL%d\n", Lfalse); // Test r2 and jump to false label if not true fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r2], reglist[r2]); fprintf(Outfile, "\tje\tL%d\n", Lfalse); // Didn't jump, so result is true fprintf(Outfile, "\tmovq\t$1, %s\n", reglist[r1]); fprintf(Outfile, "\tjmp\tL%d\n", Lend); // Someone jumped to the false label, so result is false cglabel(Lfalse); fprintf(Outfile, "\tmovq\t$0, %s\n", reglist[r1]); cglabel(Lend); free_register(r2); return(r1); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(struct symtable *sym, int numargs) { // Get a new register int outr = alloc_register(); // Call the function fprintf(Outfile, "\tcall\t%s@PLT\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\taddq\t$%d, %%rsp\n", 8 * (numargs - 6)); // and copy the return value into our register fprintf(Outfile, "\tmovq\t%%rax, %s\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpushq\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmovq\t%s, %s\n", reglist[r], reglist[FIRSTPARAMREG - argposn + 1]); } } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsalq\t$%d, %s\n", val, reglist[r]); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %s(%%rip)\n", reglist[r], sym->name); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %s(%%rip)\n", breglist[r], sym->name); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %s(%%rip)\n", dreglist[r], sym->name); break; default: fatald("Bad type in cgstorglob:", sym->type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %d(%%rbp)\n", reglist[r], sym->st_posn); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %d(%%rbp)\n", breglist[r], sym->st_posn); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %d(%%rbp)\n", dreglist[r], sym->st_posn); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size, type; int initvalue; int i; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the variable (or its elements if an array) // and the type of the variable if (node->stype == S_ARRAY) { size = typesize(value_at(node->type), node->ctype); type = value_at(node->type); } else { size = node->size; type = node->type; } // Generate the global identity and the label cgdataseg(); if (node->class == C_GLOBAL) fprintf(Outfile, "\t.globl\t%s\n", node->name); fprintf(Outfile, "%s:\n", node->name); // Output space for one or more elements for (i = 0; i < node->nelems; i++) { // Get any initial value initvalue = 0; if (node->initlist != NULL) initvalue = node->initlist[i]; // Generate the space for this type switch (size) { case 1: fprintf(Outfile, "\t.byte\t%d\n", initvalue); break; case 4: fprintf(Outfile, "\t.long\t%d\n", initvalue); break; case 8: // Generate the pointer to a string literal. Treat a zero value // as actually zero, not the label L0 if (node->initlist != NULL && type == pointer_to(P_CHAR) && initvalue != 0) fprintf(Outfile, "\t.quad\tL%d\n", initvalue); else fprintf(Outfile, "\t.quad\t%d\n", initvalue); break; default: for (int i = 0; i < size; i++) fprintf(Outfile, "\t.byte\t0\n"); } } } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\t.byte\t%d\n", *cptr); } fprintf(Outfile, "\t.byte\t0\n"); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(NOREG); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Deal with pointers here as we can't put them in // the switch statement if (ptrtype(sym->type)) fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); else { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzbl\t%s, %%eax\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %%eax\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } } cgjump(sym->st_endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = alloc_register(); if (sym->class == C_GLOBAL || sym->class == C_STATIC) fprintf(Outfile, "\tleaq\t%s(%%rip), %s\n", sym->name, reglist[r]); else fprintf(Outfile, "\tleaq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzbq\t(%s), %s\n", reglist[r], reglist[r]); break; case 2: fprintf(Outfile, "\tmovslq\t(%s), %s\n", reglist[r], reglist[r]); break; case 4: case 8: fprintf(Outfile, "\tmovq\t(%s), %s\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { // Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmovb\t%s, (%s)\n", breglist[r1], reglist[r2]); break; case 2: case 4: case 8: fprintf(Outfile, "\tmovq\t%s, (%s)\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } // Generate a switch jump table and the code to // load the registers and call the switch() code void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel) { int i, label; // Get a label for the switch table label = genlabel(); cglabel(label); // Heuristic. If we have no cases, create one case // which points to the default case if (casecount == 0) { caseval[0] = 0; caselabel[0] = defaultlabel; casecount = 1; } // Generate the switch jump table. fprintf(Outfile, "\t.quad\t%d\n", casecount); for (i = 0; i < casecount; i++) fprintf(Outfile, "\t.quad\t%d, L%d\n", caseval[i], caselabel[i]); fprintf(Outfile, "\t.quad\tL%d\n", defaultlabel); // Load the specific registers cglabel(toplabel); fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); fprintf(Outfile, "\tleaq\tL%d(%%rip), %%rdx\n", label); fprintf(Outfile, "\tjmp\tswitch\n"); } // Move value between registers void cgmove(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s, %s\n", reglist[r1], reglist[r2]); } ================================================ FILE: 51_Arrays_pt2/cg_arm.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for ARMv6 on Raspberry Pi // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. static int freereg[4]; static char *reglist[4] = { "r4", "r5", "r6", "r7" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // We have to store large integer literal values in memory. // Keep a list of them which will be output in the postamble #define MAXINTS 1024 int Intlist[MAXINTS]; static int Intslot = 0; // Determine the offset of a large integer // literal from the .L3 label. If the integer // isn't in the list, add it. static void set_int_offset(int val) { int offset = -1; // See if it is already there for (int i = 0; i < Intslot; i++) { if (Intlist[i] == val) { offset = 4 * i; break; } } // Not in the list, so add it if (offset == -1) { offset = 4 * Intslot; if (Intslot == MAXINTS) fatal("Out of int slots in set_int_offset()"); Intlist[Intslot++] = val; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L3+%d\n", offset); } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n", Outfile); } // Print out the assembly postamble void cgpostamble() { // Print out the global variables fprintf(Outfile, ".L2:\n"); for (int i = 0; i < Globs; i++) { if (Symtable[i].stype == S_VARIABLE) fprintf(Outfile, "\t.word %s\n", Symtable[i].name); } // Print out the integer literals fprintf(Outfile, ".L3:\n"); for (int i = 0; i < Intslot; i++) { fprintf(Outfile, "\t.word %d\n", Intlist[i]); } } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Symtable[id].name; fprintf(Outfile, "\t.text\n" "\t.globl\t%s\n" "\t.type\t%s, \%%function\n" "%s:\n" "\tpush\t{fp, lr}\n" "\tadd\tfp, sp, #4\n" "\tsub\tsp, sp, #8\n" "\tstr\tr0, [fp, #-8]\n", name, name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Symtable[id].endlabel); fputs("\tsub\tsp, fp, #4\n" "\tpop\t{fp, pc}\n" "\t.align\t2\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); // If the literal value is small, do it with one instruction if (value <= 1000) fprintf(Outfile, "\tmov\t%s, #%d\n", reglist[r], value); else { set_int_offset(value); fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); } return (r); } // Determine the offset of a variable from the .L2 // label. Yes, this is inefficient code. static void set_var_offset(int id) { int offset = 0; // Walk the symbol table up to id. // Find S_VARIABLEs and add on 4 until // we get to our variable for (int i = 0; i < id; i++) { if (Symtable[i].stype == S_VARIABLE) offset += 4; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L2+%d\n", offset); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tldrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgloadglob:", Symtable[id].type); } return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s, %s\n", reglist[r1], reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\tmul\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { // To do a divide: r0 holds the dividend, r1 holds the divisor. // The quotient is in r0. fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r1]); fprintf(Outfile, "\tmov\tr1, %s\n", reglist[r2]); fprintf(Outfile, "\tbl\t__aeabi_idiv\n"); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r1]); free_register(r2); return (r1); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r]); fprintf(Outfile, "\tbl\t%s\n", Symtable[id].name); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r]); return (r); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tlsl\t%s, %s, #%d\n", reglist[r], reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, int id) { // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tstr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgstorglob:", Symtable[id].type); } return (r); } // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (4); switch (type) { case P_CHAR: return (1); case P_INT: case P_LONG: return (4); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Symtable[id].type); fprintf(Outfile, "\t.data\n" "\t.globl\t%s\n", Symtable[id].name); switch (typesize) { case 1: fprintf(Outfile, "%s:\t.byte\t0\n", Symtable[id].name); break; case 4: fprintf(Outfile, "%s:\t.long\t0\n", Symtable[id].name); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "moveq", "movne", "movlt", "movgt", "movle", "movge" }; // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "movne", "moveq", "movge", "movle", "movgt", "movlt" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #1\n", cmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #0\n", invcmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\tuxtb\t%s, %s\n", reglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tb\tL%d\n", l); } // List of inverted branch instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *brlist[] = { "bne", "beq", "bge", "ble", "bgt", "blt" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", brlist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[reg]); cgjump(Symtable[id].endlabel); } // Generate code to load the address of a global // identifier into a variable. Return a new register int cgaddress(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); fprintf(Outfile, "\tmov\t%s, r3\n", reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tldrb\t%s, [%s]\n", reglist[r], reglist[r]); break; case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [%s]\n", reglist[r], reglist[r]); break; } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [%s]\n", reglist[r1], reglist[r2]); break; case P_INT: case P_LONG: fprintf(Outfile, "\tstr\t%s, [%s]\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 51_Arrays_pt2/cgn.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\tsection .text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\tsection .data\n", Outfile); currSeg = data_seg; } } // Given a scalar type value, return the // size of the type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch (type) { case P_CHAR: return (offset); case P_INT: case P_LONG: break; default: if (!ptrtype(type)) fatald("Bad type in cg_align:", type); } // Here we have an int or a long. Align it on a 4-byte offset // I put the generic code here so it can be reused elsewhere. alignment = 4; offset = (offset + direction * (alignment - 1)) & ~(alignment - 1); return (offset); } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "r10", "r11", "r12", "r13", "r9", "r8", "rcx", "rdx", "rsi", "rdi" }; static char *breglist[] = { "r10b", "r11b", "r12b", "r13b", "r9b", "r8b", "cl", "dl", "sil", "dil" }; static char *dreglist[] = { "r10d", "r11d", "r12d", "r13d", "r9d", "r8d", "ecx", "edx", "esi", "edi" }; // Set all registers as available // But if reg is positive, don't free that one. void freeall_registers(int keepreg) { int i; for (i = 0; i < NUMFREEREGS; i++) if (i != keepreg) freereg[i] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. int alloc_register(void) { int i; for (i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(NOREG); fputs("\textern\tprintint\n", Outfile); fputs("\textern\tprintchar\n", Outfile); fputs("\textern\topen\n", Outfile); fputs("\textern\tclose\n", Outfile); fputs("\textern\tread\n", Outfile); fputs("\textern\twrite\n", Outfile); fputs("\textern\tprintf\n", Outfile); cgtextseg(); fprintf(Outfile, "; internal switch(expr) routine\n" "; rsi = switch table, rax = expr\n" "; from SubC: http://www.t3x.org/subc/\n" "\n" "switch:\n" " push rsi\n" " mov rsi, rdx\n" " mov rbx, rax\n" " cld\n" " lodsq\n" " mov rcx, rax\n" "next:\n" " lodsq\n" " mov rdx, rax\n" " lodsq\n" " cmp rbx, rdx\n" " jnz no\n" " pop rsi\n" " jmp rax\n" "no:\n" " loop next\n" " lodsq\n" " pop rsi\n" " jmp rax\n" "\n"); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the rsp and rbp if (sym->class == C_GLOBAL) fprintf(Outfile, "\tglobal\t%s\n", name); fprintf(Outfile, "%s:\n" "\tpush\trbp\n" "\tmov\trbp, rsp\n", name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->st_posn = paramOffset; paramOffset += 8; } else { parm->st_posn = newlocaloffset(parm->type); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->st_posn = newlocaloffset(locvar->type); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\tadd\trsp, %d\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->st_endlabel); fprintf(Outfile, "\tadd\trsp, %d\n", stackOffset); fputs("\tpop rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], value); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", sym->name); fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], sym->name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", sym->name); } else // Print out the code to initialise it switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", sym->name); fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], sym->name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", sym->name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", sym->name); fprintf(Outfile, "\tmovsx\t%s, word [%s]\n", dreglist[r], sym->name); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", sym->name); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", sym->st_posn); fprintf(Outfile, "\tmov\t%s, [rbp+%d]\n", reglist[r], sym->st_posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", sym->st_posn); } else switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", sym->st_posn); fprintf(Outfile, "\tmovzx\t%s, byte [rbp+%d]\n", reglist[r], sym->st_posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", sym->st_posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", sym->st_posn); fprintf(Outfile, "\tmovsx\t%s, dword [rbp+%d]\n", reglist[r], sym->st_posn); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", sym->st_posn); break; default: fatald("Bad type in cgloadlocal:", sym->type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, L%d\n", reglist[r], label); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timul\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmov\trax, %s\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidiv\t%s\n", reglist[r2]); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tand\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\tor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshl\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshr\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tneg\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnot\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r], breglist[r]); return (r); } // Logically OR two registers and return a // register with the result, 1 or 0 int cglogor(int r1, int r2) { // Generate two labels int Ltrue = genlabel(); int Lend = genlabel(); // Test r1 and jump to true label if true fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r1], reglist[r1]); fprintf(Outfile, "\tjne\tL%d\n", Ltrue); // Test r2 and jump to true label if true fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r2], reglist[r2]); fprintf(Outfile, "\tjne\tL%d\n", Ltrue); // Didn't jump, so result is false fprintf(Outfile, "\tmov\t%s, 0\n", reglist[r1]); fprintf(Outfile, "\tjmp\tL%d\n", Lend); // Someone jumped to the true label, so result is true cglabel(Ltrue); fprintf(Outfile, "\tmov\t%s, 1\n", reglist[r1]); cglabel(Lend); free_register(r2); return(r1); } // Logically AND two registers and return a // register with the result, 1 or 0 int cglogand(int r1, int r2) { // Generate two labels int Lfalse = genlabel(); int Lend = genlabel(); // Test r1 and jump to false label if not true fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r1], reglist[r1]); fprintf(Outfile, "\tje\tL%d\n", Lfalse); // Test r2 and jump to false label if not true fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r2], reglist[r2]); fprintf(Outfile, "\tje\tL%d\n", Lfalse); // Didn't jump, so result is true fprintf(Outfile, "\tmov\t%s, 1\n", reglist[r1]); fprintf(Outfile, "\tjmp\tL%d\n", Lend); // Someone jumped to the false label, so result is false cglabel(Lfalse); fprintf(Outfile, "\tmov\t%s, 0\n", reglist[r1]); cglabel(Lend); free_register(r2); return(r1); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, byte %s\n", reglist[r], breglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(struct symtable *sym, int numargs) { // Get a new register int outr = alloc_register(); // Call the function fprintf(Outfile, "\tcall\t%s\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\tadd\trsp, %d\n", 8 * (numargs - 6)); // and copy the return value into our register fprintf(Outfile, "\tmov\t%s, rax\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpush\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmov\t%s, %s\n", reglist[FIRSTPARAMREG - argposn + 1], reglist[r]); } } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsal\t%s, %d\n", reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, dreglist[r]); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\tqword\t[rbp+%d], %s\n", sym->st_posn, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\tbyte\t[rbp+%d], %s\n", sym->st_posn, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\tdword\t[rbp+%d], %s\n", sym->st_posn, dreglist[r]); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size, type; int initvalue; int i; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the variable (or its elements if an array) // and the type of the variable if (node->stype == S_ARRAY) { size = typesize(value_at(node->type), node->ctype); type = value_at(node->type); } else { size = node->size; type = node->type; } // Generate the global identity and the label cgdataseg(); if (node->class == C_GLOBAL) fprintf(Outfile, "\tglobal\t%s\n", node->name); fprintf(Outfile, "%s:\n", node->name); // Output space for one or more elements for (i = 0; i < node->nelems; i++) { // Get any initial value initvalue = 0; if (node->initlist != NULL) initvalue = node->initlist[i]; // Generate the space for this type // original version switch(size) { case 1: fprintf(Outfile, "\tdb\t%d\n", initvalue); break; case 4: fprintf(Outfile, "\tdd\t%d\n", initvalue); break; case 8: // Generate the pointer to a string literal. Treat a zero value // as actually zero, not the label L0 if (node->initlist != NULL && type == pointer_to(P_CHAR) && initvalue != 0) fprintf(Outfile, "\tdq\tL%d\n", initvalue); else fprintf(Outfile, "\tdq\t%d\n", initvalue); break; default: for (int i = 0; i < size; i++) fprintf(Outfile, "\tdb\t0\n"); /* compact version using times instead of loop fprintf(Outfile, "\ttimes\t%d\tdb\t0\n", size); */ } } } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\tdb\t%d\n", *cptr); } fprintf(Outfile, "\tdb\t0\n"); /* put string in readable format on a single line // probably went overboard with error checking int comma = 0, quote = 0, start = 1; fprintf(Outfile, "\tdb\t"); for (cptr=strvalue; *cptr; cptr++) { if ( ! isprint(*cptr) ) if (comma || start) { fprintf(Outfile, "%d, ", *cptr); start = 0; comma = 1; } else if (quote) { fprintf(Outfile, "\', %d, ", *cptr); comma = 1; quote = 0; } else { fprintf(Outfile, "%d, ", *cptr); comma = 1; quote = 0; } else if (start || comma) { fprintf(Outfile, "\'%c", *cptr); start = comma = 0; quote = 1; } else { fprintf(Outfile, "%c", *cptr); comma = 0; quote = 1; } } if (comma || start) fprintf(Outfile, "0\n"); else fprintf(Outfile, "\', 0\n"); */ } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r2], breglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(NOREG); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Deal with pointers here as we can't put them in // the switch statement if (ptrtype(sym->type)) fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); else { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzx\teax, %s\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmov\teax, %s\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } } cgjump(sym->st_endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = alloc_register(); if (sym->class == C_GLOBAL || sym->class == C_STATIC) fprintf(Outfile, "\tmov\t%s, %s\n", reglist[r], sym->name); else fprintf(Outfile, "\tlea\t%s, [rbp+%d]\n", reglist[r], sym->st_posn); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], reglist[r]); break; case 2: fprintf(Outfile, "\tmovsx\t%s, dword [%s]\n", reglist[r], reglist[r]); break; case 4: case 8: fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { //Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmov\t[%s], byte %s\n", reglist[r2], breglist[r1]); break; case 2: case 4: case 8: fprintf(Outfile, "\tmov\t[%s], %s\n", reglist[r2], reglist[r1]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } // Generate a switch jump table and the code to // load the registers and call the switch() code void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel) { int i, label; // Get a label for the switch table label = genlabel(); cglabel(label); // Heuristic. If we have no cases, create one case // which points to the default case if (casecount == 0) { caseval[0] = 0; caselabel[0] = defaultlabel; casecount = 1; } // Generate the switch jump table. fprintf(Outfile, "\tdq\t%d\n", casecount); for (i = 0; i < casecount; i++) fprintf(Outfile, "\tdq\t%d, L%d\n", caseval[i], caselabel[i]); fprintf(Outfile, "\tdq\tL%d\n", defaultlabel); // Load the specific registers cglabel(toplabel); fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); fprintf(Outfile, "\tmov\trdx, L%d\n", label); fprintf(Outfile, "\tjmp\tswitch\n"); } // Move value between registers void cgmove(int r1, int r2) { fprintf(Outfile, "\tmov\t%s, %s\n", reglist[r2], reglist[r1]); } ================================================ FILE: 51_Arrays_pt2/data.h ================================================ #ifndef extern_ #define extern_ extern #endif // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 extern_ int Line; // Current line number extern_ int Putback; // Character put back by scanner extern_ struct symtable *Functionid; // Symbol ptr of the current function extern_ FILE *Infile; // Input and output files extern_ FILE *Outfile; extern_ char *Infilename; // Name of file we are parsing extern_ char *Outfilename; // Name of file we opened as Outfile extern_ struct token Token; // Last token scanned extern_ struct token Peektoken; // A look-ahead token extern_ char Text[TEXTLEN + 1]; // Last identifier scanned extern_ int Looplevel; // Depth of nested loops extern_ int Switchlevel; // Depth of nested switches // Symbol table lists extern_ struct symtable *Globhead, *Globtail; // Global variables and functions extern_ struct symtable *Loclhead, *Locltail; // Local variables extern_ struct symtable *Parmhead, *Parmtail; // Local parameters extern_ struct symtable *Membhead, *Membtail; // Temp list of struct/union members extern_ struct symtable *Structhead, *Structtail; // List of struct types extern_ struct symtable *Unionhead, *Uniontail; // List of union types extern_ struct symtable *Enumhead, *Enumtail; // List of enum types and values extern_ struct symtable *Typehead, *Typetail; // List of typedefs // Command-line flags extern_ int O_dumpAST; // If true, dump the AST trees extern_ int O_dumpsym; // If true, dump the symbol table extern_ int O_keepasm; // If true, keep any assembly files extern_ int O_assemble; // If true, assemble the assembly files extern_ int O_dolink; // If true, link the object files extern_ int O_verbose; // If true, print info on compilation stages ================================================ FILE: 51_Arrays_pt2/decl.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of declarations // Copyright (c) 2019 Warren Toomey, GPL3 static struct symtable *composite_declaration(int type); static int typedef_declaration(struct symtable **ctype); static int type_of_typedef(char *name, struct symtable **ctype); static void enum_declaration(void); // Parse the current token and return a primitive type enum value, // a pointer to any composite type and possibly modify // the class of the type. int parse_type(struct symtable **ctype, int *class) { int type, exstatic = 1; // See if the class has been changed to extern or static while (exstatic) { switch (Token.token) { case T_EXTERN: if (*class == C_STATIC) fatal("Illegal to have extern and static at the same time"); *class = C_EXTERN; scan(&Token); break; case T_STATIC: if (*class == C_LOCAL) fatal("Compiler doesn't support static local declarations"); if (*class == C_EXTERN) fatal("Illegal to have extern and static at the same time"); *class = C_STATIC; scan(&Token); break; default: exstatic = 0; } } // Now work on the actual type keyword switch (Token.token) { case T_VOID: type = P_VOID; scan(&Token); break; case T_CHAR: type = P_CHAR; scan(&Token); break; case T_INT: type = P_INT; scan(&Token); break; case T_LONG: type = P_LONG; scan(&Token); break; // For the following, if we have a ';' after the // parsing then there is no type, so return -1. // Example: struct x {int y; int z}; case T_STRUCT: type = P_STRUCT; *ctype = composite_declaration(P_STRUCT); if (Token.token == T_SEMI) type = -1; break; case T_UNION: type = P_UNION; *ctype = composite_declaration(P_UNION); if (Token.token == T_SEMI) type = -1; break; case T_ENUM: type = P_INT; // Enums are really ints enum_declaration(); if (Token.token == T_SEMI) type = -1; break; case T_TYPEDEF: type = typedef_declaration(ctype); if (Token.token == T_SEMI) type = -1; break; case T_IDENT: type = type_of_typedef(Text, ctype); break; default: fatals("Illegal type, token", Token.tokstr); } return (type); } // Given a type parsed by parse_type(), scan in any following // '*' tokens and return the new type int parse_stars(int type) { while (1) { if (Token.token != T_STAR) break; type = pointer_to(type); scan(&Token); } return (type); } // Parse a type which appears inside a cast int parse_cast(void) { int type, class=0; struct symtable *ctype; // Get the type inside the parentheses type= parse_stars(parse_type(&ctype, &class)); // Do some error checking. I'm sure more can be done if (type == P_STRUCT || type == P_UNION || type == P_VOID) fatal("Cannot cast to a struct, union or void type"); return(type); } // Given a type, parse an expression of literals and ensure // that the type of this expression matches the given type. // Parse any type cast that precedes the expression. // If an integer literal, return this value. // If a string literal, return the label number of the string. int parse_literal(int type) { struct ASTnode *tree; // Parse the expression and optimise the resulting AST tree tree= optimise(binexpr(0)); // If there's a cast, get the child and // mark it as having the type from the cast if (tree->op == A_CAST) { tree->left->type= tree->type; tree= tree->left; } // The tree must now have an integer or string literal if (tree->op != A_INTLIT && tree->op != A_STRLIT) fatal("Cannot initialise globals with a general expression"); // If the type is char * and if (type == pointer_to(P_CHAR)) { // We have a string literal, return the label number if (tree->op == A_STRLIT) return(tree->a_intvalue); // We have a zero int literal, so that's a NULL if (tree->op == A_INTLIT && tree->a_intvalue==0) return(0); } // We only get here with an integer literal. The input type // is an integer type and is wide enough to hold the literal value if (inttype(type) && typesize(type, NULL) >= typesize(tree->type, NULL)) return(tree->a_intvalue); fatal("Type mismatch: literal vs. variable"); return(0); // Keep -Wall happy } // Given the type, name and class of a scalar variable, // parse any initialisation value and allocate storage for it. // Return the variable's symbol table entry. static struct symtable *scalar_declaration(char *varname, int type, struct symtable *ctype, int class, struct ASTnode **tree) { struct symtable *sym=NULL; struct ASTnode *varnode, *exprnode; *tree= NULL; // Add this as a known scalar switch (class) { case C_STATIC: case C_EXTERN: case C_GLOBAL: sym= addglob(varname, type, ctype, S_VARIABLE, class, 1, 0); break; case C_LOCAL: sym= addlocl(varname, type, ctype, S_VARIABLE, 1); break; case C_PARAM: sym= addparm(varname, type, ctype, S_VARIABLE); break; case C_MEMBER: sym= addmemb(varname, type, ctype, S_VARIABLE, 1); break; } // The variable is being initialised if (Token.token == T_ASSIGN) { // Only possible for a global or local if (class != C_GLOBAL && class != C_LOCAL && class != C_STATIC) fatals("Variable can not be initialised", varname); scan(&Token); // Globals must be assigned a literal value if (class == C_GLOBAL || class == C_STATIC) { // Create one initial value for the variable and // parse this value sym->initlist= (int *)malloc(sizeof(int)); sym->initlist[0]= parse_literal(type); } if (class == C_LOCAL) { // Make an A_IDENT AST node with the variable varnode = mkastleaf(A_IDENT, sym->type, sym, 0); // Get the expression for the assignment, make into a rvalue exprnode = binexpr(0); exprnode->rvalue = 1; // Ensure the expression's type matches the variable exprnode = modify_type(exprnode, varnode->type, 0); if (exprnode == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree *tree = mkastnode(A_ASSIGN, exprnode->type, exprnode, NULL, varnode, NULL, 0); } } // Generate any global space if (class == C_GLOBAL || class == C_STATIC) genglobsym(sym); return (sym); } // Given the type, name and class of an variable, parse // the size of the array, if any. Then parse any initialisation // value and allocate storage for it. // Return the variable's symbol table entry. static struct symtable *array_declaration(char *varname, int type, struct symtable *ctype, int class) { struct symtable *sym; // New symbol table entry int nelems= -1; // Assume the number of elements won't be given int maxelems; // The maximum number of elements in the init list int *initlist; // The list of initial elements int i=0, j; // Skip past the '[' scan(&Token); // See we have an array size if (Token.token != T_RBRACKET) { nelems= parse_literal(P_INT); if (nelems <= 0) fatald("Array size is illegal", nelems); } // Ensure we have a following ']' match(T_RBRACKET, "]"); // Add this as a known array. We treat the // array as a pointer to its elements' type switch (class) { case C_STATIC: case C_EXTERN: case C_GLOBAL: sym = addglob(varname, pointer_to(type), ctype, S_ARRAY, class, 0, 0); break; default: fatal("For now, declaration of non-global arrays is not implemented"); } // Array initialisation if (Token.token == T_ASSIGN) { if (class != C_GLOBAL && class != C_STATIC) fatals("Variable can not be initialised", varname); scan(&Token); // Get the following left curly bracket match(T_LBRACE, "{"); #define TABLE_INCREMENT 10 // If the array already has nelems, allocate that many elements // in the list. Otherwise, start with TABLE_INCREMENT. if (nelems != -1) maxelems= nelems; else maxelems= TABLE_INCREMENT; initlist= (int *)malloc(maxelems *sizeof(int)); // Loop getting a new literal value from the list while (1) { // Check we can add the next value, then parse and add it if (nelems != -1 && i == maxelems) fatal("Too many values in initialisation list"); initlist[i++]= parse_literal(type); // Increase the list size if the original size was // not set and we have hit the end of the current list if (nelems == -1 && i == maxelems) { maxelems += TABLE_INCREMENT; initlist= (int *)realloc(initlist, maxelems *sizeof(int)); } // Leave when we hit the right curly bracket if (Token.token == T_RBRACE) { scan(&Token); break; } // Next token must be a comma, then comma(); } // Zero any unused elements in the initlist. // Attach the list to the symbol table entry for (j=i; j < sym->nelems; j++) initlist[j]=0; if (i > nelems) nelems = i; sym->initlist= initlist; } // Set the size of the array and the number of elements sym->nelems= nelems; sym->size= sym->nelems * typesize(type, ctype); // Generate any global space if (class == C_GLOBAL || class == C_STATIC) genglobsym(sym); return (sym); } // Given a pointer to the new function being declared and // a possibly NULL pointer to the function's previous declaration, // parse a list of parameters and cross-check them against the // previous declaration. Return the count of parameters static int param_declaration_list(struct symtable *oldfuncsym, struct symtable *newfuncsym) { int type, paramcnt = 0; struct symtable *ctype; struct symtable *protoptr = NULL; struct ASTnode *unused; // Get the pointer to the first prototype parameter if (oldfuncsym != NULL) protoptr = oldfuncsym->member; // Loop getting any parameters while (Token.token != T_RPAREN) { // If the first token is 'void' if (Token.token == T_VOID) { // Peek at the next token. If a ')', the function // has no parameters, so leave the loop. scan(&Peektoken); if (Peektoken.token == T_RPAREN) { // Move the Peektoken into the Token paramcnt= 0; scan(&Token); break; } } // Get the type of the next parameter type = declaration_list(&ctype, C_PARAM, T_COMMA, T_RPAREN, &unused); if (type == -1) fatal("Bad type in parameter list"); // Ensure the type of this parameter matches the prototype if (protoptr != NULL) { if (type != protoptr->type) fatald("Type doesn't match prototype for parameter", paramcnt + 1); protoptr = protoptr->next; } paramcnt++; // Stop when we hit the right parenthesis if (Token.token == T_RPAREN) break; // We need a comma as separator comma(); } if (oldfuncsym != NULL && paramcnt != oldfuncsym->nelems) fatals("Parameter count mismatch for function", oldfuncsym->name); // Return the count of parameters return (paramcnt); } // // function_declaration: type identifier '(' parameter_list ')' ; // | type identifier '(' parameter_list ')' compound_statement ; // // Parse the declaration of function. static struct symtable *function_declaration(char *funcname, int type, struct symtable *ctype, int class) { struct ASTnode *tree, *finalstmt; struct symtable *oldfuncsym, *newfuncsym = NULL; int endlabel, paramcnt; // Text has the identifier's name. If this exists and is a // function, get the id. Otherwise, set oldfuncsym to NULL. if ((oldfuncsym = findsymbol(funcname)) != NULL) if (oldfuncsym->stype != S_FUNCTION) oldfuncsym = NULL; // If this is a new function declaration, get a // label-id for the end label, and add the function // to the symbol table, if (oldfuncsym == NULL) { endlabel = genlabel(); // Assumtion: functions only return scalar types, so NULL below newfuncsym = addglob(funcname, type, NULL, S_FUNCTION, class, 0, endlabel); } // Scan in the '(', any parameters and the ')'. // Pass in any existing function prototype pointer lparen(); paramcnt = param_declaration_list(oldfuncsym, newfuncsym); rparen(); // If this is a new function declaration, update the // function symbol entry with the number of parameters. // Also copy the parameter list into the function's node. if (newfuncsym) { newfuncsym->nelems = paramcnt; newfuncsym->member = Parmhead; oldfuncsym = newfuncsym; } // Clear out the parameter list Parmhead = Parmtail = NULL; // Declaration ends in a semicolon, only a prototype. if (Token.token == T_SEMI) return (oldfuncsym); // This is not just a prototype. // Set the Functionid global to the function's symbol pointer Functionid = oldfuncsym; // Get the AST tree for the compound statement and mark // that we have parsed no loops or switches yet Looplevel = 0; Switchlevel = 0; lbrace(); tree = compound_statement(0); rbrace(); // If the function type isn't P_VOID .. if (type != P_VOID) { // Error if no statements in the function if (tree == NULL) fatal("No statements in function with non-void type"); // Check that the last AST operation in the // compound statement was a return statement finalstmt = (tree->op == A_GLUE) ? tree->right : tree; if (finalstmt == NULL || finalstmt->op != A_RETURN) fatal("No return for function with non-void type"); } // Build the A_FUNCTION node which has the function's symbol pointer // and the compound statement sub-tree tree = mkastunary(A_FUNCTION, type, tree, oldfuncsym, endlabel); // Do optimisations on the AST tree tree= optimise(tree); // Dump the AST tree if requested if (O_dumpAST) { dumpAST(tree, NOLABEL, 0); fprintf(stdout, "\n\n"); } // Generate the assembly code for it genAST(tree, NOLABEL, NOLABEL, NOLABEL, 0); // Now free the symbols associated // with this function freeloclsyms(); return (oldfuncsym); } // Parse composite type declarations: structs or unions. // Either find an existing struct/union declaration, or build // a struct/union symbol table entry and return its pointer. static struct symtable *composite_declaration(int type) { struct symtable *ctype = NULL; struct symtable *m; struct ASTnode *unused; int offset; int t; // Skip the struct/union keyword scan(&Token); // See if there is a following struct/union name if (Token.token == T_IDENT) { // Find any matching composite type if (type == P_STRUCT) ctype = findstruct(Text); else ctype = findunion(Text); scan(&Token); } // If the next token isn't an LBRACE , this is // the usage of an existing struct/union type. // Return the pointer to the type. if (Token.token != T_LBRACE) { if (ctype == NULL) fatals("unknown struct/union type", Text); return (ctype); } // Ensure this struct/union type hasn't been // previously defined if (ctype) fatals("previously defined struct/union", Text); // Build the composite type and skip the left brace if (type == P_STRUCT) ctype = addstruct(Text); else ctype = addunion(Text); scan(&Token); // Scan in the list of members while (1) { // Get the next member. m is used as a dummy t= declaration_list(&m, C_MEMBER, T_SEMI, T_RBRACE, &unused); if (t== -1) fatal("Bad type in member list"); if (Token.token == T_SEMI) scan(&Token); if (Token.token == T_RBRACE) break; } // Attach to the struct type's node rbrace(); if (Membhead==NULL) fatals("No members in struct", ctype->name); ctype->member = Membhead; Membhead = Membtail = NULL; // Set the offset of the initial member // and find the first free byte after it m = ctype->member; m->st_posn = 0; offset = typesize(m->type, m->ctype); // Set the position of each successive member in the composite type // Unions are easy. For structs, align the member and find the next free byte for (m = m->next; m != NULL; m = m->next) { // Set the offset for this member if (type == P_STRUCT) m->st_posn = genalign(m->type, offset, 1); else m->st_posn = 0; // Get the offset of the next free byte after this member offset += typesize(m->type, m->ctype); } // Set the overall size of the composite type ctype->size = offset; return (ctype); } // Parse an enum declaration static void enum_declaration(void) { struct symtable *etype = NULL; char *name= NULL; int intval = 0; // Skip the enum keyword. scan(&Token); // If there's a following enum type name, get a // pointer to any existing enum type node. if (Token.token == T_IDENT) { etype = findenumtype(Text); name = strdup(Text); // As it gets tromped soon scan(&Token); } // If the next token isn't a LBRACE, check // that we have an enum type name, then return if (Token.token != T_LBRACE) { if (etype == NULL) fatals("undeclared enum type:", name); return; } // We do have an LBRACE. Skip it scan(&Token); // If we have an enum type name, ensure that it // hasn't been declared before. if (etype != NULL) fatals("enum type redeclared:", etype->name); else // Build an enum type node for this identifier etype = addenum(name, C_ENUMTYPE, 0); // Loop to get all the enum values while (1) { // Ensure we have an identifier // Copy it in case there's an int literal coming up ident(); name = strdup(Text); // Ensure this enum value hasn't been declared before etype = findenumval(name); if (etype != NULL) fatals("enum value redeclared:", Text); // If the next token is an '=', skip it and // get the following int literal if (Token.token == T_ASSIGN) { scan(&Token); if (Token.token != T_INTLIT) fatal("Expected int literal after '='"); intval = Token.intvalue; scan(&Token); } // Build an enum value node for this identifier. // Increment the value for the next enum identifier. etype = addenum(name, C_ENUMVAL, intval++); // Bail out on a right curly bracket, else get a comma if (Token.token == T_RBRACE) break; comma(); } scan(&Token); // Skip over the right curly bracket } // Parse a typedef declaration and return the type // and ctype that it represents static int typedef_declaration(struct symtable **ctype) { int type, class = 0; // Skip the typedef keyword. scan(&Token); // Get the actual type following the keyword type = parse_type(ctype, &class); if (class != 0) fatal("Can't have extern in a typedef declaration"); // See if the typedef identifier already exists if (findtypedef(Text) != NULL) fatals("redefinition of typedef", Text); // Get any following '*' tokens type = parse_stars(type); // It doesn't exist so add it to the typedef list addtypedef(Text, type, *ctype); scan(&Token); return (type); } // Given a typedef name, return the type it represents static int type_of_typedef(char *name, struct symtable **ctype) { struct symtable *t; // Look up the typedef in the list t = findtypedef(name); if (t == NULL) fatals("unknown type", name); scan(&Token); *ctype = t->ctype; return (t->type); } // Parse the declaration of a variable or function. // The type and any following '*'s have been scanned, and we // have the identifier in the Token variable. // The class argument is the variable's class. // Return a pointer to the symbol's entry in the symbol table static struct symtable *symbol_declaration(int type, struct symtable *ctype, int class, struct ASTnode **tree) { struct symtable *sym = NULL; char *varname = strdup(Text); // Ensure that we have an identifier. // We copied it above so we can scan more tokens in, e.g. // an assignment expression for a local variable. ident(); // Deal with function declarations if (Token.token == T_LPAREN) { return (function_declaration(varname, type, ctype, class)); } // See if this array or scalar variable has already been declared switch (class) { case C_EXTERN: case C_STATIC: case C_GLOBAL: if (findglob(varname) != NULL) fatals("Duplicate global variable declaration", varname); case C_LOCAL: case C_PARAM: if (findlocl(varname) != NULL) fatals("Duplicate local variable declaration", varname); case C_MEMBER: if (findmember(varname) != NULL) fatals("Duplicate struct/union member declaration", varname); } // Add the array or scalar variable to the symbol table if (Token.token == T_LBRACKET) sym = array_declaration(varname, type, ctype, class); else sym = scalar_declaration(varname, type, ctype, class, tree); return (sym); } // Parse a list of symbols where there is an initial type. // Return the type of the symbols. et1 and et2 are end tokens. int declaration_list(struct symtable **ctype, int class, int et1, int et2, struct ASTnode **gluetree) { int inittype, type; struct symtable *sym; struct ASTnode *tree; *gluetree= NULL; // Get the initial type. If -1, it was // a composite type definition, return this if ((inittype = parse_type(ctype, &class)) == -1) return (inittype); // Now parse the list of symbols while (1) { // See if this symbol is a pointer type = parse_stars(inittype); // Parse this symbol sym = symbol_declaration(type, *ctype, class, &tree); // We parsed a function, there is no list so leave if (sym->stype == S_FUNCTION) { if (class != C_GLOBAL && class != C_STATIC) fatal("Function definition not at global level"); return (type); } // Glue any AST tree from a local declaration // to build a sequence of assignments to perform if (*gluetree== NULL) *gluetree= tree; else *gluetree = mkastnode(A_GLUE, P_NONE, *gluetree, NULL, tree, NULL, 0); // We are at the end of the list, leave if (Token.token == et1 || Token.token == et2) return (type); // Otherwise, we need a comma as separator comma(); } } // Parse one or more global declarations, either // variables, functions or structs void global_declarations(void) { struct symtable *ctype; struct ASTnode *unused; while (Token.token != T_EOF) { declaration_list(&ctype, C_GLOBAL, T_SEMI, T_EOF, &unused); // Skip any semicolons and right curly brackets if (Token.token == T_SEMI) scan(&Token); } } ================================================ FILE: 51_Arrays_pt2/decl.h ================================================ // Function prototypes for all compiler files // Copyright (c) 2019 Warren Toomey, GPL3 // scan.c void reject_token(struct token *t); int scan(struct token *t); // tree.c struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue); struct ASTnode *mkastleaf(int op, int type, struct symtable *sym, int intvalue); struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, struct symtable *sym, int intvalue); void dumpAST(struct ASTnode *n, int label, int parentASTop); // gen.c int genlabel(void); int genAST(struct ASTnode *n, int iflabel, int looptoplabel, int loopendlabel, int parentASTop); void genpreamble(); void genpostamble(); void genfreeregs(int keepreg); void genglobsym(struct symtable *node); int genglobstr(char *strvalue); int genprimsize(int type); int genalign(int type, int offset, int direction); void genreturn(int reg, int id); // cg.c int cgprimsize(int type); int cgalign(int type, int offset, int direction); void cgtextseg(); void cgdataseg(); int alloc_register(void); void freeall_registers(int keepreg); void cgpreamble(); void cgpostamble(); void cgfuncpreamble(struct symtable *sym); void cgfuncpostamble(struct symtable *sym); int cgloadint(int value, int type); int cgloadglob(struct symtable *sym, int op); int cgloadlocal(struct symtable *sym, int op); int cgloadglobstr(int label); int cgadd(int r1, int r2); int cgsub(int r1, int r2); int cgmul(int r1, int r2); int cgdiv(int r1, int r2); int cgshlconst(int r, int val); int cgcall(struct symtable *sym, int numargs); void cgcopyarg(int r, int argposn); int cgstorglob(int r, struct symtable *sym); int cgstorlocal(int r, struct symtable *sym); void cgglobsym(struct symtable *node); void cgglobstr(int l, char *strvalue); int cgcompare_and_set(int ASTop, int r1, int r2); int cgcompare_and_jump(int ASTop, int r1, int r2, int label); void cglabel(int l); void cgjump(int l); int cgwiden(int r, int oldtype, int newtype); void cgreturn(int reg, struct symtable *sym); int cgaddress(struct symtable *sym); int cgderef(int r, int type); int cgstorderef(int r1, int r2, int type); int cgnegate(int r); int cginvert(int r); int cglognot(int r); int cglogor(int r1, int r2); int cglogand(int r1, int r2); int cgboolean(int r, int op, int label); int cgand(int r1, int r2); int cgor(int r1, int r2); int cgxor(int r1, int r2); int cgshl(int r1, int r2); int cgshr(int r1, int r2); void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel); void cgmove(int r1, int r2); // expr.c struct ASTnode *expression_list(int endtoken); struct ASTnode *binexpr(int ptp); // stmt.c struct ASTnode *compound_statement(int inswitch); // misc.c void match(int t, char *what); void semi(void); void lbrace(void); void rbrace(void); void lparen(void); void rparen(void); void ident(void); void comma(void); void fatal(char *s); void fatals(char *s1, char *s2); void fatald(char *s, int d); void fatalc(char *s, int c); // sym.c void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node); struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn); struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn); struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int nelems); struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype); struct symtable *addstruct(char *name); struct symtable *addunion(char *name); struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int nelems); struct symtable *addenum(char *name, int class, int value); struct symtable *addtypedef(char *name, int type, struct symtable *ctype); struct symtable *findglob(char *s); struct symtable *findlocl(char *s); struct symtable *findsymbol(char *s); struct symtable *findmember(char *s); struct symtable *findstruct(char *s); struct symtable *findunion(char *s); struct symtable *findenumtype(char *s); struct symtable *findenumval(char *s); struct symtable *findtypedef(char *s); void clear_symtable(void); void freeloclsyms(void); void freestaticsyms(void); void dumptable(struct symtable *head, char *name, int indent); void dumpsymtables(void); // decl.c int parse_type(struct symtable **ctype, int *class); int parse_stars(int type); int parse_cast(void); int declaration_list(struct symtable **ctype, int class, int et1, int et2, struct ASTnode **gluetree); void global_declarations(void); // types.c int inttype(int type); int ptrtype(int type); int pointer_to(int type); int value_at(int type); int typesize(int type, struct symtable *ctype); struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op); // opt.c struct ASTnode *optimise(struct ASTnode *n); ================================================ FILE: 51_Arrays_pt2/defs.h ================================================ #include #include #include #include // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 enum { TEXTLEN = 512 // Length of identifiers in input }; // Commands and default filenames #define AOUT "a.out" #ifdef __NASM__ #define ASCMD "nasm -f elf64 -w-ptr -o " #define LDCMD "cc -no-pie -fno-plt -Wall -o " #else #define ASCMD "as -o " #define LDCMD "cc -o " #endif #define CPPCMD "cpp -nostdinc -isystem " // Token types enum { T_EOF, // Binary operators T_ASSIGN, T_ASPLUS, T_ASMINUS, T_ASSTAR, T_ASSLASH, T_QUESTION, T_LOGOR, T_LOGAND, T_OR, T_XOR, T_AMPER, T_EQ, T_NE, T_LT, T_GT, T_LE, T_GE, T_LSHIFT, T_RSHIFT, T_PLUS, T_MINUS, T_STAR, T_SLASH, // Other operators T_INC, T_DEC, T_INVERT, T_LOGNOT, // Type keywords T_VOID, T_CHAR, T_INT, T_LONG, // Other keywords T_IF, T_ELSE, T_WHILE, T_FOR, T_RETURN, T_STRUCT, T_UNION, T_ENUM, T_TYPEDEF, T_EXTERN, T_BREAK, T_CONTINUE, T_SWITCH, T_CASE, T_DEFAULT, T_SIZEOF, T_STATIC, // Structural tokens T_INTLIT, T_STRLIT, T_SEMI, T_IDENT, T_LBRACE, T_RBRACE, T_LPAREN, T_RPAREN, T_LBRACKET, T_RBRACKET, T_COMMA, T_DOT, T_ARROW, T_COLON }; // Token structure struct token { int token; // Token type, from the enum list above char *tokstr; // String version of the token int intvalue; // For T_INTLIT, the integer value }; // AST node types. The first few line up // with the related tokens enum { A_ASSIGN = 1, A_ASPLUS, A_ASMINUS, A_ASSTAR, A_ASSLASH, A_TERNARY, A_LOGOR, A_LOGAND, A_OR, A_XOR, A_AND, A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE, A_LSHIFT, A_RSHIFT, A_ADD, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, A_INTLIT, A_STRLIT, A_IDENT, A_GLUE, A_IF, A_WHILE, A_FUNCTION, A_WIDEN, A_RETURN, A_FUNCCALL, A_DEREF, A_ADDR, A_SCALE, A_PREINC, A_PREDEC, A_POSTINC, A_POSTDEC, A_NEGATE, A_INVERT, A_LOGNOT, A_TOBOOL, A_BREAK, A_CONTINUE, A_SWITCH, A_CASE, A_DEFAULT, A_CAST }; // Primitive types. The bottom 4 bits is an integer // value that represents the level of indirection, // e.g. 0= no pointer, 1= pointer, 2= pointer pointer etc. enum { P_NONE, P_VOID = 16, P_CHAR = 32, P_INT = 48, P_LONG = 64, P_STRUCT=80, P_UNION=96 }; // Structural types enum { S_VARIABLE, S_FUNCTION, S_ARRAY }; // Storage classes enum { C_GLOBAL = 1, // Globally visible symbol C_LOCAL, // Locally visible symbol C_PARAM, // Locally visible function parameter C_EXTERN, // External globally visible symbol C_STATIC, // Static symbol, visible in one file C_STRUCT, // A struct C_UNION, // A union C_MEMBER, // Member of a struct or union C_ENUMTYPE, // A named enumeration type C_ENUMVAL, // A named enumeration value C_TYPEDEF // A named typedef }; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol struct symtable *ctype; // If struct/union, ptr to that type int stype; // Structural type for the symbol int class; // Storage class for the symbol int size; // Total size in bytes of this symbol int nelems; // Functions: # params. Arrays: # elements #define st_endlabel st_posn // For functions, the end label int st_posn; // For locals, the negative offset // from the stack base pointer int *initlist; // List of initial values struct symtable *next; // Next symbol in one list struct symtable *member; // First member of a function, struct, }; // union or enum // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates int rvalue; // True if the node is an rvalue struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; struct symtable *sym; // For many AST nodes, the pointer to // the symbol in the symbol table #define a_intvalue a_size // For A_INTLIT, the integer value int a_size; // For A_SCALE, the size to scale by }; enum { NOREG = -1, // Use NOREG when the AST generation // functions have no register to return NOLABEL = 0 // Use NOLABEL when we have no label to // pass to genAST() }; ================================================ FILE: 51_Arrays_pt2/expr.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of expressions // Copyright (c) 2019 Warren Toomey, GPL3 // expression_list: // | expression // | expression ',' expression_list // ; // Parse a list of zero or more comma-separated expressions and // return an AST composed of A_GLUE nodes with the left-hand child // being the sub-tree of previous expressions (or NULL) and the right-hand // child being the next expression. Each A_GLUE node will have size field // set to the number of expressions in the tree at this point. If no // expressions are parsed, NULL is returned struct ASTnode *expression_list(int endtoken) { struct ASTnode *tree = NULL; struct ASTnode *child = NULL; int exprcount = 0; // Loop until the end token while (Token.token != endtoken) { // Parse the next expression and increment the expression count child = binexpr(0); exprcount++; // Build an A_GLUE AST node with the previous tree as the left child // and the new expression as the right child. Store the expression count. tree = mkastnode(A_GLUE, P_NONE, tree, NULL, child, NULL, exprcount); // Stop when we reach the end token if (Token.token == endtoken) break; // Must have a ',' at this point match(T_COMMA, ","); } // Return the tree of expressions return (tree); } // Parse a function call and return its AST static struct ASTnode *funccall(void) { struct ASTnode *tree; struct symtable *funcptr; // Check that the identifier has been defined as a function, // then make a leaf node for it. if ((funcptr = findsymbol(Text)) == NULL || funcptr->stype != S_FUNCTION) { fatals("Undeclared function", Text); } // Get the '(' lparen(); // Parse the argument expression list tree = expression_list(T_RPAREN); // XXX Check type of each argument against the function's prototype // Build the function call AST node. Store the // function's return type as this node's type. // Also record the function's symbol-id tree = mkastunary(A_FUNCCALL, funcptr->type, tree, funcptr, 0); // Get the ')' rparen(); return (tree); } // Parse the index into an array and return an AST tree for it static struct ASTnode *array_access(void) { struct ASTnode *left, *right; struct symtable *aryptr; // Check that the identifier has been defined as an array or a pointer. if ((aryptr = findsymbol(Text)) == NULL) fatals("Undeclared variable", Text); if (aryptr->stype != S_ARRAY && (aryptr->stype == S_VARIABLE && !ptrtype(aryptr->type))) fatals("Not an array or pointer", Text); // Make a leaf node for it that points at the base of // the array, or loads the pointer's value as an rvalue if (aryptr->stype == S_ARRAY) left = mkastleaf(A_ADDR, aryptr->type, aryptr, 0); else { left = mkastleaf(A_IDENT, aryptr->type, aryptr, 0); left->rvalue= 1; } // Get the '[' scan(&Token); // Parse the following expression right = binexpr(0); // Get the ']' match(T_RBRACKET, "]"); // Ensure that this is of int type if (!inttype(right->type)) fatal("Array index is not of integer type"); // Scale the index by the size of the element's type right = modify_type(right, left->type, A_ADD); // Return an AST tree where the array's base has the offset // added to it, and dereference the element. Still an lvalue // at this point. left = mkastnode(A_ADD, aryptr->type, left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, value_at(left->type), left, NULL, 0); return (left); } // Parse the member reference of a struct or union // and return an AST tree for it. If withpointer is true, // the access is through a pointer to the member. static struct ASTnode *member_access(int withpointer) { struct ASTnode *left, *right; struct symtable *compvar; struct symtable *typeptr; struct symtable *m; // Check that the identifier has been declared as a // struct/union or a struct/union pointer if ((compvar = findsymbol(Text)) == NULL) fatals("Undeclared variable", Text); if (withpointer && compvar->type != pointer_to(P_STRUCT) && compvar->type != pointer_to(P_UNION)) fatals("Undeclared variable", Text); if (!withpointer && compvar->type != P_STRUCT && compvar->type != P_UNION) fatals("Undeclared variable", Text); // If a pointer to a struct/union, get the pointer's value. // Otherwise, make a leaf node that points at the base // Either way, it's an rvalue if (withpointer) { left = mkastleaf(A_IDENT, pointer_to(compvar->type), compvar, 0); } else left = mkastleaf(A_ADDR, compvar->type, compvar, 0); left->rvalue = 1; // Get the details of the composite type typeptr = compvar->ctype; // Skip the '.' or '->' token and get the member's name scan(&Token); ident(); // Find the matching member's name in the type // Die if we can't find it for (m = typeptr->member; m != NULL; m = m->next) if (!strcmp(m->name, Text)) break; if (m == NULL) fatals("No member found in struct/union: ", Text); // Build an A_INTLIT node with the offset right = mkastleaf(A_INTLIT, P_INT, NULL, m->st_posn); // Add the member's offset to the base of the struct/union // and dereference it. Still an lvalue at this point left = mkastnode(A_ADD, pointer_to(m->type), left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, m->type, left, NULL, 0); return (left); } // Parse a postfix expression and return // an AST node representing it. The // identifier is already in Text. static struct ASTnode *postfix(void) { struct ASTnode *n; struct symtable *varptr; struct symtable *enumptr; int rvalue=0; // If the identifier matches an enum value, // return an A_INTLIT node if ((enumptr = findenumval(Text)) != NULL) { scan(&Token); return (mkastleaf(A_INTLIT, P_INT, NULL, enumptr->st_posn)); } // Scan in the next token to see if we have a postfix expression scan(&Token); // Function call if (Token.token == T_LPAREN) return (funccall()); // An array reference if (Token.token == T_LBRACKET) return (array_access()); // Access into a struct or union if (Token.token == T_DOT) return (member_access(0)); if (Token.token == T_ARROW) return (member_access(1)); // An identifier, check that it exists. For arrays, set rvalue to 1. if ((varptr = findsymbol(Text)) == NULL) fatals("Unknown variable", Text); switch(varptr->stype) { case S_VARIABLE: break; case S_ARRAY: rvalue= 1; break; default: fatals("Identifier not a scalar or array variable", Text); } switch (Token.token) { // Post-increment: skip over the token case T_INC: if (rvalue == 1) fatals("Cannot ++ on rvalue", Text); scan(&Token); n = mkastleaf(A_POSTINC, varptr->type, varptr, 0); break; // Post-decrement: skip over the token case T_DEC: if (rvalue == 1) fatals("Cannot -- on rvalue", Text); scan(&Token); n = mkastleaf(A_POSTDEC, varptr->type, varptr, 0); break; // Just a variable reference. Ensure any arrays // cannot be treated as lvalues. default: if (varptr->stype == S_ARRAY) { n = mkastleaf(A_ADDR, varptr->type, varptr, 0); n->rvalue = rvalue; } else n = mkastleaf(A_IDENT, varptr->type, varptr, 0); } return (n); } // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(void) { struct ASTnode *n; int id; int type = 0; int size, class; struct symtable *ctype; switch (Token.token) { case T_STATIC: case T_EXTERN: fatal("Compiler doesn't support static or extern local declarations"); case T_SIZEOF: // Skip the T_SIZEOF and ensure we have a left parenthesis scan(&Token); if (Token.token != T_LPAREN) fatal("Left parenthesis expected after sizeof"); scan(&Token); // Get the type inside the parentheses type = parse_stars(parse_type(&ctype, &class)); // Get the type's size size = typesize(type, ctype); rparen(); // Return a leaf node int literal with the size return (mkastleaf(A_INTLIT, P_INT, NULL, size)); case T_INTLIT: // For an INTLIT token, make a leaf AST node for it. // Make it a P_CHAR if it's within the P_CHAR range if (Token.intvalue >= 0 && Token.intvalue < 256) n = mkastleaf(A_INTLIT, P_CHAR, NULL, Token.intvalue); else n = mkastleaf(A_INTLIT, P_INT, NULL, Token.intvalue); break; case T_STRLIT: // For a STRLIT token, generate the assembly for it. // Then make a leaf AST node for it. id is the string's label. id = genglobstr(Text); n = mkastleaf(A_STRLIT, pointer_to(P_CHAR), NULL, id); break; case T_IDENT: return (postfix()); case T_LPAREN: // Beginning of a parenthesised expression, skip the '('. scan(&Token); // If the token after is a type identifier, this is a cast expression switch (Token.token) { case T_IDENT: // We have to see if the identifier matches a typedef. // If not, treat it as an expression. if (findtypedef(Text) == NULL) { n = binexpr(0); break; } case T_VOID: case T_CHAR: case T_INT: case T_LONG: case T_STRUCT: case T_UNION: case T_ENUM: // Get the type inside the parentheses type = parse_cast(); // Skip the closing ')' and then parse the following expression rparen(); default: n = binexpr(0); // Scan in the expression } // We now have at least an expression in n, and possibly a non-zero type in type // if there was a cast. Skip the closing ')' if there was no cast. if (type == 0) rparen(); else // Otherwise, make a unary AST node for the cast n = mkastunary(A_CAST, type, n, NULL, 0); return (n); default: fatals("Expecting a primary expression, got token", Token.tokstr); } // Scan in the next token and return the leaf node scan(&Token); return (n); } // Convert a binary operator token into a binary AST operation. // We rely on a 1:1 mapping from token to AST operation static int binastop(int tokentype) { if (tokentype > T_EOF && tokentype <= T_SLASH) return (tokentype); fatald("Syntax error, token", tokentype); return (0); // Keep -Wall happy } // Return true if a token is right-associative, // false otherwise. static int rightassoc(int tokentype) { if (tokentype >= T_ASSIGN && tokentype <= T_ASSLASH) return (1); return (0); } // Operator precedence for each token. Must // match up with the order of tokens in defs.h static int OpPrec[] = { 0, 10, 10, // T_EOF, T_ASSIGN, T_ASPLUS, 10, 10, 10, // T_ASMINUS, T_ASSTAR, T_ASSLASH, 15, // T_QUESTION, 20, 30, // T_LOGOR, T_LOGAND 40, 50, 60, // T_OR, T_XOR, T_AMPER 70, 70, // T_EQ, T_NE 80, 80, 80, 80, // T_LT, T_GT, T_LE, T_GE 90, 90, // T_LSHIFT, T_RSHIFT 100, 100, // T_PLUS, T_MINUS 110, 110 // T_STAR, T_SLASH }; // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec; if (tokentype > T_SLASH) fatald("Token with no precedence in op_precedence:", tokentype); prec = OpPrec[tokentype]; if (prec == 0) fatald("Syntax error, token", tokentype); return (prec); } // prefix_expression: primary // | '*' prefix_expression // | '&' prefix_expression // | '-' prefix_expression // | '++' prefix_expression // | '--' prefix_expression // ; // Parse a prefix expression and return // a sub-tree representing it. struct ASTnode *prefix(void) { struct ASTnode *tree; switch (Token.token) { case T_AMPER: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Ensure that it's an identifier if (tree->op != A_IDENT) fatal("& operator must be followed by an identifier"); // Prevent '&' being performed on an array if (tree->sym->stype == S_ARRAY) fatal("& operator cannot be performed on an array"); // Now change the operator to A_ADDR and the type to // a pointer to the original type tree->op = A_ADDR; tree->type = pointer_to(tree->type); break; case T_STAR: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's either another deref or an // identifier if (tree->op != A_IDENT && tree->op != A_DEREF) fatal("* operator must be followed by an identifier or *"); // Prepend an A_DEREF operation to the tree tree = mkastunary(A_DEREF, value_at(tree->type), tree, NULL, 0); break; case T_MINUS: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_NEGATE operation to the tree and // make the child an rvalue. Because chars are unsigned, // also widen this to int so that it's signed tree->rvalue = 1; tree = modify_type(tree, P_INT, 0); tree = mkastunary(A_NEGATE, tree->type, tree, NULL, 0); break; case T_INVERT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_INVERT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_INVERT, tree->type, tree, NULL, 0); break; case T_LOGNOT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_LOGNOT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_LOGNOT, tree->type, tree, NULL, 0); break; case T_INC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("++ operator must be followed by an identifier"); // Prepend an A_PREINC operation to the tree tree = mkastunary(A_PREINC, tree->type, tree, NULL, 0); break; case T_DEC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("-- operator must be followed by an identifier"); // Prepend an A_PREDEC operation to the tree tree = mkastunary(A_PREDEC, tree->type, tree, NULL, 0); break; default: tree = primary(); } return (tree); } // Return an AST tree whose root is a binary operator. // Parameter ptp is the previous token's precedence. struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; struct ASTnode *ltemp, *rtemp; int ASTop; int tokentype; // Get the tree on the left. // Fetch the next token at the same time. left = prefix(); // If we hit one of several terminating tokens, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA || tokentype == T_COLON || tokentype == T_RBRACE) { left->rvalue = 1; return (left); } // While the precedence of this token is more than that of the // previous token precedence, or it's right associative and // equal to the previous token's precedence while ((op_precedence(tokentype) > ptp) || (rightassoc(tokentype) && op_precedence(tokentype) == ptp)) { // Fetch in the next integer literal scan(&Token); // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Determine the operation to be performed on the sub-trees ASTop = binastop(tokentype); switch (ASTop) { case A_TERNARY: // Ensure we have a ':' token, scan in the expression after it match(T_COLON, ":"); ltemp= binexpr(0); // Build and return the AST for this statement. Use the middle // expression's type as the return type. XXX We should also // consider the third expression's type. return (mkastnode(A_TERNARY, right->type, left, right, ltemp, NULL, 0)); case A_ASSIGN: // Assignment // Make the right tree into an rvalue right->rvalue = 1; // Ensure the right's type matches the left right = modify_type(right, left->type, 0); if (right == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree. However, switch // left and right around, so that the right expression's // code will be generated before the left expression ltemp = left; left = right; right = ltemp; break; default: // We are not doing a ternary or assignment, so both trees should // be rvalues. Convert both trees into rvalue if they are lvalue trees left->rvalue = 1; right->rvalue = 1; // Ensure the two types are compatible by trying // to modify each tree to match the other's type. ltemp = modify_type(left, right->type, ASTop); rtemp = modify_type(right, left->type, ASTop); if (ltemp == NULL && rtemp == NULL) fatal("Incompatible types in binary expression"); if (ltemp != NULL) left = ltemp; if (rtemp != NULL) right = rtemp; } // Join that sub-tree with ours. Convert the token // into an AST operation at the same time. left = mkastnode(binastop(tokentype), left->type, left, NULL, right, NULL, 0); // Update the details of the current token. // If we hit a terminating token, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA || tokentype == T_COLON || tokentype == T_RBRACE) { left->rvalue = 1; return (left); } } // Return the tree we have when the precedence // is the same or lower left->rvalue = 1; return (left); } ================================================ FILE: 51_Arrays_pt2/gen.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Generic code generator // Copyright (c) 2019 Warren Toomey, GPL3 // Generate and return a new label number static int labelid = 1; int genlabel(void) { return (labelid++); } // Generate the code for an IF statement // and an optional ELSE clause. static int genIF(struct ASTnode *n, int looptoplabel, int loopendlabel) { int Lfalse, Lend; // Generate two labels: one for the // false compound statement, and one // for the end of the overall IF statement. // When there is no ELSE clause, Lfalse _is_ // the ending label! Lfalse = genlabel(); if (n->right) Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. genAST(n->left, Lfalse, NOLABEL, NOLABEL, n->op); genfreeregs(NOREG); // Generate the true compound statement genAST(n->mid, NOLABEL, looptoplabel, loopendlabel, n->op); genfreeregs(NOREG); // If there is an optional ELSE clause, // generate the jump to skip to the end if (n->right) cgjump(Lend); // Now the false label cglabel(Lfalse); // Optional ELSE clause: generate the // false compound statement and the // end label if (n->right) { genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); genfreeregs(NOREG); cglabel(Lend); } return (NOREG); } // Generate the code for a WHILE statement static int genWHILE(struct ASTnode *n) { int Lstart, Lend; // Generate the start and end labels // and output the start label Lstart = genlabel(); Lend = genlabel(); cglabel(Lstart); // Generate the condition code followed // by a jump to the end label. genAST(n->left, Lend, Lstart, Lend, n->op); genfreeregs(NOREG); // Generate the compound statement for the body genAST(n->right, NOLABEL, Lstart, Lend, n->op); genfreeregs(NOREG); // Finally output the jump back to the condition, // and the end label cgjump(Lstart); cglabel(Lend); return (NOREG); } // Generate the code for a SWITCH statement static int genSWITCH(struct ASTnode *n) { int *caseval, *caselabel; int Ljumptop, Lend; int i, reg, defaultlabel = 0, casecount = 0; struct ASTnode *c; // Create arrays for the case values and associated labels. // Ensure that we have at least one position in each array. caseval = (int *) malloc((n->a_intvalue + 1) * sizeof(int)); caselabel = (int *) malloc((n->a_intvalue + 1) * sizeof(int)); // Generate labels for the top of the jump table, and the // end of the switch statement. Set a default label for // the end of the switch, in case we don't have a default. Ljumptop = genlabel(); Lend = genlabel(); defaultlabel = Lend; // Output the code to calculate the switch condition reg = genAST(n->left, NOLABEL, NOLABEL, NOLABEL, 0); cgjump(Ljumptop); genfreeregs(reg); // Walk the right-child linked list to // generate the code for each case for (i = 0, c = n->right; c != NULL; i++, c = c->right) { // Get a label for this case. Store it // and the case value in the arrays. // Record if it is the default case. caselabel[i] = genlabel(); caseval[i] = c->a_intvalue; cglabel(caselabel[i]); if (c->op == A_DEFAULT) defaultlabel = caselabel[i]; else casecount++; // Generate the case code. Pass in the end label for the breaks. // If case has no body, we will fall into the following body. if (c->left) genAST(c->left, NOLABEL, NOLABEL, Lend, 0); genfreeregs(NOREG); } // Ensure the last case jumps past the switch table cgjump(Lend); // Now output the switch table and the end label. cgswitch(reg, casecount, Ljumptop, caselabel, caseval, defaultlabel); cglabel(Lend); return (NOREG); } // Generate the code to copy the arguments of a // function call to its parameters, then call the // function itself. Return the register that holds // the function's return value. static int gen_funccall(struct ASTnode *n) { struct ASTnode *gluetree = n->left; int reg; int numargs = 0; // If there is a list of arguments, walk this list // from the last argument (right-hand child) to the // first while (gluetree) { // Calculate the expression's value reg = genAST(gluetree->right, NOLABEL, NOLABEL, NOLABEL, gluetree->op); // Copy this into the n'th function parameter: size is 1, 2, 3, ... cgcopyarg(reg, gluetree->a_size); // Keep the first (highest) number of arguments if (numargs == 0) numargs = gluetree->a_size; genfreeregs(NOREG); gluetree = gluetree->left; } // Call the function, clean up the stack (based on numargs), // and return its result return (cgcall(n->sym, numargs)); } // Generate code for a ternary expression static int gen_ternary(struct ASTnode *n) { int Lfalse, Lend; int reg, expreg; // Generate two labels: one for the // false expression, and one for the // end of the overall expression Lfalse = genlabel(); Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. genAST(n->left, Lfalse, NOLABEL, NOLABEL, n->op); genfreeregs(NOREG); // Get a register to hold the result of the two expressions reg = alloc_register(); // Generate the true expression and the false label. // Move the expression result into the known register. expreg = genAST(n->mid, NOLABEL, NOLABEL, NOLABEL, n->op); cgmove(expreg, reg); // Don't free the register holding the result, though! genfreeregs(reg); cgjump(Lend); cglabel(Lfalse); // Generate the false expression and the end label. // Move the expression result into the known register. expreg = genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); cgmove(expreg, reg); // Don't free the register holding the result, though! genfreeregs(reg); cglabel(Lend); return (reg); } // Given an AST, an optional label, and the AST op // of the parent, generate assembly code recursively. // Return the register id with the tree's final value. int genAST(struct ASTnode *n, int iflabel, int looptoplabel, int loopendlabel, int parentASTop) { int leftreg, rightreg; // We have some specific AST node handling at the top // so that we don't evaluate the child sub-trees immediately switch (n->op) { case A_IF: return (genIF(n, looptoplabel, loopendlabel)); case A_WHILE: return (genWHILE(n)); case A_SWITCH: return (genSWITCH(n)); case A_FUNCCALL: return (gen_funccall(n)); case A_TERNARY: return (gen_ternary(n)); case A_GLUE: // Do each child statement, and free the // registers after each child if (n->left != NULL) genAST(n->left, iflabel, looptoplabel, loopendlabel, n->op); genfreeregs(NOREG); if (n->right != NULL) genAST(n->right, iflabel, looptoplabel, loopendlabel, n->op); genfreeregs(NOREG); return (NOREG); case A_FUNCTION: // Generate the function's preamble before the code // in the child sub-tree cgfuncpreamble(n->sym); genAST(n->left, NOLABEL, NOLABEL, NOLABEL, n->op); cgfuncpostamble(n->sym); return (NOREG); } // General AST node handling below // Get the left and right sub-tree values if (n->left) leftreg = genAST(n->left, NOLABEL, NOLABEL, NOLABEL, n->op); if (n->right) rightreg = genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); switch (n->op) { case A_ADD: return (cgadd(leftreg, rightreg)); case A_SUBTRACT: return (cgsub(leftreg, rightreg)); case A_MULTIPLY: return (cgmul(leftreg, rightreg)); case A_DIVIDE: return (cgdiv(leftreg, rightreg)); case A_AND: return (cgand(leftreg, rightreg)); case A_OR: return (cgor(leftreg, rightreg)); case A_XOR: return (cgxor(leftreg, rightreg)); case A_LSHIFT: return (cgshl(leftreg, rightreg)); case A_RSHIFT: return (cgshr(leftreg, rightreg)); case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: // If the parent AST node is an A_IF, A_WHILE or A_TERNARY, // generate a compare followed by a jump. Otherwise, compare // registers and set one to 1 or 0 based on the comparison. if (parentASTop == A_IF || parentASTop == A_WHILE || parentASTop == A_TERNARY) return (cgcompare_and_jump(n->op, leftreg, rightreg, iflabel)); else return (cgcompare_and_set(n->op, leftreg, rightreg)); case A_INTLIT: return (cgloadint(n->a_intvalue, n->type)); case A_STRLIT: return (cgloadglobstr(n->a_intvalue)); case A_IDENT: // Load our value if we are an rvalue // or we are being dereferenced if (n->rvalue || parentASTop == A_DEREF) { if (n->sym->class == C_GLOBAL || n->sym->class == C_STATIC) { return (cgloadglob(n->sym, n->op)); } else { return (cgloadlocal(n->sym, n->op)); } } else return (NOREG); case A_ASPLUS: case A_ASMINUS: case A_ASSTAR: case A_ASSLASH: case A_ASSIGN: // For the '+=' and friends operators, generate suitable code // and get the register with the result. Then take the left child, // make it the right child so that we can fall into the assignment code. switch (n->op) { case A_ASPLUS: leftreg = cgadd(leftreg, rightreg); n->right = n->left; break; case A_ASMINUS: leftreg = cgsub(leftreg, rightreg); n->right = n->left; break; case A_ASSTAR: leftreg = cgmul(leftreg, rightreg); n->right = n->left; break; case A_ASSLASH: leftreg = cgdiv(leftreg, rightreg); n->right = n->left; break; } // Now into the assignment code // Are we assigning to an identifier or through a pointer? switch (n->right->op) { case A_IDENT: if (n->right->sym->class == C_GLOBAL || n->right->sym->class == C_STATIC) return (cgstorglob(leftreg, n->right->sym)); else return (cgstorlocal(leftreg, n->right->sym)); case A_DEREF: return (cgstorderef(leftreg, rightreg, n->right->type)); default: fatald("Can't A_ASSIGN in genAST(), op", n->op); } case A_WIDEN: // Widen the child's type to the parent's type return (cgwiden(leftreg, n->left->type, n->type)); case A_RETURN: cgreturn(leftreg, Functionid); return (NOREG); case A_ADDR: return (cgaddress(n->sym)); case A_DEREF: // If we are an rvalue, dereference to get the value we point at, // otherwise leave it for A_ASSIGN to store through the pointer if (n->rvalue) return (cgderef(leftreg, n->left->type)); else return (leftreg); case A_SCALE: // Small optimisation: use shift if the // scale value is a known power of two switch (n->a_size) { case 2: return (cgshlconst(leftreg, 1)); case 4: return (cgshlconst(leftreg, 2)); case 8: return (cgshlconst(leftreg, 3)); default: // Load a register with the size and // multiply the leftreg by this size rightreg = cgloadint(n->a_size, P_INT); return (cgmul(leftreg, rightreg)); } case A_POSTINC: case A_POSTDEC: // Load and decrement the variable's value into a register // and post increment/decrement it if (n->sym->class == C_GLOBAL || n->sym->class == C_STATIC) return (cgloadglob(n->sym, n->op)); else return (cgloadlocal(n->sym, n->op)); case A_PREINC: case A_PREDEC: // Load and decrement the variable's value into a register // and pre increment/decrement it if (n->left->sym->class == C_GLOBAL || n->left->sym->class == C_STATIC) return (cgloadglob(n->left->sym, n->op)); else return (cgloadlocal(n->left->sym, n->op)); case A_NEGATE: return (cgnegate(leftreg)); case A_INVERT: return (cginvert(leftreg)); case A_LOGNOT: return (cglognot(leftreg)); case A_LOGOR: return (cglogor(leftreg, rightreg)); case A_LOGAND: return (cglogand(leftreg, rightreg)); case A_TOBOOL: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, set the register // to 0 or 1 based on it's zeroeness or non-zeroeness return (cgboolean(leftreg, parentASTop, iflabel)); case A_BREAK: cgjump(loopendlabel); return (NOREG); case A_CONTINUE: cgjump(looptoplabel); return (NOREG); case A_CAST: return (leftreg); // Not much to do default: fatald("Unknown AST operator", n->op); } return (NOREG); // Keep -Wall happy } void genpreamble() { cgpreamble(); } void genpostamble() { cgpostamble(); } void genfreeregs(int keepreg) { freeall_registers(keepreg); } void genglobsym(struct symtable *node) { cgglobsym(node); } int genglobstr(char *strvalue) { int l = genlabel(); cgglobstr(l, strvalue); return (l); } int genprimsize(int type) { return (cgprimsize(type)); } int genalign(int type, int offset, int direction) { return (cgalign(type, offset, direction)); } ================================================ FILE: 51_Arrays_pt2/include/ctype.h ================================================ #ifndef _CTYPE_H_ # define _CTYPE_H_ int isalnum(int c); int isalpha(int c); int iscntrl(int c); int isdigit(int c); int isgraph(int c); int islower(int c); int isprint(int c); int ispunct(int c); int isspace(int c); int isupper(int c); int isxdigit(int c); int isascii(int c); int isblank(int c); #endif // _CTYPE_H_ ================================================ FILE: 51_Arrays_pt2/include/errno.h ================================================ #ifndef _ERRNO_H_ # define _ERRNO_H_ #endif // _ERRNO_H_ ================================================ FILE: 51_Arrays_pt2/include/fcntl.h ================================================ #ifndef _FCNTL_H_ # define _FCNTL_H_ #define O_RDONLY 00 #define O_WRONLY 01 #define O_RDWR 02 int open(char *pathname, int flags); #endif // _FCNTL_H_ ================================================ FILE: 51_Arrays_pt2/include/stddef.h ================================================ #ifndef _STDDEF_H_ # define _STDDEF_H_ #ifndef NULL # define NULL (void *)0 #endif typedef long size_t; #endif //_STDDEF_H_ ================================================ FILE: 51_Arrays_pt2/include/stdio.h ================================================ #ifndef _STDIO_H_ # define _STDIO_H_ #include #ifndef NULL # define NULL (void *)0 #endif // This FILE definition will do for now typedef char * FILE; FILE *fopen(char *pathname, char *mode); size_t fread(void *ptr, size_t size, size_t nmemb, FILE *stream); size_t fwrite(void *ptr, size_t size, size_t nmemb, FILE *stream); int fclose(FILE *stream); int printf(char *format); int fprintf(FILE *stream, char *format); int fputc(int c, FILE *stream); int fputs(char *s, FILE *stream); int putc(int c, FILE *stream); int putchar(int c); int puts(char *s); extern FILE *stdin; extern FILE *stdout; extern FILE *stderr; #endif // _STDIO_H_ ================================================ FILE: 51_Arrays_pt2/include/stdlib.h ================================================ #ifndef _STDLIB_H_ # define _STDLIB_H_ void exit(int status); void _Exit(int status); void *malloc(int size); void free(void *ptr); void *calloc(int nmemb, int size); void *realloc(void *ptr, int size); #endif // _STDLIB_H_ ================================================ FILE: 51_Arrays_pt2/include/string.h ================================================ #ifndef _STRING_H_ # define _STRING_H_ char *strdup(char *s); char *strchr(char *s, int c); char *strrchr(char *s, int c); #endif // _STRING_H_ ================================================ FILE: 51_Arrays_pt2/include/unistd.h ================================================ #ifndef _UNISTD_H_ # define _UNISTD_H_ void _exit(int status); int unlink(char *pathname); #endif // _UNISTD_H_ ================================================ FILE: 51_Arrays_pt2/main.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "decl.h" #include #include // Compiler setup and top-level execution // Copyright (c) 2019 Warren Toomey, GPL3 // Given a string with a '.' and at least a 1-character suffix // after the '.', change the suffix to be the given character. // Return the new string or NULL if the original string could // not be modified char *alter_suffix(char *str, char suffix) { char *posn; char *newstr; // Clone the string if ((newstr = strdup(str)) == NULL) return (NULL); // Find the '.' if ((posn = strrchr(newstr, '.')) == NULL) return (NULL); // Ensure there is a suffix posn++; if (*posn == '\0') return (NULL); // Change the suffix and NUL-terminate the string *posn = suffix; posn++; *posn = '\0'; return (newstr); } // Given an input filename, compile that file // down to assembly code. Return the new file's name static char *do_compile(char *filename) { char cmd[TEXTLEN]; // Change the input file's suffix to .s Outfilename = alter_suffix(filename, 's'); if (Outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .c on the end\n", filename); exit(1); } // Generate the pre-processor command snprintf(cmd, TEXTLEN, "%s %s %s", CPPCMD, INCDIR, filename); // Open up the pre-processor pipe if ((Infile = popen(cmd, "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", filename, strerror(errno)); exit(1); } Infilename = filename; // Create the output file if ((Outfile = fopen(Outfilename, "w")) == NULL) { fprintf(stderr, "Unable to create %s: %s\n", Outfilename, strerror(errno)); exit(1); } Line = 1; // Reset the scanner Putback = '\n'; clear_symtable(); // Clear the symbol table if (O_verbose) printf("compiling %s\n", filename); scan(&Token); // Get the first token from the input Peektoken.token= 0; // and set there is no lookahead token genpreamble(); // Output the preamble global_declarations(); // Parse the global declarations genpostamble(); // Output the postamble fclose(Outfile); // Close the output file // Dump the symbol table if requested if (O_dumpsym) { printf("Symbols for %s\n", filename); dumpsymtables(); fprintf(stdout, "\n\n"); } freestaticsyms(); // Free any static symbols in the file return (Outfilename); } // Given an input filename, assemble that file // down to object code. Return the object filename char *do_assemble(char *filename) { char cmd[TEXTLEN]; int err; char *outfilename = alter_suffix(filename, 'o'); if (outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .s on the end\n", filename); exit(1); } // Build the assembly command and run it snprintf(cmd, TEXTLEN, "%s %s %s", ASCMD, outfilename, filename); if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Assembly of %s failed\n", filename); exit(1); } return (outfilename); } // Given a list of object files and an output filename, // link all of the object filenames together. void do_link(char *outfilename, char *objlist[]) { int cnt, size = TEXTLEN; char cmd[TEXTLEN], *cptr; int err; // Start with the linker command and the output file cptr = cmd; cnt = snprintf(cptr, size, "%s %s ", LDCMD, outfilename); cptr += cnt; size -= cnt; // Now append each object file while (*objlist != NULL) { cnt = snprintf(cptr, size, "%s ", *objlist); cptr += cnt; size -= cnt; objlist++; } if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Linking failed\n"); exit(1); } } // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "Usage: %s [-vcSTM] [-o outfile] file [file ...]\n", prog); fprintf(stderr, " -v give verbose output of the compilation stages\n"); fprintf(stderr, " -c generate object files but don't link them\n"); fprintf(stderr, " -S generate assembly files but don't link them\n"); fprintf(stderr, " -T dump the AST trees for each input file\n"); fprintf(stderr, " -M dump the symbol table for each input file\n"); fprintf(stderr, " -o outfile, produce the outfile executable file\n"); exit(1); } // Main program: check arguments and print a usage // if we don't have an argument. Open up the input // file and call scanfile() to scan the tokens in it. enum { MAXOBJ = 100 }; int main(int argc, char *argv[]) { char *outfilename = AOUT; char *asmfile, *objfile; char *objlist[MAXOBJ]; int i, objcnt = 0; // Initialise our variables O_dumpAST = 0; O_dumpsym = 0; O_keepasm = 0; O_assemble = 0; O_verbose = 0; O_dolink = 1; // Scan for command-line options for (i = 1; i < argc; i++) { // No leading '-', stop scanning for options if (*argv[i] != '-') break; // For each option in this argument for (int j = 1; (*argv[i] == '-') && argv[i][j]; j++) { switch (argv[i][j]) { case 'o': outfilename = argv[++i]; // Save & skip to next argument break; case 'T': O_dumpAST = 1; break; case 'M': O_dumpsym = 1; break; case 'c': O_assemble = 1; O_keepasm = 0; O_dolink = 0; break; case 'S': O_keepasm = 1; O_assemble = 0; O_dolink = 0; break; case 'v': O_verbose = 1; break; default: usage(argv[0]); } } } // Ensure we have at lease one input file argument if (i >= argc) usage(argv[0]); // Work on each input file in turn while (i < argc) { asmfile = do_compile(argv[i]); // Compile the source file if (O_dolink || O_assemble) { objfile = do_assemble(asmfile); // Assemble it to object forma if (objcnt == (MAXOBJ - 2)) { fprintf(stderr, "Too many object files for the compiler to handle\n"); exit(1); } objlist[objcnt++] = objfile; // Add the object file's name objlist[objcnt] = NULL; // to the list of object files } if (!O_keepasm) // Remove the assembly file if unlink(asmfile); // we don't need to keep it i++; } // Now link all the object files together if (O_dolink) { do_link(outfilename, objlist); // If we don't need to keep the object // files, then remove them if (!O_assemble) { for (i = 0; objlist[i] != NULL; i++) unlink(objlist[i]); } } return (0); } ================================================ FILE: 51_Arrays_pt2/misc.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" #include #include // Miscellaneous functions // Copyright (c) 2019 Warren Toomey, GPL3 // Ensure that the current token is t, // and fetch the next token. Otherwise // throw an error void match(int t, char *what) { if (Token.token == t) { scan(&Token); } else { fatals("Expected", what); } } // Match a semicolon and fetch the next token void semi(void) { match(T_SEMI, ";"); } // Match a left brace and fetch the next token void lbrace(void) { match(T_LBRACE, "{"); } // Match a right brace and fetch the next token void rbrace(void) { match(T_RBRACE, "}"); } // Match a left parenthesis and fetch the next token void lparen(void) { match(T_LPAREN, "("); } // Match a right parenthesis and fetch the next token void rparen(void) { match(T_RPAREN, ")"); } // Match an identifier and fetch the next token void ident(void) { match(T_IDENT, "identifier"); } // Match a comma and fetch the next token void comma(void) { match(T_COMMA, "comma"); } // Print out fatal messages void fatal(char *s) { fprintf(stderr, "%s on line %d of %s\n", s, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatals(char *s1, char *s2) { fprintf(stderr, "%s:%s on line %d of %s\n", s1, s2, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatald(char *s, int d) { fprintf(stderr, "%s:%d on line %d of %s\n", s, d, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatalc(char *s, int c) { fprintf(stderr, "%s:%c on line %d of %s\n", s, c, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } ================================================ FILE: 51_Arrays_pt2/opt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST Tree Optimisation Code // Copyright (c) 2019 Warren Toomey, GPL3 // Fold an AST tree with a binary operator // and two A_INTLIT children. Return either // the original tree or a new leaf node. static struct ASTnode *fold2(struct ASTnode *n) { int val, leftval, rightval; // Get the values from each child leftval = n->left->a_intvalue; rightval = n->right->a_intvalue; // Perform some of the binary operations. // For any AST op we can't do, return // the original tree. switch (n->op) { case A_ADD: val = leftval + rightval; break; case A_SUBTRACT: val = leftval - rightval; break; case A_MULTIPLY: val = leftval * rightval; break; case A_DIVIDE: // Don't try to divide by zero. if (rightval == 0) return (n); val = leftval / rightval; break; default: return (n); } // Return a leaf node with the new value return (mkastleaf(A_INTLIT, n->type, NULL, val)); } // Fold an AST tree with a unary operator // and one INTLIT children. Return either // the original tree or a new leaf node. static struct ASTnode *fold1(struct ASTnode *n) { int val; // Get the child value. Do the // operation if recognised. // Return the new leaf node. val = n->left->a_intvalue; switch (n->op) { case A_WIDEN: break; case A_INVERT: val = ~val; break; case A_LOGNOT: val = !val; break; default: return (n); } // Return a leaf node with the new value return (mkastleaf(A_INTLIT, n->type, NULL, val)); } // Attempt to do constant folding on // the AST tree with the root node n static struct ASTnode *fold(struct ASTnode *n) { if (n == NULL) return (NULL); // Fold on the left child, then // do the same on the right child n->left = fold(n->left); n->right = fold(n->right); // If both children are A_INTLITs, do a fold2() if (n->left && n->left->op == A_INTLIT) { if (n->right && n->right->op == A_INTLIT) n = fold2(n); else // If only the left is A_INTLIT, do a fold1() n = fold1(n); } // Return the possibly modified tree return (n); } // Optimise an AST tree by // constant folding in all sub-trees struct ASTnode *optimise(struct ASTnode *n) { n = fold(n); return (n); } ================================================ FILE: 51_Arrays_pt2/scan.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { char *p; p = strchr(s, c); return (p ? p - s : -1); } // Get the next character from the input file. static int next(void) { int c, l; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return (c); } c = fgetc(Infile); // Read from input file while (c == '#') { // We've hit a pre-processor statement scan(&Token); // Get the line number into l if (Token.token != T_INTLIT) fatals("Expecting pre-processor line number, got:", Text); l = Token.intvalue; scan(&Token); // Get the filename in Text if (Token.token != T_STRLIT) fatals("Expecting pre-processor file name, got:", Text); if (Text[0] != '<') { // If this is a real filename if (strcmp(Text, Infilename)) // and not the one we have now Infilename = strdup(Text); // save it. Then update the line num Line = l; } while ((c = fgetc(Infile)) != '\n'); // Skip to the end of the line c = fgetc(Infile); // and get the next character } if ('\n' == c) Line++; // Increment line count return (c); } // Put back an unwanted character static void putback(int c) { Putback = c; } // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } // Read in a hexadecimal constant from the input static int hexchar(void) { int c, h, n = 0, f = 0; // Loop getting characters while (isxdigit(c = next())) { // Convert from char to int value h = chrpos("0123456789abcdef", tolower(c)); // Add to running hex value n = n * 16 + h; f = 1; } // We hit a non-hex character, put it back putback(c); // Flag tells us we never saw any hex characters if (!f) fatal("missing digits after '\\x'"); if (n > 255) fatal("value out of range after '\\x'"); return n; } // Return the next character from a character // or string literal static int scanch(void) { int i, c, c2; // Get the next input character and interpret // metacharacters that start with a backslash c = next(); if (c == '\\') { switch (c = next()) { case 'a': return ('\a'); case 'b': return ('\b'); case 'f': return ('\f'); case 'n': return ('\n'); case 'r': return ('\r'); case 't': return ('\t'); case 'v': return ('\v'); case '\\': return ('\\'); case '"': return ('"'); case '\'': return ('\''); // Deal with octal constants by reading in // characters until we hit a non-octal digit. // Build up the octal value in c2 and count // # digits in i. Permit only 3 octal digits. case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': for (i = c2 = 0; isdigit(c) && c < '8'; c = next()) { if (++i > 3) break; c2 = c2 * 8 + (c - '0'); } putback(c); // Put back the first non-octal char return (c2); case 'x': return hexchar(); default: fatalc("unknown escape sequence", c); } } return (c); // Just an ordinary old character! } // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0, radix = 10; // Assume the radix is 10, but if it starts with 0 if (c == '0') { // and the next character is 'x', it's radix 16 if ((c = next()) == 'x') { radix = 16; c = next(); } else // Otherwise, it's radix 8 radix = 8; } // Convert each character into an int value while ((k = chrpos("0123456789abcdef", tolower(c))) >= 0) { if (k >= radix) fatalc("invalid digit in integer literal", c); val = val * radix + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return (val); } // Scan in a string literal from the input file, // and store it in buf[]. Return the length of // the string. static int scanstr(char *buf) { int i, c; // Loop while we have enough buffer space for (i = 0; i < TEXTLEN - 1; i++) { // Get the next char and append to buf // Return when we hit the ending double quote if ((c = scanch()) == '"') { buf[i] = 0; return (i); } buf[i] = c; } // Ran out of buf[] space fatal("String literal too long"); return (0); } // Scan an identifier from the input file and // store it in buf[]. Return the identifier's length static int scanident(int c, char *buf, int lim) { int i = 0; // Allow digits, alpha and underscores while (isalpha(c) || isdigit(c) || '_' == c) { // Error if we hit the identifier length limit, // else append to buf[] and get next character if (lim - 1 == i) { fatal("Identifier too long"); } else if (i < lim - 1) { buf[i++] = c; } c = next(); } // We hit a non-valid character, put it back. // NUL-terminate the buf[] and return the length putback(c); buf[i] = '\0'; return (i); } // Given a word from the input, return the matching // keyword token number or 0 if it's not a keyword. // Switch on the first letter so that we don't have // to waste time strcmp()ing against all the keywords. static int keyword(char *s) { switch (*s) { case 'b': if (!strcmp(s, "break")) return (T_BREAK); break; case 'c': if (!strcmp(s, "case")) return (T_CASE); if (!strcmp(s, "char")) return (T_CHAR); if (!strcmp(s, "continue")) return (T_CONTINUE); break; case 'd': if (!strcmp(s, "default")) return (T_DEFAULT); break; case 'e': if (!strcmp(s, "else")) return (T_ELSE); if (!strcmp(s, "enum")) return (T_ENUM); if (!strcmp(s, "extern")) return (T_EXTERN); break; case 'f': if (!strcmp(s, "for")) return (T_FOR); break; case 'i': if (!strcmp(s, "if")) return (T_IF); if (!strcmp(s, "int")) return (T_INT); break; case 'l': if (!strcmp(s, "long")) return (T_LONG); break; case 'r': if (!strcmp(s, "return")) return (T_RETURN); break; case 's': if (!strcmp(s, "sizeof")) return (T_SIZEOF); if (!strcmp(s, "static")) return (T_STATIC); if (!strcmp(s, "struct")) return (T_STRUCT); if (!strcmp(s, "switch")) return (T_SWITCH); break; case 't': if (!strcmp(s, "typedef")) return (T_TYPEDEF); break; case 'u': if (!strcmp(s, "union")) return (T_UNION); break; case 'v': if (!strcmp(s, "void")) return (T_VOID); break; case 'w': if (!strcmp(s, "while")) return (T_WHILE); break; } return (0); } // List of token strings, for debugging purposes char *Tstring[] = { "EOF", "=", "+=", "-=", "*=", "/=", "?", "||", "&&", "|", "^", "&", "==", "!=", ",", ">", "<=", ">=", "<<", ">>", "+", "-", "*", "/", "++", "--", "~", "!", "void", "char", "int", "long", "if", "else", "while", "for", "return", "struct", "union", "enum", "typedef", "extern", "break", "continue", "switch", "case", "default", "sizeof", "static", "intlit", "strlit", ";", "identifier", "{", "}", "(", ")", "[", "]", ",", ".", "->", ":" }; // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c, tokentype; // If we have a lookahead token, return this token if (Peektoken.token != 0) { t->token = Peektoken.token; t->tokstr = Peektoken.tokstr; t->intvalue = Peektoken.intvalue; Peektoken.token = 0; return (1); } // Skip whitespace c = skip(); // Determine the token based on // the input character switch (c) { case EOF: t->token = T_EOF; return (0); case '+': if ((c = next()) == '+') { t->token = T_INC; } else if (c == '=') { t->token = T_ASPLUS; } else { putback(c); t->token = T_PLUS; } break; case '-': if ((c = next()) == '-') { t->token = T_DEC; } else if (c == '>') { t->token = T_ARROW; } else if (c == '=') { t->token = T_ASMINUS; } else if (isdigit(c)) { // Negative int literal t->intvalue = -scanint(c); t->token = T_INTLIT; } else { putback(c); t->token = T_MINUS; } break; case '*': if ((c = next()) == '=') { t->token = T_ASSTAR; } else { putback(c); t->token = T_STAR; } break; case '/': if ((c = next()) == '=') { t->token = T_ASSLASH; } else { putback(c); t->token = T_SLASH; } break; case ';': t->token = T_SEMI; break; case '{': t->token = T_LBRACE; break; case '}': t->token = T_RBRACE; break; case '(': t->token = T_LPAREN; break; case ')': t->token = T_RPAREN; break; case '[': t->token = T_LBRACKET; break; case ']': t->token = T_RBRACKET; break; case '~': t->token = T_INVERT; break; case '^': t->token = T_XOR; break; case ',': t->token = T_COMMA; break; case '.': t->token = T_DOT; break; case ':': t->token = T_COLON; break; case '?': t->token = T_QUESTION; break; case '=': if ((c = next()) == '=') { t->token = T_EQ; } else { putback(c); t->token = T_ASSIGN; } break; case '!': if ((c = next()) == '=') { t->token = T_NE; } else { putback(c); t->token = T_LOGNOT; } break; case '<': if ((c = next()) == '=') { t->token = T_LE; } else if (c == '<') { t->token = T_LSHIFT; } else { putback(c); t->token = T_LT; } break; case '>': if ((c = next()) == '=') { t->token = T_GE; } else if (c == '>') { t->token = T_RSHIFT; } else { putback(c); t->token = T_GT; } break; case '&': if ((c = next()) == '&') { t->token = T_LOGAND; } else { putback(c); t->token = T_AMPER; } break; case '|': if ((c = next()) == '|') { t->token = T_LOGOR; } else { putback(c); t->token = T_OR; } break; case '\'': // If it's a quote, scan in the // literal character value and // the trailing quote t->intvalue = scanch(); t->token = T_INTLIT; if (next() != '\'') fatal("Expected '\\'' at end of char literal"); break; case '"': // Scan in a literal string scanstr(Text); t->token = T_STRLIT; break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } else if (isalpha(c) || '_' == c) { // Read in a keyword or identifier scanident(c, Text, TEXTLEN); // If it's a recognised keyword, return that token if ((tokentype = keyword(Text)) != 0) { t->token = tokentype; break; } // Not a recognised keyword, so it must be an identifier t->token = T_IDENT; break; } // The character isn't part of any recognised token, error fatalc("Unrecognised character", c); } // We found a token t->tokstr = Tstring[t->token]; return (1); } ================================================ FILE: 51_Arrays_pt2/stmt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of statements // Copyright (c) 2019 Warren Toomey, GPL3 // Prototypes static struct ASTnode *single_statement(void); // compound_statement: // empty, i.e. no statement // | statement // | statement statements // ; // // statement: declaration // | expression_statement // | function_call // | if_statement // | while_statement // | for_statement // | return_statement // ; // if_statement: if_head // | if_head 'else' statement // ; // // if_head: 'if' '(' true_false_expression ')' statement ; // // Parse an IF statement including any // optional ELSE clause and return its AST static struct ASTnode *if_statement(void) { struct ASTnode *condAST, *trueAST, *falseAST = NULL; // Ensure we have 'if' '(' match(T_IF, "if"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); rparen(); // Get the AST for the statement trueAST = single_statement(); // If we have an 'else', skip it // and get the AST for the statement if (Token.token == T_ELSE) { scan(&Token); falseAST = single_statement(); } // Build and return the AST for this statement return (mkastnode(A_IF, P_NONE, condAST, trueAST, falseAST, NULL, 0)); } // while_statement: 'while' '(' true_false_expression ')' statement ; // // Parse a WHILE statement and return its AST static struct ASTnode *while_statement(void) { struct ASTnode *condAST, *bodyAST; // Ensure we have 'while' '(' match(T_WHILE, "while"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); rparen(); // Get the AST for the statement. // Update the loop depth in the process Looplevel++; bodyAST = single_statement(); Looplevel--; // Build and return the AST for this statement return (mkastnode(A_WHILE, P_NONE, condAST, NULL, bodyAST, NULL, 0)); } // for_statement: 'for' '(' expression_list ';' // true_false_expression ';' // expression_list ')' statement ; // // Parse a FOR statement and return its AST static struct ASTnode *for_statement(void) { struct ASTnode *condAST, *bodyAST; struct ASTnode *preopAST, *postopAST; struct ASTnode *tree; // Ensure we have 'for' '(' match(T_FOR, "for"); lparen(); // Get the pre_op expression and the ';' preopAST = expression_list(T_SEMI); semi(); // Get the condition and the ';'. // Force a non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST, NULL, 0); semi(); // Get the post_op expression and the ')' postopAST = expression_list(T_RPAREN); rparen(); // Get the statement which is the body // Update the loop depth in the process Looplevel++; bodyAST = single_statement(); Looplevel--; // Glue the statement and the postop tree tree = mkastnode(A_GLUE, P_NONE, bodyAST, NULL, postopAST, NULL, 0); // Make a WHILE loop with the condition and this new body tree = mkastnode(A_WHILE, P_NONE, condAST, NULL, tree, NULL, 0); // And glue the preop tree to the A_WHILE tree return (mkastnode(A_GLUE, P_NONE, preopAST, NULL, tree, NULL, 0)); } // return_statement: 'return' '(' expression ')' ; // // Parse a return statement and return its AST static struct ASTnode *return_statement(void) { struct ASTnode *tree; // Can't return a value if function returns P_VOID if (Functionid->type == P_VOID) fatal("Can't return from a void function"); // Ensure we have 'return' '(' match(T_RETURN, "return"); lparen(); // Parse the following expression tree = binexpr(0); // Ensure this is compatible with the function's type tree = modify_type(tree, Functionid->type, 0); if (tree == NULL) fatal("Incompatible type to return"); // Add on the A_RETURN node tree = mkastunary(A_RETURN, P_NONE, tree, NULL, 0); // Get the ')' and ';' rparen(); semi(); return (tree); } // break_statement: 'break' ; // // Parse a break statement and return its AST static struct ASTnode *break_statement(void) { if (Looplevel == 0 && Switchlevel == 0) fatal("no loop or switch to break out from"); scan(&Token); semi(); return (mkastleaf(A_BREAK, 0, NULL, 0)); } // continue_statement: 'continue' ; // // Parse a continue statement and return its AST static struct ASTnode *continue_statement(void) { if (Looplevel == 0) fatal("no loop to continue to"); scan(&Token); semi(); return (mkastleaf(A_CONTINUE, 0, NULL, 0)); } // Parse a switch statement and return its AST static struct ASTnode *switch_statement(void) { struct ASTnode *left, *body, *n, *c; struct ASTnode *casetree= NULL, *casetail; int inloop=1, casecount=0; int seendefault=0; int ASTop, casevalue; // Skip the 'switch' and '(' scan(&Token); lparen(); // Get the switch expression, the ')' and the '{' left= binexpr(0); rparen(); lbrace(); // Ensure that this is of int type if (!inttype(left->type)) fatal("Switch expression is not of integer type"); // Build an A_SWITCH subtree with the expression as // the child n= mkastunary(A_SWITCH, 0, left, NULL, 0); // Now parse the cases Switchlevel++; while (inloop) { switch(Token.token) { // Leave the loop when we hit a '}' case T_RBRACE: if (casecount==0) fatal("No cases in switch"); inloop=0; break; case T_CASE: case T_DEFAULT: // Ensure this isn't after a previous 'default' if (seendefault) fatal("case or default after existing default"); // Set the AST operation. Scan the case value if required if (Token.token==T_DEFAULT) { ASTop= A_DEFAULT; seendefault= 1; scan(&Token); } else { ASTop= A_CASE; scan(&Token); left= binexpr(0); // Ensure the case value is an integer literal if (left->op != A_INTLIT) fatal("Expecting integer literal for case value"); casevalue= left->a_intvalue; // Walk the list of existing case values to ensure // that there isn't a duplicate case value for (c= casetree; c != NULL; c= c -> right) if (casevalue == c->a_intvalue) fatal("Duplicate case value"); } // Scan the ':' and increment the casecount match(T_COLON, ":"); casecount++; // If the next token is a T_CASE, the existing case will fall // into the next case. Otherwise, parse the case body. if (Token.token == T_CASE) body= NULL; else body= compound_statement(1); // Build a sub-tree with any compound statement as the left child // and link it in to the growing A_CASE tree if (casetree==NULL) { casetree= casetail= mkastunary(ASTop, 0, body, NULL, casevalue); } else { casetail->right= mkastunary(ASTop, 0, body, NULL, casevalue); casetail= casetail->right; } break; default: fatals("Unexpected token in switch", Token.tokstr); } } Switchlevel--; // We have a sub-tree with the cases and any default. Put the // case count into the A_SWITCH node and attach the case tree. n->a_intvalue= casecount; n->right= casetree; rbrace(); return(n); } // Parse a single statement and return its AST. static struct ASTnode *single_statement(void) { struct ASTnode *stmt; struct symtable *ctype; switch (Token.token) { case T_LBRACE: // We have a '{', so this is a compound statement lbrace(); stmt = compound_statement(0); rbrace(); return(stmt); case T_IDENT: // We have to see if the identifier matches a typedef. // If not, treat it as an expression. // Otherwise, fall down to the parse_type() call. if (findtypedef(Text) == NULL) { stmt= binexpr(0); semi(); return(stmt); } case T_CHAR: case T_INT: case T_LONG: case T_STRUCT: case T_UNION: case T_ENUM: case T_TYPEDEF: // The beginning of a variable declaration list. declaration_list(&ctype, C_LOCAL, T_SEMI, T_EOF, &stmt); semi(); return (stmt); // Any assignments from the declarations case T_IF: return (if_statement()); case T_WHILE: return (while_statement()); case T_FOR: return (for_statement()); case T_RETURN: return (return_statement()); case T_BREAK: return (break_statement()); case T_CONTINUE: return (continue_statement()); case T_SWITCH: return (switch_statement()); default: // For now, see if this is an expression. // This catches assignment statements. stmt= binexpr(0); semi(); return(stmt); } return (NULL); // Keep -Wall happy } // Parse a compound statement // and return its AST. If inswitch is true, // we look for a '}', 'case' or 'default' token // to end the parsing. Otherwise, look for // just a '}' to end the parsing. struct ASTnode *compound_statement(int inswitch) { struct ASTnode *left = NULL; struct ASTnode *tree; while (1) { // Parse a single statement tree = single_statement(); // For each new tree, either save it in left // if left is empty, or glue the left and the // new tree together if (tree != NULL) { if (left == NULL) left = tree; else left = mkastnode(A_GLUE, P_NONE, left, NULL, tree, NULL, 0); } // Leave if we've hit the end token if (Token.token == T_RBRACE) return(left); if (inswitch && (Token.token == T_CASE || Token.token == T_DEFAULT)) return(left); } } ================================================ FILE: 51_Arrays_pt2/sym.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Symbol table functions // Copyright (c) 2019 Warren Toomey, GPL3 // Append a node to the singly-linked list pointed to by head or tail void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node) { // Check for valid pointers if (head == NULL || tail == NULL || node == NULL) fatal("Either head, tail or node is NULL in appendsym"); // Append to the list if (*tail) { (*tail)->next = node; *tail = node; } else *head = *tail = node; node->next = NULL; } // Create a symbol node to be added to a symbol table list. // Set up the node's: // + type: char, int etc. // + ctype: composite type pointer for struct/union // + structural type: var, function, array etc. // + size: number of elements, or endlabel: end label for a function // + posn: Position information for local symbols // Return a pointer to the new node struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn) { // Get a new node struct symtable *node = (struct symtable *) malloc(sizeof(struct symtable)); if (node == NULL) fatal("Unable to malloc a symbol table node in newsym"); // Fill in the values if (name == NULL) node->name = NULL; else node->name = strdup(name); node->type = type; node->ctype = ctype; node->stype = stype; node->class = class; node->nelems = nelems; // For pointers and integer types, set the size // of the symbol. structs and union declarations // manually set this up themselves. if (ptrtype(type) || inttype(type)) node->size = nelems * typesize(type, ctype); node->st_posn = posn; node->next = NULL; node->member = NULL; node->initlist = NULL; return (node); } // Add a symbol to the global symbol list struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn) { struct symtable *sym = newsym(name, type, ctype, stype, class, nelems, posn); // For structs and unions, copy the size from the type node if (type == P_STRUCT || type == P_UNION) sym->size = ctype->size; appendsym(&Globhead, &Globtail, sym); return (sym); } // Add a symbol to the local symbol list struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int nelems) { struct symtable *sym = newsym(name, type, ctype, stype, C_LOCAL, nelems, 0); // For structs and unions, copy the size from the type node if (type == P_STRUCT || type == P_UNION) sym->size = ctype->size; appendsym(&Loclhead, &Locltail, sym); return (sym); } // Add a symbol to the parameter list struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype) { struct symtable *sym = newsym(name, type, ctype, stype, C_PARAM, 1, 0); appendsym(&Parmhead, &Parmtail, sym); return (sym); } // Add a symbol to the temporary member list struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int nelems) { struct symtable *sym = newsym(name, type, ctype, stype, C_MEMBER, nelems, 0); // For structs and unions, copy the size from the type node if (type == P_STRUCT || type == P_UNION) sym->size = ctype->size; appendsym(&Membhead, &Membtail, sym); return (sym); } // Add a struct to the struct list struct symtable *addstruct(char *name) { struct symtable *sym = newsym(name, P_STRUCT, NULL, 0, C_STRUCT, 0, 0); appendsym(&Structhead, &Structtail, sym); return (sym); } // Add a struct to the union list struct symtable *addunion(char *name) { struct symtable *sym = newsym(name, P_UNION, NULL, 0, C_UNION, 0, 0); appendsym(&Unionhead, &Uniontail, sym); return (sym); } // Add an enum type or value to the enum list. // Class is C_ENUMTYPE or C_ENUMVAL. // Use posn to store the int value. struct symtable *addenum(char *name, int class, int value) { struct symtable *sym = newsym(name, P_INT, NULL, 0, class, 0, value); appendsym(&Enumhead, &Enumtail, sym); return (sym); } // Add a typedef to the typedef list struct symtable *addtypedef(char *name, int type, struct symtable *ctype) { struct symtable *sym = newsym(name, type, ctype, 0, C_TYPEDEF, 0, 0); appendsym(&Typehead, &Typetail, sym); return (sym); } // Search for a symbol in a specific list. // Return a pointer to the found node or NULL if not found. // If class is not zero, also match on the given class static struct symtable *findsyminlist(char *s, struct symtable *list, int class) { for (; list != NULL; list = list->next) if ((list->name != NULL) && !strcmp(s, list->name)) if (class == 0 || class == list->class) return (list); return (NULL); } // Determine if the symbol s is in the global symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findglob(char *s) { return (findsyminlist(s, Globhead, 0)); } // Determine if the symbol s is in the local symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findlocl(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member, 0); if (node) return (node); } return (findsyminlist(s, Loclhead, 0)); } // Determine if the symbol s is in the symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findsymbol(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member, 0); if (node) return (node); } // Otherwise, try the local and global symbol lists node = findsyminlist(s, Loclhead, 0); if (node) return (node); return (findsyminlist(s, Globhead, 0)); } // Find a member in the member list // Return a pointer to the found node or NULL if not found. struct symtable *findmember(char *s) { return (findsyminlist(s, Membhead, 0)); } // Find a struct in the struct list // Return a pointer to the found node or NULL if not found. struct symtable *findstruct(char *s) { return (findsyminlist(s, Structhead, 0)); } // Find a struct in the union list // Return a pointer to the found node or NULL if not found. struct symtable *findunion(char *s) { return (findsyminlist(s, Unionhead, 0)); } // Find an enum type in the enum list // Return a pointer to the found node or NULL if not found. struct symtable *findenumtype(char *s) { return (findsyminlist(s, Enumhead, C_ENUMTYPE)); } // Find an enum value in the enum list // Return a pointer to the found node or NULL if not found. struct symtable *findenumval(char *s) { return (findsyminlist(s, Enumhead, C_ENUMVAL)); } // Find a type in the tyedef list // Return a pointer to the found node or NULL if not found. struct symtable *findtypedef(char *s) { return (findsyminlist(s, Typehead, 0)); } // Reset the contents of the symbol table void clear_symtable(void) { Globhead = Globtail = NULL; Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Membhead = Membtail = NULL; Structhead = Structtail = NULL; Unionhead = Uniontail = NULL; Enumhead = Enumtail = NULL; Typehead = Typetail = NULL; } // Clear all the entries in the local symbol table void freeloclsyms(void) { Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Functionid = NULL; } // Remove all static symbols from the global symbol table void freestaticsyms(void) { // g points at current node, prev at the previous one struct symtable *g, *prev = NULL; // Walk the global table looking for static entries for (g = Globhead; g != NULL; g = g->next) { if (g->class == C_STATIC) { // If there's a previous node, rearrange the prev pointer // to skip over the current node. If not, g is the head, // so do the same to Globhead if (prev != NULL) prev->next = g->next; else Globhead->next = g->next; // If g is the tail, point Globtail at the previous node // (if there is one), or Globhead if (g == Globtail) { if (prev != NULL) Globtail = prev; else Globtail = Globhead; } } } // Point prev at g before we move up to the next node prev = g; } // Dump a single symbol static void dumpsym(struct symtable *sym, int indent) { int i; for (i = 0; i < indent; i++) printf(" "); switch (sym->type & (~0xf)) { case P_VOID: printf("void "); break; case P_CHAR: printf("char "); break; case P_INT: printf("int "); break; case P_LONG: printf("long "); break; case P_STRUCT: if (sym->ctype != NULL) printf("struct %s ", sym->ctype->name); else printf("struct %s ", sym->name); break; case P_UNION: if (sym->ctype != NULL) printf("union %s ", sym->ctype->name); else printf("union %s ", sym->name); break; default: printf("unknown type "); } for (i = 0; i < (sym->type & 0xf); i++) printf("*"); printf("%s", sym->name); switch (sym->stype) { case S_VARIABLE: break; case S_FUNCTION: printf("()"); break; case S_ARRAY: printf("[]"); break; default: printf(" unknown stype"); } switch (sym->class) { case C_GLOBAL: printf(": global"); break; case C_LOCAL: printf(": local"); break; case C_PARAM: printf(": param"); break; case C_EXTERN: printf(": extern"); break; case C_STATIC: printf(": static"); break; case C_STRUCT: printf(": struct"); break; case C_UNION: printf(": union"); break; case C_MEMBER: printf(": member"); break; case C_ENUMTYPE: printf(": enumtype"); break; case C_ENUMVAL: printf(": enumval"); break; case C_TYPEDEF: printf(": typedef"); break; default: printf(": unknown class"); } switch (sym->stype) { case S_VARIABLE: if (sym->class == C_ENUMVAL) printf(", value %d\n", sym->st_posn); else printf(", size %d\n", sym->size); break; case S_FUNCTION: printf(", %d params\n", sym->nelems); break; case S_ARRAY: printf(", %d elems, size %d\n", sym->nelems, sym->size); break; } switch (sym->type & (~0xf)) { case P_STRUCT: case P_UNION: dumptable(sym->member, NULL, 4); } switch (sym->stype) { case S_FUNCTION: dumptable(sym->member, NULL, 4); } } // Dump one symbol table void dumptable(struct symtable *head, char *name, int indent) { struct symtable *sym; if (head != NULL && name != NULL) printf("%s\n--------\n", name); for (sym = head; sym != NULL; sym = sym->next) dumpsym(sym, indent); } void dumpsymtables(void) { dumptable(Globhead, "Global", 0); printf("\n"); dumptable(Enumhead, "Enums", 0); printf("\n"); dumptable(Typehead, "Typedefs", 0); } ================================================ FILE: 51_Arrays_pt2/tests/err.input031.c ================================================ Expecting a primary expression, got token:+ on line 5 of input031.c ================================================ FILE: 51_Arrays_pt2/tests/err.input032.c ================================================ Unknown variable:cow on line 4 of input032.c ================================================ FILE: 51_Arrays_pt2/tests/err.input033.c ================================================ Incompatible type to return on line 4 of input033.c ================================================ FILE: 51_Arrays_pt2/tests/err.input034.c ================================================ For now, declaration of non-global arrays is not implemented on line 4 of input034.c ================================================ FILE: 51_Arrays_pt2/tests/err.input035.c ================================================ Duplicate local variable declaration:a on line 4 of input035.c ================================================ FILE: 51_Arrays_pt2/tests/err.input036.c ================================================ Type doesn't match prototype for parameter:2 on line 4 of input036.c ================================================ FILE: 51_Arrays_pt2/tests/err.input037.c ================================================ Expected:comma on line 3 of input037.c ================================================ FILE: 51_Arrays_pt2/tests/err.input038.c ================================================ Type doesn't match prototype for parameter:2 on line 4 of input038.c ================================================ FILE: 51_Arrays_pt2/tests/err.input039.c ================================================ No statements in function with non-void type on line 4 of input039.c ================================================ FILE: 51_Arrays_pt2/tests/err.input040.c ================================================ No return for function with non-void type on line 4 of input040.c ================================================ FILE: 51_Arrays_pt2/tests/err.input041.c ================================================ Can't return from a void function on line 3 of input041.c ================================================ FILE: 51_Arrays_pt2/tests/err.input042.c ================================================ Undeclared function:fred on line 3 of input042.c ================================================ FILE: 51_Arrays_pt2/tests/err.input043.c ================================================ Undeclared variable:b on line 3 of input043.c ================================================ FILE: 51_Arrays_pt2/tests/err.input044.c ================================================ Unknown variable:z on line 3 of input044.c ================================================ FILE: 51_Arrays_pt2/tests/err.input045.c ================================================ & operator must be followed by an identifier on line 3 of input045.c ================================================ FILE: 51_Arrays_pt2/tests/err.input046.c ================================================ * operator must be followed by an identifier or * on line 3 of input046.c ================================================ FILE: 51_Arrays_pt2/tests/err.input047.c ================================================ ++ operator must be followed by an identifier on line 3 of input047.c ================================================ FILE: 51_Arrays_pt2/tests/err.input048.c ================================================ -- operator must be followed by an identifier on line 3 of input048.c ================================================ FILE: 51_Arrays_pt2/tests/err.input049.c ================================================ Incompatible expression in assignment on line 6 of input049.c ================================================ FILE: 51_Arrays_pt2/tests/err.input050.c ================================================ Incompatible types in binary expression on line 6 of input050.c ================================================ FILE: 51_Arrays_pt2/tests/err.input051.c ================================================ Expected '\'' at end of char literal on line 4 of input051.c ================================================ FILE: 51_Arrays_pt2/tests/err.input052.c ================================================ Unrecognised character:$ on line 5 of input052.c ================================================ FILE: 51_Arrays_pt2/tests/err.input056.c ================================================ unknown struct/union type:var1 on line 2 of input056.c ================================================ FILE: 51_Arrays_pt2/tests/err.input057.c ================================================ previously defined struct/union:fred on line 2 of input057.c ================================================ FILE: 51_Arrays_pt2/tests/err.input059.c ================================================ Undeclared variable:y on line 3 of input059.c ================================================ FILE: 51_Arrays_pt2/tests/err.input060.c ================================================ Undeclared variable:x on line 3 of input060.c ================================================ FILE: 51_Arrays_pt2/tests/err.input061.c ================================================ Undeclared variable:x on line 3 of input061.c ================================================ FILE: 51_Arrays_pt2/tests/err.input064.c ================================================ undeclared enum type::fred on line 1 of input064.c ================================================ FILE: 51_Arrays_pt2/tests/err.input065.c ================================================ enum type redeclared::fred on line 2 of input065.c ================================================ FILE: 51_Arrays_pt2/tests/err.input066.c ================================================ enum value redeclared::z on line 2 of input066.c ================================================ FILE: 51_Arrays_pt2/tests/err.input068.c ================================================ redefinition of typedef:FOO on line 2 of input068.c ================================================ FILE: 51_Arrays_pt2/tests/err.input069.c ================================================ unknown type:FLOO on line 2 of input069.c ================================================ FILE: 51_Arrays_pt2/tests/err.input072.c ================================================ no loop or switch to break out from on line 1 of input072.c ================================================ FILE: 51_Arrays_pt2/tests/err.input073.c ================================================ no loop to continue to on line 1 of input073.c ================================================ FILE: 51_Arrays_pt2/tests/err.input075.c ================================================ Unexpected token in switch:if on line 4 of input075.c ================================================ FILE: 51_Arrays_pt2/tests/err.input076.c ================================================ No cases in switch on line 3 of input076.c ================================================ FILE: 51_Arrays_pt2/tests/err.input077.c ================================================ case or default after existing default on line 6 of input077.c ================================================ FILE: 51_Arrays_pt2/tests/err.input078.c ================================================ case or default after existing default on line 6 of input078.c ================================================ FILE: 51_Arrays_pt2/tests/err.input079.c ================================================ Duplicate case value on line 6 of input079.c ================================================ FILE: 51_Arrays_pt2/tests/err.input085.c ================================================ Bad type in parameter list on line 1 of input085.c ================================================ FILE: 51_Arrays_pt2/tests/err.input086.c ================================================ Function definition not at global level on line 3 of input086.c ================================================ FILE: 51_Arrays_pt2/tests/err.input087.c ================================================ Bad type in member list on line 4 of input087.c ================================================ FILE: 51_Arrays_pt2/tests/err.input092.c ================================================ Type mismatch: literal vs. variable on line 1 of input092.c ================================================ FILE: 51_Arrays_pt2/tests/err.input093.c ================================================ Unknown variable:fred on line 1 of input093.c ================================================ FILE: 51_Arrays_pt2/tests/err.input094.c ================================================ Type mismatch: literal vs. variable on line 1 of input094.c ================================================ FILE: 51_Arrays_pt2/tests/err.input095.c ================================================ Variable can not be initialised:x on line 1 of input095.c ================================================ FILE: 51_Arrays_pt2/tests/err.input096.c ================================================ Array size is illegal:0 on line 1 of input096.c ================================================ FILE: 51_Arrays_pt2/tests/err.input097.c ================================================ For now, declaration of non-global arrays is not implemented on line 2 of input097.c ================================================ FILE: 51_Arrays_pt2/tests/err.input098.c ================================================ Too many values in initialisation list on line 1 of input098.c ================================================ FILE: 51_Arrays_pt2/tests/err.input102.c ================================================ Cannot cast to a struct, union or void type on line 3 of input102.c ================================================ FILE: 51_Arrays_pt2/tests/err.input103.c ================================================ Cannot cast to a struct, union or void type on line 3 of input103.c ================================================ FILE: 51_Arrays_pt2/tests/err.input104.c ================================================ Cannot cast to a struct, union or void type on line 2 of input104.c ================================================ FILE: 51_Arrays_pt2/tests/err.input105.c ================================================ Incompatible expression in assignment on line 4 of input105.c ================================================ FILE: 51_Arrays_pt2/tests/err.input118.c ================================================ Compiler doesn't support static or extern local declarations on line 2 of input118.c ================================================ FILE: 51_Arrays_pt2/tests/err.input124.c ================================================ Cannot ++ on rvalue:ary on line 6 of input124.c ================================================ FILE: 51_Arrays_pt2/tests/err.input126.c ================================================ Unknown variable:ptr on line 7 of input126.c ================================================ FILE: 51_Arrays_pt2/tests/input001.c ================================================ int printf(char *fmt); void main() { printf("%d\n", 12 * 3); printf("%d\n", 18 - 2 * 4); printf("%d\n", 1 + 2 + 9 - 5/2 + 3*5); } ================================================ FILE: 51_Arrays_pt2/tests/input002.c ================================================ int printf(char *fmt); void main() { int fred; int jim; fred= 5; jim= 12; printf("%d\n", fred + jim); } ================================================ FILE: 51_Arrays_pt2/tests/input003.c ================================================ int printf(char *fmt); void main() { int x; x= 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); } ================================================ FILE: 51_Arrays_pt2/tests/input004.c ================================================ int printf(char *fmt); void main() { int x; x= 7 < 9; printf("%d\n", x); x= 7 <= 9; printf("%d\n", x); x= 7 != 9; printf("%d\n", x); x= 7 == 7; printf("%d\n", x); x= 7 >= 7; printf("%d\n", x); x= 7 <= 7; printf("%d\n", x); x= 9 > 7; printf("%d\n", x); x= 9 >= 7; printf("%d\n", x); x= 9 != 7; printf("%d\n", x); } ================================================ FILE: 51_Arrays_pt2/tests/input005.c ================================================ int printf(char *fmt); void main() { int i; int j; i=6; j=12; if (i < j) { printf("%d\n", i); } else { printf("%d\n", j); } } ================================================ FILE: 51_Arrays_pt2/tests/input006.c ================================================ int printf(char *fmt); void main() { int i; i=1; while (i <= 10) { printf("%d\n", i); i= i + 1; } } ================================================ FILE: 51_Arrays_pt2/tests/input007.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 51_Arrays_pt2/tests/input008.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 51_Arrays_pt2/tests/input009.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } void fred() { int a; int b; a= 12; b= 3 * a; if (a >= b) { printf("%d\n", 2 * b - a); } } ================================================ FILE: 51_Arrays_pt2/tests/input010.c ================================================ int printf(char *fmt); void main() { int i; char j; j= 20; printf("%d\n", j); i= 10; printf("%d\n", i); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 2; j= j + 1) { printf("%d\n", j); } } ================================================ FILE: 51_Arrays_pt2/tests/input011.c ================================================ int printf(char *fmt); int main() { int i; char j; long k; i= 10; printf("%d\n", i); j= 20; printf("%d\n", j); k= 30; printf("%d\n", k); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 4; j= j + 1) { printf("%d\n", j); } for (k= 1; k <= 5; k= k + 1) { printf("%d\n", k); } return(i); printf("%d\n", 12345); return(3); } ================================================ FILE: 51_Arrays_pt2/tests/input012.c ================================================ int printf(char *fmt); int fred() { return(5); } void main() { int x; x= fred(2); printf("%d\n", x); } ================================================ FILE: 51_Arrays_pt2/tests/input013.c ================================================ int printf(char *fmt); int fred() { return(56); } void main() { int dummy; int result; dummy= printf("%d\n", 23); result= fred(10); dummy= printf("%d\n", result); } ================================================ FILE: 51_Arrays_pt2/tests/input014.c ================================================ int printf(char *fmt); int fred() { return(20); } int main() { int result; printf("%d\n", 10); result= fred(15); printf("%d\n", result); printf("%d\n", fred(15)+10); return(0); } ================================================ FILE: 51_Arrays_pt2/tests/input015.c ================================================ int printf(char *fmt); int main() { char a; char *b; char c; int d; int *e; int f; a= 18; printf("%d\n", a); b= &a; c= *b; printf("%d\n", c); d= 12; printf("%d\n", d); e= &d; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 51_Arrays_pt2/tests/input016.c ================================================ int printf(char *fmt); int c; int d; int *e; int f; int main() { c= 12; d=18; printf("%d\n", c); e= &c + 1; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 51_Arrays_pt2/tests/input017.c ================================================ int printf(char *fmt); int main() { char a; char *b; int d; int *e; b= &a; *b= 19; printf("%d\n", a); e= &d; *e= 12; printf("%d\n", d); return(0); } ================================================ FILE: 51_Arrays_pt2/tests/input018.c ================================================ int printf(char *fmt); int main() { int a; int b; a= b= 34; printf("%d\n", a); printf("%d\n", b); return(0); } ================================================ FILE: 51_Arrays_pt2/tests/input018a.c ================================================ int printf(char *fmt); int a; int *b; char c; char *d; int main() { b= &a; *b= 15; printf("%d\n", a); d= &c; *d= 16; printf("%d\n", c); return(0); } ================================================ FILE: 51_Arrays_pt2/tests/input019.c ================================================ int printf(char *fmt); int a; int b; int c; int d; int e; int main() { a= 2; b= 4; c= 3; d= 2; e= (a+b) * (c+d); printf("%d\n", e); return(0); } ================================================ FILE: 51_Arrays_pt2/tests/input020.c ================================================ int printf(char *fmt); int a; int b[25]; int main() { b[3]= 12; a= b[3]; printf("%d\n", a); return(0); } ================================================ FILE: 51_Arrays_pt2/tests/input021.c ================================================ int printf(char *fmt); char c; char *str; int main() { c= '\n'; printf("%d\n", c); for (str= "Hello world\n"; *str != 0; str= str + 1) { printf("%c", *str); } return(0); } ================================================ FILE: 51_Arrays_pt2/tests/input022.c ================================================ int printf(char *fmt); char a; char b; char c; int d; int e; int f; long g; long h; long i; int main() { b= 5; c= 7; a= b + c++; printf("%d\n", a); e= 5; f= 7; d= e + f++; printf("%d\n", d); h= 5; i= 7; g= h + i++; printf("%d\n", g); a= b-- + c; printf("%d\n", a); d= e-- + f; printf("%d\n", d); g= h-- + i; printf("%d\n", g); a= ++b + c; printf("%d\n", a); d= ++e + f; printf("%d\n", d); g= ++h + i; printf("%d\n", g); a= b * --c; printf("%d\n", a); d= e * --f; printf("%d\n", d); g= h * --i; printf("%d\n", g); return(0); } ================================================ FILE: 51_Arrays_pt2/tests/input023.c ================================================ int printf(char *fmt); char *str; int x; int main() { x= -23; printf("%d\n", x); printf("%d\n", -10 * -10); x= 1; x= ~x; printf("%d\n", x); x= 2 > 5; printf("%d\n", x); x= !x; printf("%d\n", x); x= !x; printf("%d\n", x); x= 13; if (x) { printf("%d\n", 13); } x= 0; if (!x) { printf("%d\n", 14); } for (str= "Hello world\n"; *str; str++) { printf("%c", *str); } return(0); } ================================================ FILE: 51_Arrays_pt2/tests/input024.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { a= 42; b= 19; printf("%d\n", a & b); printf("%d\n", a | b); printf("%d\n", a ^ b); printf("%d\n", 1 << 3); printf("%d\n", 63 >> 3); return(0); } ================================================ FILE: 51_Arrays_pt2/tests/input025.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { char z; int y; int x; x= 10; y= 20; z= 30; printf("%d\n", x); printf("%d\n", y); printf("%d\n", z); a= 5; b= 15; c= 25; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); return(0); } ================================================ FILE: 51_Arrays_pt2/tests/input026.c ================================================ int printf(char *fmt); int main(int a, char b, long c, int d, int e, int f, int g, int h) { int i; int j; int k; a= 13; printf("%d\n", a); b= 23; printf("%d\n", b); c= 34; printf("%d\n", c); d= 44; printf("%d\n", d); e= 54; printf("%d\n", e); f= 64; printf("%d\n", f); g= 74; printf("%d\n", g); h= 84; printf("%d\n", h); i= 94; printf("%d\n", i); j= 95; printf("%d\n", j); k= 96; printf("%d\n", k); return(0); } ================================================ FILE: 51_Arrays_pt2/tests/input027.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int param5(int a, int b, int c, int d, int e) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param2(int a, int b) { int c; int d; int e; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param0() { int a; int b; int c; int d; int e; a= 1; b= 2; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int main() { param8(1,2,3,4,5,6,7,8); param5(1,2,3,4,5); param2(1,2); param0(); return(0); } ================================================ FILE: 51_Arrays_pt2/tests/input028.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 51_Arrays_pt2/tests/input029.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h); int fred(int a, int b, int c); int main(); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 51_Arrays_pt2/tests/input030.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input030.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 51_Arrays_pt2/tests/input031.c ================================================ int printf(char *fmt); int main() { int x; x= 2 + + 3 - * / ; } ================================================ FILE: 51_Arrays_pt2/tests/input032.c ================================================ int printf(char *fmt); int main() { pizza cow llama sausage; } ================================================ FILE: 51_Arrays_pt2/tests/input033.c ================================================ int printf(char *fmt); int main() { char *z; return(z); } ================================================ FILE: 51_Arrays_pt2/tests/input034.c ================================================ int printf(char *fmt); int main() { int a[12]; return(0); } ================================================ FILE: 51_Arrays_pt2/tests/input035.c ================================================ int printf(char *fmt); int fred(int a, int b) { int a; return(a); } ================================================ FILE: 51_Arrays_pt2/tests/input036.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c); ================================================ FILE: 51_Arrays_pt2/tests/input037.c ================================================ int printf(char *fmt); int fred(int a, char b +, int z); ================================================ FILE: 51_Arrays_pt2/tests/input038.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c, int g); ================================================ FILE: 51_Arrays_pt2/tests/input039.c ================================================ int printf(char *fmt); int main() { int a; } ================================================ FILE: 51_Arrays_pt2/tests/input040.c ================================================ int printf(char *fmt); int main() { int a; a= 5; } ================================================ FILE: 51_Arrays_pt2/tests/input041.c ================================================ int printf(char *fmt); void fred() { return(5); } ================================================ FILE: 51_Arrays_pt2/tests/input042.c ================================================ int printf(char *fmt); int main() { fred(5); } ================================================ FILE: 51_Arrays_pt2/tests/input043.c ================================================ int printf(char *fmt); int main() { int a; a= b[4]; } ================================================ FILE: 51_Arrays_pt2/tests/input044.c ================================================ int printf(char *fmt); int main() { int a; a= z; } ================================================ FILE: 51_Arrays_pt2/tests/input045.c ================================================ int printf(char *fmt); int main() { int a; a= &5; } ================================================ FILE: 51_Arrays_pt2/tests/input046.c ================================================ int printf(char *fmt); int main() { int a; a= *5; } ================================================ FILE: 51_Arrays_pt2/tests/input047.c ================================================ int printf(char *fmt); int main() { int a; a= ++5; } ================================================ FILE: 51_Arrays_pt2/tests/input048.c ================================================ int printf(char *fmt); int main() { int a; a= --5; } ================================================ FILE: 51_Arrays_pt2/tests/input049.c ================================================ int printf(char *fmt); int main() { int x; char y; y= x; } ================================================ FILE: 51_Arrays_pt2/tests/input050.c ================================================ int printf(char *fmt); int main() { char *a; char *b; a= a + b; } ================================================ FILE: 51_Arrays_pt2/tests/input051.c ================================================ int printf(char *fmt); int main() { char a; a= 'fred'; } ================================================ FILE: 51_Arrays_pt2/tests/input052.c ================================================ int printf(char *fmt); int main() { int a; a= $5.00; } ================================================ FILE: 51_Arrays_pt2/tests/input053.c ================================================ int printf(char *fmt); int main() { printf("Hello world, %d\n", 23); return(0); } ================================================ FILE: 51_Arrays_pt2/tests/input054.c ================================================ int printf(char *fmt); int main() { int i; for (i=0; i < 20; i++) { printf("Hello world, %d\n", i); } return(0); } ================================================ FILE: 51_Arrays_pt2/tests/input055.c ================================================ int printf(char *fmt); int main(int argc, char **argv) { int i; char *argument; printf("Hello world\n"); for (i=0; i < argc; i++) { argument= *argv; argv= argv + 1; printf("Argument %d is %s\n", i, argument); } return(0); } ================================================ FILE: 51_Arrays_pt2/tests/input056.c ================================================ struct foo { int x; }; struct mary var1; ================================================ FILE: 51_Arrays_pt2/tests/input057.c ================================================ struct fred { int x; } ; struct fred { char y; } ; ================================================ FILE: 51_Arrays_pt2/tests/input058.c ================================================ int printf(char *fmt); struct fred { int x; char y; long z; }; struct fred var2; struct fred *varptr; int main() { long result; var2.x= 12; printf("%d\n", var2.x); var2.y= 'c'; printf("%d\n", var2.y); var2.z= 4005; printf("%d\n", var2.z); result= var2.x + var2.y + var2.z; printf("%d\n", result); varptr= &var2; result= varptr->x + varptr->y + varptr->z; printf("%d\n", result); return(0); } ================================================ FILE: 51_Arrays_pt2/tests/input059.c ================================================ int main() { int x; x= y.foo; } ================================================ FILE: 51_Arrays_pt2/tests/input060.c ================================================ int main() { int x; x= x.foo; } ================================================ FILE: 51_Arrays_pt2/tests/input061.c ================================================ int main() { int x; x= x->foo; } ================================================ FILE: 51_Arrays_pt2/tests/input062.c ================================================ int printf(char *fmt); union fred { char w; int x; int y; long z; }; union fred var1; union fred *varptr; int main() { var1.x= 65; printf("%d\n", var1.x); var1.x= 66; printf("%d\n", var1.x); printf("%d\n", var1.y); printf("The next two depend on the endian of the platform\n"); printf("%d\n", var1.w); printf("%d\n", var1.z); varptr= &var1; varptr->x= 67; printf("%d\n", varptr->x); printf("%d\n", varptr->y); return(0); } ================================================ FILE: 51_Arrays_pt2/tests/input063.c ================================================ int printf(char *fmt); enum fred { apple=1, banana, carrot, pear=10, peach, mango, papaya }; enum jane { aple=1, bnana, crrot, par=10, pech, mago, paaya }; enum fred var1; enum jane var2; enum fred var3; int main() { var1= carrot + pear + mango; printf("%d\n", var1); return(0); } ================================================ FILE: 51_Arrays_pt2/tests/input064.c ================================================ enum fred var3; ================================================ FILE: 51_Arrays_pt2/tests/input065.c ================================================ enum fred { x, y, z }; enum fred { a, b }; ================================================ FILE: 51_Arrays_pt2/tests/input066.c ================================================ enum fred { x, y, z }; enum mary { a, b, z }; ================================================ FILE: 51_Arrays_pt2/tests/input067.c ================================================ int printf(char *fmt); typedef int FOO; FOO var1; struct bar { int x; int y} ; typedef struct bar BAR; BAR var2; int main() { var1= 5; printf("%d\n", var1); var2.x= 7; var2.y= 10; printf("%d\n", var2.x + var2.y); return(0); } ================================================ FILE: 51_Arrays_pt2/tests/input068.c ================================================ typedef int FOO; typedef char FOO; ================================================ FILE: 51_Arrays_pt2/tests/input069.c ================================================ typedef int FOO; FLOO y; ================================================ FILE: 51_Arrays_pt2/tests/input070.c ================================================ #include typedef int FOO; int main() { FOO x; x= 56; printf("%d\n", x); return(0); } ================================================ FILE: 51_Arrays_pt2/tests/input071.c ================================================ #include int main() { int x; x = 0; while (x < 100) { if (x == 5) { x = x + 2; continue; } printf("%d\n", x); if (x == 14) { break; } x = x + 1; } printf("Done\n"); return (0); } ================================================ FILE: 51_Arrays_pt2/tests/input072.c ================================================ int main() { break; } ================================================ FILE: 51_Arrays_pt2/tests/input073.c ================================================ int main() { continue; } ================================================ FILE: 51_Arrays_pt2/tests/input074.c ================================================ #include int main() { int x; int y; y= 0; for (x=0; x < 5; x++) { switch(x) { case 1: { y= 5; break; } case 2: { y= 7; break; } case 3: { y= 9; } default: { y= 100; } } printf("%d\n", y); } return(0); } ================================================ FILE: 51_Arrays_pt2/tests/input075.c ================================================ int main() { int x; switch(x) { if (x<5); } } ================================================ FILE: 51_Arrays_pt2/tests/input076.c ================================================ int main() { int x; switch(x) { } } ================================================ FILE: 51_Arrays_pt2/tests/input077.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } default: { x= 3; } case 4: { x= 2; } } } ================================================ FILE: 51_Arrays_pt2/tests/input078.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } default: { x= 3; } default: { x= 2; } } } ================================================ FILE: 51_Arrays_pt2/tests/input079.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } case 2: { x= 2; } case 1: { x= 2; } default: { x= 2; } } } ================================================ FILE: 51_Arrays_pt2/tests/input080.c ================================================ #include int x; int y; int main() { for (x=0, y=1; x < 6; x++, y=y+2) { printf("%d %d\n", x, y); } return(0); } ================================================ FILE: 51_Arrays_pt2/tests/input081.c ================================================ #include int x; int y; int main() { x= 0; y=1; for (;x<5;) { printf("%d %d\n", x, y); x=x+1; y=y+2; } return(0); } ================================================ FILE: 51_Arrays_pt2/tests/input082.c ================================================ #include int main() { int x; x= 10; // Dangling else test if (x > 5) if (x > 15) printf("x > 15\n"); else printf("15 >= x > 5\n"); else printf("5 >= x\n"); return(0); } ================================================ FILE: 51_Arrays_pt2/tests/input083.c ================================================ #include int main() { int x; // Dangling else test. // We should not print anything for x<= 5 for (x=0; x < 12; x++) if (x > 5) if (x > 10) printf("10 < %2d\n", x); else printf(" 5 < %2d <= 10\n", x); return(0); } ================================================ FILE: 51_Arrays_pt2/tests/input084.c ================================================ #include int main() { int x, y; x=2; y=3; printf("%d %d\n", x, y); char a, *b; a= 'f'; b= &a; printf("%c %c\n", a, *b); return(0); } ================================================ FILE: 51_Arrays_pt2/tests/input085.c ================================================ int main(struct foo { int x; }; , int a) { return(0); } ================================================ FILE: 51_Arrays_pt2/tests/input086.c ================================================ int main() { int fred() { return(5); } int x; x=2; return(x); } ================================================ FILE: 51_Arrays_pt2/tests/input087.c ================================================ struct foo { int x; int y; struct blah { int g; }; }; ================================================ FILE: 51_Arrays_pt2/tests/input088.c ================================================ #include struct foo { int x; int y; } fred, mary; struct foo james; int main() { fred.x= 5; mary.y= 6; printf("%d %d\n", fred.x, mary.y); return(0); } ================================================ FILE: 51_Arrays_pt2/tests/input089.c ================================================ #include int x= 23; char y= 'H'; char *z= "Hello world"; int main() { printf("%d %c %s\n", x, y, z); return(0); } ================================================ FILE: 51_Arrays_pt2/tests/input090.c ================================================ #include int a = 23, b = 100; char y = 'H', *z = "Hello world"; int main() { printf("%d %d %c %s\n", a, b, y, z); return (0); } ================================================ FILE: 51_Arrays_pt2/tests/input091.c ================================================ #include int fred[] = { 1, 2, 3, 4, 5 }; int jim[10] = { 1, 2, 3, 4, 5 }; int main() { int i; for (i=0; i < 5; i++) printf("%d\n", fred[i]); for (i=0; i < 10; i++) printf("%d\n", jim[i]); return(0); } ================================================ FILE: 51_Arrays_pt2/tests/input092.c ================================================ char x= 3000; ================================================ FILE: 51_Arrays_pt2/tests/input093.c ================================================ char x= fred; ================================================ FILE: 51_Arrays_pt2/tests/input094.c ================================================ char *s= 54; ================================================ FILE: 51_Arrays_pt2/tests/input095.c ================================================ int fred(int x=2) { return(x); } ================================================ FILE: 51_Arrays_pt2/tests/input096.c ================================================ int fred[0]; ================================================ FILE: 51_Arrays_pt2/tests/input097.c ================================================ int main() { int x[45]; return(0); } ================================================ FILE: 51_Arrays_pt2/tests/input098.c ================================================ int fred[3]= { 1, 2, 3, 4, 5 }; ================================================ FILE: 51_Arrays_pt2/tests/input099.c ================================================ #include // List of token strings, for debugging purposes. // As yet, we can't store a NULL into the list char *Tstring[] = { "EOF", "=", "||", "&&", "|", "^", "&", "==", "!=", ",", ">", "<=", ">=", "<<", ">>", "+", "-", "*", "/", "++", "--", "~", "!", "void", "char", "int", "long", "if", "else", "while", "for", "return", "struct", "union", "enum", "typedef", "extern", "break", "continue", "switch", "case", "default", "intlit", "strlit", ";", "identifier", "{", "}", "(", ")", "[", "]", ",", ".", "->", ":", "" }; int main() { int i; char *str; i=0; while (1) { str= Tstring[i]; if (*str == 0) break; printf("%s\n", str); i++; } return(0); } ================================================ FILE: 51_Arrays_pt2/tests/input100.c ================================================ #include int main() { int x= 3, y=14; int z= 2 * x + y; char *str= "Hello world"; printf("%s %d %d\n", str, x+y, z); return(0); } ================================================ FILE: 51_Arrays_pt2/tests/input101.c ================================================ #include int main() { int x= 65535; char y; char *str; y= (char )x; printf("0x%x\n", y); str= (char *)0; printf("0x%lx\n", (long)str); return(0); } ================================================ FILE: 51_Arrays_pt2/tests/input102.c ================================================ int main() { struct foo { int p; }; int y= (struct foo) x; } ================================================ FILE: 51_Arrays_pt2/tests/input103.c ================================================ int main() { union foo { int p; }; int y= (union foo) x; } ================================================ FILE: 51_Arrays_pt2/tests/input104.c ================================================ int main() { int y= (void) x; } ================================================ FILE: 51_Arrays_pt2/tests/input105.c ================================================ int main() { int x; char *y; y= (char) x; } ================================================ FILE: 51_Arrays_pt2/tests/input106.c ================================================ #include char *y= (char *)0; int main() { printf("0x%lx\n", (long)y); return(0); } ================================================ FILE: 51_Arrays_pt2/tests/input107.c ================================================ #include char *y[] = { "fish", "cow", NULL }; char *z= NULL; int main() { int i; char *ptr; for (i=0; i < 3; i++) { ptr= y[i]; if (ptr != (char *)0) printf("%s\n", y[i]); else printf("NULL\n"); } return(0); } ================================================ FILE: 51_Arrays_pt2/tests/input108.c ================================================ int main() { char *str= (void *)0; return(0); } ================================================ FILE: 51_Arrays_pt2/tests/input109.c ================================================ #include int main() { int x= 17 - 1; printf("%d\n", x); return(0); } ================================================ FILE: 51_Arrays_pt2/tests/input110.c ================================================ #include int x; int y; int main() { x= 3; y= 15; y += x; printf("%d\n", y); x= 3; y= 15; y -= x; printf("%d\n", y); x= 3; y= 15; y *= x; printf("%d\n", y); x= 3; y= 15; y /= x; printf("%d\n", y); return(0); } ================================================ FILE: 51_Arrays_pt2/tests/input111.c ================================================ #include int main() { int x= 2000 + 3 + 4 * 5 + 6; printf("%d\n", x); return(0); } ================================================ FILE: 51_Arrays_pt2/tests/input112.c ================================================ #include char* y = NULL; int x= 10 + 6; int fred [ 2 + 3 ]; int main() { fred[2]= x; printf("%d\n", fred[2]); return(0); } ================================================ FILE: 51_Arrays_pt2/tests/input113.c ================================================ #include void fred(void); void fred(void) { printf("fred says hello\n"); } int main(void) { fred(); return(0); } ================================================ FILE: 51_Arrays_pt2/tests/input114.c ================================================ #include int main() { int x= 0x4a; printf("%c\n", x); return(0); } ================================================ FILE: 51_Arrays_pt2/tests/input115.c ================================================ #include struct foo { int x; char y; long z; }; typedef struct foo blah; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol struct symtable *ctype; // If struct/union, ptr to that type int stype; // Structural type for the symbol int class; // Storage class for the symbol int size; // Total size in bytes of this symbol int nelems; // Functions: # params. Arrays: # elements #define st_endlabel st_posn // For functions, the end label int st_posn; // For locals, the negative offset // from the stack base pointer int *initlist; // List of initial values struct symtable *next; // Next symbol in one list struct symtable *member; // First member of a function, struct, }; // union or enum // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates int rvalue; // True if the node is an rvalue struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; struct symtable *sym; // For many AST nodes, the pointer to // the symbol in the symbol table #define a_intvalue a_size // For A_INTLIT, the integer value int a_size; // For A_SCALE, the size to scale by }; int main() { printf("%ld\n", sizeof(char)); printf("%ld\n", sizeof(int)); printf("%ld\n", sizeof(long)); printf("%ld\n", sizeof(char *)); printf("%ld\n", sizeof(blah)); printf("%ld\n", sizeof(struct symtable)); printf("%ld\n", sizeof(struct ASTnode)); return(0); } ================================================ FILE: 51_Arrays_pt2/tests/input116.c ================================================ #include static int counter=0; static int fred(void) { return(counter++); } int main(void) { int i; for (i=0; i < 5; i++) printf("%d\n", fred()); return(0); } ================================================ FILE: 51_Arrays_pt2/tests/input117.c ================================================ #include static char *fred(void) { return("Hello"); } int main(void) { printf("%s\n", fred()); return(0); } ================================================ FILE: 51_Arrays_pt2/tests/input118.c ================================================ int main(void) { static int x; } ================================================ FILE: 51_Arrays_pt2/tests/input119.c ================================================ #include int x; int y= 3; int main() { x= y != 3 ? 6 : 8; printf("%d\n", x); x= (y == 3) ? 6 : 8; printf("%d\n", x); return(0); } ================================================ FILE: 51_Arrays_pt2/tests/input120.c ================================================ #include int x; int y= 3; int main() { for (y= 0; y < 10; y++) { x= y > 4 ? y + 2 : y + 9; printf("%d\n", x); } return(0); } ================================================ FILE: 51_Arrays_pt2/tests/input121.c ================================================ #include int x; int y= 3; int main() { for (y= 0; y < 10; y++) { x= (y < 4) ? y + 2 : (y > 7) ? 1000 : y + 9; printf("%d\n", x); } return(0); } ================================================ FILE: 51_Arrays_pt2/tests/input122.c ================================================ #include int x, y, z1, z2; int main() { for (x= 0; x <= 1; x++) { for (y= 0; y <= 1; y++) { z1= x || y; z2= x && y; printf("x %d, y %d, x || y %d, x && y %d\n", x, y, z1, z2); } } //z= x || y; return(0); } ================================================ FILE: 51_Arrays_pt2/tests/input123.c ================================================ #include int main() { int x; for (x=0; x < 20; x++) switch(x) { case 2: case 3: case 5: case 7: case 11: printf("%2d infant prime\n", x); break; case 13: case 17: case 19: printf("%2d teen prime\n", x); break; case 0: case 1: case 4: case 6: case 8: case 9: case 10: case 12: printf("%2d infant composite\n", x); break; default: printf("%2d teen composite\n", x); break; } return(0); } ================================================ FILE: 51_Arrays_pt2/tests/input124.c ================================================ #include int ary[5]; int main() { ary++; return(0); } ================================================ FILE: 51_Arrays_pt2/tests/input125.c ================================================ #include int ary[5]; int *ptr; int x; int main() { ary[3]= 2008; ptr= ary; // Load ary's address into ptr x= ary[3]; printf("%d\n", x); x= ptr[3]; printf("%d\n", x); // Treat ptr as an array return(0); } ================================================ FILE: 51_Arrays_pt2/tests/input126.c ================================================ #include int ary[5]; int main() { ary[3]= 2008; ptr= &ary; return(0); } ================================================ FILE: 51_Arrays_pt2/tests/input127.c ================================================ #include int ary[5]; void fred(int *ptr) { // Receive a pointer printf("%d\n", ptr[3]); } int main() { ary[3]= 2008; printf("%d\n", ary[3]); fred(ary); // Pass ary as a pointer return(0); } ================================================ FILE: 51_Arrays_pt2/tests/mktests ================================================ #!/bin/sh # Make the output files for each test # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make install) fi for i in input*c do if [ ! -f "out.$i" -a ! -f "err.$i" ] then ../cwj -o out $i 2> "err.$i" # If the err file is empty if [ ! -s "err.$i" ] then rm -f "err.$i" cc -o out $i ./out > "out.$i" fi fi rm -f out out.s done ================================================ FILE: 51_Arrays_pt2/tests/out.input001.c ================================================ 36 10 25 ================================================ FILE: 51_Arrays_pt2/tests/out.input002.c ================================================ 17 ================================================ FILE: 51_Arrays_pt2/tests/out.input003.c ================================================ 1 2 3 4 5 ================================================ FILE: 51_Arrays_pt2/tests/out.input004.c ================================================ 1 1 1 1 1 1 1 1 1 ================================================ FILE: 51_Arrays_pt2/tests/out.input005.c ================================================ 6 ================================================ FILE: 51_Arrays_pt2/tests/out.input006.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 51_Arrays_pt2/tests/out.input007.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 51_Arrays_pt2/tests/out.input008.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 51_Arrays_pt2/tests/out.input009.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 51_Arrays_pt2/tests/out.input010.c ================================================ 20 10 1 2 3 4 5 253 254 255 0 1 ================================================ FILE: 51_Arrays_pt2/tests/out.input011.c ================================================ 10 20 30 1 2 3 4 5 253 254 255 0 1 2 3 1 2 3 4 5 ================================================ FILE: 51_Arrays_pt2/tests/out.input012.c ================================================ 5 ================================================ FILE: 51_Arrays_pt2/tests/out.input013.c ================================================ 23 56 ================================================ FILE: 51_Arrays_pt2/tests/out.input014.c ================================================ 10 20 30 ================================================ FILE: 51_Arrays_pt2/tests/out.input015.c ================================================ 18 18 12 12 ================================================ FILE: 51_Arrays_pt2/tests/out.input016.c ================================================ 12 18 ================================================ FILE: 51_Arrays_pt2/tests/out.input017.c ================================================ 19 12 ================================================ FILE: 51_Arrays_pt2/tests/out.input018.c ================================================ 34 34 ================================================ FILE: 51_Arrays_pt2/tests/out.input018a.c ================================================ 15 16 ================================================ FILE: 51_Arrays_pt2/tests/out.input019.c ================================================ 30 ================================================ FILE: 51_Arrays_pt2/tests/out.input020.c ================================================ 12 ================================================ FILE: 51_Arrays_pt2/tests/out.input021.c ================================================ 10 Hello world ================================================ FILE: 51_Arrays_pt2/tests/out.input022.c ================================================ 12 12 12 13 13 13 13 13 13 35 35 35 ================================================ FILE: 51_Arrays_pt2/tests/out.input023.c ================================================ -23 100 -2 0 1 0 13 14 Hello world ================================================ FILE: 51_Arrays_pt2/tests/out.input024.c ================================================ 2 59 57 8 7 ================================================ FILE: 51_Arrays_pt2/tests/out.input025.c ================================================ 10 20 30 5 15 25 ================================================ FILE: 51_Arrays_pt2/tests/out.input026.c ================================================ 13 23 34 44 54 64 74 84 94 95 96 ================================================ FILE: 51_Arrays_pt2/tests/out.input027.c ================================================ 1 2 3 4 5 6 7 8 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 ================================================ FILE: 51_Arrays_pt2/tests/out.input028.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 51_Arrays_pt2/tests/out.input029.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 51_Arrays_pt2/tests/out.input030.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input030.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 51_Arrays_pt2/tests/out.input053.c ================================================ Hello world, 23 ================================================ FILE: 51_Arrays_pt2/tests/out.input054.c ================================================ Hello world, 0 Hello world, 1 Hello world, 2 Hello world, 3 Hello world, 4 Hello world, 5 Hello world, 6 Hello world, 7 Hello world, 8 Hello world, 9 Hello world, 10 Hello world, 11 Hello world, 12 Hello world, 13 Hello world, 14 Hello world, 15 Hello world, 16 Hello world, 17 Hello world, 18 Hello world, 19 ================================================ FILE: 51_Arrays_pt2/tests/out.input055.c ================================================ Hello world Argument 0 is ./out ================================================ FILE: 51_Arrays_pt2/tests/out.input058.c ================================================ 12 99 4005 4116 4116 ================================================ FILE: 51_Arrays_pt2/tests/out.input062.c ================================================ 65 66 66 The next two depend on the endian of the platform 66 66 67 67 ================================================ FILE: 51_Arrays_pt2/tests/out.input063.c ================================================ 25 ================================================ FILE: 51_Arrays_pt2/tests/out.input067.c ================================================ 5 17 ================================================ FILE: 51_Arrays_pt2/tests/out.input070.c ================================================ 56 ================================================ FILE: 51_Arrays_pt2/tests/out.input071.c ================================================ 0 1 2 3 4 7 8 9 10 11 12 13 14 Done ================================================ FILE: 51_Arrays_pt2/tests/out.input074.c ================================================ 100 5 7 100 100 ================================================ FILE: 51_Arrays_pt2/tests/out.input080.c ================================================ 0 1 1 3 2 5 3 7 4 9 5 11 ================================================ FILE: 51_Arrays_pt2/tests/out.input081.c ================================================ 0 1 1 3 2 5 3 7 4 9 ================================================ FILE: 51_Arrays_pt2/tests/out.input082.c ================================================ 15 >= x > 5 ================================================ FILE: 51_Arrays_pt2/tests/out.input083.c ================================================ 5 < 6 <= 10 5 < 7 <= 10 5 < 8 <= 10 5 < 9 <= 10 5 < 10 <= 10 10 < 11 ================================================ FILE: 51_Arrays_pt2/tests/out.input084.c ================================================ 2 3 f f ================================================ FILE: 51_Arrays_pt2/tests/out.input088.c ================================================ 5 6 ================================================ FILE: 51_Arrays_pt2/tests/out.input089.c ================================================ 23 H Hello world ================================================ FILE: 51_Arrays_pt2/tests/out.input090.c ================================================ 23 100 H Hello world ================================================ FILE: 51_Arrays_pt2/tests/out.input091.c ================================================ 1 2 3 4 5 1 2 3 4 5 0 0 0 0 0 ================================================ FILE: 51_Arrays_pt2/tests/out.input099.c ================================================ EOF = || && | ^ & == != , > <= >= << >> + - * / ++ -- ~ ! void char int long if else while for return struct union enum typedef extern break continue switch case default intlit strlit ; identifier { } ( ) [ ] , . -> : ================================================ FILE: 51_Arrays_pt2/tests/out.input100.c ================================================ Hello world 17 20 ================================================ FILE: 51_Arrays_pt2/tests/out.input101.c ================================================ 0xff 0x0 ================================================ FILE: 51_Arrays_pt2/tests/out.input106.c ================================================ 0x0 ================================================ FILE: 51_Arrays_pt2/tests/out.input107.c ================================================ fish cow NULL ================================================ FILE: 51_Arrays_pt2/tests/out.input108.c ================================================ ================================================ FILE: 51_Arrays_pt2/tests/out.input109.c ================================================ 16 ================================================ FILE: 51_Arrays_pt2/tests/out.input110.c ================================================ 18 12 45 5 ================================================ FILE: 51_Arrays_pt2/tests/out.input111.c ================================================ 2029 ================================================ FILE: 51_Arrays_pt2/tests/out.input112.c ================================================ 16 ================================================ FILE: 51_Arrays_pt2/tests/out.input113.c ================================================ fred says hello ================================================ FILE: 51_Arrays_pt2/tests/out.input114.c ================================================ J ================================================ FILE: 51_Arrays_pt2/tests/out.input115.c ================================================ 1 4 8 8 13 64 48 ================================================ FILE: 51_Arrays_pt2/tests/out.input116.c ================================================ 0 1 2 3 4 ================================================ FILE: 51_Arrays_pt2/tests/out.input117.c ================================================ Hello ================================================ FILE: 51_Arrays_pt2/tests/out.input119.c ================================================ 8 6 ================================================ FILE: 51_Arrays_pt2/tests/out.input120.c ================================================ 9 10 11 12 13 7 8 9 10 11 ================================================ FILE: 51_Arrays_pt2/tests/out.input121.c ================================================ 2 3 4 5 13 14 15 16 1000 1000 ================================================ FILE: 51_Arrays_pt2/tests/out.input122.c ================================================ x 0, y 0, x || y 0, x && y 0 x 0, y 1, x || y 1, x && y 0 x 1, y 0, x || y 1, x && y 0 x 1, y 1, x || y 1, x && y 1 ================================================ FILE: 51_Arrays_pt2/tests/out.input123.c ================================================ 0 infant composite 1 infant composite 2 infant prime 3 infant prime 4 infant composite 5 infant prime 6 infant composite 7 infant prime 8 infant composite 9 infant composite 10 infant composite 11 infant prime 12 infant composite 13 teen prime 14 teen composite 15 teen composite 16 teen composite 17 teen prime 18 teen composite 19 teen prime ================================================ FILE: 51_Arrays_pt2/tests/out.input125.c ================================================ 2008 2008 ================================================ FILE: 51_Arrays_pt2/tests/out.input127.c ================================================ 2008 2008 ================================================ FILE: 51_Arrays_pt2/tests/runtests ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make install) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../cwj -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../cwj $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.s "trial.$i" done ================================================ FILE: 51_Arrays_pt2/tests/runtestsn ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../compn ] then (cd ..; make install) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../compn -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../compn $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.o out.s "trial.$i" done ================================================ FILE: 51_Arrays_pt2/tree.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree functions // Copyright (c) 2019 Warren Toomey, GPL3 // Build and return a generic AST node struct ASTnode *mkastnode(int op, int type, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue) { struct ASTnode *n; // Malloc a new ASTnode n = (struct ASTnode *) malloc(sizeof(struct ASTnode)); if (n == NULL) fatal("Unable to malloc in mkastnode()"); // Copy in the field values and return it n->op = op; n->type = type; n->left = left; n->mid = mid; n->right = right; n->sym = sym; n->a_intvalue = intvalue; return (n); } // Make an AST leaf node struct ASTnode *mkastleaf(int op, int type, struct symtable *sym, int intvalue) { return (mkastnode(op, type, NULL, NULL, NULL, sym, intvalue)); } // Make a unary AST node: only one child struct ASTnode *mkastunary(int op, int type, struct ASTnode *left, struct symtable *sym, int intvalue) { return (mkastnode(op, type, left, NULL, NULL, sym, intvalue)); } // Generate and return a new label number // just for AST dumping purposes static int dumpid = 1; static int gendumplabel(void) { return (dumpid++); } // Given an AST tree, print it out and follow the // traversal of the tree that genAST() follows void dumpAST(struct ASTnode *n, int label, int level) { int Lfalse, Lstart, Lend; switch (n->op) { case A_IF: Lfalse = gendumplabel(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_IF"); if (n->right) { Lend = gendumplabel(); fprintf(stdout, ", end L%d", Lend); } fprintf(stdout, "\n"); dumpAST(n->left, Lfalse, level + 2); dumpAST(n->mid, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); return; case A_WHILE: Lstart = gendumplabel(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_WHILE, start L%d\n", Lstart); Lend = gendumplabel(); dumpAST(n->left, Lend, level + 2); dumpAST(n->right, NOLABEL, level + 2); return; } // Reset level to -2 for A_GLUE if (n->op == A_GLUE) level = -2; // General AST node handling if (n->left) dumpAST(n->left, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); for (int i = 0; i < level; i++) fprintf(stdout, " "); switch (n->op) { case A_GLUE: fprintf(stdout, "\n\n"); return; case A_FUNCTION: fprintf(stdout, "A_FUNCTION %s\n", n->sym->name); return; case A_ADD: fprintf(stdout, "A_ADD\n"); return; case A_SUBTRACT: fprintf(stdout, "A_SUBTRACT\n"); return; case A_MULTIPLY: fprintf(stdout, "A_MULTIPLY\n"); return; case A_DIVIDE: fprintf(stdout, "A_DIVIDE\n"); return; case A_EQ: fprintf(stdout, "A_EQ\n"); return; case A_NE: fprintf(stdout, "A_NE\n"); return; case A_LT: fprintf(stdout, "A_LE\n"); return; case A_GT: fprintf(stdout, "A_GT\n"); return; case A_LE: fprintf(stdout, "A_LE\n"); return; case A_GE: fprintf(stdout, "A_GE\n"); return; case A_INTLIT: fprintf(stdout, "A_INTLIT %d\n", n->a_intvalue); return; case A_STRLIT: fprintf(stdout, "A_STRLIT rval label L%d\n", n->a_intvalue); return; case A_IDENT: if (n->rvalue) fprintf(stdout, "A_IDENT rval %s\n", n->sym->name); else fprintf(stdout, "A_IDENT %s\n", n->sym->name); return; case A_ASSIGN: fprintf(stdout, "A_ASSIGN\n"); return; case A_WIDEN: fprintf(stdout, "A_WIDEN\n"); return; case A_RETURN: fprintf(stdout, "A_RETURN\n"); return; case A_FUNCCALL: fprintf(stdout, "A_FUNCCALL %s\n", n->sym->name); return; case A_ADDR: fprintf(stdout, "A_ADDR %s\n", n->sym->name); return; case A_DEREF: if (n->rvalue) fprintf(stdout, "A_DEREF rval\n"); else fprintf(stdout, "A_DEREF\n"); return; case A_SCALE: fprintf(stdout, "A_SCALE %d\n", n->a_size); return; case A_PREINC: fprintf(stdout, "A_PREINC %s\n", n->sym->name); return; case A_PREDEC: fprintf(stdout, "A_PREDEC %s\n", n->sym->name); return; case A_POSTINC: fprintf(stdout, "A_POSTINC\n"); return; case A_POSTDEC: fprintf(stdout, "A_POSTDEC\n"); return; case A_NEGATE: fprintf(stdout, "A_NEGATE\n"); return; case A_BREAK: fprintf(stdout, "A_BREAK\n"); return; case A_CONTINUE: fprintf(stdout, "A_CONTINUE\n"); return; case A_CASE: fprintf(stdout, "A_CASE %d\n", n->a_intvalue); return; case A_DEFAULT: fprintf(stdout, "A_DEFAULT\n"); return; case A_SWITCH: fprintf(stdout, "A_SWITCH\n"); return; case A_CAST: fprintf(stdout, "A_CAST %d\n", n->type); return; case A_ASPLUS: fprintf(stdout, "A_ASPLUS\n"); return; case A_ASMINUS: fprintf(stdout, "A_ASMINUS\n"); return; case A_ASSTAR: fprintf(stdout, "A_ASSTAR\n"); return; case A_ASSLASH: fprintf(stdout, "A_ASSLASH\n"); return; default: fatald("Unknown dumpAST operator", n->op); } } ================================================ FILE: 51_Arrays_pt2/types.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Types and type handling // Copyright (c) 2019 Warren Toomey, GPL3 // Return true if a type is an int type // of any size, false otherwise int inttype(int type) { return (((type & 0xf) == 0) && (type >= P_CHAR && type <= P_LONG)); } // Return true if a type is of pointer type int ptrtype(int type) { return ((type & 0xf) != 0); } // Given a primitive type, return // the type which is a pointer to it int pointer_to(int type) { if ((type & 0xf) == 0xf) fatald("Unrecognised in pointer_to: type", type); return (type + 1); } // Given a primitive pointer type, return // the type which it points to int value_at(int type) { if ((type & 0xf) == 0x0) fatald("Unrecognised in value_at: type", type); return (type - 1); } // Given a type and a composite type pointer, return // the size of this type in bytes int typesize(int type, struct symtable *ctype) { if (type == P_STRUCT || type == P_UNION) return (ctype->size); return (genprimsize(type)); } // Given an AST tree and a type which we want it to become, // possibly modify the tree by widening or scaling so that // it is compatible with this type. Return the original tree // if no changes occurred, a modified tree, or NULL if the // tree is not compatible with the given type. // If this will be part of a binary operation, the AST op is not zero. struct ASTnode *modify_type(struct ASTnode *tree, int rtype, int op) { int ltype; int lsize, rsize; ltype = tree->type; // XXX No idea on these yet if (ltype == P_STRUCT || ltype == P_UNION) fatal("Don't know how to do this yet"); if (rtype == P_STRUCT || rtype == P_UNION) fatal("Don't know how to do this yet"); // Compare scalar int types if (inttype(ltype) && inttype(rtype)) { // Both types same, nothing to do if (ltype == rtype) return (tree); // Get the sizes for each type lsize = typesize(ltype, NULL); rsize = typesize(rtype, NULL); // Tree's size is too big if (lsize > rsize) return (NULL); // Widen to the right if (rsize > lsize) return (mkastunary(A_WIDEN, rtype, tree, NULL, 0)); } // For pointers if (ptrtype(ltype) && ptrtype(rtype)) { // We can compare them if (op >= A_EQ && op <= A_GE) return(tree); // A comparison of the same type for a non-binary operation is OK, // or when the left tree is of `void *` type. if (op == 0 && (ltype == rtype || ltype == pointer_to(P_VOID))) return (tree); } // We can scale only on A_ADD or A_SUBTRACT operation if (op == A_ADD || op == A_SUBTRACT) { // Left is int type, right is pointer type and the size // of the original type is >1: scale the left if (inttype(ltype) && ptrtype(rtype)) { rsize = genprimsize(value_at(rtype)); if (rsize > 1) return (mkastunary(A_SCALE, rtype, tree, NULL, rsize)); else return (tree); // Size 1, no need to scale } } // If we get here, the types are not compatible return (NULL); } ================================================ FILE: 52_Pointers_pt2/Makefile ================================================ # Define the location of the include directory # and the location to install the compiler binary INCDIR=/tmp/include BINDIR=/tmp HSRCS= data.h decl.h defs.h SRCS= cg.c decl.c expr.c gen.c main.c misc.c \ opt.c scan.c stmt.c sym.c tree.c types.c SRCN= cgn.c decl.c expr.c gen.c main.c misc.c \ opt.c scan.c stmt.c sym.c tree.c types.c ARMSRCS= cg_arm.c decl.c expr.c gen.c main.c misc.c \ opt.c scan.c stmt.c sym.c tree.c types.c cwj: $(SRCS) $(HSRCS) cc -o cwj -g -Wall -DINCDIR=\"$(INCDIR)\" $(SRCS) compn: $(SRCN) $(HSRCS) cc -D__NASM__ -o compn -g -Wall -DINCDIR=\"$(INCDIR)\" $(SRCN) cwjarm: $(ARMSRCS) $(HSRCS) cc -o cwjarm -g -Wall $(ARMSRCS) cp cwjarm cwj install: cwj mkdir -p $(INCDIR) rsync -a include/. $(INCDIR) cp cwj $(BINDIR) chmod +x $(BINDIR)/cwj installn: compn mkdir -p $(INCDIR) rsync -a include/. $(INCDIR) cp compn $(BINDIR) chmod +x $(BINDIR)/compn clean: rm -f cwj cwjarm compn *.o *.s out a.out test: install tests/runtests (cd tests; chmod +x runtests; ./runtests) armtest: cwjarm tests/runtests (cd tests; chmod +x runtests; ./runtests) testn: installn tests/runtestsn (cd tests; chmod +x runtestsn; ./runtestsn) ================================================ FILE: 52_Pointers_pt2/Readme.md ================================================ # Part 52: Pointers, part 2 In this part of our compiler writing journey, I started with a pointer issue that needed to fix, and I ended up restructuring about half of `expr.c` and changing the API to another quarter of the functions in the compiler. So this is a big step in terms of number of lines touched, but not a big step in terms of fixes or improvements. ## The Problem We'll start with the problem that caused all of this. When running the compiler's source code through itself, I realised that I couldn't parse a chain of pointers, e.g. something like the expression: ```c ptr->next->next->next ``` The reason for this is that `primary()` is called and gets the value of the identifier at the beginning of the expression. If it sees a following postfix operator, it then calls `postfix()` to deal with it. `postfix()` deals with, for example, one `->` operator and returns. And that's it. There is no loop to follow a chain of `->` operators. Even worse, `primary()` looks for a single identifier. This means that it won't parse the following, either: ```c ptrarray[4]->next OR unionvar.member->next ``` because neither of these are single identifers before the `->` operator. ## How Did This Happen? This happened because of the rapid prototyping nature of our development. I only add functionality one small step at a time, and I don't usually look too far ahead in terms of future needs. So, now and then, we have to undo what has been written to make it more general and flexible. ## How to Fix It? If we look at the [BNF Grammar for C](https://www.lysator.liu.se/c/ANSI-C-grammar-y.html), we see this: ``` primary_expression : IDENTIFIER | CONSTANT | STRING_LITERAL | '(' expression ')' ; postfix_expression : primary_expression | postfix_expression '[' expression ']' | postfix_expression '(' ')' | postfix_expression '(' argument_expression_list ')' | postfix_expression '.' IDENTIFIER | postfix_expression '->' IDENTIFIER | postfix_expression '++' | postfix_expression '--' ; ``` In other words, we have things backwards. `postfix` should call `primary()` to get an AST node that represents the identifier. Then, we can loop looking for any postfix tokens, parse them and add new AST parent nodes on to the identifier node that we received back from `primary()`. It all sounds nice and simple except for one thing. The current `primary()` doesn't build an AST node; it only parses the identifier and leaves it in `Text`. It's the job of `postfix()` to build the AST node or AST tree for the identifier plus any postfix operations. At the same time, the AST node structure in `defs.h` only knows about the primitive type: ```c // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates int rvalue; // NOTE: no ctype ... }; ``` The reason for this is that we only recently added structs and unions. This, and the fact that `postfix()` did most of the parsing work meant that we haven't needed to store a pointer to the struct or union symbol for an identifier which is a struct or union. So, to fix things, we need to: 1. Add in a `ctype` pointer to `struct ASTnode` so that the full type is stored in each AST node. 2. Find and fix all the functions that build AST nodes, and all the calls to these function, so that the `ctype` of a node is stored. 3. Move `primary()` up near the top of `expr.c` and get it to build an AST node. 4. Get `postfix()` to call `primary()` to get the unadorned AST node for an identifier (A_IDENT). 5. Get `postfix()` to loop while there are postfix operators to process. That's a lot and, as the AST node calls are sprinkled everywhere, every single source file in the compiler will need to be touched. Sigh. ## Changes to the AST Node Functions I'm not going to bore you with all the details, but we can start with the change to the AST node structure in `defs.h`, and the main function in `tree.c` that builds an AST node: ```c // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates struct symtable *ctype; // If struct/union, ptr to that type ... }; // Build and return a generic AST node struct ASTnode *mkastnode(int op, int type, struct symtable *ctype, ...) { ... // Copy in the field values and return it n->op = op; n->type = type; n->ctype = ctype; ... } ``` There are also changes to `mkastleaf()` and `mkastunary()`: they now receive a `ctype` and call `mkastnode()` with this argument. In the compiler there are about 40 calls to these three functions, so I'm not going to go through each and every one. For most of them, there is a primitive `type` and ` ctype` pointer available. Some calls set the AST node type to P_INT and thus the `ctype` is NULL. Some calls set the AST node type to P_NONE and, again, the `ctype` is NULL. ## Changes to `modify_type()` The `modify_type()` is used to determine if an AST node's type is compatible with another type and, if necessary, to widen the node to match the other type. It calls `mkastunary()` and thus we also need to provide it with a `ctype` argument. I've done this and, as a consequence, the six calls to `modify_type()` have had to be modified to pass in the `ctype` of the type which we are comparing the AST node against. ## Changes to `expr.c` Now we get to the meat of the changes, the restucturing of `primary()` and `postfix()`. I've already outlined what we have to do above. As with much of what we've done, there are a few wrinkles along the way to iron out. ## Changes to `postfix()` `postfix()` actually looks much cleaner now: ```c // Parse a postfix expression and return // an AST node representing it. The // identifier is already in Text. static struct ASTnode *postfix(void) { struct ASTnode *n; // Get the primary expression n = primary(); // Loop until there are no more postfix operators while (1) { switch (Token.token) { ... default: return (n); } } ``` We now call `primary()` to get an identifier or a constant. Then we loop applying any postfix operators to the AST node we received from `primary()`. We call out to helper functions like `array_access()` and `member_access()` for `[..]`, `.` and `->` operators. We do post-increment and post-decrement here. Now that there is a loop, we have to check that we don't try to do these operations more than once. We also check that the AST we received from `primary()` is an lvalue and not an rvalue, as we need an address in memory to increment or decrement. ## A New Function, `paren_expression()` I realised that the new `primary()` function was getting a bit too big, so I split some of its code off into a new function, `paren_expression()`. This parses expressions that are enclosed in `(..)`: casts and ordinary parenthesised expressions. The code is nearly identical to the old code, so I won't go into it here. It returns an AST node with the tree that represents either a cast expression or a parenthesised expression. ## Changes to `primary()` This is where the biggest change has occurred. Firstly, here are the tokens it looks for: + 'static', 'extern' which it complains about, because we can only be parsing expressions in a local context. + 'sizeof()' + integer and string literals + identifiers: these could be known types (e.g. 'int'), names of enums, names of typedefs, function names, array names and/or scalar variable names. This section is the biggest part of `primary()` and, on reflection, perhaps I should make this into its own function. + `(..)` which is where `paren_expression()` gets called. Looking at the code, `primary()` now builds AST nodes for each of the above to return to `postfix()`. This used to be done in `postfix` but I now do it in `primary()`. ## Changes to `member_access()` With the previous `member_access()`, the global `Text` variable still held the identifier, and `member_access()` built the AST node to represent the struct/union identifier. In the current `member_access()`, we receive the AST node for the struct/union identifier, and this could be an array element or a member of another struct/union. So the code is different in that we don't build the leaf AST node for the original identifier anymore. We still build AST nodes to add on the offset from the base and dereference the pointer to the member. One other difference is this code: ```c // Check that the left AST tree is a struct or union. // If so, change it from an A_IDENT to an A_ADDR so that // we get the base address, not the value at this address. if (!withpointer) { if (left->type == P_STRUCT || left->type == P_UNION) left->op = A_ADDR; else fatal("Expression is not a struct/union"); } ``` Consider the expression `foo.bar`. `foo` is the name of a struct, for example, and `bar` is a member of that struct`. In `primary()` we will have created an A_IDENT AST node for `foo`, because we can't tell if this is a scalar variable (e.g. `int foo`) or a structure (e.g. `struct fred foo`). Now that we know it's a struct or a union, we need the base address of the struct and not the value at the base address. So, the code converts the A_IDENT AST node operation into an A_ADDR operation on the identifier. ## Testing the Code I think I spent about two hours running through our hundred plus regression tests, finding things I'd missed and fixing them up. It certainly felt good to get through all the tests again. `tests/input128.c` now checks that we can follow a chain of pointers, which was the whole point of this exercise: ```c struct foo { int val; struct foo *next; }; struct foo head, mid, tail; int main() { struct foo *ptr; tail.val= 20; tail.next= NULL; mid.val= 15; mid.next= &tail; head.val= 10; head.next= ∣ ptr= &head; printf("%d %d\n", head.val, ptr->val); printf("%d %d\n", mid.val, ptr->next->val); printf("%d %d\n", tail.val, ptr->next->next->val); return(0); } ``` And `tests/input129.c` checks that we can't post-increment twice in a row. ## One Other Change: `Linestart` There is one more change that I made to the compiler as part of our effort to get it to self-compile. The scanner was looking for a '#' token. When it saw this token, it assumed that we had hit a C pre-processor line and it parsed this line. Unfortunately, I hadn't tied the scanner down to looking in the first column of each line. So, when our compiler hit this source code line: ```c while (c == '#') { ``` it got upset that the ')' '{' were not a C pre-processor line. We now have a `Linestart` variable which flags if the scanner is at the first column of a new line or not. The main function which is modified is `next()` in `scan.c`. I think the changes are a bit ugly but they work; I should come back sometime and see if I can clean this up a bit. Anyway, we only expect C pre-processor lines when we see a '#' in column 1. ## Conclusion and What's Next In the next part of our compiler writing journey, I'll go back to feeding the compiler source code to itself, see what errors pop and up choose one or more to fix. [Next step](../53_Mop_up_pt2/Readme.md) ================================================ FILE: 52_Pointers_pt2/cg.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\t.text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\t.data\n", Outfile); currSeg = data_seg; } } // Given a scalar type value, return the // size of the type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch (type) { case P_CHAR: return (offset); case P_INT: case P_LONG: break; default: if (!ptrtype(type)) fatald("Bad type in cg_align:", type); } // Here we have an int or a long. Align it on a 4-byte offset // I put the generic code here so it can be reused elsewhere. alignment = 4; offset = (offset + direction * (alignment - 1)) & ~(alignment - 1); return (offset); } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. static int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "%r10", "%r11", "%r12", "%r13", "%r9", "%r8", "%rcx", "%rdx", "%rsi", "%rdi" }; static char *breglist[] = { "%r10b", "%r11b", "%r12b", "%r13b", "%r9b", "%r8b", "%cl", "%dl", "%sil", "%dil" }; static char *dreglist[] = { "%r10d", "%r11d", "%r12d", "%r13d", "%r9d", "%r8d", "%ecx", "%edx", "%esi", "%edi" }; // Set all registers as available. // But if reg is positive, don't free that one. void freeall_registers(int keepreg) { int i; for (i = 0; i < NUMFREEREGS; i++) if (i != keepreg) freereg[i] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. int alloc_register(void) { int i; for (i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(NOREG); cgtextseg(); fprintf(Outfile, "# internal switch(expr) routine\n" "# %%rsi = switch table, %%rax = expr\n" "# from SubC: http://www.t3x.org/subc/\n" "\n" "switch:\n" " pushq %%rsi\n" " movq %%rdx, %%rsi\n" " movq %%rax, %%rbx\n" " cld\n" " lodsq\n" " movq %%rax, %%rcx\n" "next:\n" " lodsq\n" " movq %%rax, %%rdx\n" " lodsq\n" " cmpq %%rdx, %%rbx\n" " jnz no\n" " popq %%rsi\n" " jmp *%%rax\n" "no:\n" " loop next\n" " lodsq\n" " popq %%rsi\n" " jmp *%%rax\n" "\n"); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the %rsp and %rsp if (sym->class == C_GLOBAL) fprintf(Outfile, "\t.globl\t%s\n" "\t.type\t%s, @function\n", name, name); fprintf(Outfile, "%s:\n" "\tpushq\t%%rbp\n" "\tmovq\t%%rsp, %%rbp\n", name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->st_posn = paramOffset; paramOffset += 8; } else { parm->st_posn = newlocaloffset(parm->type); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->st_posn = newlocaloffset(locvar->type); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\taddq\t$%d,%%rsp\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->st_endlabel); fprintf(Outfile, "\taddq\t$%d,%%rsp\n", stackOffset); fputs("\tpopq %rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmovq\t$%d, %s\n", value, reglist[r]); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", sym->name); } else // Print out the code to initialise it switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovzbq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", sym->name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovslq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", sym->name); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", sym->st_posn); fprintf(Outfile, "\tmovq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", sym->st_posn); } else switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", sym->st_posn); fprintf(Outfile, "\tmovzbq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", sym->st_posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", sym->st_posn); fprintf(Outfile, "\tmovslq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", sym->st_posn); break; default: fatald("Bad type in cgloadlocal:", sym->type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tleaq\tL%d(%%rip), %s\n", label, reglist[r]); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\taddq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsubq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timulq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s,%%rax\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidivq\t%s\n", reglist[r2]); fprintf(Outfile, "\tmovq\t%%rax,%s\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tandq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\torq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txorq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshlq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshrq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tnegq\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnotq\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); return (r); } // Logically OR two registers and return a // register with the result, 1 or 0 int cglogor(int r1, int r2) { // Generate two labels int Ltrue = genlabel(); int Lend = genlabel(); // Test r1 and jump to true label if true fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r1], reglist[r1]); fprintf(Outfile, "\tjne\tL%d\n", Ltrue); // Test r2 and jump to true label if true fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r2], reglist[r2]); fprintf(Outfile, "\tjne\tL%d\n", Ltrue); // Didn't jump, so result is false fprintf(Outfile, "\tmovq\t$0, %s\n", reglist[r1]); fprintf(Outfile, "\tjmp\tL%d\n", Lend); // Someone jumped to the true label, so result is true cglabel(Ltrue); fprintf(Outfile, "\tmovq\t$1, %s\n", reglist[r1]); cglabel(Lend); free_register(r2); return (r1); } // Logically AND two registers and return a // register with the result, 1 or 0 int cglogand(int r1, int r2) { // Generate two labels int Lfalse = genlabel(); int Lend = genlabel(); // Test r1 and jump to false label if not true fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r1], reglist[r1]); fprintf(Outfile, "\tje\tL%d\n", Lfalse); // Test r2 and jump to false label if not true fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r2], reglist[r2]); fprintf(Outfile, "\tje\tL%d\n", Lfalse); // Didn't jump, so result is true fprintf(Outfile, "\tmovq\t$1, %s\n", reglist[r1]); fprintf(Outfile, "\tjmp\tL%d\n", Lend); // Someone jumped to the false label, so result is false cglabel(Lfalse); fprintf(Outfile, "\tmovq\t$0, %s\n", reglist[r1]); cglabel(Lend); free_register(r2); return (r1); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(struct symtable *sym, int numargs) { // Get a new register int outr = alloc_register(); // Call the function fprintf(Outfile, "\tcall\t%s@PLT\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\taddq\t$%d, %%rsp\n", 8 * (numargs - 6)); // and copy the return value into our register fprintf(Outfile, "\tmovq\t%%rax, %s\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpushq\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmovq\t%s, %s\n", reglist[r], reglist[FIRSTPARAMREG - argposn + 1]); } } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsalq\t$%d, %s\n", val, reglist[r]); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %s(%%rip)\n", reglist[r], sym->name); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %s(%%rip)\n", breglist[r], sym->name); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %s(%%rip)\n", dreglist[r], sym->name); break; default: fatald("Bad type in cgstorglob:", sym->type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %d(%%rbp)\n", reglist[r], sym->st_posn); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %d(%%rbp)\n", breglist[r], sym->st_posn); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %d(%%rbp)\n", dreglist[r], sym->st_posn); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size, type; int initvalue; int i; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the variable (or its elements if an array) // and the type of the variable if (node->stype == S_ARRAY) { size = typesize(value_at(node->type), node->ctype); type = value_at(node->type); } else { size = node->size; type = node->type; } // Generate the global identity and the label cgdataseg(); if (node->class == C_GLOBAL) fprintf(Outfile, "\t.globl\t%s\n", node->name); fprintf(Outfile, "%s:\n", node->name); // Output space for one or more elements for (i = 0; i < node->nelems; i++) { // Get any initial value initvalue = 0; if (node->initlist != NULL) initvalue = node->initlist[i]; // Generate the space for this type switch (size) { case 1: fprintf(Outfile, "\t.byte\t%d\n", initvalue); break; case 4: fprintf(Outfile, "\t.long\t%d\n", initvalue); break; case 8: // Generate the pointer to a string literal. Treat a zero value // as actually zero, not the label L0 if (node->initlist != NULL && type == pointer_to(P_CHAR) && initvalue != 0) fprintf(Outfile, "\t.quad\tL%d\n", initvalue); else fprintf(Outfile, "\t.quad\t%d\n", initvalue); break; default: for (int i = 0; i < size; i++) fprintf(Outfile, "\t.byte\t0\n"); } } } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\t.byte\t%d\n", *cptr); } fprintf(Outfile, "\t.byte\t0\n"); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(NOREG); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Deal with pointers here as we can't put them in // the switch statement if (ptrtype(sym->type)) fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); else { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzbl\t%s, %%eax\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %%eax\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } } cgjump(sym->st_endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = alloc_register(); if (sym->class == C_GLOBAL || sym->class == C_STATIC) fprintf(Outfile, "\tleaq\t%s(%%rip), %s\n", sym->name, reglist[r]); else fprintf(Outfile, "\tleaq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzbq\t(%s), %s\n", reglist[r], reglist[r]); break; case 2: fprintf(Outfile, "\tmovslq\t(%s), %s\n", reglist[r], reglist[r]); break; case 4: case 8: fprintf(Outfile, "\tmovq\t(%s), %s\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { // Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmovb\t%s, (%s)\n", breglist[r1], reglist[r2]); break; case 2: case 4: case 8: fprintf(Outfile, "\tmovq\t%s, (%s)\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } // Generate a switch jump table and the code to // load the registers and call the switch() code void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel) { int i, label; // Get a label for the switch table label = genlabel(); cglabel(label); // Heuristic. If we have no cases, create one case // which points to the default case if (casecount == 0) { caseval[0] = 0; caselabel[0] = defaultlabel; casecount = 1; } // Generate the switch jump table. fprintf(Outfile, "\t.quad\t%d\n", casecount); for (i = 0; i < casecount; i++) fprintf(Outfile, "\t.quad\t%d, L%d\n", caseval[i], caselabel[i]); fprintf(Outfile, "\t.quad\tL%d\n", defaultlabel); // Load the specific registers cglabel(toplabel); fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); fprintf(Outfile, "\tleaq\tL%d(%%rip), %%rdx\n", label); fprintf(Outfile, "\tjmp\tswitch\n"); } // Move value between registers void cgmove(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s, %s\n", reglist[r1], reglist[r2]); } ================================================ FILE: 52_Pointers_pt2/cg_arm.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for ARMv6 on Raspberry Pi // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. static int freereg[4]; static char *reglist[4] = { "r4", "r5", "r6", "r7" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // We have to store large integer literal values in memory. // Keep a list of them which will be output in the postamble #define MAXINTS 1024 int Intlist[MAXINTS]; static int Intslot = 0; // Determine the offset of a large integer // literal from the .L3 label. If the integer // isn't in the list, add it. static void set_int_offset(int val) { int offset = -1; // See if it is already there for (int i = 0; i < Intslot; i++) { if (Intlist[i] == val) { offset = 4 * i; break; } } // Not in the list, so add it if (offset == -1) { offset = 4 * Intslot; if (Intslot == MAXINTS) fatal("Out of int slots in set_int_offset()"); Intlist[Intslot++] = val; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L3+%d\n", offset); } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n", Outfile); } // Print out the assembly postamble void cgpostamble() { // Print out the global variables fprintf(Outfile, ".L2:\n"); for (int i = 0; i < Globs; i++) { if (Symtable[i].stype == S_VARIABLE) fprintf(Outfile, "\t.word %s\n", Symtable[i].name); } // Print out the integer literals fprintf(Outfile, ".L3:\n"); for (int i = 0; i < Intslot; i++) { fprintf(Outfile, "\t.word %d\n", Intlist[i]); } } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Symtable[id].name; fprintf(Outfile, "\t.text\n" "\t.globl\t%s\n" "\t.type\t%s, \%%function\n" "%s:\n" "\tpush\t{fp, lr}\n" "\tadd\tfp, sp, #4\n" "\tsub\tsp, sp, #8\n" "\tstr\tr0, [fp, #-8]\n", name, name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Symtable[id].endlabel); fputs("\tsub\tsp, fp, #4\n" "\tpop\t{fp, pc}\n" "\t.align\t2\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); // If the literal value is small, do it with one instruction if (value <= 1000) fprintf(Outfile, "\tmov\t%s, #%d\n", reglist[r], value); else { set_int_offset(value); fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); } return (r); } // Determine the offset of a variable from the .L2 // label. Yes, this is inefficient code. static void set_var_offset(int id) { int offset = 0; // Walk the symbol table up to id. // Find S_VARIABLEs and add on 4 until // we get to our variable for (int i = 0; i < id; i++) { if (Symtable[i].stype == S_VARIABLE) offset += 4; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L2+%d\n", offset); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tldrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgloadglob:", Symtable[id].type); } return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s, %s\n", reglist[r1], reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\tmul\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { // To do a divide: r0 holds the dividend, r1 holds the divisor. // The quotient is in r0. fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r1]); fprintf(Outfile, "\tmov\tr1, %s\n", reglist[r2]); fprintf(Outfile, "\tbl\t__aeabi_idiv\n"); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r1]); free_register(r2); return (r1); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r]); fprintf(Outfile, "\tbl\t%s\n", Symtable[id].name); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r]); return (r); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tlsl\t%s, %s, #%d\n", reglist[r], reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, int id) { // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tstr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgstorglob:", Symtable[id].type); } return (r); } // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (4); switch (type) { case P_CHAR: return (1); case P_INT: case P_LONG: return (4); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Symtable[id].type); fprintf(Outfile, "\t.data\n" "\t.globl\t%s\n", Symtable[id].name); switch (typesize) { case 1: fprintf(Outfile, "%s:\t.byte\t0\n", Symtable[id].name); break; case 4: fprintf(Outfile, "%s:\t.long\t0\n", Symtable[id].name); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "moveq", "movne", "movlt", "movgt", "movle", "movge" }; // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "movne", "moveq", "movge", "movle", "movgt", "movlt" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #1\n", cmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #0\n", invcmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\tuxtb\t%s, %s\n", reglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tb\tL%d\n", l); } // List of inverted branch instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *brlist[] = { "bne", "beq", "bge", "ble", "bgt", "blt" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", brlist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[reg]); cgjump(Symtable[id].endlabel); } // Generate code to load the address of a global // identifier into a variable. Return a new register int cgaddress(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); fprintf(Outfile, "\tmov\t%s, r3\n", reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tldrb\t%s, [%s]\n", reglist[r], reglist[r]); break; case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [%s]\n", reglist[r], reglist[r]); break; } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [%s]\n", reglist[r1], reglist[r2]); break; case P_INT: case P_LONG: fprintf(Outfile, "\tstr\t%s, [%s]\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 52_Pointers_pt2/cgn.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\tsection .text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\tsection .data\n", Outfile); currSeg = data_seg; } } // Given a scalar type value, return the // size of the type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch (type) { case P_CHAR: return (offset); case P_INT: case P_LONG: break; default: if (!ptrtype(type)) fatald("Bad type in cg_align:", type); } // Here we have an int or a long. Align it on a 4-byte offset // I put the generic code here so it can be reused elsewhere. alignment = 4; offset = (offset + direction * (alignment - 1)) & ~(alignment - 1); return (offset); } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "r10", "r11", "r12", "r13", "r9", "r8", "rcx", "rdx", "rsi", "rdi" }; static char *breglist[] = { "r10b", "r11b", "r12b", "r13b", "r9b", "r8b", "cl", "dl", "sil", "dil" }; static char *dreglist[] = { "r10d", "r11d", "r12d", "r13d", "r9d", "r8d", "ecx", "edx", "esi", "edi" }; // Set all registers as available // But if reg is positive, don't free that one. void freeall_registers(int keepreg) { int i; for (i = 0; i < NUMFREEREGS; i++) if (i != keepreg) freereg[i] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. int alloc_register(void) { int i; for (i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(NOREG); fputs("\textern\tprintint\n", Outfile); fputs("\textern\tprintchar\n", Outfile); fputs("\textern\topen\n", Outfile); fputs("\textern\tclose\n", Outfile); fputs("\textern\tread\n", Outfile); fputs("\textern\twrite\n", Outfile); fputs("\textern\tprintf\n", Outfile); cgtextseg(); fprintf(Outfile, "; internal switch(expr) routine\n" "; rsi = switch table, rax = expr\n" "; from SubC: http://www.t3x.org/subc/\n" "\n" "switch:\n" " push rsi\n" " mov rsi, rdx\n" " mov rbx, rax\n" " cld\n" " lodsq\n" " mov rcx, rax\n" "next:\n" " lodsq\n" " mov rdx, rax\n" " lodsq\n" " cmp rbx, rdx\n" " jnz no\n" " pop rsi\n" " jmp rax\n" "no:\n" " loop next\n" " lodsq\n" " pop rsi\n" " jmp rax\n" "\n"); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the rsp and rbp if (sym->class == C_GLOBAL) fprintf(Outfile, "\tglobal\t%s\n", name); fprintf(Outfile, "%s:\n" "\tpush\trbp\n" "\tmov\trbp, rsp\n", name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->st_posn = paramOffset; paramOffset += 8; } else { parm->st_posn = newlocaloffset(parm->type); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->st_posn = newlocaloffset(locvar->type); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\tadd\trsp, %d\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->st_endlabel); fprintf(Outfile, "\tadd\trsp, %d\n", stackOffset); fputs("\tpop rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], value); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", sym->name); fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], sym->name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", sym->name); } else // Print out the code to initialise it switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", sym->name); fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], sym->name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", sym->name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", sym->name); fprintf(Outfile, "\tmovsx\t%s, word [%s]\n", dreglist[r], sym->name); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", sym->name); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", sym->st_posn); fprintf(Outfile, "\tmov\t%s, [rbp+%d]\n", reglist[r], sym->st_posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", sym->st_posn); } else switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", sym->st_posn); fprintf(Outfile, "\tmovzx\t%s, byte [rbp+%d]\n", reglist[r], sym->st_posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", sym->st_posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", sym->st_posn); fprintf(Outfile, "\tmovsx\t%s, dword [rbp+%d]\n", reglist[r], sym->st_posn); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", sym->st_posn); break; default: fatald("Bad type in cgloadlocal:", sym->type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, L%d\n", reglist[r], label); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timul\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmov\trax, %s\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidiv\t%s\n", reglist[r2]); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tand\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\tor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshl\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshr\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tneg\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnot\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r], breglist[r]); return (r); } // Logically OR two registers and return a // register with the result, 1 or 0 int cglogor(int r1, int r2) { // Generate two labels int Ltrue = genlabel(); int Lend = genlabel(); // Test r1 and jump to true label if true fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r1], reglist[r1]); fprintf(Outfile, "\tjne\tL%d\n", Ltrue); // Test r2 and jump to true label if true fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r2], reglist[r2]); fprintf(Outfile, "\tjne\tL%d\n", Ltrue); // Didn't jump, so result is false fprintf(Outfile, "\tmov\t%s, 0\n", reglist[r1]); fprintf(Outfile, "\tjmp\tL%d\n", Lend); // Someone jumped to the true label, so result is true cglabel(Ltrue); fprintf(Outfile, "\tmov\t%s, 1\n", reglist[r1]); cglabel(Lend); free_register(r2); return(r1); } // Logically AND two registers and return a // register with the result, 1 or 0 int cglogand(int r1, int r2) { // Generate two labels int Lfalse = genlabel(); int Lend = genlabel(); // Test r1 and jump to false label if not true fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r1], reglist[r1]); fprintf(Outfile, "\tje\tL%d\n", Lfalse); // Test r2 and jump to false label if not true fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r2], reglist[r2]); fprintf(Outfile, "\tje\tL%d\n", Lfalse); // Didn't jump, so result is true fprintf(Outfile, "\tmov\t%s, 1\n", reglist[r1]); fprintf(Outfile, "\tjmp\tL%d\n", Lend); // Someone jumped to the false label, so result is false cglabel(Lfalse); fprintf(Outfile, "\tmov\t%s, 0\n", reglist[r1]); cglabel(Lend); free_register(r2); return(r1); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, byte %s\n", reglist[r], breglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(struct symtable *sym, int numargs) { // Get a new register int outr = alloc_register(); // Call the function fprintf(Outfile, "\tcall\t%s\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\tadd\trsp, %d\n", 8 * (numargs - 6)); // and copy the return value into our register fprintf(Outfile, "\tmov\t%s, rax\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpush\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmov\t%s, %s\n", reglist[FIRSTPARAMREG - argposn + 1], reglist[r]); } } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsal\t%s, %d\n", reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, dreglist[r]); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\tqword\t[rbp+%d], %s\n", sym->st_posn, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\tbyte\t[rbp+%d], %s\n", sym->st_posn, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\tdword\t[rbp+%d], %s\n", sym->st_posn, dreglist[r]); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size, type; int initvalue; int i; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the variable (or its elements if an array) // and the type of the variable if (node->stype == S_ARRAY) { size = typesize(value_at(node->type), node->ctype); type = value_at(node->type); } else { size = node->size; type = node->type; } // Generate the global identity and the label cgdataseg(); if (node->class == C_GLOBAL) fprintf(Outfile, "\tglobal\t%s\n", node->name); fprintf(Outfile, "%s:\n", node->name); // Output space for one or more elements for (i = 0; i < node->nelems; i++) { // Get any initial value initvalue = 0; if (node->initlist != NULL) initvalue = node->initlist[i]; // Generate the space for this type // original version switch(size) { case 1: fprintf(Outfile, "\tdb\t%d\n", initvalue); break; case 4: fprintf(Outfile, "\tdd\t%d\n", initvalue); break; case 8: // Generate the pointer to a string literal. Treat a zero value // as actually zero, not the label L0 if (node->initlist != NULL && type == pointer_to(P_CHAR) && initvalue != 0) fprintf(Outfile, "\tdq\tL%d\n", initvalue); else fprintf(Outfile, "\tdq\t%d\n", initvalue); break; default: for (int i = 0; i < size; i++) fprintf(Outfile, "\tdb\t0\n"); /* compact version using times instead of loop fprintf(Outfile, "\ttimes\t%d\tdb\t0\n", size); */ } } } // Generate a global string and its start label void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\tdb\t%d\n", *cptr); } fprintf(Outfile, "\tdb\t0\n"); /* put string in readable format on a single line // probably went overboard with error checking int comma = 0, quote = 0, start = 1; fprintf(Outfile, "\tdb\t"); for (cptr=strvalue; *cptr; cptr++) { if ( ! isprint(*cptr) ) if (comma || start) { fprintf(Outfile, "%d, ", *cptr); start = 0; comma = 1; } else if (quote) { fprintf(Outfile, "\', %d, ", *cptr); comma = 1; quote = 0; } else { fprintf(Outfile, "%d, ", *cptr); comma = 1; quote = 0; } else if (start || comma) { fprintf(Outfile, "\'%c", *cptr); start = comma = 0; quote = 1; } else { fprintf(Outfile, "%c", *cptr); comma = 0; quote = 1; } } if (comma || start) fprintf(Outfile, "0\n"); else fprintf(Outfile, "\', 0\n"); */ } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r2], breglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(NOREG); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Deal with pointers here as we can't put them in // the switch statement if (ptrtype(sym->type)) fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); else { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzx\teax, %s\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmov\teax, %s\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } } cgjump(sym->st_endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = alloc_register(); if (sym->class == C_GLOBAL || sym->class == C_STATIC) fprintf(Outfile, "\tmov\t%s, %s\n", reglist[r], sym->name); else fprintf(Outfile, "\tlea\t%s, [rbp+%d]\n", reglist[r], sym->st_posn); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], reglist[r]); break; case 2: fprintf(Outfile, "\tmovsx\t%s, dword [%s]\n", reglist[r], reglist[r]); break; case 4: case 8: fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { //Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmov\t[%s], byte %s\n", reglist[r2], breglist[r1]); break; case 2: case 4: case 8: fprintf(Outfile, "\tmov\t[%s], %s\n", reglist[r2], reglist[r1]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } // Generate a switch jump table and the code to // load the registers and call the switch() code void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel) { int i, label; // Get a label for the switch table label = genlabel(); cglabel(label); // Heuristic. If we have no cases, create one case // which points to the default case if (casecount == 0) { caseval[0] = 0; caselabel[0] = defaultlabel; casecount = 1; } // Generate the switch jump table. fprintf(Outfile, "\tdq\t%d\n", casecount); for (i = 0; i < casecount; i++) fprintf(Outfile, "\tdq\t%d, L%d\n", caseval[i], caselabel[i]); fprintf(Outfile, "\tdq\tL%d\n", defaultlabel); // Load the specific registers cglabel(toplabel); fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); fprintf(Outfile, "\tmov\trdx, L%d\n", label); fprintf(Outfile, "\tjmp\tswitch\n"); } // Move value between registers void cgmove(int r1, int r2) { fprintf(Outfile, "\tmov\t%s, %s\n", reglist[r2], reglist[r1]); } ================================================ FILE: 52_Pointers_pt2/data.h ================================================ #ifndef extern_ #define extern_ extern #endif // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 extern_ int Line; // Current line number extern_ int Linestart; // True if at start of a line extern_ int Putback; // Character put back by scanner extern_ struct symtable *Functionid; // Symbol ptr of the current function extern_ FILE *Infile; // Input and output files extern_ FILE *Outfile; extern_ char *Infilename; // Name of file we are parsing extern_ char *Outfilename; // Name of file we opened as Outfile extern_ struct token Token; // Last token scanned extern_ struct token Peektoken; // A look-ahead token extern_ char Text[TEXTLEN + 1]; // Last identifier scanned extern_ int Looplevel; // Depth of nested loops extern_ int Switchlevel; // Depth of nested switches extern char *Tstring[]; // List of token strings // Symbol table lists extern_ struct symtable *Globhead, *Globtail; // Global variables and functions extern_ struct symtable *Loclhead, *Locltail; // Local variables extern_ struct symtable *Parmhead, *Parmtail; // Local parameters extern_ struct symtable *Membhead, *Membtail; // Temp list of struct/union members extern_ struct symtable *Structhead, *Structtail; // List of struct types extern_ struct symtable *Unionhead, *Uniontail; // List of union types extern_ struct symtable *Enumhead, *Enumtail; // List of enum types and values extern_ struct symtable *Typehead, *Typetail; // List of typedefs // Command-line flags extern_ int O_dumpAST; // If true, dump the AST trees extern_ int O_dumpsym; // If true, dump the symbol table extern_ int O_keepasm; // If true, keep any assembly files extern_ int O_assemble; // If true, assemble the assembly files extern_ int O_dolink; // If true, link the object files extern_ int O_verbose; // If true, print info on compilation stages ================================================ FILE: 52_Pointers_pt2/decl.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of declarations // Copyright (c) 2019 Warren Toomey, GPL3 static struct symtable *composite_declaration(int type); static int typedef_declaration(struct symtable **ctype); static int type_of_typedef(char *name, struct symtable **ctype); static void enum_declaration(void); // Parse the current token and return a primitive type enum value, // a pointer to any composite type and possibly modify // the class of the type. int parse_type(struct symtable **ctype, int *class) { int type, exstatic = 1; // See if the class has been changed to extern or static while (exstatic) { switch (Token.token) { case T_EXTERN: if (*class == C_STATIC) fatal("Illegal to have extern and static at the same time"); *class = C_EXTERN; scan(&Token); break; case T_STATIC: if (*class == C_LOCAL) fatal("Compiler doesn't support static local declarations"); if (*class == C_EXTERN) fatal("Illegal to have extern and static at the same time"); *class = C_STATIC; scan(&Token); break; default: exstatic = 0; } } // Now work on the actual type keyword switch (Token.token) { case T_VOID: type = P_VOID; scan(&Token); break; case T_CHAR: type = P_CHAR; scan(&Token); break; case T_INT: type = P_INT; scan(&Token); break; case T_LONG: type = P_LONG; scan(&Token); break; // For the following, if we have a ';' after the // parsing then there is no type, so return -1. // Example: struct x {int y; int z}; case T_STRUCT: type = P_STRUCT; *ctype = composite_declaration(P_STRUCT); if (Token.token == T_SEMI) type = -1; break; case T_UNION: type = P_UNION; *ctype = composite_declaration(P_UNION); if (Token.token == T_SEMI) type = -1; break; case T_ENUM: type = P_INT; // Enums are really ints enum_declaration(); if (Token.token == T_SEMI) type = -1; break; case T_TYPEDEF: type = typedef_declaration(ctype); if (Token.token == T_SEMI) type = -1; break; case T_IDENT: type = type_of_typedef(Text, ctype); break; default: fatals("Illegal type, token", Token.tokstr); } return (type); } // Given a type parsed by parse_type(), scan in any following // '*' tokens and return the new type int parse_stars(int type) { while (1) { if (Token.token != T_STAR) break; type = pointer_to(type); scan(&Token); } return (type); } // Parse a type which appears inside a cast int parse_cast(struct symtable **ctype) { int type, class = 0; // Get the type inside the parentheses type = parse_stars(parse_type(ctype, &class)); // Do some error checking. I'm sure more can be done if (type == P_STRUCT || type == P_UNION || type == P_VOID) fatal("Cannot cast to a struct, union or void type"); return (type); } // Given a type, parse an expression of literals and ensure // that the type of this expression matches the given type. // Parse any type cast that precedes the expression. // If an integer literal, return this value. // If a string literal, return the label number of the string. int parse_literal(int type) { struct ASTnode *tree; // Parse the expression and optimise the resulting AST tree tree = optimise(binexpr(0)); // If there's a cast, get the child and // mark it as having the type from the cast if (tree->op == A_CAST) { tree->left->type = tree->type; tree = tree->left; } // The tree must now have an integer or string literal if (tree->op != A_INTLIT && tree->op != A_STRLIT) fatal("Cannot initialise globals with a general expression"); // If the type is char * and if (type == pointer_to(P_CHAR)) { // We have a string literal, return the label number if (tree->op == A_STRLIT) return (tree->a_intvalue); // We have a zero int literal, so that's a NULL if (tree->op == A_INTLIT && tree->a_intvalue == 0) return (0); } // We only get here with an integer literal. The input type // is an integer type and is wide enough to hold the literal value if (inttype(type) && typesize(type, NULL) >= typesize(tree->type, NULL)) return (tree->a_intvalue); fatal("Type mismatch: literal vs. variable"); return (0); // Keep -Wall happy } // Given the type, name and class of a scalar variable, // parse any initialisation value and allocate storage for it. // Return the variable's symbol table entry. static struct symtable *scalar_declaration(char *varname, int type, struct symtable *ctype, int class, struct ASTnode **tree) { struct symtable *sym = NULL; struct ASTnode *varnode, *exprnode; *tree = NULL; // Add this as a known scalar switch (class) { case C_STATIC: case C_EXTERN: case C_GLOBAL: sym = addglob(varname, type, ctype, S_VARIABLE, class, 1, 0); break; case C_LOCAL: sym = addlocl(varname, type, ctype, S_VARIABLE, 1); break; case C_PARAM: sym = addparm(varname, type, ctype, S_VARIABLE); break; case C_MEMBER: sym = addmemb(varname, type, ctype, S_VARIABLE, 1); break; } // The variable is being initialised if (Token.token == T_ASSIGN) { // Only possible for a global or local if (class != C_GLOBAL && class != C_LOCAL && class != C_STATIC) fatals("Variable can not be initialised", varname); scan(&Token); // Globals must be assigned a literal value if (class == C_GLOBAL || class == C_STATIC) { // Create one initial value for the variable and // parse this value sym->initlist = (int *) malloc(sizeof(int)); sym->initlist[0] = parse_literal(type); } if (class == C_LOCAL) { // Make an A_IDENT AST node with the variable varnode = mkastleaf(A_IDENT, sym->type, sym->ctype, sym, 0); // Get the expression for the assignment, make into a rvalue exprnode = binexpr(0); exprnode->rvalue = 1; // Ensure the expression's type matches the variable exprnode = modify_type(exprnode, varnode->type, varnode->ctype, 0); if (exprnode == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree *tree = mkastnode(A_ASSIGN, exprnode->type, exprnode->ctype, exprnode, NULL, varnode, NULL, 0); } } // Generate any global space if (class == C_GLOBAL || class == C_STATIC) genglobsym(sym); return (sym); } // Given the type, name and class of an variable, parse // the size of the array, if any. Then parse any initialisation // value and allocate storage for it. // Return the variable's symbol table entry. static struct symtable *array_declaration(char *varname, int type, struct symtable *ctype, int class) { struct symtable *sym; // New symbol table entry int nelems = -1; // Assume the number of elements won't be given int maxelems; // The maximum number of elements in the init list int *initlist; // The list of initial elements int i = 0, j; // Skip past the '[' scan(&Token); // See we have an array size if (Token.token != T_RBRACKET) { nelems = parse_literal(P_INT); if (nelems <= 0) fatald("Array size is illegal", nelems); } // Ensure we have a following ']' match(T_RBRACKET, "]"); // Add this as a known array. We treat the // array as a pointer to its elements' type switch (class) { case C_STATIC: case C_EXTERN: case C_GLOBAL: sym = addglob(varname, pointer_to(type), ctype, S_ARRAY, class, 0, 0); break; default: fatal("For now, declaration of non-global arrays is not implemented"); } // Array initialisation if (Token.token == T_ASSIGN) { if (class != C_GLOBAL && class != C_STATIC) fatals("Variable can not be initialised", varname); scan(&Token); // Get the following left curly bracket match(T_LBRACE, "{"); #define TABLE_INCREMENT 10 // If the array already has nelems, allocate that many elements // in the list. Otherwise, start with TABLE_INCREMENT. if (nelems != -1) maxelems = nelems; else maxelems = TABLE_INCREMENT; initlist = (int *) malloc(maxelems * sizeof(int)); // Loop getting a new literal value from the list while (1) { // Check we can add the next value, then parse and add it if (nelems != -1 && i == maxelems) fatal("Too many values in initialisation list"); initlist[i++] = parse_literal(type); // Increase the list size if the original size was // not set and we have hit the end of the current list if (nelems == -1 && i == maxelems) { maxelems += TABLE_INCREMENT; initlist = (int *) realloc(initlist, maxelems * sizeof(int)); } // Leave when we hit the right curly bracket if (Token.token == T_RBRACE) { scan(&Token); break; } // Next token must be a comma, then comma(); } // Zero any unused elements in the initlist. // Attach the list to the symbol table entry for (j = i; j < sym->nelems; j++) initlist[j] = 0; if (i > nelems) nelems = i; sym->initlist = initlist; } // Set the size of the array and the number of elements sym->nelems = nelems; sym->size = sym->nelems * typesize(type, ctype); // Generate any global space if (class == C_GLOBAL || class == C_STATIC) genglobsym(sym); return (sym); } // Given a pointer to the new function being declared and // a possibly NULL pointer to the function's previous declaration, // parse a list of parameters and cross-check them against the // previous declaration. Return the count of parameters static int param_declaration_list(struct symtable *oldfuncsym, struct symtable *newfuncsym) { int type, paramcnt = 0; struct symtable *ctype; struct symtable *protoptr = NULL; struct ASTnode *unused; // Get the pointer to the first prototype parameter if (oldfuncsym != NULL) protoptr = oldfuncsym->member; // Loop getting any parameters while (Token.token != T_RPAREN) { // If the first token is 'void' if (Token.token == T_VOID) { // Peek at the next token. If a ')', the function // has no parameters, so leave the loop. scan(&Peektoken); if (Peektoken.token == T_RPAREN) { // Move the Peektoken into the Token paramcnt = 0; scan(&Token); break; } } // Get the type of the next parameter type = declaration_list(&ctype, C_PARAM, T_COMMA, T_RPAREN, &unused); if (type == -1) fatal("Bad type in parameter list"); // Ensure the type of this parameter matches the prototype if (protoptr != NULL) { if (type != protoptr->type) fatald("Type doesn't match prototype for parameter", paramcnt + 1); protoptr = protoptr->next; } paramcnt++; // Stop when we hit the right parenthesis if (Token.token == T_RPAREN) break; // We need a comma as separator comma(); } if (oldfuncsym != NULL && paramcnt != oldfuncsym->nelems) fatals("Parameter count mismatch for function", oldfuncsym->name); // Return the count of parameters return (paramcnt); } // // function_declaration: type identifier '(' parameter_list ')' ; // | type identifier '(' parameter_list ')' compound_statement ; // // Parse the declaration of function. static struct symtable *function_declaration(char *funcname, int type, struct symtable *ctype, int class) { struct ASTnode *tree, *finalstmt; struct symtable *oldfuncsym, *newfuncsym = NULL; int endlabel, paramcnt; // Text has the identifier's name. If this exists and is a // function, get the id. Otherwise, set oldfuncsym to NULL. if ((oldfuncsym = findsymbol(funcname)) != NULL) if (oldfuncsym->stype != S_FUNCTION) oldfuncsym = NULL; // If this is a new function declaration, get a // label-id for the end label, and add the function // to the symbol table, if (oldfuncsym == NULL) { endlabel = genlabel(); // Assumtion: functions only return scalar types, so NULL below newfuncsym = addglob(funcname, type, NULL, S_FUNCTION, class, 0, endlabel); } // Scan in the '(', any parameters and the ')'. // Pass in any existing function prototype pointer lparen(); paramcnt = param_declaration_list(oldfuncsym, newfuncsym); rparen(); // If this is a new function declaration, update the // function symbol entry with the number of parameters. // Also copy the parameter list into the function's node. if (newfuncsym) { newfuncsym->nelems = paramcnt; newfuncsym->member = Parmhead; oldfuncsym = newfuncsym; } // Clear out the parameter list Parmhead = Parmtail = NULL; // Declaration ends in a semicolon, only a prototype. if (Token.token == T_SEMI) return (oldfuncsym); // This is not just a prototype. // Set the Functionid global to the function's symbol pointer Functionid = oldfuncsym; // Get the AST tree for the compound statement and mark // that we have parsed no loops or switches yet Looplevel = 0; Switchlevel = 0; lbrace(); tree = compound_statement(0); rbrace(); // If the function type isn't P_VOID .. if (type != P_VOID) { // Error if no statements in the function if (tree == NULL) fatal("No statements in function with non-void type"); // Check that the last AST operation in the // compound statement was a return statement finalstmt = (tree->op == A_GLUE) ? tree->right : tree; if (finalstmt == NULL || finalstmt->op != A_RETURN) fatal("No return for function with non-void type"); } // Build the A_FUNCTION node which has the function's symbol pointer // and the compound statement sub-tree tree = mkastunary(A_FUNCTION, type, ctype, tree, oldfuncsym, endlabel); // Do optimisations on the AST tree tree = optimise(tree); // Dump the AST tree if requested if (O_dumpAST) { dumpAST(tree, NOLABEL, 0); fprintf(stdout, "\n\n"); } // Generate the assembly code for it genAST(tree, NOLABEL, NOLABEL, NOLABEL, 0); // Now free the symbols associated // with this function freeloclsyms(); return (oldfuncsym); } // Parse composite type declarations: structs or unions. // Either find an existing struct/union declaration, or build // a struct/union symbol table entry and return its pointer. static struct symtable *composite_declaration(int type) { struct symtable *ctype = NULL; struct symtable *m; struct ASTnode *unused; int offset; int t; // Skip the struct/union keyword scan(&Token); // See if there is a following struct/union name if (Token.token == T_IDENT) { // Find any matching composite type if (type == P_STRUCT) ctype = findstruct(Text); else ctype = findunion(Text); scan(&Token); } // If the next token isn't an LBRACE , this is // the usage of an existing struct/union type. // Return the pointer to the type. if (Token.token != T_LBRACE) { if (ctype == NULL) fatals("unknown struct/union type", Text); return (ctype); } // Ensure this struct/union type hasn't been // previously defined if (ctype) fatals("previously defined struct/union", Text); // Build the composite type and skip the left brace if (type == P_STRUCT) ctype = addstruct(Text); else ctype = addunion(Text); scan(&Token); // Scan in the list of members while (1) { // Get the next member. m is used as a dummy t = declaration_list(&m, C_MEMBER, T_SEMI, T_RBRACE, &unused); if (t == -1) fatal("Bad type in member list"); if (Token.token == T_SEMI) scan(&Token); if (Token.token == T_RBRACE) break; } // Attach to the struct type's node rbrace(); if (Membhead == NULL) fatals("No members in struct", ctype->name); ctype->member = Membhead; Membhead = Membtail = NULL; // Set the offset of the initial member // and find the first free byte after it m = ctype->member; m->st_posn = 0; offset = typesize(m->type, m->ctype); // Set the position of each successive member in the composite type // Unions are easy. For structs, align the member and find the next free byte for (m = m->next; m != NULL; m = m->next) { // Set the offset for this member if (type == P_STRUCT) m->st_posn = genalign(m->type, offset, 1); else m->st_posn = 0; // Get the offset of the next free byte after this member offset += typesize(m->type, m->ctype); } // Set the overall size of the composite type ctype->size = offset; return (ctype); } // Parse an enum declaration static void enum_declaration(void) { struct symtable *etype = NULL; char *name = NULL; int intval = 0; // Skip the enum keyword. scan(&Token); // If there's a following enum type name, get a // pointer to any existing enum type node. if (Token.token == T_IDENT) { etype = findenumtype(Text); name = strdup(Text); // As it gets tromped soon scan(&Token); } // If the next token isn't a LBRACE, check // that we have an enum type name, then return if (Token.token != T_LBRACE) { if (etype == NULL) fatals("undeclared enum type:", name); return; } // We do have an LBRACE. Skip it scan(&Token); // If we have an enum type name, ensure that it // hasn't been declared before. if (etype != NULL) fatals("enum type redeclared:", etype->name); else // Build an enum type node for this identifier etype = addenum(name, C_ENUMTYPE, 0); // Loop to get all the enum values while (1) { // Ensure we have an identifier // Copy it in case there's an int literal coming up ident(); name = strdup(Text); // Ensure this enum value hasn't been declared before etype = findenumval(name); if (etype != NULL) fatals("enum value redeclared:", Text); // If the next token is an '=', skip it and // get the following int literal if (Token.token == T_ASSIGN) { scan(&Token); if (Token.token != T_INTLIT) fatal("Expected int literal after '='"); intval = Token.intvalue; scan(&Token); } // Build an enum value node for this identifier. // Increment the value for the next enum identifier. etype = addenum(name, C_ENUMVAL, intval++); // Bail out on a right curly bracket, else get a comma if (Token.token == T_RBRACE) break; comma(); } scan(&Token); // Skip over the right curly bracket } // Parse a typedef declaration and return the type // and ctype that it represents static int typedef_declaration(struct symtable **ctype) { int type, class = 0; // Skip the typedef keyword. scan(&Token); // Get the actual type following the keyword type = parse_type(ctype, &class); if (class != 0) fatal("Can't have extern in a typedef declaration"); // See if the typedef identifier already exists if (findtypedef(Text) != NULL) fatals("redefinition of typedef", Text); // Get any following '*' tokens type = parse_stars(type); // It doesn't exist so add it to the typedef list addtypedef(Text, type, *ctype); scan(&Token); return (type); } // Given a typedef name, return the type it represents static int type_of_typedef(char *name, struct symtable **ctype) { struct symtable *t; // Look up the typedef in the list t = findtypedef(name); if (t == NULL) fatals("unknown type", name); scan(&Token); *ctype = t->ctype; return (t->type); } // Parse the declaration of a variable or function. // The type and any following '*'s have been scanned, and we // have the identifier in the Token variable. // The class argument is the variable's class. // Return a pointer to the symbol's entry in the symbol table static struct symtable *symbol_declaration(int type, struct symtable *ctype, int class, struct ASTnode **tree) { struct symtable *sym = NULL; char *varname = strdup(Text); // Ensure that we have an identifier. // We copied it above so we can scan more tokens in, e.g. // an assignment expression for a local variable. ident(); // Deal with function declarations if (Token.token == T_LPAREN) { return (function_declaration(varname, type, ctype, class)); } // See if this array or scalar variable has already been declared switch (class) { case C_EXTERN: case C_STATIC: case C_GLOBAL: if (findglob(varname) != NULL) fatals("Duplicate global variable declaration", varname); case C_LOCAL: case C_PARAM: if (findlocl(varname) != NULL) fatals("Duplicate local variable declaration", varname); case C_MEMBER: if (findmember(varname) != NULL) fatals("Duplicate struct/union member declaration", varname); } // Add the array or scalar variable to the symbol table if (Token.token == T_LBRACKET) sym = array_declaration(varname, type, ctype, class); else sym = scalar_declaration(varname, type, ctype, class, tree); return (sym); } // Parse a list of symbols where there is an initial type. // Return the type of the symbols. et1 and et2 are end tokens. int declaration_list(struct symtable **ctype, int class, int et1, int et2, struct ASTnode **gluetree) { int inittype, type; struct symtable *sym; struct ASTnode *tree; *gluetree = NULL; // Get the initial type. If -1, it was // a composite type definition, return this if ((inittype = parse_type(ctype, &class)) == -1) return (inittype); // Now parse the list of symbols while (1) { // See if this symbol is a pointer type = parse_stars(inittype); // Parse this symbol sym = symbol_declaration(type, *ctype, class, &tree); // We parsed a function, there is no list so leave if (sym->stype == S_FUNCTION) { if (class != C_GLOBAL && class != C_STATIC) fatal("Function definition not at global level"); return (type); } // Glue any AST tree from a local declaration // to build a sequence of assignments to perform if (*gluetree == NULL) *gluetree = tree; else *gluetree = mkastnode(A_GLUE, P_NONE, NULL, *gluetree, NULL, tree, NULL, 0); // We are at the end of the list, leave if (Token.token == et1 || Token.token == et2) return (type); // Otherwise, we need a comma as separator comma(); } } // Parse one or more global declarations, either // variables, functions or structs void global_declarations(void) { struct symtable *ctype; struct ASTnode *unused; while (Token.token != T_EOF) { declaration_list(&ctype, C_GLOBAL, T_SEMI, T_EOF, &unused); // Skip any semicolons and right curly brackets if (Token.token == T_SEMI) scan(&Token); } } ================================================ FILE: 52_Pointers_pt2/decl.h ================================================ // Function prototypes for all compiler files // Copyright (c) 2019 Warren Toomey, GPL3 // scan.c void reject_token(struct token *t); int scan(struct token *t); // tree.c struct ASTnode *mkastnode(int op, int type, struct symtable *ctype, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue); struct ASTnode *mkastleaf(int op, int type, struct symtable *ctype, struct symtable *sym, int intvalue); struct ASTnode *mkastunary(int op, int type, struct symtable *ctype, struct ASTnode *left, struct symtable *sym, int intvalue); void dumpAST(struct ASTnode *n, int label, int parentASTop); // gen.c int genlabel(void); int genAST(struct ASTnode *n, int iflabel, int looptoplabel, int loopendlabel, int parentASTop); void genpreamble(); void genpostamble(); void genfreeregs(int keepreg); void genglobsym(struct symtable *node); int genglobstr(char *strvalue); int genprimsize(int type); int genalign(int type, int offset, int direction); void genreturn(int reg, int id); // cg.c int cgprimsize(int type); int cgalign(int type, int offset, int direction); void cgtextseg(); void cgdataseg(); int alloc_register(void); void freeall_registers(int keepreg); void cgpreamble(); void cgpostamble(); void cgfuncpreamble(struct symtable *sym); void cgfuncpostamble(struct symtable *sym); int cgloadint(int value, int type); int cgloadglob(struct symtable *sym, int op); int cgloadlocal(struct symtable *sym, int op); int cgloadglobstr(int label); int cgadd(int r1, int r2); int cgsub(int r1, int r2); int cgmul(int r1, int r2); int cgdiv(int r1, int r2); int cgshlconst(int r, int val); int cgcall(struct symtable *sym, int numargs); void cgcopyarg(int r, int argposn); int cgstorglob(int r, struct symtable *sym); int cgstorlocal(int r, struct symtable *sym); void cgglobsym(struct symtable *node); void cgglobstr(int l, char *strvalue); int cgcompare_and_set(int ASTop, int r1, int r2); int cgcompare_and_jump(int ASTop, int r1, int r2, int label); void cglabel(int l); void cgjump(int l); int cgwiden(int r, int oldtype, int newtype); void cgreturn(int reg, struct symtable *sym); int cgaddress(struct symtable *sym); int cgderef(int r, int type); int cgstorderef(int r1, int r2, int type); int cgnegate(int r); int cginvert(int r); int cglognot(int r); int cglogor(int r1, int r2); int cglogand(int r1, int r2); int cgboolean(int r, int op, int label); int cgand(int r1, int r2); int cgor(int r1, int r2); int cgxor(int r1, int r2); int cgshl(int r1, int r2); int cgshr(int r1, int r2); void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel); void cgmove(int r1, int r2); // expr.c struct ASTnode *expression_list(int endtoken); struct ASTnode *binexpr(int ptp); // stmt.c struct ASTnode *compound_statement(int inswitch); // misc.c void match(int t, char *what); void semi(void); void lbrace(void); void rbrace(void); void lparen(void); void rparen(void); void ident(void); void comma(void); void fatal(char *s); void fatals(char *s1, char *s2); void fatald(char *s, int d); void fatalc(char *s, int c); // sym.c void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node); struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn); struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn); struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int nelems); struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype); struct symtable *addstruct(char *name); struct symtable *addunion(char *name); struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int nelems); struct symtable *addenum(char *name, int class, int value); struct symtable *addtypedef(char *name, int type, struct symtable *ctype); struct symtable *findglob(char *s); struct symtable *findlocl(char *s); struct symtable *findsymbol(char *s); struct symtable *findmember(char *s); struct symtable *findstruct(char *s); struct symtable *findunion(char *s); struct symtable *findenumtype(char *s); struct symtable *findenumval(char *s); struct symtable *findtypedef(char *s); void clear_symtable(void); void freeloclsyms(void); void freestaticsyms(void); void dumptable(struct symtable *head, char *name, int indent); void dumpsymtables(void); // decl.c int parse_type(struct symtable **ctype, int *class); int parse_stars(int type); int parse_cast(struct symtable **ctype); int declaration_list(struct symtable **ctype, int class, int et1, int et2, struct ASTnode **gluetree); void global_declarations(void); // types.c int inttype(int type); int ptrtype(int type); int pointer_to(int type); int value_at(int type); int typesize(int type, struct symtable *ctype); struct ASTnode *modify_type(struct ASTnode *tree, int rtype, struct symtable *rctype, int op); // opt.c struct ASTnode *optimise(struct ASTnode *n); ================================================ FILE: 52_Pointers_pt2/defs.h ================================================ #include #include #include #include // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 enum { TEXTLEN = 512 // Length of identifiers in input }; // Commands and default filenames #define AOUT "a.out" #ifdef __NASM__ #define ASCMD "nasm -f elf64 -w-ptr -o " #define LDCMD "cc -no-pie -fno-plt -Wall -o " #else #define ASCMD "as -o " #define LDCMD "cc -o " #endif #define CPPCMD "cpp -nostdinc -isystem " // Token types enum { T_EOF, // Binary operators T_ASSIGN, T_ASPLUS, T_ASMINUS, T_ASSTAR, T_ASSLASH, T_QUESTION, T_LOGOR, T_LOGAND, T_OR, T_XOR, T_AMPER, T_EQ, T_NE, T_LT, T_GT, T_LE, T_GE, T_LSHIFT, T_RSHIFT, T_PLUS, T_MINUS, T_STAR, T_SLASH, // Other operators T_INC, T_DEC, T_INVERT, T_LOGNOT, // Type keywords T_VOID, T_CHAR, T_INT, T_LONG, // Other keywords T_IF, T_ELSE, T_WHILE, T_FOR, T_RETURN, T_STRUCT, T_UNION, T_ENUM, T_TYPEDEF, T_EXTERN, T_BREAK, T_CONTINUE, T_SWITCH, T_CASE, T_DEFAULT, T_SIZEOF, T_STATIC, // Structural tokens T_INTLIT, T_STRLIT, T_SEMI, T_IDENT, T_LBRACE, T_RBRACE, T_LPAREN, T_RPAREN, T_LBRACKET, T_RBRACKET, T_COMMA, T_DOT, T_ARROW, T_COLON }; // Token structure struct token { int token; // Token type, from the enum list above char *tokstr; // String version of the token int intvalue; // For T_INTLIT, the integer value }; // AST node types. The first few line up // with the related tokens enum { A_ASSIGN = 1, A_ASPLUS, A_ASMINUS, A_ASSTAR, A_ASSLASH, // 1 A_TERNARY, A_LOGOR, A_LOGAND, A_OR, A_XOR, A_AND, // 6 A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE, A_LSHIFT, A_RSHIFT, // 12 A_ADD, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, // 20 A_INTLIT, A_STRLIT, A_IDENT, A_GLUE, // 24 A_IF, A_WHILE, A_FUNCTION, A_WIDEN, A_RETURN, // 28 A_FUNCCALL, A_DEREF, A_ADDR, A_SCALE, // 33 A_PREINC, A_PREDEC, A_POSTINC, A_POSTDEC, // 37 A_NEGATE, A_INVERT, A_LOGNOT, A_TOBOOL, A_BREAK, // 41 A_CONTINUE, A_SWITCH, A_CASE, A_DEFAULT, A_CAST // 46 }; // Primitive types. The bottom 4 bits is an integer // value that represents the level of indirection, // e.g. 0= no pointer, 1= pointer, 2= pointer pointer etc. enum { P_NONE, P_VOID = 16, P_CHAR = 32, P_INT = 48, P_LONG = 64, P_STRUCT=80, P_UNION=96 }; // Structural types enum { S_VARIABLE, S_FUNCTION, S_ARRAY }; // Storage classes enum { C_GLOBAL = 1, // Globally visible symbol C_LOCAL, // Locally visible symbol C_PARAM, // Locally visible function parameter C_EXTERN, // External globally visible symbol C_STATIC, // Static symbol, visible in one file C_STRUCT, // A struct C_UNION, // A union C_MEMBER, // Member of a struct or union C_ENUMTYPE, // A named enumeration type C_ENUMVAL, // A named enumeration value C_TYPEDEF // A named typedef }; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol struct symtable *ctype; // If struct/union, ptr to that type int stype; // Structural type for the symbol int class; // Storage class for the symbol int size; // Total size in bytes of this symbol int nelems; // Functions: # params. Arrays: # elements #define st_endlabel st_posn // For functions, the end label int st_posn; // For locals, the negative offset // from the stack base pointer int *initlist; // List of initial values struct symtable *next; // Next symbol in one list struct symtable *member; // First member of a function, struct, }; // union or enum // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates struct symtable *ctype; // If struct/union, ptr to that type int rvalue; // True if the node is an rvalue struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; struct symtable *sym; // For many AST nodes, the pointer to // the symbol in the symbol table #define a_intvalue a_size // For A_INTLIT, the integer value int a_size; // For A_SCALE, the size to scale by }; enum { NOREG = -1, // Use NOREG when the AST generation // functions have no register to return NOLABEL = 0 // Use NOLABEL when we have no label to // pass to genAST() }; ================================================ FILE: 52_Pointers_pt2/expr.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of expressions // Copyright (c) 2019 Warren Toomey, GPL3 // expression_list: // | expression // | expression ',' expression_list // ; // Parse a list of zero or more comma-separated expressions and // return an AST composed of A_GLUE nodes with the left-hand child // being the sub-tree of previous expressions (or NULL) and the right-hand // child being the next expression. Each A_GLUE node will have size field // set to the number of expressions in the tree at this point. If no // expressions are parsed, NULL is returned struct ASTnode *expression_list(int endtoken) { struct ASTnode *tree = NULL; struct ASTnode *child = NULL; int exprcount = 0; // Loop until the end token while (Token.token != endtoken) { // Parse the next expression and increment the expression count child = binexpr(0); exprcount++; // Build an A_GLUE AST node with the previous tree as the left child // and the new expression as the right child. Store the expression count. tree = mkastnode(A_GLUE, P_NONE, NULL, tree, NULL, child, NULL, exprcount); // Stop when we reach the end token if (Token.token == endtoken) break; // Must have a ',' at this point match(T_COMMA, ","); } // Return the tree of expressions return (tree); } // Parse a function call and return its AST static struct ASTnode *funccall(void) { struct ASTnode *tree; struct symtable *funcptr; // Check that the identifier has been defined as a function, // then make a leaf node for it. if ((funcptr = findsymbol(Text)) == NULL || funcptr->stype != S_FUNCTION) { fatals("Undeclared function", Text); } // Get the '(' lparen(); // Parse the argument expression list tree = expression_list(T_RPAREN); // XXX Check type of each argument against the function's prototype // Build the function call AST node. Store the // function's return type as this node's type. // Also record the function's symbol-id tree = mkastunary(A_FUNCCALL, funcptr->type, funcptr->ctype, tree, funcptr, 0); // Get the ')' rparen(); return (tree); } // Parse the index into an array and return an AST tree for it static struct ASTnode *array_access(struct ASTnode *left) { struct ASTnode *right; // Check that the sub-tree is a pointer if (!ptrtype(left->type)) fatal("Not an array or pointer"); // Get the '[' scan(&Token); // Parse the following expression right = binexpr(0); // Get the ']' match(T_RBRACKET, "]"); // Ensure that this is of int type if (!inttype(right->type)) fatal("Array index is not of integer type"); // Make the left tree an rvalue left->rvalue = 1; // Scale the index by the size of the element's type right = modify_type(right, left->type, left->ctype, A_ADD); // Return an AST tree where the array's base has the offset added to it, // and dereference the element. Still an lvalue at this point. left = mkastnode(A_ADD, left->type, left->ctype, left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, value_at(left->type), left->ctype, left, NULL, 0); return (left); } // Parse the member reference of a struct or union // and return an AST tree for it. If withpointer is true, // the access is through a pointer to the member. static struct ASTnode *member_access(struct ASTnode *left, int withpointer) { struct ASTnode *right; struct symtable *typeptr; struct symtable *m; // Check that the left AST tree is a pointer to struct or union if (withpointer && left->type != pointer_to(P_STRUCT) && left->type != pointer_to(P_UNION)) fatal("Expression is not a pointer to a struct/union"); // Or, check that the left AST tree is a struct or union. // If so, change it from an A_IDENT to an A_ADDR so that // we get the base address, not the value at this address. if (!withpointer) { if (left->type == P_STRUCT || left->type == P_UNION) left->op = A_ADDR; else fatal("Expression is not a struct/union"); } // Get the details of the composite type typeptr = left->ctype; // Skip the '.' or '->' token and get the member's name scan(&Token); ident(); // Find the matching member's name in the type // Die if we can't find it for (m = typeptr->member; m != NULL; m = m->next) if (!strcmp(m->name, Text)) break; if (m == NULL) fatals("No member found in struct/union: ", Text); // Make the left tree an rvalue left->rvalue = 1; // Build an A_INTLIT node with the offset right = mkastleaf(A_INTLIT, P_INT, NULL, NULL, m->st_posn); // Add the member's offset to the base of the struct/union // and dereference it. Still an lvalue at this point left = mkastnode(A_ADD, pointer_to(m->type), m->ctype, left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, m->type, m->ctype, left, NULL, 0); return (left); } // Parse a parenthesised expression and // return an AST node representing it. static struct ASTnode *paren_expression(void) { struct ASTnode *n; int type = 0; struct symtable *ctype = NULL; // Beginning of a parenthesised expression, skip the '('. scan(&Token); // If the token after is a type identifier, this is a cast expression switch (Token.token) { case T_IDENT: // We have to see if the identifier matches a typedef. // If not, treat it as an expression. if (findtypedef(Text) == NULL) { n = binexpr(0); break; } case T_VOID: case T_CHAR: case T_INT: case T_LONG: case T_STRUCT: case T_UNION: case T_ENUM: // Get the type inside the parentheses type = parse_cast(&ctype); // Skip the closing ')' and then parse the following expression rparen(); default: n = binexpr(0); // Scan in the expression } // We now have at least an expression in n, and possibly a non-zero type // in type if there was a cast. Skip the closing ')' if there was no cast. if (type == 0) rparen(); else // Otherwise, make a unary AST node for the cast n = mkastunary(A_CAST, type, ctype, n, NULL, 0); return (n); } // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(void) { struct ASTnode *n; struct symtable *enumptr; struct symtable *varptr; int id; int type = 0; int size, class; struct symtable *ctype; switch (Token.token) { case T_STATIC: case T_EXTERN: fatal("Compiler doesn't support static or extern local declarations"); case T_SIZEOF: // Skip the T_SIZEOF and ensure we have a left parenthesis scan(&Token); if (Token.token != T_LPAREN) fatal("Left parenthesis expected after sizeof"); scan(&Token); // Get the type inside the parentheses type = parse_stars(parse_type(&ctype, &class)); // Get the type's size size = typesize(type, ctype); rparen(); // Make a leaf node int literal with the size return (mkastleaf(A_INTLIT, P_INT, NULL, NULL, size)); case T_INTLIT: // For an INTLIT token, make a leaf AST node for it. // Make it a P_CHAR if it's within the P_CHAR range if (Token.intvalue >= 0 && Token.intvalue < 256) n = mkastleaf(A_INTLIT, P_CHAR, NULL, NULL, Token.intvalue); else n = mkastleaf(A_INTLIT, P_INT, NULL, NULL, Token.intvalue); break; case T_STRLIT: // For a STRLIT token, generate the assembly for it. // Then make a leaf AST node for it. id is the string's label. id = genglobstr(Text); n = mkastleaf(A_STRLIT, pointer_to(P_CHAR), NULL, NULL, id); break; case T_IDENT: // If the identifier matches an enum value, // return an A_INTLIT node if ((enumptr = findenumval(Text)) != NULL) { n = mkastleaf(A_INTLIT, P_INT, NULL, NULL, enumptr->st_posn); break; } // See if this identifier exists as a symbol. For arrays, set rvalue to 1. if ((varptr = findsymbol(Text)) == NULL) fatals("Unknown variable or function", Text); switch (varptr->stype) { case S_VARIABLE: n = mkastleaf(A_IDENT, varptr->type, varptr->ctype, varptr, 0); break; case S_ARRAY: n = mkastleaf(A_ADDR, varptr->type, varptr->ctype, varptr, 0); n->rvalue = 1; break; case S_FUNCTION: // Function call, see if the next token is a left parenthesis scan(&Token); if (Token.token != T_LPAREN) fatals("Function name used without parentheses", Text); return (funccall()); default: fatals("Identifier not a scalar or array variable", Text); } break; case T_LPAREN: return (paren_expression()); default: fatals("Expecting a primary expression, got token", Token.tokstr); } // Scan in the next token and return the leaf node scan(&Token); return (n); } // Parse a postfix expression and return // an AST node representing it. The // identifier is already in Text. static struct ASTnode *postfix(void) { struct ASTnode *n; // Get the primary expression n = primary(); // Loop until there are no more postfix operators while (1) { switch (Token.token) { case T_LBRACKET: // An array reference n = array_access(n); break; case T_DOT: // Access into a struct or union n = member_access(n, 0); break; case T_ARROW: // Pointer access into a struct or union n = member_access(n, 1); break; case T_INC: // Post-increment: skip over the token if (n->rvalue == 1) fatal("Cannot ++ on rvalue"); scan(&Token); // Can't do it twice if (n->op == A_POSTINC || n->op == A_POSTDEC) fatal("Cannot ++ and/or -- more than once"); // and change the AST operation n->op = A_POSTINC; break; case T_DEC: // Post-decrement: skip over the token if (n->rvalue == 1) fatal("Cannot -- on rvalue"); scan(&Token); // Can't do it twice if (n->op == A_POSTINC || n->op == A_POSTDEC) fatal("Cannot ++ and/or -- more than once"); // and change the AST operation n->op = A_POSTDEC; break; default: return (n); } } return (NULL); // Keep -Wall happy } // Convert a binary operator token into a binary AST operation. // We rely on a 1:1 mapping from token to AST operation static int binastop(int tokentype) { if (tokentype > T_EOF && tokentype <= T_SLASH) return (tokentype); fatals("Syntax error, token", Tstring[tokentype]); return (0); // Keep -Wall happy } // Return true if a token is right-associative, // false otherwise. static int rightassoc(int tokentype) { if (tokentype >= T_ASSIGN && tokentype <= T_ASSLASH) return (1); return (0); } // Operator precedence for each token. Must // match up with the order of tokens in defs.h static int OpPrec[] = { 0, 10, 10, // T_EOF, T_ASSIGN, T_ASPLUS, 10, 10, 10, // T_ASMINUS, T_ASSTAR, T_ASSLASH, 15, // T_QUESTION, 20, 30, // T_LOGOR, T_LOGAND 40, 50, 60, // T_OR, T_XOR, T_AMPER 70, 70, // T_EQ, T_NE 80, 80, 80, 80, // T_LT, T_GT, T_LE, T_GE 90, 90, // T_LSHIFT, T_RSHIFT 100, 100, // T_PLUS, T_MINUS 110, 110 // T_STAR, T_SLASH }; // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec; if (tokentype > T_SLASH) fatals("Token with no precedence in op_precedence:", Tstring[tokentype]); prec = OpPrec[tokentype]; if (prec == 0) fatals("Syntax error, token", Tstring[tokentype]); return (prec); } // prefix_expression: postfix_expression // | '*' prefix_expression // | '&' prefix_expression // | '-' prefix_expression // | '++' prefix_expression // | '--' prefix_expression // ; // Parse a prefix expression and return // a sub-tree representing it. struct ASTnode *prefix(void) { struct ASTnode *tree; switch (Token.token) { case T_AMPER: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Ensure that it's an identifier if (tree->op != A_IDENT) fatal("& operator must be followed by an identifier"); // Prevent '&' being performed on an array if (tree->sym->stype == S_ARRAY) fatal("& operator cannot be performed on an array"); // Now change the operator to A_ADDR and the type to // a pointer to the original type tree->op = A_ADDR; tree->type = pointer_to(tree->type); break; case T_STAR: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's either another deref or an // identifier if (tree->op != A_IDENT && tree->op != A_DEREF) fatal("* operator must be followed by an identifier or *"); // Prepend an A_DEREF operation to the tree tree = mkastunary(A_DEREF, value_at(tree->type), tree->ctype, tree, NULL, 0); break; case T_MINUS: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_NEGATE operation to the tree and // make the child an rvalue. Because chars are unsigned, // also widen this if needed to int so that it's signed tree->rvalue = 1; if (tree->type == P_CHAR) tree->type = P_INT; tree = mkastunary(A_NEGATE, tree->type, tree->ctype, tree, NULL, 0); break; case T_INVERT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_INVERT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_INVERT, tree->type, tree->ctype, tree, NULL, 0); break; case T_LOGNOT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_LOGNOT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_LOGNOT, tree->type, tree->ctype, tree, NULL, 0); break; case T_INC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("++ operator must be followed by an identifier"); // Prepend an A_PREINC operation to the tree tree = mkastunary(A_PREINC, tree->type, tree->ctype, tree, NULL, 0); break; case T_DEC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("-- operator must be followed by an identifier"); // Prepend an A_PREDEC operation to the tree tree = mkastunary(A_PREDEC, tree->type, tree->ctype, tree, NULL, 0); break; default: tree = postfix(); } return (tree); } // Return an AST tree whose root is a binary operator. // Parameter ptp is the previous token's precedence. struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; struct ASTnode *ltemp, *rtemp; int ASTop; int tokentype; // Get the tree on the left. // Fetch the next token at the same time. left = prefix(); // If we hit one of several terminating tokens, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA || tokentype == T_COLON || tokentype == T_RBRACE) { left->rvalue = 1; return (left); } // While the precedence of this token is more than that of the // previous token precedence, or it's right associative and // equal to the previous token's precedence while ((op_precedence(tokentype) > ptp) || (rightassoc(tokentype) && op_precedence(tokentype) == ptp)) { // Fetch in the next integer literal scan(&Token); // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Determine the operation to be performed on the sub-trees ASTop = binastop(tokentype); switch (ASTop) { case A_TERNARY: // Ensure we have a ':' token, scan in the expression after it match(T_COLON, ":"); ltemp = binexpr(0); // Build and return the AST for this statement. Use the middle // expression's type as the return type. XXX We should also // consider the third expression's type. return (mkastnode (A_TERNARY, right->type, right->ctype, left, right, ltemp, NULL, 0)); case A_ASSIGN: // Assignment // Make the right tree into an rvalue right->rvalue = 1; // Ensure the right's type matches the left right = modify_type(right, left->type, left->ctype, 0); if (right == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree. However, switch // left and right around, so that the right expression's // code will be generated before the left expression ltemp = left; left = right; right = ltemp; break; default: // We are not doing a ternary or assignment, so both trees should // be rvalues. Convert both trees into rvalue if they are lvalue trees left->rvalue = 1; right->rvalue = 1; // Ensure the two types are compatible by trying // to modify each tree to match the other's type. ltemp = modify_type(left, right->type, right->ctype, ASTop); rtemp = modify_type(right, left->type, left->ctype, ASTop); if (ltemp == NULL && rtemp == NULL) fatal("Incompatible types in binary expression"); if (ltemp != NULL) left = ltemp; if (rtemp != NULL) right = rtemp; } // Join that sub-tree with ours. Convert the token // into an AST operation at the same time. left = mkastnode(binastop(tokentype), left->type, left->ctype, left, NULL, right, NULL, 0); // Some operators produce an int result regardless of their operands switch (binastop(tokentype)) { case A_LOGOR: case A_LOGAND: case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: left->type = P_INT; } // Update the details of the current token. // If we hit a terminating token, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA || tokentype == T_COLON || tokentype == T_RBRACE) { left->rvalue = 1; return (left); } } // Return the tree we have when the precedence // is the same or lower left->rvalue = 1; return (left); } ================================================ FILE: 52_Pointers_pt2/gen.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Generic code generator // Copyright (c) 2019 Warren Toomey, GPL3 // Generate and return a new label number static int labelid = 1; int genlabel(void) { return (labelid++); } // Generate the code for an IF statement // and an optional ELSE clause. static int genIF(struct ASTnode *n, int looptoplabel, int loopendlabel) { int Lfalse, Lend; // Generate two labels: one for the // false compound statement, and one // for the end of the overall IF statement. // When there is no ELSE clause, Lfalse _is_ // the ending label! Lfalse = genlabel(); if (n->right) Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. genAST(n->left, Lfalse, NOLABEL, NOLABEL, n->op); genfreeregs(NOREG); // Generate the true compound statement genAST(n->mid, NOLABEL, looptoplabel, loopendlabel, n->op); genfreeregs(NOREG); // If there is an optional ELSE clause, // generate the jump to skip to the end if (n->right) cgjump(Lend); // Now the false label cglabel(Lfalse); // Optional ELSE clause: generate the // false compound statement and the // end label if (n->right) { genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); genfreeregs(NOREG); cglabel(Lend); } return (NOREG); } // Generate the code for a WHILE statement static int genWHILE(struct ASTnode *n) { int Lstart, Lend; // Generate the start and end labels // and output the start label Lstart = genlabel(); Lend = genlabel(); cglabel(Lstart); // Generate the condition code followed // by a jump to the end label. genAST(n->left, Lend, Lstart, Lend, n->op); genfreeregs(NOREG); // Generate the compound statement for the body genAST(n->right, NOLABEL, Lstart, Lend, n->op); genfreeregs(NOREG); // Finally output the jump back to the condition, // and the end label cgjump(Lstart); cglabel(Lend); return (NOREG); } // Generate the code for a SWITCH statement static int genSWITCH(struct ASTnode *n) { int *caseval, *caselabel; int Ljumptop, Lend; int i, reg, defaultlabel = 0, casecount = 0; struct ASTnode *c; // Create arrays for the case values and associated labels. // Ensure that we have at least one position in each array. caseval = (int *) malloc((n->a_intvalue + 1) * sizeof(int)); caselabel = (int *) malloc((n->a_intvalue + 1) * sizeof(int)); // Generate labels for the top of the jump table, and the // end of the switch statement. Set a default label for // the end of the switch, in case we don't have a default. Ljumptop = genlabel(); Lend = genlabel(); defaultlabel = Lend; // Output the code to calculate the switch condition reg = genAST(n->left, NOLABEL, NOLABEL, NOLABEL, 0); cgjump(Ljumptop); genfreeregs(reg); // Walk the right-child linked list to // generate the code for each case for (i = 0, c = n->right; c != NULL; i++, c = c->right) { // Get a label for this case. Store it // and the case value in the arrays. // Record if it is the default case. caselabel[i] = genlabel(); caseval[i] = c->a_intvalue; cglabel(caselabel[i]); if (c->op == A_DEFAULT) defaultlabel = caselabel[i]; else casecount++; // Generate the case code. Pass in the end label for the breaks. // If case has no body, we will fall into the following body. if (c->left) genAST(c->left, NOLABEL, NOLABEL, Lend, 0); genfreeregs(NOREG); } // Ensure the last case jumps past the switch table cgjump(Lend); // Now output the switch table and the end label. cgswitch(reg, casecount, Ljumptop, caselabel, caseval, defaultlabel); cglabel(Lend); return (NOREG); } // Generate the code to copy the arguments of a // function call to its parameters, then call the // function itself. Return the register that holds // the function's return value. static int gen_funccall(struct ASTnode *n) { struct ASTnode *gluetree = n->left; int reg; int numargs = 0; // If there is a list of arguments, walk this list // from the last argument (right-hand child) to the // first while (gluetree) { // Calculate the expression's value reg = genAST(gluetree->right, NOLABEL, NOLABEL, NOLABEL, gluetree->op); // Copy this into the n'th function parameter: size is 1, 2, 3, ... cgcopyarg(reg, gluetree->a_size); // Keep the first (highest) number of arguments if (numargs == 0) numargs = gluetree->a_size; genfreeregs(NOREG); gluetree = gluetree->left; } // Call the function, clean up the stack (based on numargs), // and return its result return (cgcall(n->sym, numargs)); } // Generate code for a ternary expression static int gen_ternary(struct ASTnode *n) { int Lfalse, Lend; int reg, expreg; // Generate two labels: one for the // false expression, and one for the // end of the overall expression Lfalse = genlabel(); Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. genAST(n->left, Lfalse, NOLABEL, NOLABEL, n->op); genfreeregs(NOREG); // Get a register to hold the result of the two expressions reg = alloc_register(); // Generate the true expression and the false label. // Move the expression result into the known register. expreg = genAST(n->mid, NOLABEL, NOLABEL, NOLABEL, n->op); cgmove(expreg, reg); // Don't free the register holding the result, though! genfreeregs(reg); cgjump(Lend); cglabel(Lfalse); // Generate the false expression and the end label. // Move the expression result into the known register. expreg = genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); cgmove(expreg, reg); // Don't free the register holding the result, though! genfreeregs(reg); cglabel(Lend); return (reg); } // Given an AST, an optional label, and the AST op // of the parent, generate assembly code recursively. // Return the register id with the tree's final value. int genAST(struct ASTnode *n, int iflabel, int looptoplabel, int loopendlabel, int parentASTop) { int leftreg, rightreg; // We have some specific AST node handling at the top // so that we don't evaluate the child sub-trees immediately switch (n->op) { case A_IF: return (genIF(n, looptoplabel, loopendlabel)); case A_WHILE: return (genWHILE(n)); case A_SWITCH: return (genSWITCH(n)); case A_FUNCCALL: return (gen_funccall(n)); case A_TERNARY: return (gen_ternary(n)); case A_GLUE: // Do each child statement, and free the // registers after each child if (n->left != NULL) genAST(n->left, iflabel, looptoplabel, loopendlabel, n->op); genfreeregs(NOREG); if (n->right != NULL) genAST(n->right, iflabel, looptoplabel, loopendlabel, n->op); genfreeregs(NOREG); return (NOREG); case A_FUNCTION: // Generate the function's preamble before the code // in the child sub-tree cgfuncpreamble(n->sym); genAST(n->left, NOLABEL, NOLABEL, NOLABEL, n->op); cgfuncpostamble(n->sym); return (NOREG); } // General AST node handling below // Get the left and right sub-tree values if (n->left) leftreg = genAST(n->left, NOLABEL, NOLABEL, NOLABEL, n->op); if (n->right) rightreg = genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); switch (n->op) { case A_ADD: return (cgadd(leftreg, rightreg)); case A_SUBTRACT: return (cgsub(leftreg, rightreg)); case A_MULTIPLY: return (cgmul(leftreg, rightreg)); case A_DIVIDE: return (cgdiv(leftreg, rightreg)); case A_AND: return (cgand(leftreg, rightreg)); case A_OR: return (cgor(leftreg, rightreg)); case A_XOR: return (cgxor(leftreg, rightreg)); case A_LSHIFT: return (cgshl(leftreg, rightreg)); case A_RSHIFT: return (cgshr(leftreg, rightreg)); case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: // If the parent AST node is an A_IF, A_WHILE or A_TERNARY, // generate a compare followed by a jump. Otherwise, compare // registers and set one to 1 or 0 based on the comparison. if (parentASTop == A_IF || parentASTop == A_WHILE || parentASTop == A_TERNARY) return (cgcompare_and_jump(n->op, leftreg, rightreg, iflabel)); else return (cgcompare_and_set(n->op, leftreg, rightreg)); case A_INTLIT: return (cgloadint(n->a_intvalue, n->type)); case A_STRLIT: return (cgloadglobstr(n->a_intvalue)); case A_IDENT: // Load our value if we are an rvalue // or we are being dereferenced if (n->rvalue || parentASTop == A_DEREF) { if (n->sym->class == C_GLOBAL || n->sym->class == C_STATIC) { return (cgloadglob(n->sym, n->op)); } else { return (cgloadlocal(n->sym, n->op)); } } else return (NOREG); case A_ASPLUS: case A_ASMINUS: case A_ASSTAR: case A_ASSLASH: case A_ASSIGN: // For the '+=' and friends operators, generate suitable code // and get the register with the result. Then take the left child, // make it the right child so that we can fall into the assignment code. switch (n->op) { case A_ASPLUS: leftreg = cgadd(leftreg, rightreg); n->right = n->left; break; case A_ASMINUS: leftreg = cgsub(leftreg, rightreg); n->right = n->left; break; case A_ASSTAR: leftreg = cgmul(leftreg, rightreg); n->right = n->left; break; case A_ASSLASH: leftreg = cgdiv(leftreg, rightreg); n->right = n->left; break; } // Now into the assignment code // Are we assigning to an identifier or through a pointer? switch (n->right->op) { case A_IDENT: if (n->right->sym->class == C_GLOBAL || n->right->sym->class == C_STATIC) return (cgstorglob(leftreg, n->right->sym)); else return (cgstorlocal(leftreg, n->right->sym)); case A_DEREF: return (cgstorderef(leftreg, rightreg, n->right->type)); default: fatald("Can't A_ASSIGN in genAST(), op", n->op); } case A_WIDEN: // Widen the child's type to the parent's type return (cgwiden(leftreg, n->left->type, n->type)); case A_RETURN: cgreturn(leftreg, Functionid); return (NOREG); case A_ADDR: return (cgaddress(n->sym)); case A_DEREF: // If we are an rvalue, dereference to get the value we point at, // otherwise leave it for A_ASSIGN to store through the pointer if (n->rvalue) return (cgderef(leftreg, n->left->type)); else return (leftreg); case A_SCALE: // Small optimisation: use shift if the // scale value is a known power of two switch (n->a_size) { case 2: return (cgshlconst(leftreg, 1)); case 4: return (cgshlconst(leftreg, 2)); case 8: return (cgshlconst(leftreg, 3)); default: // Load a register with the size and // multiply the leftreg by this size rightreg = cgloadint(n->a_size, P_INT); return (cgmul(leftreg, rightreg)); } case A_POSTINC: case A_POSTDEC: // Load and decrement the variable's value into a register // and post increment/decrement it if (n->sym->class == C_GLOBAL || n->sym->class == C_STATIC) return (cgloadglob(n->sym, n->op)); else return (cgloadlocal(n->sym, n->op)); case A_PREINC: case A_PREDEC: // Load and decrement the variable's value into a register // and pre increment/decrement it if (n->left->sym->class == C_GLOBAL || n->left->sym->class == C_STATIC) return (cgloadglob(n->left->sym, n->op)); else return (cgloadlocal(n->left->sym, n->op)); case A_NEGATE: return (cgnegate(leftreg)); case A_INVERT: return (cginvert(leftreg)); case A_LOGNOT: return (cglognot(leftreg)); case A_LOGOR: return (cglogor(leftreg, rightreg)); case A_LOGAND: return (cglogand(leftreg, rightreg)); case A_TOBOOL: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, set the register // to 0 or 1 based on it's zeroeness or non-zeroeness return (cgboolean(leftreg, parentASTop, iflabel)); case A_BREAK: cgjump(loopendlabel); return (NOREG); case A_CONTINUE: cgjump(looptoplabel); return (NOREG); case A_CAST: return (leftreg); // Not much to do default: fatald("Unknown AST operator", n->op); } return (NOREG); // Keep -Wall happy } void genpreamble() { cgpreamble(); } void genpostamble() { cgpostamble(); } void genfreeregs(int keepreg) { freeall_registers(keepreg); } void genglobsym(struct symtable *node) { cgglobsym(node); } int genglobstr(char *strvalue) { int l = genlabel(); cgglobstr(l, strvalue); return (l); } int genprimsize(int type) { return (cgprimsize(type)); } int genalign(int type, int offset, int direction) { return (cgalign(type, offset, direction)); } ================================================ FILE: 52_Pointers_pt2/include/ctype.h ================================================ #ifndef _CTYPE_H_ # define _CTYPE_H_ int isalnum(int c); int isalpha(int c); int iscntrl(int c); int isdigit(int c); int isgraph(int c); int islower(int c); int isprint(int c); int ispunct(int c); int isspace(int c); int isupper(int c); int isxdigit(int c); int isascii(int c); int isblank(int c); #endif // _CTYPE_H_ ================================================ FILE: 52_Pointers_pt2/include/errno.h ================================================ #ifndef _ERRNO_H_ # define _ERRNO_H_ #endif // _ERRNO_H_ ================================================ FILE: 52_Pointers_pt2/include/fcntl.h ================================================ #ifndef _FCNTL_H_ # define _FCNTL_H_ #define O_RDONLY 00 #define O_WRONLY 01 #define O_RDWR 02 int open(char *pathname, int flags); #endif // _FCNTL_H_ ================================================ FILE: 52_Pointers_pt2/include/stddef.h ================================================ #ifndef _STDDEF_H_ # define _STDDEF_H_ #ifndef NULL # define NULL (void *)0 #endif typedef long size_t; #endif //_STDDEF_H_ ================================================ FILE: 52_Pointers_pt2/include/stdio.h ================================================ #ifndef _STDIO_H_ # define _STDIO_H_ #include #ifndef NULL # define NULL (void *)0 #endif // This FILE definition will do for now typedef char * FILE; FILE *fopen(char *pathname, char *mode); size_t fread(void *ptr, size_t size, size_t nmemb, FILE *stream); size_t fwrite(void *ptr, size_t size, size_t nmemb, FILE *stream); int fclose(FILE *stream); int printf(char *format); int fprintf(FILE *stream, char *format); int fgetc(FILE *stream); int fputc(int c, FILE *stream); int fputs(char *s, FILE *stream); int putc(int c, FILE *stream); int putchar(int c); int puts(char *s); extern FILE *stdin; extern FILE *stdout; extern FILE *stderr; #endif // _STDIO_H_ ================================================ FILE: 52_Pointers_pt2/include/stdlib.h ================================================ #ifndef _STDLIB_H_ # define _STDLIB_H_ void exit(int status); void _Exit(int status); void *malloc(int size); void free(void *ptr); void *calloc(int nmemb, int size); void *realloc(void *ptr, int size); #endif // _STDLIB_H_ ================================================ FILE: 52_Pointers_pt2/include/string.h ================================================ #ifndef _STRING_H_ # define _STRING_H_ char *strdup(char *s); char *strchr(char *s, int c); char *strrchr(char *s, int c); int strcmp(char *s1, char *s2); int strncmp(char *s1, char *s2, size_t n); #endif // _STRING_H_ ================================================ FILE: 52_Pointers_pt2/include/unistd.h ================================================ #ifndef _UNISTD_H_ # define _UNISTD_H_ void _exit(int status); int unlink(char *pathname); #endif // _UNISTD_H_ ================================================ FILE: 52_Pointers_pt2/main.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "decl.h" #include #include // Compiler setup and top-level execution // Copyright (c) 2019 Warren Toomey, GPL3 // Given a string with a '.' and at least a 1-character suffix // after the '.', change the suffix to be the given character. // Return the new string or NULL if the original string could // not be modified char *alter_suffix(char *str, char suffix) { char *posn; char *newstr; // Clone the string if ((newstr = strdup(str)) == NULL) return (NULL); // Find the '.' if ((posn = strrchr(newstr, '.')) == NULL) return (NULL); // Ensure there is a suffix posn++; if (*posn == '\0') return (NULL); // Change the suffix and NUL-terminate the string *posn = suffix; posn++; *posn = '\0'; return (newstr); } // Given an input filename, compile that file // down to assembly code. Return the new file's name static char *do_compile(char *filename) { char cmd[TEXTLEN]; // Change the input file's suffix to .s Outfilename = alter_suffix(filename, 's'); if (Outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .c on the end\n", filename); exit(1); } // Generate the pre-processor command snprintf(cmd, TEXTLEN, "%s %s %s", CPPCMD, INCDIR, filename); // Open up the pre-processor pipe if ((Infile = popen(cmd, "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", filename, strerror(errno)); exit(1); } Infilename = filename; // Create the output file if ((Outfile = fopen(Outfilename, "w")) == NULL) { fprintf(stderr, "Unable to create %s: %s\n", Outfilename, strerror(errno)); exit(1); } Line = 1; // Reset the scanner Linestart = 1; Putback = '\n'; clear_symtable(); // Clear the symbol table if (O_verbose) printf("compiling %s\n", filename); scan(&Token); // Get the first token from the input Peektoken.token = 0; // and set there is no lookahead token genpreamble(); // Output the preamble global_declarations(); // Parse the global declarations genpostamble(); // Output the postamble fclose(Outfile); // Close the output file // Dump the symbol table if requested if (O_dumpsym) { printf("Symbols for %s\n", filename); dumpsymtables(); fprintf(stdout, "\n\n"); } freestaticsyms(); // Free any static symbols in the file return (Outfilename); } // Given an input filename, assemble that file // down to object code. Return the object filename char *do_assemble(char *filename) { char cmd[TEXTLEN]; int err; char *outfilename = alter_suffix(filename, 'o'); if (outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .s on the end\n", filename); exit(1); } // Build the assembly command and run it snprintf(cmd, TEXTLEN, "%s %s %s", ASCMD, outfilename, filename); if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Assembly of %s failed\n", filename); exit(1); } return (outfilename); } // Given a list of object files and an output filename, // link all of the object filenames together. void do_link(char *outfilename, char *objlist[]) { int cnt, size = TEXTLEN; char cmd[TEXTLEN], *cptr; int err; // Start with the linker command and the output file cptr = cmd; cnt = snprintf(cptr, size, "%s %s ", LDCMD, outfilename); cptr += cnt; size -= cnt; // Now append each object file while (*objlist != NULL) { cnt = snprintf(cptr, size, "%s ", *objlist); cptr += cnt; size -= cnt; objlist++; } if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Linking failed\n"); exit(1); } } // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "Usage: %s [-vcSTM] [-o outfile] file [file ...]\n", prog); fprintf(stderr, " -v give verbose output of the compilation stages\n"); fprintf(stderr, " -c generate object files but don't link them\n"); fprintf(stderr, " -S generate assembly files but don't link them\n"); fprintf(stderr, " -T dump the AST trees for each input file\n"); fprintf(stderr, " -M dump the symbol table for each input file\n"); fprintf(stderr, " -o outfile, produce the outfile executable file\n"); exit(1); } // Main program: check arguments and print a usage // if we don't have an argument. Open up the input // file and call scanfile() to scan the tokens in it. enum { MAXOBJ = 100 }; int main(int argc, char *argv[]) { char *outfilename = AOUT; char *asmfile, *objfile; char *objlist[MAXOBJ]; int i, objcnt = 0; // Initialise our variables O_dumpAST = 0; O_dumpsym = 0; O_keepasm = 0; O_assemble = 0; O_verbose = 0; O_dolink = 1; // Scan for command-line options for (i = 1; i < argc; i++) { // No leading '-', stop scanning for options if (*argv[i] != '-') break; // For each option in this argument for (int j = 1; (*argv[i] == '-') && argv[i][j]; j++) { switch (argv[i][j]) { case 'o': outfilename = argv[++i]; // Save & skip to next argument break; case 'T': O_dumpAST = 1; break; case 'M': O_dumpsym = 1; break; case 'c': O_assemble = 1; O_keepasm = 0; O_dolink = 0; break; case 'S': O_keepasm = 1; O_assemble = 0; O_dolink = 0; break; case 'v': O_verbose = 1; break; default: usage(argv[0]); } } } // Ensure we have at lease one input file argument if (i >= argc) usage(argv[0]); // Work on each input file in turn while (i < argc) { asmfile = do_compile(argv[i]); // Compile the source file if (O_dolink || O_assemble) { objfile = do_assemble(asmfile); // Assemble it to object forma if (objcnt == (MAXOBJ - 2)) { fprintf(stderr, "Too many object files for the compiler to handle\n"); exit(1); } objlist[objcnt++] = objfile; // Add the object file's name objlist[objcnt] = NULL; // to the list of object files } if (!O_keepasm) // Remove the assembly file if unlink(asmfile); // we don't need to keep it i++; } // Now link all the object files together if (O_dolink) { do_link(outfilename, objlist); // If we don't need to keep the object // files, then remove them if (!O_assemble) { for (i = 0; objlist[i] != NULL; i++) unlink(objlist[i]); } } return (0); } ================================================ FILE: 52_Pointers_pt2/misc.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" #include #include // Miscellaneous functions // Copyright (c) 2019 Warren Toomey, GPL3 // Ensure that the current token is t, // and fetch the next token. Otherwise // throw an error void match(int t, char *what) { if (Token.token == t) { scan(&Token); } else { fatals("Expected", what); } } // Match a semicolon and fetch the next token void semi(void) { match(T_SEMI, ";"); } // Match a left brace and fetch the next token void lbrace(void) { match(T_LBRACE, "{"); } // Match a right brace and fetch the next token void rbrace(void) { match(T_RBRACE, "}"); } // Match a left parenthesis and fetch the next token void lparen(void) { match(T_LPAREN, "("); } // Match a right parenthesis and fetch the next token void rparen(void) { match(T_RPAREN, ")"); } // Match an identifier and fetch the next token void ident(void) { match(T_IDENT, "identifier"); } // Match a comma and fetch the next token void comma(void) { match(T_COMMA, "comma"); } // Print out fatal messages void fatal(char *s) { fprintf(stderr, "%s on line %d of %s\n", s, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatals(char *s1, char *s2) { fprintf(stderr, "%s:%s on line %d of %s\n", s1, s2, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatald(char *s, int d) { fprintf(stderr, "%s:%d on line %d of %s\n", s, d, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatalc(char *s, int c) { fprintf(stderr, "%s:%c on line %d of %s\n", s, c, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } ================================================ FILE: 52_Pointers_pt2/opt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST Tree Optimisation Code // Copyright (c) 2019 Warren Toomey, GPL3 // Fold an AST tree with a binary operator // and two A_INTLIT children. Return either // the original tree or a new leaf node. static struct ASTnode *fold2(struct ASTnode *n) { int val, leftval, rightval; // Get the values from each child leftval = n->left->a_intvalue; rightval = n->right->a_intvalue; // Perform some of the binary operations. // For any AST op we can't do, return // the original tree. switch (n->op) { case A_ADD: val = leftval + rightval; break; case A_SUBTRACT: val = leftval - rightval; break; case A_MULTIPLY: val = leftval * rightval; break; case A_DIVIDE: // Don't try to divide by zero. if (rightval == 0) return (n); val = leftval / rightval; break; default: return (n); } // Return a leaf node with the new value return (mkastleaf(A_INTLIT, n->type, NULL, NULL, val)); } // Fold an AST tree with a unary operator // and one INTLIT children. Return either // the original tree or a new leaf node. static struct ASTnode *fold1(struct ASTnode *n) { int val; // Get the child value. Do the // operation if recognised. // Return the new leaf node. val = n->left->a_intvalue; switch (n->op) { case A_WIDEN: break; case A_INVERT: val = ~val; break; case A_LOGNOT: val = !val; break; default: return (n); } // Return a leaf node with the new value return (mkastleaf(A_INTLIT, n->type, NULL, NULL, val)); } // Attempt to do constant folding on // the AST tree with the root node n static struct ASTnode *fold(struct ASTnode *n) { if (n == NULL) return (NULL); // Fold on the left child, then // do the same on the right child n->left = fold(n->left); n->right = fold(n->right); // If both children are A_INTLITs, do a fold2() if (n->left && n->left->op == A_INTLIT) { if (n->right && n->right->op == A_INTLIT) n = fold2(n); else // If only the left is A_INTLIT, do a fold1() n = fold1(n); } // Return the possibly modified tree return (n); } // Optimise an AST tree by // constant folding in all sub-trees struct ASTnode *optimise(struct ASTnode *n) { n = fold(n); return (n); } ================================================ FILE: 52_Pointers_pt2/scan.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { int i; for (i = 0; s[i] != '\0'; i++) if (s[i] == (char) c) return (i); return (-1); } // Get the next character from the input file. static int next(void) { int c, l; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return (c); } c = fgetc(Infile); // Read from input file while (Linestart && c == '#') { // We've hit a pre-processor statement Linestart = 0; // No longer at the start of the line scan(&Token); // Get the line number into l if (Token.token != T_INTLIT) fatals("Expecting pre-processor line number, got:", Text); l = Token.intvalue; scan(&Token); // Get the filename in Text if (Token.token != T_STRLIT) fatals("Expecting pre-processor file name, got:", Text); if (Text[0] != '<') { // If this is a real filename if (strcmp(Text, Infilename)) // and not the one we have now Infilename = strdup(Text); // save it. Then update the line num Line = l; } while ((c = fgetc(Infile)) != '\n'); // Skip to the end of the line c = fgetc(Infile); // and get the next character Linestart = 1; // Now back at the start of the line } Linestart = 0; // No longer at the start of the line if ('\n' == c) { Line++; // Increment line count Linestart = 1; // Now back at the start of the line } return (c); } // Put back an unwanted character static void putback(int c) { Putback = c; } // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } // Read in a hexadecimal constant from the input static int hexchar(void) { int c, h, n = 0, f = 0; // Loop getting characters while (isxdigit(c = next())) { // Convert from char to int value h = chrpos("0123456789abcdef", tolower(c)); // Add to running hex value n = n * 16 + h; f = 1; } // We hit a non-hex character, put it back putback(c); // Flag tells us we never saw any hex characters if (!f) fatal("missing digits after '\\x'"); if (n > 255) fatal("value out of range after '\\x'"); return n; } // Return the next character from a character // or string literal static int scanch(void) { int i, c, c2; // Get the next input character and interpret // metacharacters that start with a backslash c = next(); if (c == '\\') { switch (c = next()) { case 'a': return ('\a'); case 'b': return ('\b'); case 'f': return ('\f'); case 'n': return ('\n'); case 'r': return ('\r'); case 't': return ('\t'); case 'v': return ('\v'); case '\\': return ('\\'); case '"': return ('"'); case '\'': return ('\''); // Deal with octal constants by reading in // characters until we hit a non-octal digit. // Build up the octal value in c2 and count // # digits in i. Permit only 3 octal digits. case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': for (i = c2 = 0; isdigit(c) && c < '8'; c = next()) { if (++i > 3) break; c2 = c2 * 8 + (c - '0'); } putback(c); // Put back the first non-octal char return (c2); case 'x': return hexchar(); default: fatalc("unknown escape sequence", c); } } return (c); // Just an ordinary old character! } // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0, radix = 10; // Assume the radix is 10, but if it starts with 0 if (c == '0') { // and the next character is 'x', it's radix 16 if ((c = next()) == 'x') { radix = 16; c = next(); } else // Otherwise, it's radix 8 radix = 8; } // Convert each character into an int value while ((k = chrpos("0123456789abcdef", tolower(c))) >= 0) { if (k >= radix) fatalc("invalid digit in integer literal", c); val = val * radix + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return (val); } // Scan in a string literal from the input file, // and store it in buf[]. Return the length of // the string. static int scanstr(char *buf) { int i, c; // Loop while we have enough buffer space for (i = 0; i < TEXTLEN - 1; i++) { // Get the next char and append to buf // Return when we hit the ending double quote if ((c = scanch()) == '"') { buf[i] = 0; return (i); } buf[i] = c; } // Ran out of buf[] space fatal("String literal too long"); return (0); } // Scan an identifier from the input file and // store it in buf[]. Return the identifier's length static int scanident(int c, char *buf, int lim) { int i = 0; // Allow digits, alpha and underscores while (isalpha(c) || isdigit(c) || '_' == c) { // Error if we hit the identifier length limit, // else append to buf[] and get next character if (lim - 1 == i) { fatal("Identifier too long"); } else if (i < lim - 1) { buf[i++] = c; } c = next(); } // We hit a non-valid character, put it back. // NUL-terminate the buf[] and return the length putback(c); buf[i] = '\0'; return (i); } // Given a word from the input, return the matching // keyword token number or 0 if it's not a keyword. // Switch on the first letter so that we don't have // to waste time strcmp()ing against all the keywords. static int keyword(char *s) { switch (*s) { case 'b': if (!strcmp(s, "break")) return (T_BREAK); break; case 'c': if (!strcmp(s, "case")) return (T_CASE); if (!strcmp(s, "char")) return (T_CHAR); if (!strcmp(s, "continue")) return (T_CONTINUE); break; case 'd': if (!strcmp(s, "default")) return (T_DEFAULT); break; case 'e': if (!strcmp(s, "else")) return (T_ELSE); if (!strcmp(s, "enum")) return (T_ENUM); if (!strcmp(s, "extern")) return (T_EXTERN); break; case 'f': if (!strcmp(s, "for")) return (T_FOR); break; case 'i': if (!strcmp(s, "if")) return (T_IF); if (!strcmp(s, "int")) return (T_INT); break; case 'l': if (!strcmp(s, "long")) return (T_LONG); break; case 'r': if (!strcmp(s, "return")) return (T_RETURN); break; case 's': if (!strcmp(s, "sizeof")) return (T_SIZEOF); if (!strcmp(s, "static")) return (T_STATIC); if (!strcmp(s, "struct")) return (T_STRUCT); if (!strcmp(s, "switch")) return (T_SWITCH); break; case 't': if (!strcmp(s, "typedef")) return (T_TYPEDEF); break; case 'u': if (!strcmp(s, "union")) return (T_UNION); break; case 'v': if (!strcmp(s, "void")) return (T_VOID); break; case 'w': if (!strcmp(s, "while")) return (T_WHILE); break; } return (0); } // List of token strings, for debugging purposes char *Tstring[] = { "EOF", "=", "+=", "-=", "*=", "/=", "?", "||", "&&", "|", "^", "&", "==", "!=", ",", ">", "<=", ">=", "<<", ">>", "+", "-", "*", "/", "++", "--", "~", "!", "void", "char", "int", "long", "if", "else", "while", "for", "return", "struct", "union", "enum", "typedef", "extern", "break", "continue", "switch", "case", "default", "sizeof", "static", "intlit", "strlit", ";", "identifier", "{", "}", "(", ")", "[", "]", ",", ".", "->", ":" }; // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c, tokentype; // If we have a lookahead token, return this token if (Peektoken.token != 0) { t->token = Peektoken.token; t->tokstr = Peektoken.tokstr; t->intvalue = Peektoken.intvalue; Peektoken.token = 0; return (1); } // Skip whitespace c = skip(); // Determine the token based on // the input character switch (c) { case EOF: t->token = T_EOF; return (0); case '+': if ((c = next()) == '+') { t->token = T_INC; } else if (c == '=') { t->token = T_ASPLUS; } else { putback(c); t->token = T_PLUS; } break; case '-': if ((c = next()) == '-') { t->token = T_DEC; } else if (c == '>') { t->token = T_ARROW; } else if (c == '=') { t->token = T_ASMINUS; } else if (isdigit(c)) { // Negative int literal t->intvalue = -scanint(c); t->token = T_INTLIT; } else { putback(c); t->token = T_MINUS; } break; case '*': if ((c = next()) == '=') { t->token = T_ASSTAR; } else { putback(c); t->token = T_STAR; } break; case '/': if ((c = next()) == '=') { t->token = T_ASSLASH; } else { putback(c); t->token = T_SLASH; } break; case ';': t->token = T_SEMI; break; case '{': t->token = T_LBRACE; break; case '}': t->token = T_RBRACE; break; case '(': t->token = T_LPAREN; break; case ')': t->token = T_RPAREN; break; case '[': t->token = T_LBRACKET; break; case ']': t->token = T_RBRACKET; break; case '~': t->token = T_INVERT; break; case '^': t->token = T_XOR; break; case ',': t->token = T_COMMA; break; case '.': t->token = T_DOT; break; case ':': t->token = T_COLON; break; case '?': t->token = T_QUESTION; break; case '=': if ((c = next()) == '=') { t->token = T_EQ; } else { putback(c); t->token = T_ASSIGN; } break; case '!': if ((c = next()) == '=') { t->token = T_NE; } else { putback(c); t->token = T_LOGNOT; } break; case '<': if ((c = next()) == '=') { t->token = T_LE; } else if (c == '<') { t->token = T_LSHIFT; } else { putback(c); t->token = T_LT; } break; case '>': if ((c = next()) == '=') { t->token = T_GE; } else if (c == '>') { t->token = T_RSHIFT; } else { putback(c); t->token = T_GT; } break; case '&': if ((c = next()) == '&') { t->token = T_LOGAND; } else { putback(c); t->token = T_AMPER; } break; case '|': if ((c = next()) == '|') { t->token = T_LOGOR; } else { putback(c); t->token = T_OR; } break; case '\'': // If it's a quote, scan in the // literal character value and // the trailing quote t->intvalue = scanch(); t->token = T_INTLIT; if (next() != '\'') fatal("Expected '\\'' at end of char literal"); break; case '"': // Scan in a literal string scanstr(Text); t->token = T_STRLIT; break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } else if (isalpha(c) || '_' == c) { // Read in a keyword or identifier scanident(c, Text, TEXTLEN); // If it's a recognised keyword, return that token if ((tokentype = keyword(Text)) != 0) { t->token = tokentype; break; } // Not a recognised keyword, so it must be an identifier t->token = T_IDENT; break; } // The character isn't part of any recognised token, error fatalc("Unrecognised character", c); } // We found a token t->tokstr = Tstring[t->token]; return (1); } ================================================ FILE: 52_Pointers_pt2/stmt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of statements // Copyright (c) 2019 Warren Toomey, GPL3 // Prototypes static struct ASTnode *single_statement(void); // compound_statement: // empty, i.e. no statement // | statement // | statement statements // ; // // statement: declaration // | expression_statement // | function_call // | if_statement // | while_statement // | for_statement // | return_statement // ; // if_statement: if_head // | if_head 'else' statement // ; // // if_head: 'if' '(' true_false_expression ')' statement ; // // Parse an IF statement including any // optional ELSE clause and return its AST static struct ASTnode *if_statement(void) { struct ASTnode *condAST, *trueAST, *falseAST = NULL; // Ensure we have 'if' '(' match(T_IF, "if"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST->ctype, condAST, NULL, 0); rparen(); // Get the AST for the statement trueAST = single_statement(); // If we have an 'else', skip it // and get the AST for the statement if (Token.token == T_ELSE) { scan(&Token); falseAST = single_statement(); } // Build and return the AST for this statement return (mkastnode(A_IF, P_NONE, NULL, condAST, trueAST, falseAST, NULL, 0)); } // while_statement: 'while' '(' true_false_expression ')' statement ; // // Parse a WHILE statement and return its AST static struct ASTnode *while_statement(void) { struct ASTnode *condAST, *bodyAST; // Ensure we have 'while' '(' match(T_WHILE, "while"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST->ctype, condAST, NULL, 0); rparen(); // Get the AST for the statement. // Update the loop depth in the process Looplevel++; bodyAST = single_statement(); Looplevel--; // Build and return the AST for this statement return (mkastnode(A_WHILE, P_NONE, NULL, condAST, NULL, bodyAST, NULL, 0)); } // for_statement: 'for' '(' expression_list ';' // true_false_expression ';' // expression_list ')' statement ; // // Parse a FOR statement and return its AST static struct ASTnode *for_statement(void) { struct ASTnode *condAST, *bodyAST; struct ASTnode *preopAST, *postopAST; struct ASTnode *tree; // Ensure we have 'for' '(' match(T_FOR, "for"); lparen(); // Get the pre_op expression and the ';' preopAST = expression_list(T_SEMI); semi(); // Get the condition and the ';'. // Force a non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST->ctype, condAST, NULL, 0); semi(); // Get the post_op expression and the ')' postopAST = expression_list(T_RPAREN); rparen(); // Get the statement which is the body // Update the loop depth in the process Looplevel++; bodyAST = single_statement(); Looplevel--; // Glue the statement and the postop tree tree = mkastnode(A_GLUE, P_NONE, NULL, bodyAST, NULL, postopAST, NULL, 0); // Make a WHILE loop with the condition and this new body tree = mkastnode(A_WHILE, P_NONE, NULL, condAST, NULL, tree, NULL, 0); // And glue the preop tree to the A_WHILE tree return (mkastnode(A_GLUE, P_NONE, NULL, preopAST, NULL, tree, NULL, 0)); } // return_statement: 'return' '(' expression ')' ; // // Parse a return statement and return its AST static struct ASTnode *return_statement(void) { struct ASTnode *tree; // Can't return a value if function returns P_VOID if (Functionid->type == P_VOID) fatal("Can't return from a void function"); // Ensure we have 'return' '(' match(T_RETURN, "return"); lparen(); // Parse the following expression tree = binexpr(0); // Ensure this is compatible with the function's type tree = modify_type(tree, Functionid->type, Functionid->ctype, 0); if (tree == NULL) fatal("Incompatible type to return"); // Add on the A_RETURN node tree = mkastunary(A_RETURN, P_NONE, NULL, tree, NULL, 0); // Get the ')' and ';' rparen(); semi(); return (tree); } // break_statement: 'break' ; // // Parse a break statement and return its AST static struct ASTnode *break_statement(void) { if (Looplevel == 0 && Switchlevel == 0) fatal("no loop or switch to break out from"); scan(&Token); semi(); return (mkastleaf(A_BREAK, P_NONE, NULL, NULL, 0)); } // continue_statement: 'continue' ; // // Parse a continue statement and return its AST static struct ASTnode *continue_statement(void) { if (Looplevel == 0) fatal("no loop to continue to"); scan(&Token); semi(); return (mkastleaf(A_CONTINUE, P_NONE, NULL, NULL, 0)); } // Parse a switch statement and return its AST static struct ASTnode *switch_statement(void) { struct ASTnode *left, *body, *n, *c; struct ASTnode *casetree = NULL, *casetail; int inloop = 1, casecount = 0; int seendefault = 0; int ASTop, casevalue; // Skip the 'switch' and '(' scan(&Token); lparen(); // Get the switch expression, the ')' and the '{' left = binexpr(0); rparen(); lbrace(); // Ensure that this is of int type if (!inttype(left->type)) fatal("Switch expression is not of integer type"); // Build an A_SWITCH subtree with the expression as // the child n = mkastunary(A_SWITCH, P_NONE, NULL, left, NULL, 0); // Now parse the cases Switchlevel++; while (inloop) { switch (Token.token) { // Leave the loop when we hit a '}' case T_RBRACE: if (casecount == 0) fatal("No cases in switch"); inloop = 0; break; case T_CASE: case T_DEFAULT: // Ensure this isn't after a previous 'default' if (seendefault) fatal("case or default after existing default"); // Set the AST operation. Scan the case value if required if (Token.token == T_DEFAULT) { ASTop = A_DEFAULT; seendefault = 1; scan(&Token); } else { ASTop = A_CASE; scan(&Token); left = binexpr(0); // Ensure the case value is an integer literal if (left->op != A_INTLIT) fatal("Expecting integer literal for case value"); casevalue = left->a_intvalue; // Walk the list of existing case values to ensure // that there isn't a duplicate case value for (c = casetree; c != NULL; c = c->right) if (casevalue == c->a_intvalue) fatal("Duplicate case value"); } // Scan the ':' and increment the casecount match(T_COLON, ":"); casecount++; // If the next token is a T_CASE, the existing case will fall // into the next case. Otherwise, parse the case body. if (Token.token == T_CASE) body = NULL; else body = compound_statement(1); // Build a sub-tree with any compound statement as the left child // and link it in to the growing A_CASE tree if (casetree == NULL) { casetree = casetail = mkastunary(ASTop, P_NONE, NULL, body, NULL, casevalue); } else { casetail->right = mkastunary(ASTop, P_NONE, NULL, body, NULL, casevalue); casetail = casetail->right; } break; default: fatals("Unexpected token in switch", Token.tokstr); } } Switchlevel--; // We have a sub-tree with the cases and any default. Put the // case count into the A_SWITCH node and attach the case tree. n->a_intvalue = casecount; n->right = casetree; rbrace(); return (n); } // Parse a single statement and return its AST. static struct ASTnode *single_statement(void) { struct ASTnode *stmt; struct symtable *ctype; switch (Token.token) { case T_LBRACE: // We have a '{', so this is a compound statement lbrace(); stmt = compound_statement(0); rbrace(); return (stmt); case T_IDENT: // We have to see if the identifier matches a typedef. // If not, treat it as an expression. // Otherwise, fall down to the parse_type() call. if (findtypedef(Text) == NULL) { stmt = binexpr(0); semi(); return (stmt); } case T_CHAR: case T_INT: case T_LONG: case T_STRUCT: case T_UNION: case T_ENUM: case T_TYPEDEF: // The beginning of a variable declaration list. declaration_list(&ctype, C_LOCAL, T_SEMI, T_EOF, &stmt); semi(); return (stmt); // Any assignments from the declarations case T_IF: return (if_statement()); case T_WHILE: return (while_statement()); case T_FOR: return (for_statement()); case T_RETURN: return (return_statement()); case T_BREAK: return (break_statement()); case T_CONTINUE: return (continue_statement()); case T_SWITCH: return (switch_statement()); default: // For now, see if this is an expression. // This catches assignment statements. stmt = binexpr(0); semi(); return (stmt); } return (NULL); // Keep -Wall happy } // Parse a compound statement // and return its AST. If inswitch is true, // we look for a '}', 'case' or 'default' token // to end the parsing. Otherwise, look for // just a '}' to end the parsing. struct ASTnode *compound_statement(int inswitch) { struct ASTnode *left = NULL; struct ASTnode *tree; while (1) { // Parse a single statement tree = single_statement(); // For each new tree, either save it in left // if left is empty, or glue the left and the // new tree together if (tree != NULL) { if (left == NULL) left = tree; else left = mkastnode(A_GLUE, P_NONE, NULL, left, NULL, tree, NULL, 0); } // Leave if we've hit the end token if (Token.token == T_RBRACE) return (left); if (inswitch && (Token.token == T_CASE || Token.token == T_DEFAULT)) return (left); } return (NULL); // Keep -Wall happy } ================================================ FILE: 52_Pointers_pt2/sym.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Symbol table functions // Copyright (c) 2019 Warren Toomey, GPL3 // Append a node to the singly-linked list pointed to by head or tail void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node) { // Check for valid pointers if (head == NULL || tail == NULL || node == NULL) fatal("Either head, tail or node is NULL in appendsym"); // Append to the list if (*tail) { (*tail)->next = node; *tail = node; } else *head = *tail = node; node->next = NULL; } // Create a symbol node to be added to a symbol table list. // Set up the node's: // + type: char, int etc. // + ctype: composite type pointer for struct/union // + structural type: var, function, array etc. // + size: number of elements, or endlabel: end label for a function // + posn: Position information for local symbols // Return a pointer to the new node struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn) { // Get a new node struct symtable *node = (struct symtable *) malloc(sizeof(struct symtable)); if (node == NULL) fatal("Unable to malloc a symbol table node in newsym"); // Fill in the values if (name == NULL) node->name = NULL; else node->name = strdup(name); node->type = type; node->ctype = ctype; node->stype = stype; node->class = class; node->nelems = nelems; // For pointers and integer types, set the size // of the symbol. structs and union declarations // manually set this up themselves. if (ptrtype(type) || inttype(type)) node->size = nelems * typesize(type, ctype); node->st_posn = posn; node->next = NULL; node->member = NULL; node->initlist = NULL; return (node); } // Add a symbol to the global symbol list struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn) { struct symtable *sym = newsym(name, type, ctype, stype, class, nelems, posn); // For structs and unions, copy the size from the type node if (type == P_STRUCT || type == P_UNION) sym->size = ctype->size; appendsym(&Globhead, &Globtail, sym); return (sym); } // Add a symbol to the local symbol list struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int nelems) { struct symtable *sym = newsym(name, type, ctype, stype, C_LOCAL, nelems, 0); // For structs and unions, copy the size from the type node if (type == P_STRUCT || type == P_UNION) sym->size = ctype->size; appendsym(&Loclhead, &Locltail, sym); return (sym); } // Add a symbol to the parameter list struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype) { struct symtable *sym = newsym(name, type, ctype, stype, C_PARAM, 1, 0); appendsym(&Parmhead, &Parmtail, sym); return (sym); } // Add a symbol to the temporary member list struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int nelems) { struct symtable *sym = newsym(name, type, ctype, stype, C_MEMBER, nelems, 0); // For structs and unions, copy the size from the type node if (type == P_STRUCT || type == P_UNION) sym->size = ctype->size; appendsym(&Membhead, &Membtail, sym); return (sym); } // Add a struct to the struct list struct symtable *addstruct(char *name) { struct symtable *sym = newsym(name, P_STRUCT, NULL, 0, C_STRUCT, 0, 0); appendsym(&Structhead, &Structtail, sym); return (sym); } // Add a struct to the union list struct symtable *addunion(char *name) { struct symtable *sym = newsym(name, P_UNION, NULL, 0, C_UNION, 0, 0); appendsym(&Unionhead, &Uniontail, sym); return (sym); } // Add an enum type or value to the enum list. // Class is C_ENUMTYPE or C_ENUMVAL. // Use posn to store the int value. struct symtable *addenum(char *name, int class, int value) { struct symtable *sym = newsym(name, P_INT, NULL, 0, class, 0, value); appendsym(&Enumhead, &Enumtail, sym); return (sym); } // Add a typedef to the typedef list struct symtable *addtypedef(char *name, int type, struct symtable *ctype) { struct symtable *sym = newsym(name, type, ctype, 0, C_TYPEDEF, 0, 0); appendsym(&Typehead, &Typetail, sym); return (sym); } // Search for a symbol in a specific list. // Return a pointer to the found node or NULL if not found. // If class is not zero, also match on the given class static struct symtable *findsyminlist(char *s, struct symtable *list, int class) { for (; list != NULL; list = list->next) if ((list->name != NULL) && !strcmp(s, list->name)) if (class == 0 || class == list->class) return (list); return (NULL); } // Determine if the symbol s is in the global symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findglob(char *s) { return (findsyminlist(s, Globhead, 0)); } // Determine if the symbol s is in the local symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findlocl(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member, 0); if (node) return (node); } return (findsyminlist(s, Loclhead, 0)); } // Determine if the symbol s is in the symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findsymbol(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member, 0); if (node) return (node); } // Otherwise, try the local and global symbol lists node = findsyminlist(s, Loclhead, 0); if (node) return (node); return (findsyminlist(s, Globhead, 0)); } // Find a member in the member list // Return a pointer to the found node or NULL if not found. struct symtable *findmember(char *s) { return (findsyminlist(s, Membhead, 0)); } // Find a struct in the struct list // Return a pointer to the found node or NULL if not found. struct symtable *findstruct(char *s) { return (findsyminlist(s, Structhead, 0)); } // Find a struct in the union list // Return a pointer to the found node or NULL if not found. struct symtable *findunion(char *s) { return (findsyminlist(s, Unionhead, 0)); } // Find an enum type in the enum list // Return a pointer to the found node or NULL if not found. struct symtable *findenumtype(char *s) { return (findsyminlist(s, Enumhead, C_ENUMTYPE)); } // Find an enum value in the enum list // Return a pointer to the found node or NULL if not found. struct symtable *findenumval(char *s) { return (findsyminlist(s, Enumhead, C_ENUMVAL)); } // Find a type in the tyedef list // Return a pointer to the found node or NULL if not found. struct symtable *findtypedef(char *s) { return (findsyminlist(s, Typehead, 0)); } // Reset the contents of the symbol table void clear_symtable(void) { Globhead = Globtail = NULL; Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Membhead = Membtail = NULL; Structhead = Structtail = NULL; Unionhead = Uniontail = NULL; Enumhead = Enumtail = NULL; Typehead = Typetail = NULL; } // Clear all the entries in the local symbol table void freeloclsyms(void) { Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Functionid = NULL; } // Remove all static symbols from the global symbol table void freestaticsyms(void) { // g points at current node, prev at the previous one struct symtable *g, *prev = NULL; // Walk the global table looking for static entries for (g = Globhead; g != NULL; g = g->next) { if (g->class == C_STATIC) { // If there's a previous node, rearrange the prev pointer // to skip over the current node. If not, g is the head, // so do the same to Globhead if (prev != NULL) prev->next = g->next; else Globhead->next = g->next; // If g is the tail, point Globtail at the previous node // (if there is one), or Globhead if (g == Globtail) { if (prev != NULL) Globtail = prev; else Globtail = Globhead; } } } // Point prev at g before we move up to the next node prev = g; } // Dump a single symbol static void dumpsym(struct symtable *sym, int indent) { int i; for (i = 0; i < indent; i++) printf(" "); switch (sym->type & (~0xf)) { case P_VOID: printf("void "); break; case P_CHAR: printf("char "); break; case P_INT: printf("int "); break; case P_LONG: printf("long "); break; case P_STRUCT: if (sym->ctype != NULL) printf("struct %s ", sym->ctype->name); else printf("struct %s ", sym->name); break; case P_UNION: if (sym->ctype != NULL) printf("union %s ", sym->ctype->name); else printf("union %s ", sym->name); break; default: printf("unknown type "); } for (i = 0; i < (sym->type & 0xf); i++) printf("*"); printf("%s", sym->name); switch (sym->stype) { case S_VARIABLE: break; case S_FUNCTION: printf("()"); break; case S_ARRAY: printf("[]"); break; default: printf(" unknown stype"); } switch (sym->class) { case C_GLOBAL: printf(": global"); break; case C_LOCAL: printf(": local"); break; case C_PARAM: printf(": param"); break; case C_EXTERN: printf(": extern"); break; case C_STATIC: printf(": static"); break; case C_STRUCT: printf(": struct"); break; case C_UNION: printf(": union"); break; case C_MEMBER: printf(": member"); break; case C_ENUMTYPE: printf(": enumtype"); break; case C_ENUMVAL: printf(": enumval"); break; case C_TYPEDEF: printf(": typedef"); break; default: printf(": unknown class"); } switch (sym->stype) { case S_VARIABLE: if (sym->class == C_ENUMVAL) printf(", value %d\n", sym->st_posn); else printf(", size %d\n", sym->size); break; case S_FUNCTION: printf(", %d params\n", sym->nelems); break; case S_ARRAY: printf(", %d elems, size %d\n", sym->nelems, sym->size); break; } switch (sym->type & (~0xf)) { case P_STRUCT: case P_UNION: dumptable(sym->member, NULL, 4); } switch (sym->stype) { case S_FUNCTION: dumptable(sym->member, NULL, 4); } } // Dump one symbol table void dumptable(struct symtable *head, char *name, int indent) { struct symtable *sym; if (head != NULL && name != NULL) printf("%s\n--------\n", name); for (sym = head; sym != NULL; sym = sym->next) dumpsym(sym, indent); } void dumpsymtables(void) { dumptable(Globhead, "Global", 0); printf("\n"); dumptable(Enumhead, "Enums", 0); printf("\n"); dumptable(Typehead, "Typedefs", 0); } ================================================ FILE: 52_Pointers_pt2/tests/err.input031.c ================================================ Expecting a primary expression, got token:+ on line 5 of input031.c ================================================ FILE: 52_Pointers_pt2/tests/err.input032.c ================================================ Unknown variable or function:pizza on line 4 of input032.c ================================================ FILE: 52_Pointers_pt2/tests/err.input033.c ================================================ Incompatible type to return on line 4 of input033.c ================================================ FILE: 52_Pointers_pt2/tests/err.input034.c ================================================ For now, declaration of non-global arrays is not implemented on line 4 of input034.c ================================================ FILE: 52_Pointers_pt2/tests/err.input035.c ================================================ Duplicate local variable declaration:a on line 4 of input035.c ================================================ FILE: 52_Pointers_pt2/tests/err.input036.c ================================================ Type doesn't match prototype for parameter:2 on line 4 of input036.c ================================================ FILE: 52_Pointers_pt2/tests/err.input037.c ================================================ Expected:comma on line 3 of input037.c ================================================ FILE: 52_Pointers_pt2/tests/err.input038.c ================================================ Type doesn't match prototype for parameter:2 on line 4 of input038.c ================================================ FILE: 52_Pointers_pt2/tests/err.input039.c ================================================ No statements in function with non-void type on line 4 of input039.c ================================================ FILE: 52_Pointers_pt2/tests/err.input040.c ================================================ No return for function with non-void type on line 4 of input040.c ================================================ FILE: 52_Pointers_pt2/tests/err.input041.c ================================================ Can't return from a void function on line 3 of input041.c ================================================ FILE: 52_Pointers_pt2/tests/err.input042.c ================================================ Unknown variable or function:fred on line 3 of input042.c ================================================ FILE: 52_Pointers_pt2/tests/err.input043.c ================================================ Unknown variable or function:b on line 3 of input043.c ================================================ FILE: 52_Pointers_pt2/tests/err.input044.c ================================================ Unknown variable or function:z on line 3 of input044.c ================================================ FILE: 52_Pointers_pt2/tests/err.input045.c ================================================ & operator must be followed by an identifier on line 3 of input045.c ================================================ FILE: 52_Pointers_pt2/tests/err.input046.c ================================================ * operator must be followed by an identifier or * on line 3 of input046.c ================================================ FILE: 52_Pointers_pt2/tests/err.input047.c ================================================ ++ operator must be followed by an identifier on line 3 of input047.c ================================================ FILE: 52_Pointers_pt2/tests/err.input048.c ================================================ -- operator must be followed by an identifier on line 3 of input048.c ================================================ FILE: 52_Pointers_pt2/tests/err.input049.c ================================================ Incompatible expression in assignment on line 6 of input049.c ================================================ FILE: 52_Pointers_pt2/tests/err.input050.c ================================================ Incompatible types in binary expression on line 6 of input050.c ================================================ FILE: 52_Pointers_pt2/tests/err.input051.c ================================================ Expected '\'' at end of char literal on line 4 of input051.c ================================================ FILE: 52_Pointers_pt2/tests/err.input052.c ================================================ Unrecognised character:$ on line 5 of input052.c ================================================ FILE: 52_Pointers_pt2/tests/err.input056.c ================================================ unknown struct/union type:var1 on line 2 of input056.c ================================================ FILE: 52_Pointers_pt2/tests/err.input057.c ================================================ previously defined struct/union:fred on line 2 of input057.c ================================================ FILE: 52_Pointers_pt2/tests/err.input059.c ================================================ Unknown variable or function:y on line 3 of input059.c ================================================ FILE: 52_Pointers_pt2/tests/err.input060.c ================================================ Expression is not a struct/union on line 3 of input060.c ================================================ FILE: 52_Pointers_pt2/tests/err.input061.c ================================================ Expression is not a pointer to a struct/union on line 3 of input061.c ================================================ FILE: 52_Pointers_pt2/tests/err.input064.c ================================================ undeclared enum type::fred on line 1 of input064.c ================================================ FILE: 52_Pointers_pt2/tests/err.input065.c ================================================ enum type redeclared::fred on line 2 of input065.c ================================================ FILE: 52_Pointers_pt2/tests/err.input066.c ================================================ enum value redeclared::z on line 2 of input066.c ================================================ FILE: 52_Pointers_pt2/tests/err.input068.c ================================================ redefinition of typedef:FOO on line 2 of input068.c ================================================ FILE: 52_Pointers_pt2/tests/err.input069.c ================================================ unknown type:FLOO on line 2 of input069.c ================================================ FILE: 52_Pointers_pt2/tests/err.input072.c ================================================ no loop or switch to break out from on line 1 of input072.c ================================================ FILE: 52_Pointers_pt2/tests/err.input073.c ================================================ no loop to continue to on line 1 of input073.c ================================================ FILE: 52_Pointers_pt2/tests/err.input075.c ================================================ Unexpected token in switch:if on line 4 of input075.c ================================================ FILE: 52_Pointers_pt2/tests/err.input076.c ================================================ No cases in switch on line 3 of input076.c ================================================ FILE: 52_Pointers_pt2/tests/err.input077.c ================================================ case or default after existing default on line 6 of input077.c ================================================ FILE: 52_Pointers_pt2/tests/err.input078.c ================================================ case or default after existing default on line 6 of input078.c ================================================ FILE: 52_Pointers_pt2/tests/err.input079.c ================================================ Duplicate case value on line 6 of input079.c ================================================ FILE: 52_Pointers_pt2/tests/err.input085.c ================================================ Bad type in parameter list on line 1 of input085.c ================================================ FILE: 52_Pointers_pt2/tests/err.input086.c ================================================ Function definition not at global level on line 3 of input086.c ================================================ FILE: 52_Pointers_pt2/tests/err.input087.c ================================================ Bad type in member list on line 4 of input087.c ================================================ FILE: 52_Pointers_pt2/tests/err.input092.c ================================================ Type mismatch: literal vs. variable on line 1 of input092.c ================================================ FILE: 52_Pointers_pt2/tests/err.input093.c ================================================ Unknown variable or function:fred on line 1 of input093.c ================================================ FILE: 52_Pointers_pt2/tests/err.input094.c ================================================ Type mismatch: literal vs. variable on line 1 of input094.c ================================================ FILE: 52_Pointers_pt2/tests/err.input095.c ================================================ Variable can not be initialised:x on line 1 of input095.c ================================================ FILE: 52_Pointers_pt2/tests/err.input096.c ================================================ Array size is illegal:0 on line 1 of input096.c ================================================ FILE: 52_Pointers_pt2/tests/err.input097.c ================================================ For now, declaration of non-global arrays is not implemented on line 2 of input097.c ================================================ FILE: 52_Pointers_pt2/tests/err.input098.c ================================================ Too many values in initialisation list on line 1 of input098.c ================================================ FILE: 52_Pointers_pt2/tests/err.input102.c ================================================ Cannot cast to a struct, union or void type on line 3 of input102.c ================================================ FILE: 52_Pointers_pt2/tests/err.input103.c ================================================ Cannot cast to a struct, union or void type on line 3 of input103.c ================================================ FILE: 52_Pointers_pt2/tests/err.input104.c ================================================ Cannot cast to a struct, union or void type on line 2 of input104.c ================================================ FILE: 52_Pointers_pt2/tests/err.input105.c ================================================ Incompatible expression in assignment on line 4 of input105.c ================================================ FILE: 52_Pointers_pt2/tests/err.input118.c ================================================ Compiler doesn't support static or extern local declarations on line 2 of input118.c ================================================ FILE: 52_Pointers_pt2/tests/err.input124.c ================================================ Cannot ++ on rvalue on line 6 of input124.c ================================================ FILE: 52_Pointers_pt2/tests/err.input126.c ================================================ Unknown variable or function:ptr on line 7 of input126.c ================================================ FILE: 52_Pointers_pt2/tests/err.input129.c ================================================ Cannot ++ and/or -- more than once on line 6 of input129.c ================================================ FILE: 52_Pointers_pt2/tests/input001.c ================================================ int printf(char *fmt); void main() { printf("%d\n", 12 * 3); printf("%d\n", 18 - 2 * 4); printf("%d\n", 1 + 2 + 9 - 5/2 + 3*5); } ================================================ FILE: 52_Pointers_pt2/tests/input002.c ================================================ int printf(char *fmt); void main() { int fred; int jim; fred= 5; jim= 12; printf("%d\n", fred + jim); } ================================================ FILE: 52_Pointers_pt2/tests/input003.c ================================================ int printf(char *fmt); void main() { int x; x= 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); } ================================================ FILE: 52_Pointers_pt2/tests/input004.c ================================================ int printf(char *fmt); void main() { int x; x= 7 < 9; printf("%d\n", x); x= 7 <= 9; printf("%d\n", x); x= 7 != 9; printf("%d\n", x); x= 7 == 7; printf("%d\n", x); x= 7 >= 7; printf("%d\n", x); x= 7 <= 7; printf("%d\n", x); x= 9 > 7; printf("%d\n", x); x= 9 >= 7; printf("%d\n", x); x= 9 != 7; printf("%d\n", x); } ================================================ FILE: 52_Pointers_pt2/tests/input005.c ================================================ int printf(char *fmt); void main() { int i; int j; i=6; j=12; if (i < j) { printf("%d\n", i); } else { printf("%d\n", j); } } ================================================ FILE: 52_Pointers_pt2/tests/input006.c ================================================ int printf(char *fmt); void main() { int i; i=1; while (i <= 10) { printf("%d\n", i); i= i + 1; } } ================================================ FILE: 52_Pointers_pt2/tests/input007.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 52_Pointers_pt2/tests/input008.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 52_Pointers_pt2/tests/input009.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } void fred() { int a; int b; a= 12; b= 3 * a; if (a >= b) { printf("%d\n", 2 * b - a); } } ================================================ FILE: 52_Pointers_pt2/tests/input010.c ================================================ int printf(char *fmt); void main() { int i; char j; j= 20; printf("%d\n", j); i= 10; printf("%d\n", i); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 2; j= j + 1) { printf("%d\n", j); } } ================================================ FILE: 52_Pointers_pt2/tests/input011.c ================================================ int printf(char *fmt); int main() { int i; char j; long k; i= 10; printf("%d\n", i); j= 20; printf("%d\n", j); k= 30; printf("%d\n", k); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 4; j= j + 1) { printf("%d\n", j); } for (k= 1; k <= 5; k= k + 1) { printf("%d\n", k); } return(i); printf("%d\n", 12345); return(3); } ================================================ FILE: 52_Pointers_pt2/tests/input012.c ================================================ int printf(char *fmt); int fred() { return(5); } void main() { int x; x= fred(2); printf("%d\n", x); } ================================================ FILE: 52_Pointers_pt2/tests/input013.c ================================================ int printf(char *fmt); int fred() { return(56); } void main() { int dummy; int result; dummy= printf("%d\n", 23); result= fred(10); dummy= printf("%d\n", result); } ================================================ FILE: 52_Pointers_pt2/tests/input014.c ================================================ int printf(char *fmt); int fred() { return(20); } int main() { int result; printf("%d\n", 10); result= fred(15); printf("%d\n", result); printf("%d\n", fred(15)+10); return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input015.c ================================================ int printf(char *fmt); int main() { char a; char *b; char c; int d; int *e; int f; a= 18; printf("%d\n", a); b= &a; c= *b; printf("%d\n", c); d= 12; printf("%d\n", d); e= &d; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input016.c ================================================ int printf(char *fmt); int c; int d; int *e; int f; int main() { c= 12; d=18; printf("%d\n", c); e= &c + 1; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input017.c ================================================ int printf(char *fmt); int main() { char a; char *b; int d; int *e; b= &a; *b= 19; printf("%d\n", a); e= &d; *e= 12; printf("%d\n", d); return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input018.c ================================================ int printf(char *fmt); int main() { int a; int b; a= b= 34; printf("%d\n", a); printf("%d\n", b); return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input018a.c ================================================ int printf(char *fmt); int a; int *b; char c; char *d; int main() { b= &a; *b= 15; printf("%d\n", a); d= &c; *d= 16; printf("%d\n", c); return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input019.c ================================================ int printf(char *fmt); int a; int b; int c; int d; int e; int main() { a= 2; b= 4; c= 3; d= 2; e= (a+b) * (c+d); printf("%d\n", e); return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input020.c ================================================ int printf(char *fmt); int a; int b[25]; int main() { b[3]= 12; a= b[3]; printf("%d\n", a); return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input021.c ================================================ int printf(char *fmt); char c; char *str; int main() { c= '\n'; printf("%d\n", c); for (str= "Hello world\n"; *str != 0; str= str + 1) { printf("%c", *str); } return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input022.c ================================================ int printf(char *fmt); char a; char b; char c; int d; int e; int f; long g; long h; long i; int main() { b= 5; c= 7; a= b + c++; printf("%d\n", a); e= 5; f= 7; d= e + f++; printf("%d\n", d); h= 5; i= 7; g= h + i++; printf("%d\n", g); a= b-- + c; printf("%d\n", a); d= e-- + f; printf("%d\n", d); g= h-- + i; printf("%d\n", g); a= ++b + c; printf("%d\n", a); d= ++e + f; printf("%d\n", d); g= ++h + i; printf("%d\n", g); a= b * --c; printf("%d\n", a); d= e * --f; printf("%d\n", d); g= h * --i; printf("%d\n", g); return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input023.c ================================================ int printf(char *fmt); char *str; int x; int main() { x= -23; printf("%d\n", x); printf("%d\n", -10 * -10); x= 1; x= ~x; printf("%d\n", x); x= 2 > 5; printf("%d\n", x); x= !x; printf("%d\n", x); x= !x; printf("%d\n", x); x= 13; if (x) { printf("%d\n", 13); } x= 0; if (!x) { printf("%d\n", 14); } for (str= "Hello world\n"; *str; str++) { printf("%c", *str); } return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input024.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { a= 42; b= 19; printf("%d\n", a & b); printf("%d\n", a | b); printf("%d\n", a ^ b); printf("%d\n", 1 << 3); printf("%d\n", 63 >> 3); return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input025.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { char z; int y; int x; x= 10; y= 20; z= 30; printf("%d\n", x); printf("%d\n", y); printf("%d\n", z); a= 5; b= 15; c= 25; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input026.c ================================================ int printf(char *fmt); int main(int a, char b, long c, int d, int e, int f, int g, int h) { int i; int j; int k; a= 13; printf("%d\n", a); b= 23; printf("%d\n", b); c= 34; printf("%d\n", c); d= 44; printf("%d\n", d); e= 54; printf("%d\n", e); f= 64; printf("%d\n", f); g= 74; printf("%d\n", g); h= 84; printf("%d\n", h); i= 94; printf("%d\n", i); j= 95; printf("%d\n", j); k= 96; printf("%d\n", k); return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input027.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int param5(int a, int b, int c, int d, int e) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param2(int a, int b) { int c; int d; int e; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param0() { int a; int b; int c; int d; int e; a= 1; b= 2; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int main() { param8(1,2,3,4,5,6,7,8); param5(1,2,3,4,5); param2(1,2); param0(); return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input028.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input029.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h); int fred(int a, int b, int c); int main(); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input030.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input030.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 52_Pointers_pt2/tests/input031.c ================================================ int printf(char *fmt); int main() { int x; x= 2 + + 3 - * / ; } ================================================ FILE: 52_Pointers_pt2/tests/input032.c ================================================ int printf(char *fmt); int main() { pizza cow llama sausage; } ================================================ FILE: 52_Pointers_pt2/tests/input033.c ================================================ int printf(char *fmt); int main() { char *z; return(z); } ================================================ FILE: 52_Pointers_pt2/tests/input034.c ================================================ int printf(char *fmt); int main() { int a[12]; return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input035.c ================================================ int printf(char *fmt); int fred(int a, int b) { int a; return(a); } ================================================ FILE: 52_Pointers_pt2/tests/input036.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c); ================================================ FILE: 52_Pointers_pt2/tests/input037.c ================================================ int printf(char *fmt); int fred(int a, char b +, int z); ================================================ FILE: 52_Pointers_pt2/tests/input038.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c, int g); ================================================ FILE: 52_Pointers_pt2/tests/input039.c ================================================ int printf(char *fmt); int main() { int a; } ================================================ FILE: 52_Pointers_pt2/tests/input040.c ================================================ int printf(char *fmt); int main() { int a; a= 5; } ================================================ FILE: 52_Pointers_pt2/tests/input041.c ================================================ int printf(char *fmt); void fred() { return(5); } ================================================ FILE: 52_Pointers_pt2/tests/input042.c ================================================ int printf(char *fmt); int main() { fred(5); } ================================================ FILE: 52_Pointers_pt2/tests/input043.c ================================================ int printf(char *fmt); int main() { int a; a= b[4]; } ================================================ FILE: 52_Pointers_pt2/tests/input044.c ================================================ int printf(char *fmt); int main() { int a; a= z; } ================================================ FILE: 52_Pointers_pt2/tests/input045.c ================================================ int printf(char *fmt); int main() { int a; a= &5; } ================================================ FILE: 52_Pointers_pt2/tests/input046.c ================================================ int printf(char *fmt); int main() { int a; a= *5; } ================================================ FILE: 52_Pointers_pt2/tests/input047.c ================================================ int printf(char *fmt); int main() { int a; a= ++5; } ================================================ FILE: 52_Pointers_pt2/tests/input048.c ================================================ int printf(char *fmt); int main() { int a; a= --5; } ================================================ FILE: 52_Pointers_pt2/tests/input049.c ================================================ int printf(char *fmt); int main() { int x; char y; y= x; } ================================================ FILE: 52_Pointers_pt2/tests/input050.c ================================================ int printf(char *fmt); int main() { char *a; char *b; a= a + b; } ================================================ FILE: 52_Pointers_pt2/tests/input051.c ================================================ int printf(char *fmt); int main() { char a; a= 'fred'; } ================================================ FILE: 52_Pointers_pt2/tests/input052.c ================================================ int printf(char *fmt); int main() { int a; a= $5.00; } ================================================ FILE: 52_Pointers_pt2/tests/input053.c ================================================ int printf(char *fmt); int main() { printf("Hello world, %d\n", 23); return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input054.c ================================================ int printf(char *fmt); int main() { int i; for (i=0; i < 20; i++) { printf("Hello world, %d\n", i); } return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input055.c ================================================ int printf(char *fmt); int main(int argc, char **argv) { int i; char *argument; printf("Hello world\n"); for (i=0; i < argc; i++) { argument= *argv; argv= argv + 1; printf("Argument %d is %s\n", i, argument); } return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input056.c ================================================ struct foo { int x; }; struct mary var1; ================================================ FILE: 52_Pointers_pt2/tests/input057.c ================================================ struct fred { int x; } ; struct fred { char y; } ; ================================================ FILE: 52_Pointers_pt2/tests/input058.c ================================================ int printf(char *fmt); struct fred { int x; char y; long z; }; struct fred var2; struct fred *varptr; int main() { long result; var2.x= 12; printf("%d\n", var2.x); var2.y= 'c'; printf("%d\n", var2.y); var2.z= 4005; printf("%d\n", var2.z); result= var2.x + var2.y + var2.z; printf("%d\n", result); varptr= &var2; result= varptr->x + varptr->y + varptr->z; printf("%d\n", result); return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input059.c ================================================ int main() { int x; x= y.foo; } ================================================ FILE: 52_Pointers_pt2/tests/input060.c ================================================ int main() { int x; x= x.foo; } ================================================ FILE: 52_Pointers_pt2/tests/input061.c ================================================ int main() { int x; x= x->foo; } ================================================ FILE: 52_Pointers_pt2/tests/input062.c ================================================ int printf(char *fmt); union fred { char w; int x; int y; long z; }; union fred var1; union fred *varptr; int main() { var1.x= 65; printf("%d\n", var1.x); var1.x= 66; printf("%d\n", var1.x); printf("%d\n", var1.y); printf("The next two depend on the endian of the platform\n"); printf("%d\n", var1.w); printf("%d\n", var1.z); varptr= &var1; varptr->x= 67; printf("%d\n", varptr->x); printf("%d\n", varptr->y); return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input063.c ================================================ int printf(char *fmt); enum fred { apple=1, banana, carrot, pear=10, peach, mango, papaya }; enum jane { aple=1, bnana, crrot, par=10, pech, mago, paaya }; enum fred var1; enum jane var2; enum fred var3; int main() { var1= carrot + pear + mango; printf("%d\n", var1); return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input064.c ================================================ enum fred var3; ================================================ FILE: 52_Pointers_pt2/tests/input065.c ================================================ enum fred { x, y, z }; enum fred { a, b }; ================================================ FILE: 52_Pointers_pt2/tests/input066.c ================================================ enum fred { x, y, z }; enum mary { a, b, z }; ================================================ FILE: 52_Pointers_pt2/tests/input067.c ================================================ int printf(char *fmt); typedef int FOO; FOO var1; struct bar { int x; int y} ; typedef struct bar BAR; BAR var2; int main() { var1= 5; printf("%d\n", var1); var2.x= 7; var2.y= 10; printf("%d\n", var2.x + var2.y); return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input068.c ================================================ typedef int FOO; typedef char FOO; ================================================ FILE: 52_Pointers_pt2/tests/input069.c ================================================ typedef int FOO; FLOO y; ================================================ FILE: 52_Pointers_pt2/tests/input070.c ================================================ #include typedef int FOO; int main() { FOO x; x= 56; printf("%d\n", x); return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input071.c ================================================ #include int main() { int x; x = 0; while (x < 100) { if (x == 5) { x = x + 2; continue; } printf("%d\n", x); if (x == 14) { break; } x = x + 1; } printf("Done\n"); return (0); } ================================================ FILE: 52_Pointers_pt2/tests/input072.c ================================================ int main() { break; } ================================================ FILE: 52_Pointers_pt2/tests/input073.c ================================================ int main() { continue; } ================================================ FILE: 52_Pointers_pt2/tests/input074.c ================================================ #include int main() { int x; int y; y= 0; for (x=0; x < 5; x++) { switch(x) { case 1: { y= 5; break; } case 2: { y= 7; break; } case 3: { y= 9; } default: { y= 100; } } printf("%d\n", y); } return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input075.c ================================================ int main() { int x; switch(x) { if (x<5); } } ================================================ FILE: 52_Pointers_pt2/tests/input076.c ================================================ int main() { int x; switch(x) { } } ================================================ FILE: 52_Pointers_pt2/tests/input077.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } default: { x= 3; } case 4: { x= 2; } } } ================================================ FILE: 52_Pointers_pt2/tests/input078.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } default: { x= 3; } default: { x= 2; } } } ================================================ FILE: 52_Pointers_pt2/tests/input079.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } case 2: { x= 2; } case 1: { x= 2; } default: { x= 2; } } } ================================================ FILE: 52_Pointers_pt2/tests/input080.c ================================================ #include int x; int y; int main() { for (x=0, y=1; x < 6; x++, y=y+2) { printf("%d %d\n", x, y); } return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input081.c ================================================ #include int x; int y; int main() { x= 0; y=1; for (;x<5;) { printf("%d %d\n", x, y); x=x+1; y=y+2; } return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input082.c ================================================ #include int main() { int x; x= 10; // Dangling else test if (x > 5) if (x > 15) printf("x > 15\n"); else printf("15 >= x > 5\n"); else printf("5 >= x\n"); return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input083.c ================================================ #include int main() { int x; // Dangling else test. // We should not print anything for x<= 5 for (x=0; x < 12; x++) if (x > 5) if (x > 10) printf("10 < %2d\n", x); else printf(" 5 < %2d <= 10\n", x); return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input084.c ================================================ #include int main() { int x, y; x=2; y=3; printf("%d %d\n", x, y); char a, *b; a= 'f'; b= &a; printf("%c %c\n", a, *b); return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input085.c ================================================ int main(struct foo { int x; }; , int a) { return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input086.c ================================================ int main() { int fred() { return(5); } int x; x=2; return(x); } ================================================ FILE: 52_Pointers_pt2/tests/input087.c ================================================ struct foo { int x; int y; struct blah { int g; }; }; ================================================ FILE: 52_Pointers_pt2/tests/input088.c ================================================ #include struct foo { int x; int y; } fred, mary; struct foo james; int main() { fred.x= 5; mary.y= 6; printf("%d %d\n", fred.x, mary.y); return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input089.c ================================================ #include int x= 23; char y= 'H'; char *z= "Hello world"; int main() { printf("%d %c %s\n", x, y, z); return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input090.c ================================================ #include int a = 23, b = 100; char y = 'H', *z = "Hello world"; int main() { printf("%d %d %c %s\n", a, b, y, z); return (0); } ================================================ FILE: 52_Pointers_pt2/tests/input091.c ================================================ #include int fred[] = { 1, 2, 3, 4, 5 }; int jim[10] = { 1, 2, 3, 4, 5 }; int main() { int i; for (i=0; i < 5; i++) printf("%d\n", fred[i]); for (i=0; i < 10; i++) printf("%d\n", jim[i]); return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input092.c ================================================ char x= 3000; ================================================ FILE: 52_Pointers_pt2/tests/input093.c ================================================ char x= fred; ================================================ FILE: 52_Pointers_pt2/tests/input094.c ================================================ char *s= 54; ================================================ FILE: 52_Pointers_pt2/tests/input095.c ================================================ int fred(int x=2) { return(x); } ================================================ FILE: 52_Pointers_pt2/tests/input096.c ================================================ int fred[0]; ================================================ FILE: 52_Pointers_pt2/tests/input097.c ================================================ int main() { int x[45]; return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input098.c ================================================ int fred[3]= { 1, 2, 3, 4, 5 }; ================================================ FILE: 52_Pointers_pt2/tests/input099.c ================================================ #include // List of token strings, for debugging purposes. // As yet, we can't store a NULL into the list char *Tstring[] = { "EOF", "=", "||", "&&", "|", "^", "&", "==", "!=", ",", ">", "<=", ">=", "<<", ">>", "+", "-", "*", "/", "++", "--", "~", "!", "void", "char", "int", "long", "if", "else", "while", "for", "return", "struct", "union", "enum", "typedef", "extern", "break", "continue", "switch", "case", "default", "intlit", "strlit", ";", "identifier", "{", "}", "(", ")", "[", "]", ",", ".", "->", ":", "" }; int main() { int i; char *str; i=0; while (1) { str= Tstring[i]; if (*str == 0) break; printf("%s\n", str); i++; } return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input100.c ================================================ #include int main() { int x= 3, y=14; int z= 2 * x + y; char *str= "Hello world"; printf("%s %d %d\n", str, x+y, z); return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input101.c ================================================ #include int main() { int x= 65535; char y; char *str; y= (char )x; printf("0x%x\n", y); str= (char *)0; printf("0x%lx\n", (long)str); return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input102.c ================================================ int main() { struct foo { int p; }; int y= (struct foo) x; } ================================================ FILE: 52_Pointers_pt2/tests/input103.c ================================================ int main() { union foo { int p; }; int y= (union foo) x; } ================================================ FILE: 52_Pointers_pt2/tests/input104.c ================================================ int main() { int y= (void) x; } ================================================ FILE: 52_Pointers_pt2/tests/input105.c ================================================ int main() { int x; char *y; y= (char) x; } ================================================ FILE: 52_Pointers_pt2/tests/input106.c ================================================ #include char *y= (char *)0; int main() { printf("0x%lx\n", (long)y); return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input107.c ================================================ #include char *y[] = { "fish", "cow", NULL }; char *z= NULL; int main() { int i; char *ptr; for (i=0; i < 3; i++) { ptr= y[i]; if (ptr != (char *)0) printf("%s\n", y[i]); else printf("NULL\n"); } return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input108.c ================================================ int main() { char *str= (void *)0; return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input109.c ================================================ #include int main() { int x= 17 - 1; printf("%d\n", x); return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input110.c ================================================ #include int x; int y; int main() { x= 3; y= 15; y += x; printf("%d\n", y); x= 3; y= 15; y -= x; printf("%d\n", y); x= 3; y= 15; y *= x; printf("%d\n", y); x= 3; y= 15; y /= x; printf("%d\n", y); return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input111.c ================================================ #include int main() { int x= 2000 + 3 + 4 * 5 + 6; printf("%d\n", x); return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input112.c ================================================ #include char* y = NULL; int x= 10 + 6; int fred [ 2 + 3 ]; int main() { fred[2]= x; printf("%d\n", fred[2]); return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input113.c ================================================ #include void fred(void); void fred(void) { printf("fred says hello\n"); } int main(void) { fred(); return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input114.c ================================================ #include int main() { int x= 0x4a; printf("%c\n", x); return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input115.c ================================================ #include struct foo { int x; char y; long z; }; typedef struct foo blah; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol struct symtable *ctype; // If struct/union, ptr to that type int stype; // Structural type for the symbol int class; // Storage class for the symbol int size; // Total size in bytes of this symbol int nelems; // Functions: # params. Arrays: # elements #define st_endlabel st_posn // For functions, the end label int st_posn; // For locals, the negative offset // from the stack base pointer int *initlist; // List of initial values struct symtable *next; // Next symbol in one list struct symtable *member; // First member of a function, struct, }; // union or enum // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates int rvalue; // True if the node is an rvalue struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; struct symtable *sym; // For many AST nodes, the pointer to // the symbol in the symbol table #define a_intvalue a_size // For A_INTLIT, the integer value int a_size; // For A_SCALE, the size to scale by }; int main() { printf("%ld\n", sizeof(char)); printf("%ld\n", sizeof(int)); printf("%ld\n", sizeof(long)); printf("%ld\n", sizeof(char *)); printf("%ld\n", sizeof(blah)); printf("%ld\n", sizeof(struct symtable)); printf("%ld\n", sizeof(struct ASTnode)); return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input116.c ================================================ #include static int counter=0; static int fred(void) { return(counter++); } int main(void) { int i; for (i=0; i < 5; i++) printf("%d\n", fred()); return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input117.c ================================================ #include static char *fred(void) { return("Hello"); } int main(void) { printf("%s\n", fred()); return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input118.c ================================================ int main(void) { static int x; } ================================================ FILE: 52_Pointers_pt2/tests/input119.c ================================================ #include int x; int y= 3; int main() { x= y != 3 ? 6 : 8; printf("%d\n", x); x= (y == 3) ? 6 : 8; printf("%d\n", x); return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input120.c ================================================ #include int x; int y= 3; int main() { for (y= 0; y < 10; y++) { x= y > 4 ? y + 2 : y + 9; printf("%d\n", x); } return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input121.c ================================================ #include int x; int y= 3; int main() { for (y= 0; y < 10; y++) { x= (y < 4) ? y + 2 : (y > 7) ? 1000 : y + 9; printf("%d\n", x); } return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input122.c ================================================ #include int x, y, z1, z2; int main() { for (x= 0; x <= 1; x++) { for (y= 0; y <= 1; y++) { z1= x || y; z2= x && y; printf("x %d, y %d, x || y %d, x && y %d\n", x, y, z1, z2); } } //z= x || y; return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input123.c ================================================ #include int main() { int x; for (x=0; x < 20; x++) switch(x) { case 2: case 3: case 5: case 7: case 11: printf("%2d infant prime\n", x); break; case 13: case 17: case 19: printf("%2d teen prime\n", x); break; case 0: case 1: case 4: case 6: case 8: case 9: case 10: case 12: printf("%2d infant composite\n", x); break; default: printf("%2d teen composite\n", x); break; } return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input124.c ================================================ #include int ary[5]; int main() { ary++; return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input125.c ================================================ #include int ary[5]; int *ptr; int x; int main() { ary[3]= 2008; ptr= ary; // Load ary's address into ptr x= ary[3]; printf("%d\n", x); x= ptr[3]; printf("%d\n", x); // Treat ptr as an array return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input126.c ================================================ #include int ary[5]; int main() { ary[3]= 2008; ptr= &ary; return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input127.c ================================================ #include int ary[5]; void fred(int *ptr) { // Receive a pointer printf("%d\n", ptr[3]); } int main() { ary[3]= 2008; printf("%d\n", ary[3]); fred(ary); // Pass ary as a pointer return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input128.c ================================================ #include struct foo { int val; struct foo *next; }; struct foo head, mid, tail; int main() { struct foo *ptr; tail.val= 20; tail.next= NULL; mid.val= 15; mid.next= &tail; head.val= 10; head.next= ∣ ptr= &head; printf("%d %d\n", head.val, ptr->val); printf("%d %d\n", mid.val, ptr->next->val); printf("%d %d\n", tail.val, ptr->next->next->val); return(0); } ================================================ FILE: 52_Pointers_pt2/tests/input129.c ================================================ #include int x= 6; int main() { printf("%d\n", x++ ++); return(0); } ================================================ FILE: 52_Pointers_pt2/tests/mktests ================================================ #!/bin/sh # Make the output files for each test # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make install) fi for i in input*c do if [ ! -f "out.$i" -a ! -f "err.$i" ] then ../cwj -o out $i 2> "err.$i" # If the err file is empty if [ ! -s "err.$i" ] then rm -f "err.$i" cc -o out $i ./out > "out.$i" fi fi rm -f out out.s done ================================================ FILE: 52_Pointers_pt2/tests/out.input001.c ================================================ 36 10 25 ================================================ FILE: 52_Pointers_pt2/tests/out.input002.c ================================================ 17 ================================================ FILE: 52_Pointers_pt2/tests/out.input003.c ================================================ 1 2 3 4 5 ================================================ FILE: 52_Pointers_pt2/tests/out.input004.c ================================================ 1 1 1 1 1 1 1 1 1 ================================================ FILE: 52_Pointers_pt2/tests/out.input005.c ================================================ 6 ================================================ FILE: 52_Pointers_pt2/tests/out.input006.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 52_Pointers_pt2/tests/out.input007.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 52_Pointers_pt2/tests/out.input008.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 52_Pointers_pt2/tests/out.input009.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 52_Pointers_pt2/tests/out.input010.c ================================================ 20 10 1 2 3 4 5 253 254 255 0 1 ================================================ FILE: 52_Pointers_pt2/tests/out.input011.c ================================================ 10 20 30 1 2 3 4 5 253 254 255 0 1 2 3 1 2 3 4 5 ================================================ FILE: 52_Pointers_pt2/tests/out.input012.c ================================================ 5 ================================================ FILE: 52_Pointers_pt2/tests/out.input013.c ================================================ 23 56 ================================================ FILE: 52_Pointers_pt2/tests/out.input014.c ================================================ 10 20 30 ================================================ FILE: 52_Pointers_pt2/tests/out.input015.c ================================================ 18 18 12 12 ================================================ FILE: 52_Pointers_pt2/tests/out.input016.c ================================================ 12 18 ================================================ FILE: 52_Pointers_pt2/tests/out.input017.c ================================================ 19 12 ================================================ FILE: 52_Pointers_pt2/tests/out.input018.c ================================================ 34 34 ================================================ FILE: 52_Pointers_pt2/tests/out.input018a.c ================================================ 15 16 ================================================ FILE: 52_Pointers_pt2/tests/out.input019.c ================================================ 30 ================================================ FILE: 52_Pointers_pt2/tests/out.input020.c ================================================ 12 ================================================ FILE: 52_Pointers_pt2/tests/out.input021.c ================================================ 10 Hello world ================================================ FILE: 52_Pointers_pt2/tests/out.input022.c ================================================ 12 12 12 13 13 13 13 13 13 35 35 35 ================================================ FILE: 52_Pointers_pt2/tests/out.input023.c ================================================ -23 100 -2 0 1 0 13 14 Hello world ================================================ FILE: 52_Pointers_pt2/tests/out.input024.c ================================================ 2 59 57 8 7 ================================================ FILE: 52_Pointers_pt2/tests/out.input025.c ================================================ 10 20 30 5 15 25 ================================================ FILE: 52_Pointers_pt2/tests/out.input026.c ================================================ 13 23 34 44 54 64 74 84 94 95 96 ================================================ FILE: 52_Pointers_pt2/tests/out.input027.c ================================================ 1 2 3 4 5 6 7 8 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 ================================================ FILE: 52_Pointers_pt2/tests/out.input028.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 52_Pointers_pt2/tests/out.input029.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 52_Pointers_pt2/tests/out.input030.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input030.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 52_Pointers_pt2/tests/out.input053.c ================================================ Hello world, 23 ================================================ FILE: 52_Pointers_pt2/tests/out.input054.c ================================================ Hello world, 0 Hello world, 1 Hello world, 2 Hello world, 3 Hello world, 4 Hello world, 5 Hello world, 6 Hello world, 7 Hello world, 8 Hello world, 9 Hello world, 10 Hello world, 11 Hello world, 12 Hello world, 13 Hello world, 14 Hello world, 15 Hello world, 16 Hello world, 17 Hello world, 18 Hello world, 19 ================================================ FILE: 52_Pointers_pt2/tests/out.input055.c ================================================ Hello world Argument 0 is ./out ================================================ FILE: 52_Pointers_pt2/tests/out.input058.c ================================================ 12 99 4005 4116 4116 ================================================ FILE: 52_Pointers_pt2/tests/out.input062.c ================================================ 65 66 66 The next two depend on the endian of the platform 66 66 67 67 ================================================ FILE: 52_Pointers_pt2/tests/out.input063.c ================================================ 25 ================================================ FILE: 52_Pointers_pt2/tests/out.input067.c ================================================ 5 17 ================================================ FILE: 52_Pointers_pt2/tests/out.input070.c ================================================ 56 ================================================ FILE: 52_Pointers_pt2/tests/out.input071.c ================================================ 0 1 2 3 4 7 8 9 10 11 12 13 14 Done ================================================ FILE: 52_Pointers_pt2/tests/out.input074.c ================================================ 100 5 7 100 100 ================================================ FILE: 52_Pointers_pt2/tests/out.input080.c ================================================ 0 1 1 3 2 5 3 7 4 9 5 11 ================================================ FILE: 52_Pointers_pt2/tests/out.input081.c ================================================ 0 1 1 3 2 5 3 7 4 9 ================================================ FILE: 52_Pointers_pt2/tests/out.input082.c ================================================ 15 >= x > 5 ================================================ FILE: 52_Pointers_pt2/tests/out.input083.c ================================================ 5 < 6 <= 10 5 < 7 <= 10 5 < 8 <= 10 5 < 9 <= 10 5 < 10 <= 10 10 < 11 ================================================ FILE: 52_Pointers_pt2/tests/out.input084.c ================================================ 2 3 f f ================================================ FILE: 52_Pointers_pt2/tests/out.input088.c ================================================ 5 6 ================================================ FILE: 52_Pointers_pt2/tests/out.input089.c ================================================ 23 H Hello world ================================================ FILE: 52_Pointers_pt2/tests/out.input090.c ================================================ 23 100 H Hello world ================================================ FILE: 52_Pointers_pt2/tests/out.input091.c ================================================ 1 2 3 4 5 1 2 3 4 5 0 0 0 0 0 ================================================ FILE: 52_Pointers_pt2/tests/out.input099.c ================================================ EOF = || && | ^ & == != , > <= >= << >> + - * / ++ -- ~ ! void char int long if else while for return struct union enum typedef extern break continue switch case default intlit strlit ; identifier { } ( ) [ ] , . -> : ================================================ FILE: 52_Pointers_pt2/tests/out.input100.c ================================================ Hello world 17 20 ================================================ FILE: 52_Pointers_pt2/tests/out.input101.c ================================================ 0xff 0x0 ================================================ FILE: 52_Pointers_pt2/tests/out.input106.c ================================================ 0x0 ================================================ FILE: 52_Pointers_pt2/tests/out.input107.c ================================================ fish cow NULL ================================================ FILE: 52_Pointers_pt2/tests/out.input108.c ================================================ ================================================ FILE: 52_Pointers_pt2/tests/out.input109.c ================================================ 16 ================================================ FILE: 52_Pointers_pt2/tests/out.input110.c ================================================ 18 12 45 5 ================================================ FILE: 52_Pointers_pt2/tests/out.input111.c ================================================ 2029 ================================================ FILE: 52_Pointers_pt2/tests/out.input112.c ================================================ 16 ================================================ FILE: 52_Pointers_pt2/tests/out.input113.c ================================================ fred says hello ================================================ FILE: 52_Pointers_pt2/tests/out.input114.c ================================================ J ================================================ FILE: 52_Pointers_pt2/tests/out.input115.c ================================================ 1 4 8 8 13 64 48 ================================================ FILE: 52_Pointers_pt2/tests/out.input116.c ================================================ 0 1 2 3 4 ================================================ FILE: 52_Pointers_pt2/tests/out.input117.c ================================================ Hello ================================================ FILE: 52_Pointers_pt2/tests/out.input119.c ================================================ 8 6 ================================================ FILE: 52_Pointers_pt2/tests/out.input120.c ================================================ 9 10 11 12 13 7 8 9 10 11 ================================================ FILE: 52_Pointers_pt2/tests/out.input121.c ================================================ 2 3 4 5 13 14 15 16 1000 1000 ================================================ FILE: 52_Pointers_pt2/tests/out.input122.c ================================================ x 0, y 0, x || y 0, x && y 0 x 0, y 1, x || y 1, x && y 0 x 1, y 0, x || y 1, x && y 0 x 1, y 1, x || y 1, x && y 1 ================================================ FILE: 52_Pointers_pt2/tests/out.input123.c ================================================ 0 infant composite 1 infant composite 2 infant prime 3 infant prime 4 infant composite 5 infant prime 6 infant composite 7 infant prime 8 infant composite 9 infant composite 10 infant composite 11 infant prime 12 infant composite 13 teen prime 14 teen composite 15 teen composite 16 teen composite 17 teen prime 18 teen composite 19 teen prime ================================================ FILE: 52_Pointers_pt2/tests/out.input125.c ================================================ 2008 2008 ================================================ FILE: 52_Pointers_pt2/tests/out.input127.c ================================================ 2008 2008 ================================================ FILE: 52_Pointers_pt2/tests/out.input128.c ================================================ 10 10 15 15 20 20 ================================================ FILE: 52_Pointers_pt2/tests/runtests ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make install) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../cwj -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../cwj $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.s "trial.$i" done ================================================ FILE: 52_Pointers_pt2/tests/runtestsn ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../compn ] then (cd ..; make install) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../compn -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../compn $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.o out.s "trial.$i" done ================================================ FILE: 52_Pointers_pt2/tree.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree functions // Copyright (c) 2019 Warren Toomey, GPL3 // Build and return a generic AST node struct ASTnode *mkastnode(int op, int type, struct symtable *ctype, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue) { struct ASTnode *n; // Malloc a new ASTnode n = (struct ASTnode *) malloc(sizeof(struct ASTnode)); if (n == NULL) fatal("Unable to malloc in mkastnode()"); // Copy in the field values and return it n->op = op; n->type = type; n->ctype = ctype; n->left = left; n->mid = mid; n->right = right; n->sym = sym; n->a_intvalue = intvalue; return (n); } // Make an AST leaf node struct ASTnode *mkastleaf(int op, int type, struct symtable *ctype, struct symtable *sym, int intvalue) { return (mkastnode(op, type, ctype, NULL, NULL, NULL, sym, intvalue)); } // Make a unary AST node: only one child struct ASTnode *mkastunary(int op, int type, struct symtable *ctype, struct ASTnode *left, struct symtable *sym, int intvalue) { return (mkastnode(op, type, ctype, left, NULL, NULL, sym, intvalue)); } // Generate and return a new label number // just for AST dumping purposes static int dumpid = 1; static int gendumplabel(void) { return (dumpid++); } // Given an AST tree, print it out and follow the // traversal of the tree that genAST() follows void dumpAST(struct ASTnode *n, int label, int level) { int Lfalse, Lstart, Lend; int i; switch (n->op) { case A_IF: Lfalse = gendumplabel(); for (i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_IF"); if (n->right) { Lend = gendumplabel(); fprintf(stdout, ", end L%d", Lend); } fprintf(stdout, "\n"); dumpAST(n->left, Lfalse, level + 2); dumpAST(n->mid, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); return; case A_WHILE: Lstart = gendumplabel(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_WHILE, start L%d\n", Lstart); Lend = gendumplabel(); dumpAST(n->left, Lend, level + 2); dumpAST(n->right, NOLABEL, level + 2); return; } // Reset level to -2 for A_GLUE if (n->op == A_GLUE) level = -2; // General AST node handling if (n->left) dumpAST(n->left, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); for (int i = 0; i < level; i++) fprintf(stdout, " "); switch (n->op) { case A_GLUE: fprintf(stdout, "\n\n"); return; case A_FUNCTION: fprintf(stdout, "A_FUNCTION %s\n", n->sym->name); return; case A_ADD: fprintf(stdout, "A_ADD\n"); return; case A_SUBTRACT: fprintf(stdout, "A_SUBTRACT\n"); return; case A_MULTIPLY: fprintf(stdout, "A_MULTIPLY\n"); return; case A_DIVIDE: fprintf(stdout, "A_DIVIDE\n"); return; case A_EQ: fprintf(stdout, "A_EQ\n"); return; case A_NE: fprintf(stdout, "A_NE\n"); return; case A_LT: fprintf(stdout, "A_LE\n"); return; case A_GT: fprintf(stdout, "A_GT\n"); return; case A_LE: fprintf(stdout, "A_LE\n"); return; case A_GE: fprintf(stdout, "A_GE\n"); return; case A_INTLIT: fprintf(stdout, "A_INTLIT %d\n", n->a_intvalue); return; case A_STRLIT: fprintf(stdout, "A_STRLIT rval label L%d\n", n->a_intvalue); return; case A_IDENT: if (n->rvalue) fprintf(stdout, "A_IDENT rval %s\n", n->sym->name); else fprintf(stdout, "A_IDENT %s\n", n->sym->name); return; case A_ASSIGN: fprintf(stdout, "A_ASSIGN\n"); return; case A_WIDEN: fprintf(stdout, "A_WIDEN\n"); return; case A_RETURN: fprintf(stdout, "A_RETURN\n"); return; case A_FUNCCALL: fprintf(stdout, "A_FUNCCALL %s\n", n->sym->name); return; case A_ADDR: fprintf(stdout, "A_ADDR %s\n", n->sym->name); return; case A_DEREF: if (n->rvalue) fprintf(stdout, "A_DEREF rval\n"); else fprintf(stdout, "A_DEREF\n"); return; case A_SCALE: fprintf(stdout, "A_SCALE %d\n", n->a_size); return; case A_PREINC: fprintf(stdout, "A_PREINC %s\n", n->sym->name); return; case A_PREDEC: fprintf(stdout, "A_PREDEC %s\n", n->sym->name); return; case A_POSTINC: fprintf(stdout, "A_POSTINC\n"); return; case A_POSTDEC: fprintf(stdout, "A_POSTDEC\n"); return; case A_NEGATE: fprintf(stdout, "A_NEGATE\n"); return; case A_BREAK: fprintf(stdout, "A_BREAK\n"); return; case A_CONTINUE: fprintf(stdout, "A_CONTINUE\n"); return; case A_CASE: fprintf(stdout, "A_CASE %d\n", n->a_intvalue); return; case A_DEFAULT: fprintf(stdout, "A_DEFAULT\n"); return; case A_SWITCH: fprintf(stdout, "A_SWITCH\n"); return; case A_CAST: fprintf(stdout, "A_CAST %d\n", n->type); return; case A_ASPLUS: fprintf(stdout, "A_ASPLUS\n"); return; case A_ASMINUS: fprintf(stdout, "A_ASMINUS\n"); return; case A_ASSTAR: fprintf(stdout, "A_ASSTAR\n"); return; case A_ASSLASH: fprintf(stdout, "A_ASSLASH\n"); return; default: fatald("Unknown dumpAST operator", n->op); } } ================================================ FILE: 52_Pointers_pt2/types.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Types and type handling // Copyright (c) 2019 Warren Toomey, GPL3 // Return true if a type is an int type // of any size, false otherwise int inttype(int type) { return (((type & 0xf) == 0) && (type >= P_CHAR && type <= P_LONG)); } // Return true if a type is of pointer type int ptrtype(int type) { return ((type & 0xf) != 0); } // Given a primitive type, return // the type which is a pointer to it int pointer_to(int type) { if ((type & 0xf) == 0xf) fatald("Unrecognised in pointer_to: type", type); return (type + 1); } // Given a primitive pointer type, return // the type which it points to int value_at(int type) { if ((type & 0xf) == 0x0) fatald("Unrecognised in value_at: type", type); return (type - 1); } // Given a type and a composite type pointer, return // the size of this type in bytes int typesize(int type, struct symtable *ctype) { if (type == P_STRUCT || type == P_UNION) return (ctype->size); return (genprimsize(type)); } // Given an AST tree and a type which we want it to become, // possibly modify the tree by widening or scaling so that // it is compatible with this type. Return the original tree // if no changes occurred, a modified tree, or NULL if the // tree is not compatible with the given type. // If this will be part of a binary operation, the AST op is not zero. struct ASTnode *modify_type(struct ASTnode *tree, int rtype, struct symtable *rctype, int op) { int ltype; int lsize, rsize; ltype = tree->type; // XXX No idea on these yet if (ltype == P_STRUCT || ltype == P_UNION) fatal("Don't know how to do this yet"); if (rtype == P_STRUCT || rtype == P_UNION) fatal("Don't know how to do this yet"); // Compare scalar int types if (inttype(ltype) && inttype(rtype)) { // Both types same, nothing to do if (ltype == rtype) return (tree); // Get the sizes for each type lsize = typesize(ltype, NULL); rsize = typesize(rtype, NULL); // Tree's size is too big if (lsize > rsize) return (NULL); // Widen to the right if (rsize > lsize) return (mkastunary(A_WIDEN, rtype, NULL, tree, NULL, 0)); } // For pointers if (ptrtype(ltype) && ptrtype(rtype)) { // We can compare them if (op >= A_EQ && op <= A_GE) return (tree); // A comparison of the same type for a non-binary operation is OK, // or when the left tree is of `void *` type. if (op == 0 && (ltype == rtype || ltype == pointer_to(P_VOID))) return (tree); } // We can scale only on A_ADD or A_SUBTRACT operation if (op == A_ADD || op == A_SUBTRACT) { // Left is int type, right is pointer type and the size // of the original type is >1: scale the left if (inttype(ltype) && ptrtype(rtype)) { rsize = genprimsize(value_at(rtype)); if (rsize > 1) return (mkastunary(A_SCALE, rtype, rctype, tree, NULL, rsize)); else return (tree); // Size 1, no need to scale } } // If we get here, the types are not compatible return (NULL); } ================================================ FILE: 53_Mop_up_pt2/Makefile ================================================ # Define the location of the include directory # and the location to install the compiler binary INCDIR=/tmp/include BINDIR=/tmp HSRCS= data.h decl.h defs.h SRCS= cg.c decl.c expr.c gen.c main.c misc.c \ opt.c scan.c stmt.c sym.c tree.c types.c SRCN= cgn.c decl.c expr.c gen.c main.c misc.c \ opt.c scan.c stmt.c sym.c tree.c types.c ARMSRCS= cg_arm.c decl.c expr.c gen.c main.c misc.c \ opt.c scan.c stmt.c sym.c tree.c types.c cwj: $(SRCS) $(HSRCS) cc -o cwj -g -Wall -DINCDIR=\"$(INCDIR)\" $(SRCS) compn: $(SRCN) $(HSRCS) cc -D__NASM__ -o compn -g -Wall -DINCDIR=\"$(INCDIR)\" $(SRCN) cwjarm: $(ARMSRCS) $(HSRCS) cc -o cwjarm -g -Wall $(ARMSRCS) cp cwjarm cwj install: cwj mkdir -p $(INCDIR) rsync -a include/. $(INCDIR) cp cwj $(BINDIR) chmod +x $(BINDIR)/cwj installn: compn mkdir -p $(INCDIR) rsync -a include/. $(INCDIR) cp compn $(BINDIR) chmod +x $(BINDIR)/compn clean: rm -f cwj cwjarm compn *.o *.s out a.out test: install tests/runtests (cd tests; chmod +x runtests; ./runtests) armtest: cwjarm tests/runtests (cd tests; chmod +x runtests; ./runtests) testn: installn tests/runtestsn (cd tests; chmod +x runtestsn; ./runtestsn) ================================================ FILE: 53_Mop_up_pt2/Readme.md ================================================ # Part 53: Mopping Up, part 2 In this part of our compiler writing journey, I fix a few annoying things that we use in the compiler's own source code. ## Consecutive String Literals C allows the declaration of string literals by splitting them across multiple lines or as multiple strings, e.g. ```c char *c= "hello " "there, " "how " "are " "you?"; ``` Now, we could fix this problem up in the lexical scanner. However, I spent a lot of time trying to do this. The problem is that the code is now complicated with dealing with the C pre-processor, and I couldn't find a clean way of allowing consecutive string literals. My solution is to do it in the parser, with a bit of help from the code generator. In `primary()` in `expr.c`, the code that deals with string literals now looks like this: ```c case T_STRLIT: // For a STRLIT token, generate the assembly for it. id = genglobstr(Text, 0); // 0 means generate a label // For successive STRLIT tokens, append their contents // to this one while (1) { scan(&Peektoken); if (Peektoken.token != T_STRLIT) break; genglobstr(Text, 1); // 1 means don't generate a label scan(&Token); // Skip it } // Now make a leaf AST node for it. id is the string's label. genglobstrend(); n = mkastleaf(A_STRLIT, pointer_to(P_CHAR), NULL, NULL, id); break; ``` `genglobstr()` now takes a second argument which tells it if this is the first part of the string or a successive part of the string. Also, `genglobstrend()` now has the job of NUL terminating the string literal. ## Empty Statements C allows both empty statements and empty compound statements, e.g. ```c while ((c=getc()) != 'x') ; // ';' is an empty statement int fred() { } // Function with empty body ``` and I use both of these in the compiler, so we need to support both of them. In `stmt.c`, the code now does this: ```c static struct ASTnode *single_statement(void) { struct ASTnode *stmt; struct symtable *ctype; switch (Token.token) { case T_SEMI: // An empty statement semi(); break; ... } ... } struct ASTnode *compound_statement(int inswitch) { struct ASTnode *left = NULL; struct ASTnode *tree; while (1) { // Leave if we've hit the end token. We do this first to allow // an empty compound statement if (Token.token == T_RBRACE) return (left); ... } ... } ``` and that fixes both shortcomings. ## Redeclared Symbols C allows a global variable to later be declared extern, and an extern variable to be declared later as a global variable, and vice versa. However, the types of both declarations have to match. We also want to ensure that only one version of the symbol ends up in the symbol table: we don't want both a C_GLOBAL and a C_EXTERN entry! In `stmt.c` I've added a new function called `is_new_symbol()`. We call this after we have parsed the name of a variable and after we have tried to find it in the symbol table. Thus, `sym` may be NULL (no existing variable) or not NULL (is an existing variable). If the symbol exists, it's actually quite complicated to ensure that it's a safe redeclaration. ```c // Given a pointer to a symbol that may already exist // return true if this symbol doesn't exist. We use // this function to convert externs into globals int is_new_symbol(struct symtable *sym, int class, int type, struct symtable *ctype) { // There is no existing symbol, thus is new if (sym==NULL) return(1); // global versus extern: if they match that it's not new // and we can convert the class to global if ((sym->class== C_GLOBAL && class== C_EXTERN) || (sym->class== C_EXTERN && class== C_GLOBAL)) { // If the types don't match, there's a problem if (type != sym->type) fatals("Type mismatch between global/extern", sym->name); // Struct/unions, also compare the ctype if (type >= P_STRUCT && ctype != sym->ctype) fatals("Type mismatch between global/extern", sym->name); // If we get to here, the types match, so mark the symbol // as global sym->class= C_GLOBAL; // Return that symbol is not new return(0); } // It must be a duplicate symbol if we get here fatals("Duplicate global variable declaration", sym->name); return(-1); // Keep -Wall happy } ``` The code is straight-forward but not elegant. Also note that any redeclared extern symbol is turned into a global symbol. This means we don't have to remove the symbol from the symbol table and add in a new, global, symbol. ## Operand Types to Logical Operations The next bug I hit was something like this: ```c int *x; int y; if (x && y > 12) ... ``` The compiler evaluates the `&&` operation in `binexpr()`. To do this, it ensures that the types of each side of the binary operator are compatible. Well, if the operator above was a `+` then, definitely, the types are incompatible. But with a logical comparison, we can *AND* these together. I've fixed this by adding some more code to the top of `modify_type()` in `types.c`. If we are doing an `&&` or an `||` operation, then we need either integer or pointer types on each side of the operation. ```c struct ASTnode *modify_type(struct ASTnode *tree, int rtype, struct symtable *rctype, int op) { int ltype; int lsize, rsize; ltype = tree->type; // For A_LOGOR and A_LOGAND, both types have to be int or pointer types if (op==A_LOGOR || op==A_LOGAND) { if (!inttype(ltype) && !ptrtype(ltype)) return(NULL); if (!inttype(ltype) && !ptrtype(rtype)) return(NULL); return (tree); } ... } ``` I've also realised that I've implemented `&&` and `||` incorrectly, so I'll have to fix that. Now now, but soon. ## Return with No Value One other C feature that is missing is the ability to return from a void function, i.e. just leave without returning any value. However, the current parser expects to see parentheses and an expression after the `return` keyword. So, in `return_statement()` in `stmt.c`, we now have: ```c // Parse a return statement and return its AST static struct ASTnode *return_statement(void) { struct ASTnode *tree= NULL; // Ensure we have 'return' match(T_RETURN, "return"); // See if we have a return value if (Token.token == T_LPAREN) { // Code to parse the parentheses and the expression ... } else { if (Functionid->type != P_VOID) fatal("Must return a value from a non-void function"); } // Add on the A_RETURN node tree = mkastunary(A_RETURN, P_NONE, NULL, tree, NULL, 0); // Get the ';' semi(); return (tree); } ``` If the `return` token isn't followed by a left parenthesis, we leave the expression `tree` set to NULL. We also check that this is a void returning function, and print out a fatal error if not. Now that we have parsed the `return` function, we may create an A_RETURN AST node with a NULL child. So now we have to deal with this in the code generator. The top of `cgreturn()` in `cg.c` now has: ```c // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Only return a value if we have a value to return if (reg != NOREG) { .. } cgjump(sym->st_endlabel); } ``` If there was no child AST tree, then there is no register with the expression's value. Thus, we only output the jump to the function's end label. ## Conclusion and What's Next We've fixed five minor issues in the compiler: things that we need to work to get the compiler to compile itself. I did identify a problem with `&&` and `||`. However, before we get to that I need to solve an important, pressing, problem: we have a limited set of CPU registers and, for large source files, we are running out of them. In the next part of our compiler writing journey, I will have to work on implementing register spills. I've been delaying this, but now most of the fatal errors from the compiler (when compiling itself) are register issues. So now it's time to sort this out. [Next step](../54_Reg_Spills/Readme.md) ================================================ FILE: 53_Mop_up_pt2/cg.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\t.text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\t.data\n", Outfile); currSeg = data_seg; } } // Given a scalar type value, return the // size of the type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch (type) { case P_CHAR: return (offset); case P_INT: case P_LONG: break; default: if (!ptrtype(type)) fatald("Bad type in cg_align:", type); } // Here we have an int or a long. Align it on a 4-byte offset // I put the generic code here so it can be reused elsewhere. alignment = 4; offset = (offset + direction * (alignment - 1)) & ~(alignment - 1); return (offset); } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. static int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "%r10", "%r11", "%r12", "%r13", "%r9", "%r8", "%rcx", "%rdx", "%rsi", "%rdi" }; static char *breglist[] = { "%r10b", "%r11b", "%r12b", "%r13b", "%r9b", "%r8b", "%cl", "%dl", "%sil", "%dil" }; static char *dreglist[] = { "%r10d", "%r11d", "%r12d", "%r13d", "%r9d", "%r8d", "%ecx", "%edx", "%esi", "%edi" }; // Set all registers as available. // But if reg is positive, don't free that one. void freeall_registers(int keepreg) { int i; for (i = 0; i < NUMFREEREGS; i++) if (i != keepreg) freereg[i] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. int alloc_register(void) { int i; for (i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(NOREG); cgtextseg(); fprintf(Outfile, "# internal switch(expr) routine\n" "# %%rsi = switch table, %%rax = expr\n" "# from SubC: http://www.t3x.org/subc/\n" "\n" "switch:\n" " pushq %%rsi\n" " movq %%rdx, %%rsi\n" " movq %%rax, %%rbx\n" " cld\n" " lodsq\n" " movq %%rax, %%rcx\n" "next:\n" " lodsq\n" " movq %%rax, %%rdx\n" " lodsq\n" " cmpq %%rdx, %%rbx\n" " jnz no\n" " popq %%rsi\n" " jmp *%%rax\n" "no:\n" " loop next\n" " lodsq\n" " popq %%rsi\n" " jmp *%%rax\n" "\n"); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the %rsp and %rsp if (sym->class == C_GLOBAL) fprintf(Outfile, "\t.globl\t%s\n" "\t.type\t%s, @function\n", name, name); fprintf(Outfile, "%s:\n" "\tpushq\t%%rbp\n" "\tmovq\t%%rsp, %%rbp\n", name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->st_posn = paramOffset; paramOffset += 8; } else { parm->st_posn = newlocaloffset(parm->type); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->st_posn = newlocaloffset(locvar->type); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\taddq\t$%d,%%rsp\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->st_endlabel); fprintf(Outfile, "\taddq\t$%d,%%rsp\n", stackOffset); fputs("\tpopq %rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmovq\t$%d, %s\n", value, reglist[r]); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", sym->name); } else // Print out the code to initialise it switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovzbq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", sym->name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovslq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", sym->name); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", sym->st_posn); fprintf(Outfile, "\tmovq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", sym->st_posn); } else switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", sym->st_posn); fprintf(Outfile, "\tmovzbq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", sym->st_posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", sym->st_posn); fprintf(Outfile, "\tmovslq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", sym->st_posn); break; default: fatald("Bad type in cgloadlocal:", sym->type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tleaq\tL%d(%%rip), %s\n", label, reglist[r]); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\taddq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsubq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timulq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s,%%rax\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidivq\t%s\n", reglist[r2]); fprintf(Outfile, "\tmovq\t%%rax,%s\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tandq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\torq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txorq\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshlq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshrq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tnegq\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnotq\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); return (r); } // Logically OR two registers and return a // register with the result, 1 or 0 int cglogor(int r1, int r2) { // Generate two labels int Ltrue = genlabel(); int Lend = genlabel(); // Test r1 and jump to true label if true fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r1], reglist[r1]); fprintf(Outfile, "\tjne\tL%d\n", Ltrue); // Test r2 and jump to true label if true fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r2], reglist[r2]); fprintf(Outfile, "\tjne\tL%d\n", Ltrue); // Didn't jump, so result is false fprintf(Outfile, "\tmovq\t$0, %s\n", reglist[r1]); fprintf(Outfile, "\tjmp\tL%d\n", Lend); // Someone jumped to the true label, so result is true cglabel(Ltrue); fprintf(Outfile, "\tmovq\t$1, %s\n", reglist[r1]); cglabel(Lend); free_register(r2); return (r1); } // Logically AND two registers and return a // register with the result, 1 or 0 int cglogand(int r1, int r2) { // Generate two labels int Lfalse = genlabel(); int Lend = genlabel(); // Test r1 and jump to false label if not true fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r1], reglist[r1]); fprintf(Outfile, "\tje\tL%d\n", Lfalse); // Test r2 and jump to false label if not true fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r2], reglist[r2]); fprintf(Outfile, "\tje\tL%d\n", Lfalse); // Didn't jump, so result is true fprintf(Outfile, "\tmovq\t$1, %s\n", reglist[r1]); fprintf(Outfile, "\tjmp\tL%d\n", Lend); // Someone jumped to the false label, so result is false cglabel(Lfalse); fprintf(Outfile, "\tmovq\t$0, %s\n", reglist[r1]); cglabel(Lend); free_register(r2); return (r1); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(struct symtable *sym, int numargs) { // Get a new register int outr = alloc_register(); // Call the function fprintf(Outfile, "\tcall\t%s@PLT\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\taddq\t$%d, %%rsp\n", 8 * (numargs - 6)); // and copy the return value into our register fprintf(Outfile, "\tmovq\t%%rax, %s\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpushq\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmovq\t%s, %s\n", reglist[r], reglist[FIRSTPARAMREG - argposn + 1]); } } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsalq\t$%d, %s\n", val, reglist[r]); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %s(%%rip)\n", reglist[r], sym->name); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %s(%%rip)\n", breglist[r], sym->name); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %s(%%rip)\n", dreglist[r], sym->name); break; default: fatald("Bad type in cgstorglob:", sym->type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %d(%%rbp)\n", reglist[r], sym->st_posn); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %d(%%rbp)\n", breglist[r], sym->st_posn); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %d(%%rbp)\n", dreglist[r], sym->st_posn); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size, type; int initvalue; int i; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the variable (or its elements if an array) // and the type of the variable if (node->stype == S_ARRAY) { size = typesize(value_at(node->type), node->ctype); type = value_at(node->type); } else { size = node->size; type = node->type; } // Generate the global identity and the label cgdataseg(); if (node->class == C_GLOBAL) fprintf(Outfile, "\t.globl\t%s\n", node->name); fprintf(Outfile, "%s:\n", node->name); // Output space for one or more elements for (i = 0; i < node->nelems; i++) { // Get any initial value initvalue = 0; if (node->initlist != NULL) initvalue = node->initlist[i]; // Generate the space for this type switch (size) { case 1: fprintf(Outfile, "\t.byte\t%d\n", initvalue); break; case 4: fprintf(Outfile, "\t.long\t%d\n", initvalue); break; case 8: // Generate the pointer to a string literal. Treat a zero value // as actually zero, not the label L0 if (node->initlist != NULL && type == pointer_to(P_CHAR) && initvalue != 0) fprintf(Outfile, "\t.quad\tL%d\n", initvalue); else fprintf(Outfile, "\t.quad\t%d\n", initvalue); break; default: for (i = 0; i < size; i++) fprintf(Outfile, "\t.byte\t0\n"); } } } // Generate a global string and its start label // Don't output the label if append is true. void cgglobstr(int l, char *strvalue, int append) { char *cptr; if (!append) cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\t.byte\t%d\n", *cptr); } } void cgglobstrend(void) { fprintf(Outfile, "\t.byte\t0\n"); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(NOREG); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Only return a value if we have a value to return if (reg != NOREG) { // Deal with pointers here as we can't put them in // the switch statement if (ptrtype(sym->type)) fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); else { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzbl\t%s, %%eax\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %%eax\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } } } cgjump(sym->st_endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = alloc_register(); if (sym->class == C_GLOBAL || sym->class == C_STATIC) fprintf(Outfile, "\tleaq\t%s(%%rip), %s\n", sym->name, reglist[r]); else fprintf(Outfile, "\tleaq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzbq\t(%s), %s\n", reglist[r], reglist[r]); break; case 2: fprintf(Outfile, "\tmovslq\t(%s), %s\n", reglist[r], reglist[r]); break; case 4: case 8: fprintf(Outfile, "\tmovq\t(%s), %s\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { // Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmovb\t%s, (%s)\n", breglist[r1], reglist[r2]); break; case 2: case 4: case 8: fprintf(Outfile, "\tmovq\t%s, (%s)\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } // Generate a switch jump table and the code to // load the registers and call the switch() code void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel) { int i, label; // Get a label for the switch table label = genlabel(); cglabel(label); // Heuristic. If we have no cases, create one case // which points to the default case if (casecount == 0) { caseval[0] = 0; caselabel[0] = defaultlabel; casecount = 1; } // Generate the switch jump table. fprintf(Outfile, "\t.quad\t%d\n", casecount); for (i = 0; i < casecount; i++) fprintf(Outfile, "\t.quad\t%d, L%d\n", caseval[i], caselabel[i]); fprintf(Outfile, "\t.quad\tL%d\n", defaultlabel); // Load the specific registers cglabel(toplabel); fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); fprintf(Outfile, "\tleaq\tL%d(%%rip), %%rdx\n", label); fprintf(Outfile, "\tjmp\tswitch\n"); } // Move value between registers void cgmove(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s, %s\n", reglist[r1], reglist[r2]); } ================================================ FILE: 53_Mop_up_pt2/cg_arm.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for ARMv6 on Raspberry Pi // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. static int freereg[4]; static char *reglist[4] = { "r4", "r5", "r6", "r7" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // We have to store large integer literal values in memory. // Keep a list of them which will be output in the postamble #define MAXINTS 1024 int Intlist[MAXINTS]; static int Intslot = 0; // Determine the offset of a large integer // literal from the .L3 label. If the integer // isn't in the list, add it. static void set_int_offset(int val) { int offset = -1; // See if it is already there for (int i = 0; i < Intslot; i++) { if (Intlist[i] == val) { offset = 4 * i; break; } } // Not in the list, so add it if (offset == -1) { offset = 4 * Intslot; if (Intslot == MAXINTS) fatal("Out of int slots in set_int_offset()"); Intlist[Intslot++] = val; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L3+%d\n", offset); } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n", Outfile); } // Print out the assembly postamble void cgpostamble() { // Print out the global variables fprintf(Outfile, ".L2:\n"); for (int i = 0; i < Globs; i++) { if (Symtable[i].stype == S_VARIABLE) fprintf(Outfile, "\t.word %s\n", Symtable[i].name); } // Print out the integer literals fprintf(Outfile, ".L3:\n"); for (int i = 0; i < Intslot; i++) { fprintf(Outfile, "\t.word %d\n", Intlist[i]); } } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Symtable[id].name; fprintf(Outfile, "\t.text\n" "\t.globl\t%s\n" "\t.type\t%s, \%%function\n" "%s:\n" "\tpush\t{fp, lr}\n" "\tadd\tfp, sp, #4\n" "\tsub\tsp, sp, #8\n" "\tstr\tr0, [fp, #-8]\n", name, name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Symtable[id].endlabel); fputs("\tsub\tsp, fp, #4\n" "\tpop\t{fp, pc}\n" "\t.align\t2\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); // If the literal value is small, do it with one instruction if (value <= 1000) fprintf(Outfile, "\tmov\t%s, #%d\n", reglist[r], value); else { set_int_offset(value); fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); } return (r); } // Determine the offset of a variable from the .L2 // label. Yes, this is inefficient code. static void set_var_offset(int id) { int offset = 0; // Walk the symbol table up to id. // Find S_VARIABLEs and add on 4 until // we get to our variable for (int i = 0; i < id; i++) { if (Symtable[i].stype == S_VARIABLE) offset += 4; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L2+%d\n", offset); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tldrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgloadglob:", Symtable[id].type); } return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s, %s\n", reglist[r1], reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\tmul\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { // To do a divide: r0 holds the dividend, r1 holds the divisor. // The quotient is in r0. fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r1]); fprintf(Outfile, "\tmov\tr1, %s\n", reglist[r2]); fprintf(Outfile, "\tbl\t__aeabi_idiv\n"); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r1]); free_register(r2); return (r1); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r]); fprintf(Outfile, "\tbl\t%s\n", Symtable[id].name); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r]); return (r); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tlsl\t%s, %s, #%d\n", reglist[r], reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, int id) { // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tstr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgstorglob:", Symtable[id].type); } return (r); } // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (4); switch (type) { case P_CHAR: return (1); case P_INT: case P_LONG: return (4); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Symtable[id].type); fprintf(Outfile, "\t.data\n" "\t.globl\t%s\n", Symtable[id].name); switch (typesize) { case 1: fprintf(Outfile, "%s:\t.byte\t0\n", Symtable[id].name); break; case 4: fprintf(Outfile, "%s:\t.long\t0\n", Symtable[id].name); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "moveq", "movne", "movlt", "movgt", "movle", "movge" }; // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "movne", "moveq", "movge", "movle", "movgt", "movlt" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #1\n", cmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #0\n", invcmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\tuxtb\t%s, %s\n", reglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tb\tL%d\n", l); } // List of inverted branch instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *brlist[] = { "bne", "beq", "bge", "ble", "bgt", "blt" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", brlist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[reg]); cgjump(Symtable[id].endlabel); } // Generate code to load the address of a global // identifier into a variable. Return a new register int cgaddress(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); fprintf(Outfile, "\tmov\t%s, r3\n", reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tldrb\t%s, [%s]\n", reglist[r], reglist[r]); break; case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [%s]\n", reglist[r], reglist[r]); break; } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [%s]\n", reglist[r1], reglist[r2]); break; case P_INT: case P_LONG: fprintf(Outfile, "\tstr\t%s, [%s]\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 53_Mop_up_pt2/cgn.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\tsection .text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\tsection .data\n", Outfile); currSeg = data_seg; } } // Given a scalar type value, return the // size of the type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch (type) { case P_CHAR: return (offset); case P_INT: case P_LONG: break; default: if (!ptrtype(type)) fatald("Bad type in cg_align:", type); } // Here we have an int or a long. Align it on a 4-byte offset // I put the generic code here so it can be reused elsewhere. alignment = 4; offset = (offset + direction * (alignment - 1)) & ~(alignment - 1); return (offset); } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "r10", "r11", "r12", "r13", "r9", "r8", "rcx", "rdx", "rsi", "rdi" }; static char *breglist[] = { "r10b", "r11b", "r12b", "r13b", "r9b", "r8b", "cl", "dl", "sil", "dil" }; static char *dreglist[] = { "r10d", "r11d", "r12d", "r13d", "r9d", "r8d", "ecx", "edx", "esi", "edi" }; // Set all registers as available // But if reg is positive, don't free that one. void freeall_registers(int keepreg) { int i; for (i = 0; i < NUMFREEREGS; i++) if (i != keepreg) freereg[i] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. int alloc_register(void) { int i; for (i = 0; i < NUMFREEREGS; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // Print out the assembly preamble void cgpreamble() { freeall_registers(NOREG); fputs("\textern\tprintint\n", Outfile); fputs("\textern\tprintchar\n", Outfile); fputs("\textern\topen\n", Outfile); fputs("\textern\tclose\n", Outfile); fputs("\textern\tread\n", Outfile); fputs("\textern\twrite\n", Outfile); fputs("\textern\tprintf\n", Outfile); cgtextseg(); fprintf(Outfile, "; internal switch(expr) routine\n" "; rsi = switch table, rax = expr\n" "; from SubC: http://www.t3x.org/subc/\n" "\n" "switch:\n" " push rsi\n" " mov rsi, rdx\n" " mov rbx, rax\n" " cld\n" " lodsq\n" " mov rcx, rax\n" "next:\n" " lodsq\n" " mov rdx, rax\n" " lodsq\n" " cmp rbx, rdx\n" " jnz no\n" " pop rsi\n" " jmp rax\n" "no:\n" " loop next\n" " lodsq\n" " pop rsi\n" " jmp rax\n" "\n"); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the rsp and rbp if (sym->class == C_GLOBAL) fprintf(Outfile, "\tglobal\t%s\n", name); fprintf(Outfile, "%s:\n" "\tpush\trbp\n" "\tmov\trbp, rsp\n", name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->st_posn = paramOffset; paramOffset += 8; } else { parm->st_posn = newlocaloffset(parm->type); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->st_posn = newlocaloffset(locvar->type); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\tadd\trsp, %d\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->st_endlabel); fprintf(Outfile, "\tadd\trsp, %d\n", stackOffset); fputs("\tpop rbp\n" "\tret\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], value); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", sym->name); fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], sym->name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", sym->name); } else // Print out the code to initialise it switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", sym->name); fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], sym->name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", sym->name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", sym->name); fprintf(Outfile, "\tmovsx\t%s, word [%s]\n", dreglist[r], sym->name); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", sym->name); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", sym->st_posn); fprintf(Outfile, "\tmov\t%s, [rbp+%d]\n", reglist[r], sym->st_posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", sym->st_posn); } else switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", sym->st_posn); fprintf(Outfile, "\tmovzx\t%s, byte [rbp+%d]\n", reglist[r], sym->st_posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", sym->st_posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", sym->st_posn); fprintf(Outfile, "\tmovsx\t%s, dword [rbp+%d]\n", reglist[r], sym->st_posn); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", sym->st_posn); break; default: fatald("Bad type in cgloadlocal:", sym->type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, L%d\n", reglist[r], label); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timul\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmov\trax, %s\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidiv\t%s\n", reglist[r2]); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tand\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgor(int r1, int r2) { fprintf(Outfile, "\tor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txor\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r1); return (r2); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshl\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshr\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tneg\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnot\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r], breglist[r]); return (r); } // Logically OR two registers and return a // register with the result, 1 or 0 int cglogor(int r1, int r2) { // Generate two labels int Ltrue = genlabel(); int Lend = genlabel(); // Test r1 and jump to true label if true fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r1], reglist[r1]); fprintf(Outfile, "\tjne\tL%d\n", Ltrue); // Test r2 and jump to true label if true fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r2], reglist[r2]); fprintf(Outfile, "\tjne\tL%d\n", Ltrue); // Didn't jump, so result is false fprintf(Outfile, "\tmov\t%s, 0\n", reglist[r1]); fprintf(Outfile, "\tjmp\tL%d\n", Lend); // Someone jumped to the true label, so result is true cglabel(Ltrue); fprintf(Outfile, "\tmov\t%s, 1\n", reglist[r1]); cglabel(Lend); free_register(r2); return(r1); } // Logically AND two registers and return a // register with the result, 1 or 0 int cglogand(int r1, int r2) { // Generate two labels int Lfalse = genlabel(); int Lend = genlabel(); // Test r1 and jump to false label if not true fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r1], reglist[r1]); fprintf(Outfile, "\tje\tL%d\n", Lfalse); // Test r2 and jump to false label if not true fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r2], reglist[r2]); fprintf(Outfile, "\tje\tL%d\n", Lfalse); // Didn't jump, so result is true fprintf(Outfile, "\tmov\t%s, 1\n", reglist[r1]); fprintf(Outfile, "\tjmp\tL%d\n", Lend); // Someone jumped to the false label, so result is false cglabel(Lfalse); fprintf(Outfile, "\tmov\t%s, 0\n", reglist[r1]); cglabel(Lend); free_register(r2); return(r1); } // Convert an integer value to a boolean value. Jump if // it's an IF or WHILE operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); if (op == A_IF || op == A_WHILE) fprintf(Outfile, "\tje\tL%d\n", label); else { fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, byte %s\n", reglist[r], breglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(struct symtable *sym, int numargs) { // Get a new register int outr = alloc_register(); // Call the function fprintf(Outfile, "\tcall\t%s\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\tadd\trsp, %d\n", 8 * (numargs - 6)); // and copy the return value into our register fprintf(Outfile, "\tmov\t%s, rax\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpush\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmov\t%s, %s\n", reglist[FIRSTPARAMREG - argposn + 1], reglist[r]); } } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsal\t%s, %d\n", reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, dreglist[r]); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\tqword\t[rbp+%d], %s\n", sym->st_posn, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\tbyte\t[rbp+%d], %s\n", sym->st_posn, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\tdword\t[rbp+%d], %s\n", sym->st_posn, dreglist[r]); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size, type; int initvalue; int i; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the variable (or its elements if an array) // and the type of the variable if (node->stype == S_ARRAY) { size = typesize(value_at(node->type), node->ctype); type = value_at(node->type); } else { size = node->size; type = node->type; } // Generate the global identity and the label cgdataseg(); if (node->class == C_GLOBAL) fprintf(Outfile, "\tglobal\t%s\n", node->name); fprintf(Outfile, "%s:\n", node->name); // Output space for one or more elements for (i = 0; i < node->nelems; i++) { // Get any initial value initvalue = 0; if (node->initlist != NULL) initvalue = node->initlist[i]; // Generate the space for this type // original version switch(size) { case 1: fprintf(Outfile, "\tdb\t%d\n", initvalue); break; case 4: fprintf(Outfile, "\tdd\t%d\n", initvalue); break; case 8: // Generate the pointer to a string literal. Treat a zero value // as actually zero, not the label L0 if (node->initlist != NULL && type == pointer_to(P_CHAR) && initvalue != 0) fprintf(Outfile, "\tdq\tL%d\n", initvalue); else fprintf(Outfile, "\tdq\t%d\n", initvalue); break; default: for (i = 0; i < size; i++) fprintf(Outfile, "\tdb\t0\n"); /* compact version using times instead of loop fprintf(Outfile, "\ttimes\t%d\tdb\t0\n", size); */ } } } // Generate a global string and its start label // Don't output the label if append is true. void cgglobstr(int l, char *strvalue, int append) { char *cptr; if (!append) cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\tdb\t%d\n", *cptr); } } void cgglobstrend(void) { fprintf(Outfile, "\tdb\t0\n"); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r2], breglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(NOREG); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Only return a value if we have a value to return if (reg != NOREG) { // Deal with pointers here as we can't put them in // the switch statement if (ptrtype(sym->type)) fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); else { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzx\teax, %s\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmov\teax, %s\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } } } cgjump(sym->st_endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = alloc_register(); if (sym->class == C_GLOBAL || sym->class == C_STATIC) fprintf(Outfile, "\tmov\t%s, %s\n", reglist[r], sym->name); else fprintf(Outfile, "\tlea\t%s, [rbp+%d]\n", reglist[r], sym->st_posn); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], reglist[r]); break; case 2: fprintf(Outfile, "\tmovsx\t%s, dword [%s]\n", reglist[r], reglist[r]); break; case 4: case 8: fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { //Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmov\t[%s], byte %s\n", reglist[r2], breglist[r1]); break; case 2: case 4: case 8: fprintf(Outfile, "\tmov\t[%s], %s\n", reglist[r2], reglist[r1]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } // Generate a switch jump table and the code to // load the registers and call the switch() code void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel) { int i, label; // Get a label for the switch table label = genlabel(); cglabel(label); // Heuristic. If we have no cases, create one case // which points to the default case if (casecount == 0) { caseval[0] = 0; caselabel[0] = defaultlabel; casecount = 1; } // Generate the switch jump table. fprintf(Outfile, "\tdq\t%d\n", casecount); for (i = 0; i < casecount; i++) fprintf(Outfile, "\tdq\t%d, L%d\n", caseval[i], caselabel[i]); fprintf(Outfile, "\tdq\tL%d\n", defaultlabel); // Load the specific registers cglabel(toplabel); fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); fprintf(Outfile, "\tmov\trdx, L%d\n", label); fprintf(Outfile, "\tjmp\tswitch\n"); } // Move value between registers void cgmove(int r1, int r2) { fprintf(Outfile, "\tmov\t%s, %s\n", reglist[r2], reglist[r1]); } ================================================ FILE: 53_Mop_up_pt2/data.h ================================================ #ifndef extern_ #define extern_ extern #endif // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 extern_ int Line; // Current line number extern_ int Linestart; // True if at start of a line extern_ int Putback; // Character put back by scanner extern_ struct symtable *Functionid; // Symbol ptr of the current function extern_ FILE *Infile; // Input and output files extern_ FILE *Outfile; extern_ char *Infilename; // Name of file we are parsing extern_ char *Outfilename; // Name of file we opened as Outfile extern_ struct token Token; // Last token scanned extern_ struct token Peektoken; // A look-ahead token extern_ char Text[TEXTLEN + 1]; // Last identifier scanned extern_ int Looplevel; // Depth of nested loops extern_ int Switchlevel; // Depth of nested switches extern char *Tstring[]; // List of token strings // Symbol table lists extern_ struct symtable *Globhead, *Globtail; // Global variables and functions extern_ struct symtable *Loclhead, *Locltail; // Local variables extern_ struct symtable *Parmhead, *Parmtail; // Local parameters extern_ struct symtable *Membhead, *Membtail; // Temp list of struct/union members extern_ struct symtable *Structhead, *Structtail; // List of struct types extern_ struct symtable *Unionhead, *Uniontail; // List of union types extern_ struct symtable *Enumhead, *Enumtail; // List of enum types and values extern_ struct symtable *Typehead, *Typetail; // List of typedefs // Command-line flags extern_ int O_dumpAST; // If true, dump the AST trees extern_ int O_dumpsym; // If true, dump the symbol table extern_ int O_keepasm; // If true, keep any assembly files extern_ int O_assemble; // If true, assemble the assembly files extern_ int O_dolink; // If true, link the object files extern_ int O_verbose; // If true, print info on compilation stages ================================================ FILE: 53_Mop_up_pt2/decl.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of declarations // Copyright (c) 2019 Warren Toomey, GPL3 static struct symtable *composite_declaration(int type); static int typedef_declaration(struct symtable **ctype); static int type_of_typedef(char *name, struct symtable **ctype); static void enum_declaration(void); // Parse the current token and return a primitive type enum value, // a pointer to any composite type and possibly modify // the class of the type. int parse_type(struct symtable **ctype, int *class) { int type, exstatic = 1; // See if the class has been changed to extern or static while (exstatic) { switch (Token.token) { case T_EXTERN: if (*class == C_STATIC) fatal("Illegal to have extern and static at the same time"); *class = C_EXTERN; scan(&Token); break; case T_STATIC: if (*class == C_LOCAL) fatal("Compiler doesn't support static local declarations"); if (*class == C_EXTERN) fatal("Illegal to have extern and static at the same time"); *class = C_STATIC; scan(&Token); break; default: exstatic = 0; } } // Now work on the actual type keyword switch (Token.token) { case T_VOID: type = P_VOID; scan(&Token); break; case T_CHAR: type = P_CHAR; scan(&Token); break; case T_INT: type = P_INT; scan(&Token); break; case T_LONG: type = P_LONG; scan(&Token); break; // For the following, if we have a ';' after the // parsing then there is no type, so return -1. // Example: struct x {int y; int z}; case T_STRUCT: type = P_STRUCT; *ctype = composite_declaration(P_STRUCT); if (Token.token == T_SEMI) type = -1; break; case T_UNION: type = P_UNION; *ctype = composite_declaration(P_UNION); if (Token.token == T_SEMI) type = -1; break; case T_ENUM: type = P_INT; // Enums are really ints enum_declaration(); if (Token.token == T_SEMI) type = -1; break; case T_TYPEDEF: type = typedef_declaration(ctype); if (Token.token == T_SEMI) type = -1; break; case T_IDENT: type = type_of_typedef(Text, ctype); break; default: fatals("Illegal type, token", Token.tokstr); } return (type); } // Given a type parsed by parse_type(), scan in any following // '*' tokens and return the new type int parse_stars(int type) { while (1) { if (Token.token != T_STAR) break; type = pointer_to(type); scan(&Token); } return (type); } // Parse a type which appears inside a cast int parse_cast(struct symtable **ctype) { int type, class = 0; // Get the type inside the parentheses type = parse_stars(parse_type(ctype, &class)); // Do some error checking. I'm sure more can be done if (type == P_STRUCT || type == P_UNION || type == P_VOID) fatal("Cannot cast to a struct, union or void type"); return (type); } // Given a type, parse an expression of literals and ensure // that the type of this expression matches the given type. // Parse any type cast that precedes the expression. // If an integer literal, return this value. // If a string literal, return the label number of the string. int parse_literal(int type) { struct ASTnode *tree; // Parse the expression and optimise the resulting AST tree tree = optimise(binexpr(0)); // If there's a cast, get the child and // mark it as having the type from the cast if (tree->op == A_CAST) { tree->left->type = tree->type; tree = tree->left; } // The tree must now have an integer or string literal if (tree->op != A_INTLIT && tree->op != A_STRLIT) fatal("Cannot initialise globals with a general expression"); // If the type is char * and if (type == pointer_to(P_CHAR)) { // We have a string literal, return the label number if (tree->op == A_STRLIT) return (tree->a_intvalue); // We have a zero int literal, so that's a NULL if (tree->op == A_INTLIT && tree->a_intvalue == 0) return (0); } // We only get here with an integer literal. The input type // is an integer type and is wide enough to hold the literal value if (inttype(type) && typesize(type, NULL) >= typesize(tree->type, NULL)) return (tree->a_intvalue); fatal("Type mismatch: literal vs. variable"); return (0); // Keep -Wall happy } // Given a pointer to a symbol that may already exist // return true if this symbol doesn't exist. We use // this function to convert externs into globals int is_new_symbol(struct symtable *sym, int class, int type, struct symtable *ctype) { // There is no existing symbol, thus is new if (sym==NULL) return(1); // global versus extern: if they match that it's not new // and we can convert the class to global if ((sym->class== C_GLOBAL && class== C_EXTERN) || (sym->class== C_EXTERN && class== C_GLOBAL)) { // If the types don't match, there's a problem if (type != sym->type) fatals("Type mismatch between global/extern", sym->name); // Struct/unions, also compare the ctype if (type >= P_STRUCT && ctype != sym->ctype) fatals("Type mismatch between global/extern", sym->name); // If we get to here, the types match, so mark the symbol // as global sym->class= C_GLOBAL; // Return that symbol is not new return(0); } // It must be a duplicate symbol if we get here fatals("Duplicate global variable declaration", sym->name); return(-1); // Keep -Wall happy } // Given the type, name and class of a scalar variable, // parse any initialisation value and allocate storage for it. // Return the variable's symbol table entry. static struct symtable *scalar_declaration(char *varname, int type, struct symtable *ctype, int class, struct ASTnode **tree) { struct symtable *sym = NULL; struct ASTnode *varnode, *exprnode; *tree = NULL; // Add this as a known scalar switch (class) { case C_STATIC: case C_EXTERN: case C_GLOBAL: // See if this variable is new or already exists sym= findglob(varname); if (is_new_symbol(sym, class, type, ctype)) sym = addglob(varname, type, ctype, S_VARIABLE, class, 1, 0); break; case C_LOCAL: sym = addlocl(varname, type, ctype, S_VARIABLE, 1); break; case C_PARAM: sym = addparm(varname, type, ctype, S_VARIABLE); break; case C_MEMBER: sym = addmemb(varname, type, ctype, S_VARIABLE, 1); break; } // The variable is being initialised if (Token.token == T_ASSIGN) { // Only possible for a global or local if (class != C_GLOBAL && class != C_LOCAL && class != C_STATIC) fatals("Variable can not be initialised", varname); scan(&Token); // Globals must be assigned a literal value if (class == C_GLOBAL || class == C_STATIC) { // Create one initial value for the variable and // parse this value sym->initlist = (int *) malloc(sizeof(int)); sym->initlist[0] = parse_literal(type); } if (class == C_LOCAL) { // Make an A_IDENT AST node with the variable varnode = mkastleaf(A_IDENT, sym->type, sym->ctype, sym, 0); // Get the expression for the assignment, make into a rvalue exprnode = binexpr(0); exprnode->rvalue = 1; // Ensure the expression's type matches the variable exprnode = modify_type(exprnode, varnode->type, varnode->ctype, 0); if (exprnode == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree *tree = mkastnode(A_ASSIGN, exprnode->type, exprnode->ctype, exprnode, NULL, varnode, NULL, 0); } } // Generate any global space if (class == C_GLOBAL || class == C_STATIC) genglobsym(sym); return (sym); } // Given the type, name and class of an variable, parse // the size of the array, if any. Then parse any initialisation // value and allocate storage for it. // Return the variable's symbol table entry. static struct symtable *array_declaration(char *varname, int type, struct symtable *ctype, int class) { struct symtable *sym; // New symbol table entry int nelems = -1; // Assume the number of elements won't be given int maxelems; // The maximum number of elements in the init list int *initlist; // The list of initial elements int i = 0, j; // Skip past the '[' scan(&Token); // See we have an array size if (Token.token != T_RBRACKET) { nelems = parse_literal(P_INT); if (nelems <= 0) fatald("Array size is illegal", nelems); } // Ensure we have a following ']' match(T_RBRACKET, "]"); // Add this as a known array. We treat the // array as a pointer to its elements' type switch (class) { case C_STATIC: case C_EXTERN: case C_GLOBAL: // See if this variable is new or already exists sym= findglob(varname); if (is_new_symbol(sym, class, pointer_to(type), ctype)) sym = addglob(varname, pointer_to(type), ctype, S_ARRAY, class, 0, 0); break; default: fatal("For now, declaration of non-global arrays is not implemented"); } // Array initialisation if (Token.token == T_ASSIGN) { if (class != C_GLOBAL && class != C_STATIC) fatals("Variable can not be initialised", varname); scan(&Token); // Get the following left curly bracket match(T_LBRACE, "{"); #define TABLE_INCREMENT 10 // If the array already has nelems, allocate that many elements // in the list. Otherwise, start with TABLE_INCREMENT. if (nelems != -1) maxelems = nelems; else maxelems = TABLE_INCREMENT; initlist = (int *) malloc(maxelems * sizeof(int)); // Loop getting a new literal value from the list while (1) { // Check we can add the next value, then parse and add it if (nelems != -1 && i == maxelems) fatal("Too many values in initialisation list"); initlist[i++] = parse_literal(type); // Increase the list size if the original size was // not set and we have hit the end of the current list if (nelems == -1 && i == maxelems) { maxelems += TABLE_INCREMENT; initlist = (int *) realloc(initlist, maxelems * sizeof(int)); } // Leave when we hit the right curly bracket if (Token.token == T_RBRACE) { scan(&Token); break; } // Next token must be a comma, then comma(); } // Zero any unused elements in the initlist. // Attach the list to the symbol table entry for (j = i; j < sym->nelems; j++) initlist[j] = 0; if (i > nelems) nelems = i; sym->initlist = initlist; } // Set the size of the array and the number of elements sym->nelems = nelems; sym->size = sym->nelems * typesize(type, ctype); // Generate any global space if (class == C_GLOBAL || class == C_STATIC) genglobsym(sym); return (sym); } // Given a pointer to the new function being declared and // a possibly NULL pointer to the function's previous declaration, // parse a list of parameters and cross-check them against the // previous declaration. Return the count of parameters static int param_declaration_list(struct symtable *oldfuncsym, struct symtable *newfuncsym) { int type, paramcnt = 0; struct symtable *ctype; struct symtable *protoptr = NULL; struct ASTnode *unused; // Get the pointer to the first prototype parameter if (oldfuncsym != NULL) protoptr = oldfuncsym->member; // Loop getting any parameters while (Token.token != T_RPAREN) { // If the first token is 'void' if (Token.token == T_VOID) { // Peek at the next token. If a ')', the function // has no parameters, so leave the loop. scan(&Peektoken); if (Peektoken.token == T_RPAREN) { // Move the Peektoken into the Token paramcnt = 0; scan(&Token); break; } } // Get the type of the next parameter type = declaration_list(&ctype, C_PARAM, T_COMMA, T_RPAREN, &unused); if (type == -1) fatal("Bad type in parameter list"); // Ensure the type of this parameter matches the prototype if (protoptr != NULL) { if (type != protoptr->type) fatald("Type doesn't match prototype for parameter", paramcnt + 1); protoptr = protoptr->next; } paramcnt++; // Stop when we hit the right parenthesis if (Token.token == T_RPAREN) break; // We need a comma as separator comma(); } if (oldfuncsym != NULL && paramcnt != oldfuncsym->nelems) fatals("Parameter count mismatch for function", oldfuncsym->name); // Return the count of parameters return (paramcnt); } // // function_declaration: type identifier '(' parameter_list ')' ; // | type identifier '(' parameter_list ')' compound_statement ; // // Parse the declaration of function. static struct symtable *function_declaration(char *funcname, int type, struct symtable *ctype, int class) { struct ASTnode *tree, *finalstmt; struct symtable *oldfuncsym, *newfuncsym = NULL; int endlabel, paramcnt; // Text has the identifier's name. If this exists and is a // function, get the id. Otherwise, set oldfuncsym to NULL. if ((oldfuncsym = findsymbol(funcname)) != NULL) if (oldfuncsym->stype != S_FUNCTION) oldfuncsym = NULL; // If this is a new function declaration, get a // label-id for the end label, and add the function // to the symbol table, if (oldfuncsym == NULL) { endlabel = genlabel(); // Assumtion: functions only return scalar types, so NULL below newfuncsym = addglob(funcname, type, NULL, S_FUNCTION, class, 0, endlabel); } // Scan in the '(', any parameters and the ')'. // Pass in any existing function prototype pointer lparen(); paramcnt = param_declaration_list(oldfuncsym, newfuncsym); rparen(); // If this is a new function declaration, update the // function symbol entry with the number of parameters. // Also copy the parameter list into the function's node. if (newfuncsym) { newfuncsym->nelems = paramcnt; newfuncsym->member = Parmhead; oldfuncsym = newfuncsym; } // Clear out the parameter list Parmhead = Parmtail = NULL; // Declaration ends in a semicolon, only a prototype. if (Token.token == T_SEMI) return (oldfuncsym); // This is not just a prototype. // Set the Functionid global to the function's symbol pointer Functionid = oldfuncsym; // Get the AST tree for the compound statement and mark // that we have parsed no loops or switches yet Looplevel = 0; Switchlevel = 0; lbrace(); tree = compound_statement(0); rbrace(); // If the function type isn't P_VOID .. if (type != P_VOID) { // Error if no statements in the function if (tree == NULL) fatal("No statements in function with non-void type"); // Check that the last AST operation in the // compound statement was a return statement finalstmt = (tree->op == A_GLUE) ? tree->right : tree; if (finalstmt == NULL || finalstmt->op != A_RETURN) fatal("No return for function with non-void type"); } // Build the A_FUNCTION node which has the function's symbol pointer // and the compound statement sub-tree tree = mkastunary(A_FUNCTION, type, ctype, tree, oldfuncsym, endlabel); // Do optimisations on the AST tree tree = optimise(tree); // Dump the AST tree if requested if (O_dumpAST) { dumpAST(tree, NOLABEL, 0); fprintf(stdout, "\n\n"); } // Generate the assembly code for it genAST(tree, NOLABEL, NOLABEL, NOLABEL, 0); // Now free the symbols associated // with this function freeloclsyms(); return (oldfuncsym); } // Parse composite type declarations: structs or unions. // Either find an existing struct/union declaration, or build // a struct/union symbol table entry and return its pointer. static struct symtable *composite_declaration(int type) { struct symtable *ctype = NULL; struct symtable *m; struct ASTnode *unused; int offset; int t; // Skip the struct/union keyword scan(&Token); // See if there is a following struct/union name if (Token.token == T_IDENT) { // Find any matching composite type if (type == P_STRUCT) ctype = findstruct(Text); else ctype = findunion(Text); scan(&Token); } // If the next token isn't an LBRACE , this is // the usage of an existing struct/union type. // Return the pointer to the type. if (Token.token != T_LBRACE) { if (ctype == NULL) fatals("unknown struct/union type", Text); return (ctype); } // Ensure this struct/union type hasn't been // previously defined if (ctype) fatals("previously defined struct/union", Text); // Build the composite type and skip the left brace if (type == P_STRUCT) ctype = addstruct(Text); else ctype = addunion(Text); scan(&Token); // Scan in the list of members while (1) { // Get the next member. m is used as a dummy t = declaration_list(&m, C_MEMBER, T_SEMI, T_RBRACE, &unused); if (t == -1) fatal("Bad type in member list"); if (Token.token == T_SEMI) scan(&Token); if (Token.token == T_RBRACE) break; } // Attach to the struct type's node rbrace(); if (Membhead == NULL) fatals("No members in struct", ctype->name); ctype->member = Membhead; Membhead = Membtail = NULL; // Set the offset of the initial member // and find the first free byte after it m = ctype->member; m->st_posn = 0; offset = typesize(m->type, m->ctype); // Set the position of each successive member in the composite type // Unions are easy. For structs, align the member and find the next free byte for (m = m->next; m != NULL; m = m->next) { // Set the offset for this member if (type == P_STRUCT) m->st_posn = genalign(m->type, offset, 1); else m->st_posn = 0; // Get the offset of the next free byte after this member offset += typesize(m->type, m->ctype); } // Set the overall size of the composite type ctype->size = offset; return (ctype); } // Parse an enum declaration static void enum_declaration(void) { struct symtable *etype = NULL; char *name = NULL; int intval = 0; // Skip the enum keyword. scan(&Token); // If there's a following enum type name, get a // pointer to any existing enum type node. if (Token.token == T_IDENT) { etype = findenumtype(Text); name = strdup(Text); // As it gets tromped soon scan(&Token); } // If the next token isn't a LBRACE, check // that we have an enum type name, then return if (Token.token != T_LBRACE) { if (etype == NULL) fatals("undeclared enum type:", name); return; } // We do have an LBRACE. Skip it scan(&Token); // If we have an enum type name, ensure that it // hasn't been declared before. if (etype != NULL) fatals("enum type redeclared:", etype->name); else // Build an enum type node for this identifier etype = addenum(name, C_ENUMTYPE, 0); // Loop to get all the enum values while (1) { // Ensure we have an identifier // Copy it in case there's an int literal coming up ident(); name = strdup(Text); // Ensure this enum value hasn't been declared before etype = findenumval(name); if (etype != NULL) fatals("enum value redeclared:", Text); // If the next token is an '=', skip it and // get the following int literal if (Token.token == T_ASSIGN) { scan(&Token); if (Token.token != T_INTLIT) fatal("Expected int literal after '='"); intval = Token.intvalue; scan(&Token); } // Build an enum value node for this identifier. // Increment the value for the next enum identifier. etype = addenum(name, C_ENUMVAL, intval++); // Bail out on a right curly bracket, else get a comma if (Token.token == T_RBRACE) break; comma(); } scan(&Token); // Skip over the right curly bracket } // Parse a typedef declaration and return the type // and ctype that it represents static int typedef_declaration(struct symtable **ctype) { int type, class = 0; // Skip the typedef keyword. scan(&Token); // Get the actual type following the keyword type = parse_type(ctype, &class); if (class != 0) fatal("Can't have extern in a typedef declaration"); // See if the typedef identifier already exists if (findtypedef(Text) != NULL) fatals("redefinition of typedef", Text); // Get any following '*' tokens type = parse_stars(type); // It doesn't exist so add it to the typedef list addtypedef(Text, type, *ctype); scan(&Token); return (type); } // Given a typedef name, return the type it represents static int type_of_typedef(char *name, struct symtable **ctype) { struct symtable *t; // Look up the typedef in the list t = findtypedef(name); if (t == NULL) fatals("unknown type", name); scan(&Token); *ctype = t->ctype; return (t->type); } // Parse the declaration of a variable or function. // The type and any following '*'s have been scanned, and we // have the identifier in the Token variable. // The class argument is the variable's class. // Return a pointer to the symbol's entry in the symbol table static struct symtable *symbol_declaration(int type, struct symtable *ctype, int class, struct ASTnode **tree) { struct symtable *sym = NULL; char *varname = strdup(Text); // Ensure that we have an identifier. // We copied it above so we can scan more tokens in, e.g. // an assignment expression for a local variable. ident(); // Deal with function declarations if (Token.token == T_LPAREN) { return (function_declaration(varname, type, ctype, class)); } // See if this array or scalar variable has already been declared switch (class) { case C_EXTERN: case C_STATIC: case C_GLOBAL: case C_LOCAL: case C_PARAM: if (findlocl(varname) != NULL) fatals("Duplicate local variable declaration", varname); case C_MEMBER: if (findmember(varname) != NULL) fatals("Duplicate struct/union member declaration", varname); } // Add the array or scalar variable to the symbol table if (Token.token == T_LBRACKET) sym = array_declaration(varname, type, ctype, class); else sym = scalar_declaration(varname, type, ctype, class, tree); return (sym); } // Parse a list of symbols where there is an initial type. // Return the type of the symbols. et1 and et2 are end tokens. int declaration_list(struct symtable **ctype, int class, int et1, int et2, struct ASTnode **gluetree) { int inittype, type; struct symtable *sym; struct ASTnode *tree; *gluetree = NULL; // Get the initial type. If -1, it was // a composite type definition, return this if ((inittype = parse_type(ctype, &class)) == -1) return (inittype); // Now parse the list of symbols while (1) { // See if this symbol is a pointer type = parse_stars(inittype); // Parse this symbol sym = symbol_declaration(type, *ctype, class, &tree); // We parsed a function, there is no list so leave if (sym->stype == S_FUNCTION) { if (class != C_GLOBAL && class != C_STATIC) fatal("Function definition not at global level"); return (type); } // Glue any AST tree from a local declaration // to build a sequence of assignments to perform if (*gluetree == NULL) *gluetree = tree; else *gluetree = mkastnode(A_GLUE, P_NONE, NULL, *gluetree, NULL, tree, NULL, 0); // We are at the end of the list, leave if (Token.token == et1 || Token.token == et2) return (type); // Otherwise, we need a comma as separator comma(); } } // Parse one or more global declarations, either // variables, functions or structs void global_declarations(void) { struct symtable *ctype= NULL; struct ASTnode *unused; while (Token.token != T_EOF) { declaration_list(&ctype, C_GLOBAL, T_SEMI, T_EOF, &unused); // Skip any semicolons and right curly brackets if (Token.token == T_SEMI) scan(&Token); } } ================================================ FILE: 53_Mop_up_pt2/decl.h ================================================ // Function prototypes for all compiler files // Copyright (c) 2019 Warren Toomey, GPL3 // scan.c void reject_token(struct token *t); int scan(struct token *t); // tree.c struct ASTnode *mkastnode(int op, int type, struct symtable *ctype, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue); struct ASTnode *mkastleaf(int op, int type, struct symtable *ctype, struct symtable *sym, int intvalue); struct ASTnode *mkastunary(int op, int type, struct symtable *ctype, struct ASTnode *left, struct symtable *sym, int intvalue); void dumpAST(struct ASTnode *n, int label, int level); // gen.c int genlabel(void); int genAST(struct ASTnode *n, int iflabel, int looptoplabel, int loopendlabel, int parentASTop); void genpreamble(); void genpostamble(); void genfreeregs(int keepreg); void genglobsym(struct symtable *node); int genglobstr(char *strvalue, int append); void genglobstrend(void); int genprimsize(int type); int genalign(int type, int offset, int direction); void genreturn(int reg, int id); // cg.c int cgprimsize(int type); int cgalign(int type, int offset, int direction); void cgtextseg(); void cgdataseg(); int alloc_register(void); void freeall_registers(int keepreg); void cgpreamble(); void cgpostamble(); void cgfuncpreamble(struct symtable *sym); void cgfuncpostamble(struct symtable *sym); int cgloadint(int value, int type); int cgloadglob(struct symtable *sym, int op); int cgloadlocal(struct symtable *sym, int op); int cgloadglobstr(int label); int cgadd(int r1, int r2); int cgsub(int r1, int r2); int cgmul(int r1, int r2); int cgdiv(int r1, int r2); int cgshlconst(int r, int val); int cgcall(struct symtable *sym, int numargs); void cgcopyarg(int r, int argposn); int cgstorglob(int r, struct symtable *sym); int cgstorlocal(int r, struct symtable *sym); void cgglobsym(struct symtable *node); void cgglobstr(int l, char *strvalue, int append); void cgglobstrend(void); int cgcompare_and_set(int ASTop, int r1, int r2); int cgcompare_and_jump(int ASTop, int r1, int r2, int label); void cglabel(int l); void cgjump(int l); int cgwiden(int r, int oldtype, int newtype); void cgreturn(int reg, struct symtable *sym); int cgaddress(struct symtable *sym); int cgderef(int r, int type); int cgstorderef(int r1, int r2, int type); int cgnegate(int r); int cginvert(int r); int cglognot(int r); int cglogor(int r1, int r2); int cglogand(int r1, int r2); int cgboolean(int r, int op, int label); int cgand(int r1, int r2); int cgor(int r1, int r2); int cgxor(int r1, int r2); int cgshl(int r1, int r2); int cgshr(int r1, int r2); void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel); void cgmove(int r1, int r2); // expr.c struct ASTnode *expression_list(int endtoken); struct ASTnode *binexpr(int ptp); // stmt.c struct ASTnode *compound_statement(int inswitch); // misc.c void match(int t, char *what); void semi(void); void lbrace(void); void rbrace(void); void lparen(void); void rparen(void); void ident(void); void comma(void); void fatal(char *s); void fatals(char *s1, char *s2); void fatald(char *s, int d); void fatalc(char *s, int c); // sym.c void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node); struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn); struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn); struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int nelems); struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype); struct symtable *addstruct(char *name); struct symtable *addunion(char *name); struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int nelems); struct symtable *addenum(char *name, int class, int value); struct symtable *addtypedef(char *name, int type, struct symtable *ctype); struct symtable *findglob(char *s); struct symtable *findlocl(char *s); struct symtable *findsymbol(char *s); struct symtable *findmember(char *s); struct symtable *findstruct(char *s); struct symtable *findunion(char *s); struct symtable *findenumtype(char *s); struct symtable *findenumval(char *s); struct symtable *findtypedef(char *s); void clear_symtable(void); void freeloclsyms(void); void freestaticsyms(void); void dumptable(struct symtable *head, char *name, int indent); void dumpsymtables(void); // decl.c int parse_type(struct symtable **ctype, int *class); int parse_stars(int type); int parse_cast(struct symtable **ctype); int declaration_list(struct symtable **ctype, int class, int et1, int et2, struct ASTnode **gluetree); void global_declarations(void); // types.c int inttype(int type); int ptrtype(int type); int pointer_to(int type); int value_at(int type); int typesize(int type, struct symtable *ctype); struct ASTnode *modify_type(struct ASTnode *tree, int rtype, struct symtable *rctype, int op); // opt.c struct ASTnode *optimise(struct ASTnode *n); ================================================ FILE: 53_Mop_up_pt2/defs.h ================================================ #include #include #include #include // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 enum { TEXTLEN = 512 // Length of identifiers in input }; // Commands and default filenames #define AOUT "a.out" #ifdef __NASM__ #define ASCMD "nasm -f elf64 -w-ptr -o " #define LDCMD "cc -no-pie -fno-plt -Wall -o " #else #define ASCMD "as -o " #define LDCMD "cc -o " #endif #define CPPCMD "cpp -nostdinc -isystem " // Token types enum { T_EOF, // Binary operators T_ASSIGN, T_ASPLUS, T_ASMINUS, T_ASSTAR, T_ASSLASH, T_QUESTION, T_LOGOR, T_LOGAND, T_OR, T_XOR, T_AMPER, T_EQ, T_NE, T_LT, T_GT, T_LE, T_GE, T_LSHIFT, T_RSHIFT, T_PLUS, T_MINUS, T_STAR, T_SLASH, // Other operators T_INC, T_DEC, T_INVERT, T_LOGNOT, // Type keywords T_VOID, T_CHAR, T_INT, T_LONG, // Other keywords T_IF, T_ELSE, T_WHILE, T_FOR, T_RETURN, T_STRUCT, T_UNION, T_ENUM, T_TYPEDEF, T_EXTERN, T_BREAK, T_CONTINUE, T_SWITCH, T_CASE, T_DEFAULT, T_SIZEOF, T_STATIC, // Structural tokens T_INTLIT, T_STRLIT, T_SEMI, T_IDENT, T_LBRACE, T_RBRACE, T_LPAREN, T_RPAREN, T_LBRACKET, T_RBRACKET, T_COMMA, T_DOT, T_ARROW, T_COLON }; // Token structure struct token { int token; // Token type, from the enum list above char *tokstr; // String version of the token int intvalue; // For T_INTLIT, the integer value }; // AST node types. The first few line up // with the related tokens enum { A_ASSIGN = 1, A_ASPLUS, A_ASMINUS, A_ASSTAR, A_ASSLASH, // 1 A_TERNARY, A_LOGOR, A_LOGAND, A_OR, A_XOR, A_AND, // 6 A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE, A_LSHIFT, A_RSHIFT, // 12 A_ADD, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, // 20 A_INTLIT, A_STRLIT, A_IDENT, A_GLUE, // 24 A_IF, A_WHILE, A_FUNCTION, A_WIDEN, A_RETURN, // 28 A_FUNCCALL, A_DEREF, A_ADDR, A_SCALE, // 33 A_PREINC, A_PREDEC, A_POSTINC, A_POSTDEC, // 37 A_NEGATE, A_INVERT, A_LOGNOT, A_TOBOOL, A_BREAK, // 41 A_CONTINUE, A_SWITCH, A_CASE, A_DEFAULT, A_CAST // 46 }; // Primitive types. The bottom 4 bits is an integer // value that represents the level of indirection, // e.g. 0= no pointer, 1= pointer, 2= pointer pointer etc. enum { P_NONE, P_VOID = 16, P_CHAR = 32, P_INT = 48, P_LONG = 64, P_STRUCT=80, P_UNION=96 }; // Structural types enum { S_VARIABLE, S_FUNCTION, S_ARRAY }; // Storage classes enum { C_GLOBAL = 1, // Globally visible symbol C_LOCAL, // Locally visible symbol C_PARAM, // Locally visible function parameter C_EXTERN, // External globally visible symbol C_STATIC, // Static symbol, visible in one file C_STRUCT, // A struct C_UNION, // A union C_MEMBER, // Member of a struct or union C_ENUMTYPE, // A named enumeration type C_ENUMVAL, // A named enumeration value C_TYPEDEF // A named typedef }; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol struct symtable *ctype; // If struct/union, ptr to that type int stype; // Structural type for the symbol int class; // Storage class for the symbol int size; // Total size in bytes of this symbol int nelems; // Functions: # params. Arrays: # elements #define st_endlabel st_posn // For functions, the end label int st_posn; // For locals, the negative offset // from the stack base pointer int *initlist; // List of initial values struct symtable *next; // Next symbol in one list struct symtable *member; // First member of a function, struct, }; // union or enum // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates struct symtable *ctype; // If struct/union, ptr to that type int rvalue; // True if the node is an rvalue struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; struct symtable *sym; // For many AST nodes, the pointer to // the symbol in the symbol table #define a_intvalue a_size // For A_INTLIT, the integer value int a_size; // For A_SCALE, the size to scale by }; enum { NOREG = -1, // Use NOREG when the AST generation // functions have no register to return NOLABEL = 0 // Use NOLABEL when we have no label to // pass to genAST() }; ================================================ FILE: 53_Mop_up_pt2/expr.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of expressions // Copyright (c) 2019 Warren Toomey, GPL3 // expression_list: // | expression // | expression ',' expression_list // ; // Parse a list of zero or more comma-separated expressions and // return an AST composed of A_GLUE nodes with the left-hand child // being the sub-tree of previous expressions (or NULL) and the right-hand // child being the next expression. Each A_GLUE node will have size field // set to the number of expressions in the tree at this point. If no // expressions are parsed, NULL is returned struct ASTnode *expression_list(int endtoken) { struct ASTnode *tree = NULL; struct ASTnode *child = NULL; int exprcount = 0; // Loop until the end token while (Token.token != endtoken) { // Parse the next expression and increment the expression count child = binexpr(0); exprcount++; // Build an A_GLUE AST node with the previous tree as the left child // and the new expression as the right child. Store the expression count. tree = mkastnode(A_GLUE, P_NONE, NULL, tree, NULL, child, NULL, exprcount); // Stop when we reach the end token if (Token.token == endtoken) break; // Must have a ',' at this point match(T_COMMA, ","); } // Return the tree of expressions return (tree); } // Parse a function call and return its AST static struct ASTnode *funccall(void) { struct ASTnode *tree; struct symtable *funcptr; // Check that the identifier has been defined as a function, // then make a leaf node for it. if ((funcptr = findsymbol(Text)) == NULL || funcptr->stype != S_FUNCTION) { fatals("Undeclared function", Text); } // Get the '(' lparen(); // Parse the argument expression list tree = expression_list(T_RPAREN); // XXX Check type of each argument against the function's prototype // Build the function call AST node. Store the // function's return type as this node's type. // Also record the function's symbol-id tree = mkastunary(A_FUNCCALL, funcptr->type, funcptr->ctype, tree, funcptr, 0); // Get the ')' rparen(); return (tree); } // Parse the index into an array and return an AST tree for it static struct ASTnode *array_access(struct ASTnode *left) { struct ASTnode *right; // Check that the sub-tree is a pointer if (!ptrtype(left->type)) fatal("Not an array or pointer"); // Get the '[' scan(&Token); // Parse the following expression right = binexpr(0); // Get the ']' match(T_RBRACKET, "]"); // Ensure that this is of int type if (!inttype(right->type)) fatal("Array index is not of integer type"); // Make the left tree an rvalue left->rvalue = 1; // Scale the index by the size of the element's type right = modify_type(right, left->type, left->ctype, A_ADD); // Return an AST tree where the array's base has the offset added to it, // and dereference the element. Still an lvalue at this point. left = mkastnode(A_ADD, left->type, left->ctype, left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, value_at(left->type), left->ctype, left, NULL, 0); return (left); } // Parse the member reference of a struct or union // and return an AST tree for it. If withpointer is true, // the access is through a pointer to the member. static struct ASTnode *member_access(struct ASTnode *left, int withpointer) { struct ASTnode *right; struct symtable *typeptr; struct symtable *m; // Check that the left AST tree is a pointer to struct or union if (withpointer && left->type != pointer_to(P_STRUCT) && left->type != pointer_to(P_UNION)) fatal("Expression is not a pointer to a struct/union"); // Or, check that the left AST tree is a struct or union. // If so, change it from an A_IDENT to an A_ADDR so that // we get the base address, not the value at this address. if (!withpointer) { if (left->type == P_STRUCT || left->type == P_UNION) left->op = A_ADDR; else fatal("Expression is not a struct/union"); } // Get the details of the composite type typeptr = left->ctype; // Skip the '.' or '->' token and get the member's name scan(&Token); ident(); // Find the matching member's name in the type // Die if we can't find it for (m = typeptr->member; m != NULL; m = m->next) if (!strcmp(m->name, Text)) break; if (m == NULL) fatals("No member found in struct/union: ", Text); // Make the left tree an rvalue left->rvalue = 1; // Build an A_INTLIT node with the offset right = mkastleaf(A_INTLIT, P_INT, NULL, NULL, m->st_posn); // Add the member's offset to the base of the struct/union // and dereference it. Still an lvalue at this point left = mkastnode(A_ADD, pointer_to(m->type), m->ctype, left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, m->type, m->ctype, left, NULL, 0); return (left); } // Parse a parenthesised expression and // return an AST node representing it. static struct ASTnode *paren_expression(void) { struct ASTnode *n; int type = 0; struct symtable *ctype = NULL; // Beginning of a parenthesised expression, skip the '('. scan(&Token); // If the token after is a type identifier, this is a cast expression switch (Token.token) { case T_IDENT: // We have to see if the identifier matches a typedef. // If not, treat it as an expression. if (findtypedef(Text) == NULL) { n = binexpr(0); break; } case T_VOID: case T_CHAR: case T_INT: case T_LONG: case T_STRUCT: case T_UNION: case T_ENUM: // Get the type inside the parentheses type = parse_cast(&ctype); // Skip the closing ')' and then parse the following expression rparen(); default: n = binexpr(0); // Scan in the expression } // We now have at least an expression in n, and possibly a non-zero type // in type if there was a cast. Skip the closing ')' if there was no cast. if (type == 0) rparen(); else // Otherwise, make a unary AST node for the cast n = mkastunary(A_CAST, type, ctype, n, NULL, 0); return (n); } // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(void) { struct ASTnode *n; struct symtable *enumptr; struct symtable *varptr; int id; int type = 0; int size, class; struct symtable *ctype; switch (Token.token) { case T_STATIC: case T_EXTERN: fatal("Compiler doesn't support static or extern local declarations"); case T_SIZEOF: // Skip the T_SIZEOF and ensure we have a left parenthesis scan(&Token); if (Token.token != T_LPAREN) fatal("Left parenthesis expected after sizeof"); scan(&Token); // Get the type inside the parentheses type = parse_stars(parse_type(&ctype, &class)); // Get the type's size size = typesize(type, ctype); rparen(); // Make a leaf node int literal with the size return (mkastleaf(A_INTLIT, P_INT, NULL, NULL, size)); case T_INTLIT: // For an INTLIT token, make a leaf AST node for it. // Make it a P_CHAR if it's within the P_CHAR range if (Token.intvalue >= 0 && Token.intvalue < 256) n = mkastleaf(A_INTLIT, P_CHAR, NULL, NULL, Token.intvalue); else n = mkastleaf(A_INTLIT, P_INT, NULL, NULL, Token.intvalue); break; case T_STRLIT: // For a STRLIT token, generate the assembly for it. id = genglobstr(Text, 0); // For successive STRLIT tokens, append their contents // to this one while (1) { scan(&Peektoken); if (Peektoken.token != T_STRLIT) break; genglobstr(Text, 1); scan(&Token); // To skip it properly } // Now make a leaf AST node for it. id is the string's label. genglobstrend(); n = mkastleaf(A_STRLIT, pointer_to(P_CHAR), NULL, NULL, id); break; case T_IDENT: // If the identifier matches an enum value, // return an A_INTLIT node if ((enumptr = findenumval(Text)) != NULL) { n = mkastleaf(A_INTLIT, P_INT, NULL, NULL, enumptr->st_posn); break; } // See if this identifier exists as a symbol. For arrays, set rvalue to 1. if ((varptr = findsymbol(Text)) == NULL) fatals("Unknown variable or function", Text); switch (varptr->stype) { case S_VARIABLE: n = mkastleaf(A_IDENT, varptr->type, varptr->ctype, varptr, 0); break; case S_ARRAY: n = mkastleaf(A_ADDR, varptr->type, varptr->ctype, varptr, 0); n->rvalue = 1; break; case S_FUNCTION: // Function call, see if the next token is a left parenthesis scan(&Token); if (Token.token != T_LPAREN) fatals("Function name used without parentheses", Text); return (funccall()); default: fatals("Identifier not a scalar or array variable", Text); } break; case T_LPAREN: return (paren_expression()); default: fatals("Expecting a primary expression, got token", Token.tokstr); } // Scan in the next token and return the leaf node scan(&Token); return (n); } // Parse a postfix expression and return // an AST node representing it. The // identifier is already in Text. static struct ASTnode *postfix(void) { struct ASTnode *n; // Get the primary expression n = primary(); // Loop until there are no more postfix operators while (1) { switch (Token.token) { case T_LBRACKET: // An array reference n = array_access(n); break; case T_DOT: // Access into a struct or union n = member_access(n, 0); break; case T_ARROW: // Pointer access into a struct or union n = member_access(n, 1); break; case T_INC: // Post-increment: skip over the token if (n->rvalue == 1) fatal("Cannot ++ on rvalue"); scan(&Token); // Can't do it twice if (n->op == A_POSTINC || n->op == A_POSTDEC) fatal("Cannot ++ and/or -- more than once"); // and change the AST operation n->op = A_POSTINC; break; case T_DEC: // Post-decrement: skip over the token if (n->rvalue == 1) fatal("Cannot -- on rvalue"); scan(&Token); // Can't do it twice if (n->op == A_POSTINC || n->op == A_POSTDEC) fatal("Cannot ++ and/or -- more than once"); // and change the AST operation n->op = A_POSTDEC; break; default: return (n); } } return (NULL); // Keep -Wall happy } // Convert a binary operator token into a binary AST operation. // We rely on a 1:1 mapping from token to AST operation static int binastop(int tokentype) { if (tokentype > T_EOF && tokentype <= T_SLASH) return (tokentype); fatals("Syntax error, token", Tstring[tokentype]); return (0); // Keep -Wall happy } // Return true if a token is right-associative, // false otherwise. static int rightassoc(int tokentype) { if (tokentype >= T_ASSIGN && tokentype <= T_ASSLASH) return (1); return (0); } // Operator precedence for each token. Must // match up with the order of tokens in defs.h static int OpPrec[] = { 0, 10, 10, // T_EOF, T_ASSIGN, T_ASPLUS, 10, 10, 10, // T_ASMINUS, T_ASSTAR, T_ASSLASH, 15, // T_QUESTION, 20, 30, // T_LOGOR, T_LOGAND 40, 50, 60, // T_OR, T_XOR, T_AMPER 70, 70, // T_EQ, T_NE 80, 80, 80, 80, // T_LT, T_GT, T_LE, T_GE 90, 90, // T_LSHIFT, T_RSHIFT 100, 100, // T_PLUS, T_MINUS 110, 110 // T_STAR, T_SLASH }; // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec; if (tokentype > T_SLASH) fatals("Token with no precedence in op_precedence:", Tstring[tokentype]); prec = OpPrec[tokentype]; if (prec == 0) fatals("Syntax error, token", Tstring[tokentype]); return (prec); } // prefix_expression: postfix_expression // | '*' prefix_expression // | '&' prefix_expression // | '-' prefix_expression // | '++' prefix_expression // | '--' prefix_expression // ; // Parse a prefix expression and return // a sub-tree representing it. struct ASTnode *prefix(void) { struct ASTnode *tree; switch (Token.token) { case T_AMPER: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Ensure that it's an identifier if (tree->op != A_IDENT) fatal("& operator must be followed by an identifier"); // Prevent '&' being performed on an array if (tree->sym->stype == S_ARRAY) fatal("& operator cannot be performed on an array"); // Now change the operator to A_ADDR and the type to // a pointer to the original type tree->op = A_ADDR; tree->type = pointer_to(tree->type); break; case T_STAR: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's either another deref or an // identifier if (tree->op != A_IDENT && tree->op != A_DEREF) fatal("* operator must be followed by an identifier or *"); // Prepend an A_DEREF operation to the tree tree = mkastunary(A_DEREF, value_at(tree->type), tree->ctype, tree, NULL, 0); break; case T_MINUS: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_NEGATE operation to the tree and // make the child an rvalue. Because chars are unsigned, // also widen this if needed to int so that it's signed tree->rvalue = 1; if (tree->type == P_CHAR) tree->type = P_INT; tree = mkastunary(A_NEGATE, tree->type, tree->ctype, tree, NULL, 0); break; case T_INVERT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_INVERT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_INVERT, tree->type, tree->ctype, tree, NULL, 0); break; case T_LOGNOT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_LOGNOT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_LOGNOT, tree->type, tree->ctype, tree, NULL, 0); break; case T_INC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("++ operator must be followed by an identifier"); // Prepend an A_PREINC operation to the tree tree = mkastunary(A_PREINC, tree->type, tree->ctype, tree, NULL, 0); break; case T_DEC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("-- operator must be followed by an identifier"); // Prepend an A_PREDEC operation to the tree tree = mkastunary(A_PREDEC, tree->type, tree->ctype, tree, NULL, 0); break; default: tree = postfix(); } return (tree); } // Return an AST tree whose root is a binary operator. // Parameter ptp is the previous token's precedence. struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; struct ASTnode *ltemp, *rtemp; int ASTop; int tokentype; // Get the tree on the left. // Fetch the next token at the same time. left = prefix(); // If we hit one of several terminating tokens, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA || tokentype == T_COLON || tokentype == T_RBRACE) { left->rvalue = 1; return (left); } // While the precedence of this token is more than that of the // previous token precedence, or it's right associative and // equal to the previous token's precedence while ((op_precedence(tokentype) > ptp) || (rightassoc(tokentype) && op_precedence(tokentype) == ptp)) { // Fetch in the next integer literal scan(&Token); // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Determine the operation to be performed on the sub-trees ASTop = binastop(tokentype); switch (ASTop) { case A_TERNARY: // Ensure we have a ':' token, scan in the expression after it match(T_COLON, ":"); ltemp = binexpr(0); // Build and return the AST for this statement. Use the middle // expression's type as the return type. XXX We should also // consider the third expression's type. return (mkastnode (A_TERNARY, right->type, right->ctype, left, right, ltemp, NULL, 0)); case A_ASSIGN: // Assignment // Make the right tree into an rvalue right->rvalue = 1; // Ensure the right's type matches the left right = modify_type(right, left->type, left->ctype, 0); if (right == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree. However, switch // left and right around, so that the right expression's // code will be generated before the left expression ltemp = left; left = right; right = ltemp; break; default: // We are not doing a ternary or assignment, so both trees should // be rvalues. Convert both trees into rvalue if they are lvalue trees left->rvalue = 1; right->rvalue = 1; // Ensure the two types are compatible by trying // to modify each tree to match the other's type. ltemp = modify_type(left, right->type, right->ctype, ASTop); rtemp = modify_type(right, left->type, left->ctype, ASTop); if (ltemp == NULL && rtemp == NULL) fatal("Incompatible types in binary expression"); if (ltemp != NULL) left = ltemp; if (rtemp != NULL) right = rtemp; } // Join that sub-tree with ours. Convert the token // into an AST operation at the same time. left = mkastnode(binastop(tokentype), left->type, left->ctype, left, NULL, right, NULL, 0); // Some operators produce an int result regardless of their operands switch (binastop(tokentype)) { case A_LOGOR: case A_LOGAND: case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: left->type = P_INT; } // Update the details of the current token. // If we hit a terminating token, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA || tokentype == T_COLON || tokentype == T_RBRACE) { left->rvalue = 1; return (left); } } // Return the tree we have when the precedence // is the same or lower left->rvalue = 1; return (left); } ================================================ FILE: 53_Mop_up_pt2/gen.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Generic code generator // Copyright (c) 2019 Warren Toomey, GPL3 // Generate and return a new label number static int labelid = 1; int genlabel(void) { return (labelid++); } // Generate the code for an IF statement // and an optional ELSE clause. static int genIF(struct ASTnode *n, int looptoplabel, int loopendlabel) { int Lfalse, Lend; // Generate two labels: one for the // false compound statement, and one // for the end of the overall IF statement. // When there is no ELSE clause, Lfalse _is_ // the ending label! Lfalse = genlabel(); if (n->right) Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. genAST(n->left, Lfalse, NOLABEL, NOLABEL, n->op); genfreeregs(NOREG); // Generate the true compound statement genAST(n->mid, NOLABEL, looptoplabel, loopendlabel, n->op); genfreeregs(NOREG); // If there is an optional ELSE clause, // generate the jump to skip to the end if (n->right) cgjump(Lend); // Now the false label cglabel(Lfalse); // Optional ELSE clause: generate the // false compound statement and the // end label if (n->right) { genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); genfreeregs(NOREG); cglabel(Lend); } return (NOREG); } // Generate the code for a WHILE statement static int genWHILE(struct ASTnode *n) { int Lstart, Lend; // Generate the start and end labels // and output the start label Lstart = genlabel(); Lend = genlabel(); cglabel(Lstart); // Generate the condition code followed // by a jump to the end label. genAST(n->left, Lend, Lstart, Lend, n->op); genfreeregs(NOREG); // Generate the compound statement for the body genAST(n->right, NOLABEL, Lstart, Lend, n->op); genfreeregs(NOREG); // Finally output the jump back to the condition, // and the end label cgjump(Lstart); cglabel(Lend); return (NOREG); } // Generate the code for a SWITCH statement static int genSWITCH(struct ASTnode *n) { int *caseval, *caselabel; int Ljumptop, Lend; int i, reg, defaultlabel = 0, casecount = 0; struct ASTnode *c; // Create arrays for the case values and associated labels. // Ensure that we have at least one position in each array. caseval = (int *) malloc((n->a_intvalue + 1) * sizeof(int)); caselabel = (int *) malloc((n->a_intvalue + 1) * sizeof(int)); // Generate labels for the top of the jump table, and the // end of the switch statement. Set a default label for // the end of the switch, in case we don't have a default. Ljumptop = genlabel(); Lend = genlabel(); defaultlabel = Lend; // Output the code to calculate the switch condition reg = genAST(n->left, NOLABEL, NOLABEL, NOLABEL, 0); cgjump(Ljumptop); genfreeregs(reg); // Walk the right-child linked list to // generate the code for each case for (i = 0, c = n->right; c != NULL; i++, c = c->right) { // Get a label for this case. Store it // and the case value in the arrays. // Record if it is the default case. caselabel[i] = genlabel(); caseval[i] = c->a_intvalue; cglabel(caselabel[i]); if (c->op == A_DEFAULT) defaultlabel = caselabel[i]; else casecount++; // Generate the case code. Pass in the end label for the breaks. // If case has no body, we will fall into the following body. if (c->left) genAST(c->left, NOLABEL, NOLABEL, Lend, 0); genfreeregs(NOREG); } // Ensure the last case jumps past the switch table cgjump(Lend); // Now output the switch table and the end label. cgswitch(reg, casecount, Ljumptop, caselabel, caseval, defaultlabel); cglabel(Lend); return (NOREG); } // Generate the code to copy the arguments of a // function call to its parameters, then call the // function itself. Return the register that holds // the function's return value. static int gen_funccall(struct ASTnode *n) { struct ASTnode *gluetree = n->left; int reg; int numargs = 0; // If there is a list of arguments, walk this list // from the last argument (right-hand child) to the // first while (gluetree) { // Calculate the expression's value reg = genAST(gluetree->right, NOLABEL, NOLABEL, NOLABEL, gluetree->op); // Copy this into the n'th function parameter: size is 1, 2, 3, ... cgcopyarg(reg, gluetree->a_size); // Keep the first (highest) number of arguments if (numargs == 0) numargs = gluetree->a_size; genfreeregs(NOREG); gluetree = gluetree->left; } // Call the function, clean up the stack (based on numargs), // and return its result return (cgcall(n->sym, numargs)); } // Generate code for a ternary expression static int gen_ternary(struct ASTnode *n) { int Lfalse, Lend; int reg, expreg; // Generate two labels: one for the // false expression, and one for the // end of the overall expression Lfalse = genlabel(); Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. genAST(n->left, Lfalse, NOLABEL, NOLABEL, n->op); genfreeregs(NOREG); // Get a register to hold the result of the two expressions reg = alloc_register(); // Generate the true expression and the false label. // Move the expression result into the known register. expreg = genAST(n->mid, NOLABEL, NOLABEL, NOLABEL, n->op); cgmove(expreg, reg); // Don't free the register holding the result, though! genfreeregs(reg); cgjump(Lend); cglabel(Lfalse); // Generate the false expression and the end label. // Move the expression result into the known register. expreg = genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); cgmove(expreg, reg); // Don't free the register holding the result, though! genfreeregs(reg); cglabel(Lend); return (reg); } // Given an AST, an optional label, and the AST op // of the parent, generate assembly code recursively. // Return the register id with the tree's final value. int genAST(struct ASTnode *n, int iflabel, int looptoplabel, int loopendlabel, int parentASTop) { int leftreg= NOREG, rightreg= NOREG; // Empty tree, do nothing if (n==NULL) return(NOREG); // We have some specific AST node handling at the top // so that we don't evaluate the child sub-trees immediately switch (n->op) { case A_IF: return (genIF(n, looptoplabel, loopendlabel)); case A_WHILE: return (genWHILE(n)); case A_SWITCH: return (genSWITCH(n)); case A_FUNCCALL: return (gen_funccall(n)); case A_TERNARY: return (gen_ternary(n)); case A_GLUE: // Do each child statement, and free the // registers after each child if (n->left != NULL) genAST(n->left, iflabel, looptoplabel, loopendlabel, n->op); genfreeregs(NOREG); if (n->right != NULL) genAST(n->right, iflabel, looptoplabel, loopendlabel, n->op); genfreeregs(NOREG); return (NOREG); case A_FUNCTION: // Generate the function's preamble before the code // in the child sub-tree cgfuncpreamble(n->sym); genAST(n->left, NOLABEL, NOLABEL, NOLABEL, n->op); cgfuncpostamble(n->sym); return (NOREG); } // General AST node handling below // Get the left and right sub-tree values if (n->left) leftreg = genAST(n->left, NOLABEL, NOLABEL, NOLABEL, n->op); if (n->right) rightreg = genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); switch (n->op) { case A_ADD: return (cgadd(leftreg, rightreg)); case A_SUBTRACT: return (cgsub(leftreg, rightreg)); case A_MULTIPLY: return (cgmul(leftreg, rightreg)); case A_DIVIDE: return (cgdiv(leftreg, rightreg)); case A_AND: return (cgand(leftreg, rightreg)); case A_OR: return (cgor(leftreg, rightreg)); case A_XOR: return (cgxor(leftreg, rightreg)); case A_LSHIFT: return (cgshl(leftreg, rightreg)); case A_RSHIFT: return (cgshr(leftreg, rightreg)); case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: // If the parent AST node is an A_IF, A_WHILE or A_TERNARY, // generate a compare followed by a jump. Otherwise, compare // registers and set one to 1 or 0 based on the comparison. if (parentASTop == A_IF || parentASTop == A_WHILE || parentASTop == A_TERNARY) return (cgcompare_and_jump(n->op, leftreg, rightreg, iflabel)); else return (cgcompare_and_set(n->op, leftreg, rightreg)); case A_INTLIT: return (cgloadint(n->a_intvalue, n->type)); case A_STRLIT: return (cgloadglobstr(n->a_intvalue)); case A_IDENT: // Load our value if we are an rvalue // or we are being dereferenced if (n->rvalue || parentASTop == A_DEREF) { if (n->sym->class == C_GLOBAL || n->sym->class == C_STATIC) { return (cgloadglob(n->sym, n->op)); } else { return (cgloadlocal(n->sym, n->op)); } } else return (NOREG); case A_ASPLUS: case A_ASMINUS: case A_ASSTAR: case A_ASSLASH: case A_ASSIGN: // For the '+=' and friends operators, generate suitable code // and get the register with the result. Then take the left child, // make it the right child so that we can fall into the assignment code. switch (n->op) { case A_ASPLUS: leftreg = cgadd(leftreg, rightreg); n->right = n->left; break; case A_ASMINUS: leftreg = cgsub(leftreg, rightreg); n->right = n->left; break; case A_ASSTAR: leftreg = cgmul(leftreg, rightreg); n->right = n->left; break; case A_ASSLASH: leftreg = cgdiv(leftreg, rightreg); n->right = n->left; break; } // Now into the assignment code // Are we assigning to an identifier or through a pointer? switch (n->right->op) { case A_IDENT: if (n->right->sym->class == C_GLOBAL || n->right->sym->class == C_STATIC) return (cgstorglob(leftreg, n->right->sym)); else return (cgstorlocal(leftreg, n->right->sym)); case A_DEREF: return (cgstorderef(leftreg, rightreg, n->right->type)); default: fatald("Can't A_ASSIGN in genAST(), op", n->op); } case A_WIDEN: // Widen the child's type to the parent's type return (cgwiden(leftreg, n->left->type, n->type)); case A_RETURN: cgreturn(leftreg, Functionid); return (NOREG); case A_ADDR: return (cgaddress(n->sym)); case A_DEREF: // If we are an rvalue, dereference to get the value we point at, // otherwise leave it for A_ASSIGN to store through the pointer if (n->rvalue) return (cgderef(leftreg, n->left->type)); else return (leftreg); case A_SCALE: // Small optimisation: use shift if the // scale value is a known power of two switch (n->a_size) { case 2: return (cgshlconst(leftreg, 1)); case 4: return (cgshlconst(leftreg, 2)); case 8: return (cgshlconst(leftreg, 3)); default: // Load a register with the size and // multiply the leftreg by this size rightreg = cgloadint(n->a_size, P_INT); return (cgmul(leftreg, rightreg)); } case A_POSTINC: case A_POSTDEC: // Load and decrement the variable's value into a register // and post increment/decrement it if (n->sym->class == C_GLOBAL || n->sym->class == C_STATIC) return (cgloadglob(n->sym, n->op)); else return (cgloadlocal(n->sym, n->op)); case A_PREINC: case A_PREDEC: // Load and decrement the variable's value into a register // and pre increment/decrement it if (n->left->sym->class == C_GLOBAL || n->left->sym->class == C_STATIC) return (cgloadglob(n->left->sym, n->op)); else return (cgloadlocal(n->left->sym, n->op)); case A_NEGATE: return (cgnegate(leftreg)); case A_INVERT: return (cginvert(leftreg)); case A_LOGNOT: return (cglognot(leftreg)); case A_LOGOR: return (cglogor(leftreg, rightreg)); case A_LOGAND: return (cglogand(leftreg, rightreg)); case A_TOBOOL: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, set the register // to 0 or 1 based on it's zeroeness or non-zeroeness return (cgboolean(leftreg, parentASTop, iflabel)); case A_BREAK: cgjump(loopendlabel); return (NOREG); case A_CONTINUE: cgjump(looptoplabel); return (NOREG); case A_CAST: return (leftreg); // Not much to do default: fatald("Unknown AST operator", n->op); } return (NOREG); // Keep -Wall happy } void genpreamble() { cgpreamble(); } void genpostamble() { cgpostamble(); } void genfreeregs(int keepreg) { freeall_registers(keepreg); } void genglobsym(struct symtable *node) { cgglobsym(node); } // Generate a global string. // If append is true, append to // previous genglobstr() call. int genglobstr(char *strvalue, int append) { int l = genlabel(); cgglobstr(l, strvalue, append); return (l); } void genglobstrend(void) { cgglobstrend(); } int genprimsize(int type) { return (cgprimsize(type)); } int genalign(int type, int offset, int direction) { return (cgalign(type, offset, direction)); } ================================================ FILE: 53_Mop_up_pt2/include/ctype.h ================================================ #ifndef _CTYPE_H_ # define _CTYPE_H_ int isalnum(int c); int isalpha(int c); int iscntrl(int c); int isdigit(int c); int isgraph(int c); int islower(int c); int isprint(int c); int ispunct(int c); int isspace(int c); int isupper(int c); int isxdigit(int c); int isascii(int c); int isblank(int c); int toupper(int c); int tolower(int c); #endif // _CTYPE_H_ ================================================ FILE: 53_Mop_up_pt2/include/errno.h ================================================ #ifndef _ERRNO_H_ # define _ERRNO_H_ #endif // _ERRNO_H_ ================================================ FILE: 53_Mop_up_pt2/include/fcntl.h ================================================ #ifndef _FCNTL_H_ # define _FCNTL_H_ #define O_RDONLY 00 #define O_WRONLY 01 #define O_RDWR 02 int open(char *pathname, int flags); #endif // _FCNTL_H_ ================================================ FILE: 53_Mop_up_pt2/include/stddef.h ================================================ #ifndef _STDDEF_H_ # define _STDDEF_H_ #ifndef NULL # define NULL (void *)0 #endif typedef long size_t; #endif //_STDDEF_H_ ================================================ FILE: 53_Mop_up_pt2/include/stdio.h ================================================ #ifndef _STDIO_H_ # define _STDIO_H_ #include #ifndef NULL # define NULL (void *)0 #endif #ifndef EOF # define EOF (-1) #endif // This FILE definition will do for now typedef char * FILE; FILE *fopen(char *pathname, char *mode); size_t fread(void *ptr, size_t size, size_t nmemb, FILE *stream); size_t fwrite(void *ptr, size_t size, size_t nmemb, FILE *stream); int fclose(FILE *stream); int printf(char *format); int fprintf(FILE *stream, char *format); int fgetc(FILE *stream); int fputc(int c, FILE *stream); int fputs(char *s, FILE *stream); int putc(int c, FILE *stream); int putchar(int c); int puts(char *s); extern FILE *stdin; extern FILE *stdout; extern FILE *stderr; #endif // _STDIO_H_ ================================================ FILE: 53_Mop_up_pt2/include/stdlib.h ================================================ #ifndef _STDLIB_H_ # define _STDLIB_H_ void exit(int status); void _Exit(int status); void *malloc(int size); void free(void *ptr); void *calloc(int nmemb, int size); void *realloc(void *ptr, int size); #endif // _STDLIB_H_ ================================================ FILE: 53_Mop_up_pt2/include/string.h ================================================ #ifndef _STRING_H_ # define _STRING_H_ char *strdup(char *s); char *strchr(char *s, int c); char *strrchr(char *s, int c); int strcmp(char *s1, char *s2); int strncmp(char *s1, char *s2, size_t n); #endif // _STRING_H_ ================================================ FILE: 53_Mop_up_pt2/include/unistd.h ================================================ #ifndef _UNISTD_H_ # define _UNISTD_H_ void _exit(int status); int unlink(char *pathname); #endif // _UNISTD_H_ ================================================ FILE: 53_Mop_up_pt2/main.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "decl.h" #include #include // Compiler setup and top-level execution // Copyright (c) 2019 Warren Toomey, GPL3 // Given a string with a '.' and at least a 1-character suffix // after the '.', change the suffix to be the given character. // Return the new string or NULL if the original string could // not be modified char *alter_suffix(char *str, char suffix) { char *posn; char *newstr; // Clone the string if ((newstr = strdup(str)) == NULL) return (NULL); // Find the '.' if ((posn = strrchr(newstr, '.')) == NULL) return (NULL); // Ensure there is a suffix posn++; if (*posn == '\0') return (NULL); // Change the suffix and NUL-terminate the string *posn = suffix; posn++; *posn = '\0'; return (newstr); } // Given an input filename, compile that file // down to assembly code. Return the new file's name static char *do_compile(char *filename) { char cmd[TEXTLEN]; // Change the input file's suffix to .s Outfilename = alter_suffix(filename, 's'); if (Outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .c on the end\n", filename); exit(1); } // Generate the pre-processor command snprintf(cmd, TEXTLEN, "%s %s %s", CPPCMD, INCDIR, filename); // Open up the pre-processor pipe if ((Infile = popen(cmd, "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", filename, strerror(errno)); exit(1); } Infilename = filename; // Create the output file if ((Outfile = fopen(Outfilename, "w")) == NULL) { fprintf(stderr, "Unable to create %s: %s\n", Outfilename, strerror(errno)); exit(1); } Line = 1; // Reset the scanner Linestart = 1; Putback = '\n'; clear_symtable(); // Clear the symbol table if (O_verbose) printf("compiling %s\n", filename); scan(&Token); // Get the first token from the input Peektoken.token = 0; // and set there is no lookahead token genpreamble(); // Output the preamble global_declarations(); // Parse the global declarations genpostamble(); // Output the postamble fclose(Outfile); // Close the output file // Dump the symbol table if requested if (O_dumpsym) { printf("Symbols for %s\n", filename); dumpsymtables(); fprintf(stdout, "\n\n"); } freestaticsyms(); // Free any static symbols in the file return (Outfilename); } // Given an input filename, assemble that file // down to object code. Return the object filename char *do_assemble(char *filename) { char cmd[TEXTLEN]; int err; char *outfilename = alter_suffix(filename, 'o'); if (outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .s on the end\n", filename); exit(1); } // Build the assembly command and run it snprintf(cmd, TEXTLEN, "%s %s %s", ASCMD, outfilename, filename); if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Assembly of %s failed\n", filename); exit(1); } return (outfilename); } // Given a list of object files and an output filename, // link all of the object filenames together. void do_link(char *outfilename, char *objlist[]) { int cnt, size = TEXTLEN; char cmd[TEXTLEN], *cptr; int err; // Start with the linker command and the output file cptr = cmd; cnt = snprintf(cptr, size, "%s %s ", LDCMD, outfilename); cptr += cnt; size -= cnt; // Now append each object file while (*objlist != NULL) { cnt = snprintf(cptr, size, "%s ", *objlist); cptr += cnt; size -= cnt; objlist++; } if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Linking failed\n"); exit(1); } } // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "Usage: %s [-vcSTM] [-o outfile] file [file ...]\n", prog); fprintf(stderr, " -v give verbose output of the compilation stages\n"); fprintf(stderr, " -c generate object files but don't link them\n"); fprintf(stderr, " -S generate assembly files but don't link them\n"); fprintf(stderr, " -T dump the AST trees for each input file\n"); fprintf(stderr, " -M dump the symbol table for each input file\n"); fprintf(stderr, " -o outfile, produce the outfile executable file\n"); exit(1); } // Main program: check arguments and print a usage // if we don't have an argument. Open up the input // file and call scanfile() to scan the tokens in it. enum { MAXOBJ = 100 }; int main(int argc, char *argv[]) { char *outfilename = AOUT; char *asmfile, *objfile; char *objlist[MAXOBJ]; int i, objcnt = 0; // Initialise our variables O_dumpAST = 0; O_dumpsym = 0; O_keepasm = 0; O_assemble = 0; O_verbose = 0; O_dolink = 1; // Scan for command-line options for (i = 1; i < argc; i++) { // No leading '-', stop scanning for options if (*argv[i] != '-') break; // For each option in this argument for (int j = 1; (*argv[i] == '-') && argv[i][j]; j++) { switch (argv[i][j]) { case 'o': outfilename = argv[++i]; // Save & skip to next argument break; case 'T': O_dumpAST = 1; break; case 'M': O_dumpsym = 1; break; case 'c': O_assemble = 1; O_keepasm = 0; O_dolink = 0; break; case 'S': O_keepasm = 1; O_assemble = 0; O_dolink = 0; break; case 'v': O_verbose = 1; break; default: usage(argv[0]); } } } // Ensure we have at lease one input file argument if (i >= argc) usage(argv[0]); // Work on each input file in turn while (i < argc) { asmfile = do_compile(argv[i]); // Compile the source file if (O_dolink || O_assemble) { objfile = do_assemble(asmfile); // Assemble it to object forma if (objcnt == (MAXOBJ - 2)) { fprintf(stderr, "Too many object files for the compiler to handle\n"); exit(1); } objlist[objcnt++] = objfile; // Add the object file's name objlist[objcnt] = NULL; // to the list of object files } if (!O_keepasm) // Remove the assembly file if unlink(asmfile); // we don't need to keep it i++; } // Now link all the object files together if (O_dolink) { do_link(outfilename, objlist); // If we don't need to keep the object // files, then remove them if (!O_assemble) { for (i = 0; objlist[i] != NULL; i++) unlink(objlist[i]); } } return (0); } ================================================ FILE: 53_Mop_up_pt2/misc.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" #include #include // Miscellaneous functions // Copyright (c) 2019 Warren Toomey, GPL3 // Ensure that the current token is t, // and fetch the next token. Otherwise // throw an error void match(int t, char *what) { if (Token.token == t) { scan(&Token); } else { fatals("Expected", what); } } // Match a semicolon and fetch the next token void semi(void) { match(T_SEMI, ";"); } // Match a left brace and fetch the next token void lbrace(void) { match(T_LBRACE, "{"); } // Match a right brace and fetch the next token void rbrace(void) { match(T_RBRACE, "}"); } // Match a left parenthesis and fetch the next token void lparen(void) { match(T_LPAREN, "("); } // Match a right parenthesis and fetch the next token void rparen(void) { match(T_RPAREN, ")"); } // Match an identifier and fetch the next token void ident(void) { match(T_IDENT, "identifier"); } // Match a comma and fetch the next token void comma(void) { match(T_COMMA, "comma"); } // Print out fatal messages void fatal(char *s) { fprintf(stderr, "%s on line %d of %s\n", s, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatals(char *s1, char *s2) { fprintf(stderr, "%s:%s on line %d of %s\n", s1, s2, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatald(char *s, int d) { fprintf(stderr, "%s:%d on line %d of %s\n", s, d, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatalc(char *s, int c) { fprintf(stderr, "%s:%c on line %d of %s\n", s, c, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } ================================================ FILE: 53_Mop_up_pt2/opt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST Tree Optimisation Code // Copyright (c) 2019 Warren Toomey, GPL3 // Fold an AST tree with a binary operator // and two A_INTLIT children. Return either // the original tree or a new leaf node. static struct ASTnode *fold2(struct ASTnode *n) { int val, leftval, rightval; // Get the values from each child leftval = n->left->a_intvalue; rightval = n->right->a_intvalue; // Perform some of the binary operations. // For any AST op we can't do, return // the original tree. switch (n->op) { case A_ADD: val = leftval + rightval; break; case A_SUBTRACT: val = leftval - rightval; break; case A_MULTIPLY: val = leftval * rightval; break; case A_DIVIDE: // Don't try to divide by zero. if (rightval == 0) return (n); val = leftval / rightval; break; default: return (n); } // Return a leaf node with the new value return (mkastleaf(A_INTLIT, n->type, NULL, NULL, val)); } // Fold an AST tree with a unary operator // and one INTLIT children. Return either // the original tree or a new leaf node. static struct ASTnode *fold1(struct ASTnode *n) { int val; // Get the child value. Do the // operation if recognised. // Return the new leaf node. val = n->left->a_intvalue; switch (n->op) { case A_WIDEN: break; case A_INVERT: val = ~val; break; case A_LOGNOT: val = !val; break; default: return (n); } // Return a leaf node with the new value return (mkastleaf(A_INTLIT, n->type, NULL, NULL, val)); } // Attempt to do constant folding on // the AST tree with the root node n static struct ASTnode *fold(struct ASTnode *n) { if (n == NULL) return (NULL); // Fold on the left child, then // do the same on the right child n->left = fold(n->left); n->right = fold(n->right); // If both children are A_INTLITs, do a fold2() if (n->left && n->left->op == A_INTLIT) { if (n->right && n->right->op == A_INTLIT) n = fold2(n); else // If only the left is A_INTLIT, do a fold1() n = fold1(n); } // Return the possibly modified tree return (n); } // Optimise an AST tree by // constant folding in all sub-trees struct ASTnode *optimise(struct ASTnode *n) { n = fold(n); return (n); } ================================================ FILE: 53_Mop_up_pt2/scan.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { int i; for (i = 0; s[i] != '\0'; i++) if (s[i] == (char) c) return (i); return (-1); } // Get the next character from the input file. static int next(void) { int c, l; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return (c); } c = fgetc(Infile); // Read from input file while (Linestart && c == '#') { // We've hit a pre-processor statement Linestart = 0; // No longer at the start of the line scan(&Token); // Get the line number into l if (Token.token != T_INTLIT) fatals("Expecting pre-processor line number, got:", Text); l = Token.intvalue; scan(&Token); // Get the filename in Text if (Token.token != T_STRLIT) fatals("Expecting pre-processor file name, got:", Text); if (Text[0] != '<') { // If this is a real filename if (strcmp(Text, Infilename)) // and not the one we have now Infilename = strdup(Text); // save it. Then update the line num Line = l; } while ((c = fgetc(Infile)) != '\n'); // Skip to the end of the line c = fgetc(Infile); // and get the next character Linestart = 1; // Now back at the start of the line } Linestart = 0; // No longer at the start of the line if ('\n' == c) { Line++; // Increment line count Linestart = 1; // Now back at the start of the line } return (c); } // Put back an unwanted character static void putback(int c) { Putback = c; } // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } // Read in a hexadecimal constant from the input static int hexchar(void) { int c, h, n = 0, f = 0; // Loop getting characters while (isxdigit(c = next())) { // Convert from char to int value h = chrpos("0123456789abcdef", tolower(c)); // Add to running hex value n = n * 16 + h; f = 1; } // We hit a non-hex character, put it back putback(c); // Flag tells us we never saw any hex characters if (!f) fatal("missing digits after '\\x'"); if (n > 255) fatal("value out of range after '\\x'"); return (n); } // Return the next character from a character // or string literal static int scanch(void) { int i, c, c2; // Get the next input character and interpret // metacharacters that start with a backslash c = next(); if (c == '\\') { switch (c = next()) { case 'a': return ('\a'); case 'b': return ('\b'); case 'f': return ('\f'); case 'n': return ('\n'); case 'r': return ('\r'); case 't': return ('\t'); case 'v': return ('\v'); case '\\': return ('\\'); case '"': return ('"'); case '\'': return ('\''); // Deal with octal constants by reading in // characters until we hit a non-octal digit. // Build up the octal value in c2 and count // # digits in i. Permit only 3 octal digits. case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': for (i = c2 = 0; isdigit(c) && c < '8'; c = next()) { if (++i > 3) break; c2 = c2 * 8 + (c - '0'); } putback(c); // Put back the first non-octal char return (c2); case 'x': return (hexchar()); default: fatalc("unknown escape sequence", c); } } return (c); // Just an ordinary old character! } // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0, radix = 10; // Assume the radix is 10, but if it starts with 0 if (c == '0') { // and the next character is 'x', it's radix 16 if ((c = next()) == 'x') { radix = 16; c = next(); } else // Otherwise, it's radix 8 radix = 8; } // Convert each character into an int value while ((k = chrpos("0123456789abcdef", tolower(c))) >= 0) { if (k >= radix) fatalc("invalid digit in integer literal", c); val = val * radix + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return (val); } // Scan in a string literal from the input file, // and store it in buf[]. Return the length of // the string. static int scanstr(char *buf) { int i, c; // Loop while we have enough buffer space for (i = 0; i < TEXTLEN - 1; i++) { // Get the next char and append to buf // Return when we hit the ending double quote if ((c = scanch()) == '"') { buf[i] = 0; return (i); } buf[i] = (char)c; } // Ran out of buf[] space fatal("String literal too long"); return (0); } // Scan an identifier from the input file and // store it in buf[]. Return the identifier's length static int scanident(int c, char *buf, int lim) { int i = 0; // Allow digits, alpha and underscores while (isalpha(c) || isdigit(c) || '_' == c) { // Error if we hit the identifier length limit, // else append to buf[] and get next character if (lim - 1 == i) { fatal("Identifier too long"); } else if (i < lim - 1) { buf[i++] = (char)c; } c = next(); } // We hit a non-valid character, put it back. // NUL-terminate the buf[] and return the length putback(c); buf[i] = '\0'; return (i); } // Given a word from the input, return the matching // keyword token number or 0 if it's not a keyword. // Switch on the first letter so that we don't have // to waste time strcmp()ing against all the keywords. static int keyword(char *s) { switch (*s) { case 'b': if (!strcmp(s, "break")) return (T_BREAK); break; case 'c': if (!strcmp(s, "case")) return (T_CASE); if (!strcmp(s, "char")) return (T_CHAR); if (!strcmp(s, "continue")) return (T_CONTINUE); break; case 'd': if (!strcmp(s, "default")) return (T_DEFAULT); break; case 'e': if (!strcmp(s, "else")) return (T_ELSE); if (!strcmp(s, "enum")) return (T_ENUM); if (!strcmp(s, "extern")) return (T_EXTERN); break; case 'f': if (!strcmp(s, "for")) return (T_FOR); break; case 'i': if (!strcmp(s, "if")) return (T_IF); if (!strcmp(s, "int")) return (T_INT); break; case 'l': if (!strcmp(s, "long")) return (T_LONG); break; case 'r': if (!strcmp(s, "return")) return (T_RETURN); break; case 's': if (!strcmp(s, "sizeof")) return (T_SIZEOF); if (!strcmp(s, "static")) return (T_STATIC); if (!strcmp(s, "struct")) return (T_STRUCT); if (!strcmp(s, "switch")) return (T_SWITCH); break; case 't': if (!strcmp(s, "typedef")) return (T_TYPEDEF); break; case 'u': if (!strcmp(s, "union")) return (T_UNION); break; case 'v': if (!strcmp(s, "void")) return (T_VOID); break; case 'w': if (!strcmp(s, "while")) return (T_WHILE); break; } return (0); } // List of token strings, for debugging purposes char *Tstring[] = { "EOF", "=", "+=", "-=", "*=", "/=", "?", "||", "&&", "|", "^", "&", "==", "!=", ",", ">", "<=", ">=", "<<", ">>", "+", "-", "*", "/", "++", "--", "~", "!", "void", "char", "int", "long", "if", "else", "while", "for", "return", "struct", "union", "enum", "typedef", "extern", "break", "continue", "switch", "case", "default", "sizeof", "static", "intlit", "strlit", ";", "identifier", "{", "}", "(", ")", "[", "]", ",", ".", "->", ":" }; // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c, tokentype; // If we have a lookahead token, return this token if (Peektoken.token != 0) { t->token = Peektoken.token; t->tokstr = Peektoken.tokstr; t->intvalue = Peektoken.intvalue; Peektoken.token = 0; return (1); } // Skip whitespace c = skip(); // Determine the token based on // the input character switch (c) { case EOF: t->token = T_EOF; return (0); case '+': if ((c = next()) == '+') { t->token = T_INC; } else if (c == '=') { t->token = T_ASPLUS; } else { putback(c); t->token = T_PLUS; } break; case '-': if ((c = next()) == '-') { t->token = T_DEC; } else if (c == '>') { t->token = T_ARROW; } else if (c == '=') { t->token = T_ASMINUS; } else if (isdigit(c)) { // Negative int literal t->intvalue = -scanint(c); t->token = T_INTLIT; } else { putback(c); t->token = T_MINUS; } break; case '*': if ((c = next()) == '=') { t->token = T_ASSTAR; } else { putback(c); t->token = T_STAR; } break; case '/': if ((c = next()) == '=') { t->token = T_ASSLASH; } else { putback(c); t->token = T_SLASH; } break; case ';': t->token = T_SEMI; break; case '{': t->token = T_LBRACE; break; case '}': t->token = T_RBRACE; break; case '(': t->token = T_LPAREN; break; case ')': t->token = T_RPAREN; break; case '[': t->token = T_LBRACKET; break; case ']': t->token = T_RBRACKET; break; case '~': t->token = T_INVERT; break; case '^': t->token = T_XOR; break; case ',': t->token = T_COMMA; break; case '.': t->token = T_DOT; break; case ':': t->token = T_COLON; break; case '?': t->token = T_QUESTION; break; case '=': if ((c = next()) == '=') { t->token = T_EQ; } else { putback(c); t->token = T_ASSIGN; } break; case '!': if ((c = next()) == '=') { t->token = T_NE; } else { putback(c); t->token = T_LOGNOT; } break; case '<': if ((c = next()) == '=') { t->token = T_LE; } else if (c == '<') { t->token = T_LSHIFT; } else { putback(c); t->token = T_LT; } break; case '>': if ((c = next()) == '=') { t->token = T_GE; } else if (c == '>') { t->token = T_RSHIFT; } else { putback(c); t->token = T_GT; } break; case '&': if ((c = next()) == '&') { t->token = T_LOGAND; } else { putback(c); t->token = T_AMPER; } break; case '|': if ((c = next()) == '|') { t->token = T_LOGOR; } else { putback(c); t->token = T_OR; } break; case '\'': // If it's a quote, scan in the // literal character value and // the trailing quote t->intvalue = scanch(); t->token = T_INTLIT; if (next() != '\'') fatal("Expected '\\'' at end of char literal"); break; case '"': // Scan in a literal string scanstr(Text); t->token = T_STRLIT; break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } else if (isalpha(c) || '_' == c) { // Read in a keyword or identifier scanident(c, Text, TEXTLEN); // If it's a recognised keyword, return that token if ((tokentype = keyword(Text)) != 0) { t->token = tokentype; break; } // Not a recognised keyword, so it must be an identifier t->token = T_IDENT; break; } // The character isn't part of any recognised token, error fatalc("Unrecognised character", c); } // We found a token t->tokstr = Tstring[t->token]; return (1); } ================================================ FILE: 53_Mop_up_pt2/stmt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of statements // Copyright (c) 2019 Warren Toomey, GPL3 // Prototypes static struct ASTnode *single_statement(void); // compound_statement: // empty, i.e. no statement // | statement // | statement statements // ; // // statement: declaration // | expression_statement // | function_call // | if_statement // | while_statement // | for_statement // | return_statement // ; // if_statement: if_head // | if_head 'else' statement // ; // // if_head: 'if' '(' true_false_expression ')' statement ; // // Parse an IF statement including any // optional ELSE clause and return its AST static struct ASTnode *if_statement(void) { struct ASTnode *condAST, *trueAST, *falseAST = NULL; // Ensure we have 'if' '(' match(T_IF, "if"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST->ctype, condAST, NULL, 0); rparen(); // Get the AST for the statement trueAST = single_statement(); // If we have an 'else', skip it // and get the AST for the statement if (Token.token == T_ELSE) { scan(&Token); falseAST = single_statement(); } // Build and return the AST for this statement return (mkastnode(A_IF, P_NONE, NULL, condAST, trueAST, falseAST, NULL, 0)); } // while_statement: 'while' '(' true_false_expression ')' statement ; // // Parse a WHILE statement and return its AST static struct ASTnode *while_statement(void) { struct ASTnode *condAST, *bodyAST; // Ensure we have 'while' '(' match(T_WHILE, "while"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST->ctype, condAST, NULL, 0); rparen(); // Get the AST for the statement. // Update the loop depth in the process Looplevel++; bodyAST = single_statement(); Looplevel--; // Build and return the AST for this statement return (mkastnode(A_WHILE, P_NONE, NULL, condAST, NULL, bodyAST, NULL, 0)); } // for_statement: 'for' '(' expression_list ';' // true_false_expression ';' // expression_list ')' statement ; // // Parse a FOR statement and return its AST static struct ASTnode *for_statement(void) { struct ASTnode *condAST, *bodyAST; struct ASTnode *preopAST, *postopAST; struct ASTnode *tree; // Ensure we have 'for' '(' match(T_FOR, "for"); lparen(); // Get the pre_op expression and the ';' preopAST = expression_list(T_SEMI); semi(); // Get the condition and the ';'. // Force a non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST->ctype, condAST, NULL, 0); semi(); // Get the post_op expression and the ')' postopAST = expression_list(T_RPAREN); rparen(); // Get the statement which is the body // Update the loop depth in the process Looplevel++; bodyAST = single_statement(); Looplevel--; // Glue the statement and the postop tree tree = mkastnode(A_GLUE, P_NONE, NULL, bodyAST, NULL, postopAST, NULL, 0); // Make a WHILE loop with the condition and this new body tree = mkastnode(A_WHILE, P_NONE, NULL, condAST, NULL, tree, NULL, 0); // And glue the preop tree to the A_WHILE tree return (mkastnode(A_GLUE, P_NONE, NULL, preopAST, NULL, tree, NULL, 0)); } // return_statement: 'return' '(' expression ')' ; // // Parse a return statement and return its AST static struct ASTnode *return_statement(void) { struct ASTnode *tree= NULL; // Ensure we have 'return' match(T_RETURN, "return"); // See if we have a return value if (Token.token == T_LPAREN) { // Can't return a value if function returns P_VOID if (Functionid->type == P_VOID) fatal("Can't return from a void function"); // Skip the left parenthesis lparen(); // Parse the following expression tree = binexpr(0); // Ensure this is compatible with the function's type tree = modify_type(tree, Functionid->type, Functionid->ctype, 0); if (tree == NULL) fatal("Incompatible type to return"); // Get the ')' rparen(); } else { if (Functionid->type != P_VOID) fatal("Must return a value from a non-void function"); } // Add on the A_RETURN node tree = mkastunary(A_RETURN, P_NONE, NULL, tree, NULL, 0); // Get the ';' semi(); return (tree); } // break_statement: 'break' ; // // Parse a break statement and return its AST static struct ASTnode *break_statement(void) { if (Looplevel == 0 && Switchlevel == 0) fatal("no loop or switch to break out from"); scan(&Token); semi(); return (mkastleaf(A_BREAK, P_NONE, NULL, NULL, 0)); } // continue_statement: 'continue' ; // // Parse a continue statement and return its AST static struct ASTnode *continue_statement(void) { if (Looplevel == 0) fatal("no loop to continue to"); scan(&Token); semi(); return (mkastleaf(A_CONTINUE, P_NONE, NULL, NULL, 0)); } // Parse a switch statement and return its AST static struct ASTnode *switch_statement(void) { struct ASTnode *left, *body, *n, *c; struct ASTnode *casetree = NULL, *casetail; int inloop = 1, casecount = 0; int seendefault = 0; int ASTop, casevalue; // Skip the 'switch' and '(' scan(&Token); lparen(); // Get the switch expression, the ')' and the '{' left = binexpr(0); rparen(); lbrace(); // Ensure that this is of int type if (!inttype(left->type)) fatal("Switch expression is not of integer type"); // Build an A_SWITCH subtree with the expression as // the child n = mkastunary(A_SWITCH, P_NONE, NULL, left, NULL, 0); // Now parse the cases Switchlevel++; while (inloop) { switch (Token.token) { // Leave the loop when we hit a '}' case T_RBRACE: if (casecount == 0) fatal("No cases in switch"); inloop = 0; break; case T_CASE: case T_DEFAULT: // Ensure this isn't after a previous 'default' if (seendefault) fatal("case or default after existing default"); // Set the AST operation. Scan the case value if required if (Token.token == T_DEFAULT) { ASTop = A_DEFAULT; seendefault = 1; scan(&Token); } else { ASTop = A_CASE; scan(&Token); left = binexpr(0); // Ensure the case value is an integer literal if (left->op != A_INTLIT) fatal("Expecting integer literal for case value"); casevalue = left->a_intvalue; // Walk the list of existing case values to ensure // that there isn't a duplicate case value for (c = casetree; c != NULL; c = c->right) if (casevalue == c->a_intvalue) fatal("Duplicate case value"); } // Scan the ':' and increment the casecount match(T_COLON, ":"); casecount++; // If the next token is a T_CASE, the existing case will fall // into the next case. Otherwise, parse the case body. if (Token.token == T_CASE) body = NULL; else body = compound_statement(1); // Build a sub-tree with any compound statement as the left child // and link it in to the growing A_CASE tree if (casetree == NULL) { casetree = casetail = mkastunary(ASTop, P_NONE, NULL, body, NULL, casevalue); } else { casetail->right = mkastunary(ASTop, P_NONE, NULL, body, NULL, casevalue); casetail = casetail->right; } break; default: fatals("Unexpected token in switch", Token.tokstr); } } Switchlevel--; // We have a sub-tree with the cases and any default. Put the // case count into the A_SWITCH node and attach the case tree. n->a_intvalue = casecount; n->right = casetree; rbrace(); return (n); } // Parse a single statement and return its AST. static struct ASTnode *single_statement(void) { struct ASTnode *stmt; struct symtable *ctype; switch (Token.token) { case T_SEMI: // An empty statement semi(); break; case T_LBRACE: // We have a '{', so this is a compound statement lbrace(); stmt = compound_statement(0); rbrace(); return (stmt); case T_IDENT: // We have to see if the identifier matches a typedef. // If not, treat it as an expression. // Otherwise, fall down to the parse_type() call. if (findtypedef(Text) == NULL) { stmt = binexpr(0); semi(); return (stmt); } case T_CHAR: case T_INT: case T_LONG: case T_STRUCT: case T_UNION: case T_ENUM: case T_TYPEDEF: // The beginning of a variable declaration list. declaration_list(&ctype, C_LOCAL, T_SEMI, T_EOF, &stmt); semi(); return (stmt); // Any assignments from the declarations case T_IF: return (if_statement()); case T_WHILE: return (while_statement()); case T_FOR: return (for_statement()); case T_RETURN: return (return_statement()); case T_BREAK: return (break_statement()); case T_CONTINUE: return (continue_statement()); case T_SWITCH: return (switch_statement()); default: // For now, see if this is an expression. // This catches assignment statements. stmt = binexpr(0); semi(); return (stmt); } return (NULL); // Keep -Wall happy } // Parse a compound statement // and return its AST. If inswitch is true, // we look for a '}', 'case' or 'default' token // to end the parsing. Otherwise, look for // just a '}' to end the parsing. struct ASTnode *compound_statement(int inswitch) { struct ASTnode *left = NULL; struct ASTnode *tree; while (1) { // Leave if we've hit the end token. We do this first to allow // an empty compound statement if (Token.token == T_RBRACE) return (left); if (inswitch && (Token.token == T_CASE || Token.token == T_DEFAULT)) return (left); // Parse a single statement tree = single_statement(); // For each new tree, either save it in left // if left is empty, or glue the left and the // new tree together if (tree != NULL) { if (left == NULL) left = tree; else left = mkastnode(A_GLUE, P_NONE, NULL, left, NULL, tree, NULL, 0); } } return (NULL); // Keep -Wall happy } ================================================ FILE: 53_Mop_up_pt2/sym.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Symbol table functions // Copyright (c) 2019 Warren Toomey, GPL3 // Append a node to the singly-linked list pointed to by head or tail void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node) { // Check for valid pointers if (head == NULL || tail == NULL || node == NULL) fatal("Either head, tail or node is NULL in appendsym"); // Append to the list if (*tail) { (*tail)->next = node; *tail = node; } else *head = *tail = node; node->next = NULL; } // Create a symbol node to be added to a symbol table list. // Set up the node's: // + type: char, int etc. // + ctype: composite type pointer for struct/union // + structural type: var, function, array etc. // + size: number of elements, or endlabel: end label for a function // + posn: Position information for local symbols // Return a pointer to the new node struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn) { // Get a new node struct symtable *node = (struct symtable *) malloc(sizeof(struct symtable)); if (node == NULL) fatal("Unable to malloc a symbol table node in newsym"); // Fill in the values if (name == NULL) node->name = NULL; else node->name = strdup(name); node->type = type; node->ctype = ctype; node->stype = stype; node->class = class; node->nelems = nelems; // For pointers and integer types, set the size // of the symbol. structs and union declarations // manually set this up themselves. if (ptrtype(type) || inttype(type)) node->size = nelems * typesize(type, ctype); node->st_posn = posn; node->next = NULL; node->member = NULL; node->initlist = NULL; return (node); } // Add a symbol to the global symbol list struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn) { struct symtable *sym = newsym(name, type, ctype, stype, class, nelems, posn); // For structs and unions, copy the size from the type node if (type == P_STRUCT || type == P_UNION) sym->size = ctype->size; appendsym(&Globhead, &Globtail, sym); return (sym); } // Add a symbol to the local symbol list struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int nelems) { struct symtable *sym = newsym(name, type, ctype, stype, C_LOCAL, nelems, 0); // For structs and unions, copy the size from the type node if (type == P_STRUCT || type == P_UNION) sym->size = ctype->size; appendsym(&Loclhead, &Locltail, sym); return (sym); } // Add a symbol to the parameter list struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype) { struct symtable *sym = newsym(name, type, ctype, stype, C_PARAM, 1, 0); appendsym(&Parmhead, &Parmtail, sym); return (sym); } // Add a symbol to the temporary member list struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int nelems) { struct symtable *sym = newsym(name, type, ctype, stype, C_MEMBER, nelems, 0); // For structs and unions, copy the size from the type node if (type == P_STRUCT || type == P_UNION) sym->size = ctype->size; appendsym(&Membhead, &Membtail, sym); return (sym); } // Add a struct to the struct list struct symtable *addstruct(char *name) { struct symtable *sym = newsym(name, P_STRUCT, NULL, 0, C_STRUCT, 0, 0); appendsym(&Structhead, &Structtail, sym); return (sym); } // Add a struct to the union list struct symtable *addunion(char *name) { struct symtable *sym = newsym(name, P_UNION, NULL, 0, C_UNION, 0, 0); appendsym(&Unionhead, &Uniontail, sym); return (sym); } // Add an enum type or value to the enum list. // Class is C_ENUMTYPE or C_ENUMVAL. // Use posn to store the int value. struct symtable *addenum(char *name, int class, int value) { struct symtable *sym = newsym(name, P_INT, NULL, 0, class, 0, value); appendsym(&Enumhead, &Enumtail, sym); return (sym); } // Add a typedef to the typedef list struct symtable *addtypedef(char *name, int type, struct symtable *ctype) { struct symtable *sym = newsym(name, type, ctype, 0, C_TYPEDEF, 0, 0); appendsym(&Typehead, &Typetail, sym); return (sym); } // Search for a symbol in a specific list. // Return a pointer to the found node or NULL if not found. // If class is not zero, also match on the given class static struct symtable *findsyminlist(char *s, struct symtable *list, int class) { for (; list != NULL; list = list->next) if ((list->name != NULL) && !strcmp(s, list->name)) if (class == 0 || class == list->class) return (list); return (NULL); } // Determine if the symbol s is in the global symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findglob(char *s) { return (findsyminlist(s, Globhead, 0)); } // Determine if the symbol s is in the local symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findlocl(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member, 0); if (node) return (node); } return (findsyminlist(s, Loclhead, 0)); } // Determine if the symbol s is in the symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findsymbol(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member, 0); if (node) return (node); } // Otherwise, try the local and global symbol lists node = findsyminlist(s, Loclhead, 0); if (node) return (node); return (findsyminlist(s, Globhead, 0)); } // Find a member in the member list // Return a pointer to the found node or NULL if not found. struct symtable *findmember(char *s) { return (findsyminlist(s, Membhead, 0)); } // Find a struct in the struct list // Return a pointer to the found node or NULL if not found. struct symtable *findstruct(char *s) { return (findsyminlist(s, Structhead, 0)); } // Find a struct in the union list // Return a pointer to the found node or NULL if not found. struct symtable *findunion(char *s) { return (findsyminlist(s, Unionhead, 0)); } // Find an enum type in the enum list // Return a pointer to the found node or NULL if not found. struct symtable *findenumtype(char *s) { return (findsyminlist(s, Enumhead, C_ENUMTYPE)); } // Find an enum value in the enum list // Return a pointer to the found node or NULL if not found. struct symtable *findenumval(char *s) { return (findsyminlist(s, Enumhead, C_ENUMVAL)); } // Find a type in the tyedef list // Return a pointer to the found node or NULL if not found. struct symtable *findtypedef(char *s) { return (findsyminlist(s, Typehead, 0)); } // Reset the contents of the symbol table void clear_symtable(void) { Globhead = Globtail = NULL; Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Membhead = Membtail = NULL; Structhead = Structtail = NULL; Unionhead = Uniontail = NULL; Enumhead = Enumtail = NULL; Typehead = Typetail = NULL; } // Clear all the entries in the local symbol table void freeloclsyms(void) { Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Functionid = NULL; } // Remove all static symbols from the global symbol table void freestaticsyms(void) { // g points at current node, prev at the previous one struct symtable *g, *prev = NULL; // Walk the global table looking for static entries for (g = Globhead; g != NULL; g = g->next) { if (g->class == C_STATIC) { // If there's a previous node, rearrange the prev pointer // to skip over the current node. If not, g is the head, // so do the same to Globhead if (prev != NULL) prev->next = g->next; else Globhead->next = g->next; // If g is the tail, point Globtail at the previous node // (if there is one), or Globhead if (g == Globtail) { if (prev != NULL) Globtail = prev; else Globtail = Globhead; } } } // Point prev at g before we move up to the next node prev = g; } // Dump a single symbol static void dumpsym(struct symtable *sym, int indent) { int i; for (i = 0; i < indent; i++) printf(" "); switch (sym->type & (~0xf)) { case P_VOID: printf("void "); break; case P_CHAR: printf("char "); break; case P_INT: printf("int "); break; case P_LONG: printf("long "); break; case P_STRUCT: if (sym->ctype != NULL) printf("struct %s ", sym->ctype->name); else printf("struct %s ", sym->name); break; case P_UNION: if (sym->ctype != NULL) printf("union %s ", sym->ctype->name); else printf("union %s ", sym->name); break; default: printf("unknown type "); } for (i = 0; i < (sym->type & 0xf); i++) printf("*"); printf("%s", sym->name); switch (sym->stype) { case S_VARIABLE: break; case S_FUNCTION: printf("()"); break; case S_ARRAY: printf("[]"); break; default: printf(" unknown stype"); } switch (sym->class) { case C_GLOBAL: printf(": global"); break; case C_LOCAL: printf(": local"); break; case C_PARAM: printf(": param"); break; case C_EXTERN: printf(": extern"); break; case C_STATIC: printf(": static"); break; case C_STRUCT: printf(": struct"); break; case C_UNION: printf(": union"); break; case C_MEMBER: printf(": member"); break; case C_ENUMTYPE: printf(": enumtype"); break; case C_ENUMVAL: printf(": enumval"); break; case C_TYPEDEF: printf(": typedef"); break; default: printf(": unknown class"); } switch (sym->stype) { case S_VARIABLE: if (sym->class == C_ENUMVAL) printf(", value %d\n", sym->st_posn); else printf(", size %d\n", sym->size); break; case S_FUNCTION: printf(", %d params\n", sym->nelems); break; case S_ARRAY: printf(", %d elems, size %d\n", sym->nelems, sym->size); break; } switch (sym->type & (~0xf)) { case P_STRUCT: case P_UNION: dumptable(sym->member, NULL, 4); } switch (sym->stype) { case S_FUNCTION: dumptable(sym->member, NULL, 4); } } // Dump one symbol table void dumptable(struct symtable *head, char *name, int indent) { struct symtable *sym; if (head != NULL && name != NULL) printf("%s\n--------\n", name); for (sym = head; sym != NULL; sym = sym->next) dumpsym(sym, indent); } void dumpsymtables(void) { dumptable(Globhead, "Global", 0); printf("\n"); dumptable(Enumhead, "Enums", 0); printf("\n"); dumptable(Typehead, "Typedefs", 0); } ================================================ FILE: 53_Mop_up_pt2/tests/err.input031.c ================================================ Expecting a primary expression, got token:+ on line 5 of input031.c ================================================ FILE: 53_Mop_up_pt2/tests/err.input032.c ================================================ Unknown variable or function:pizza on line 4 of input032.c ================================================ FILE: 53_Mop_up_pt2/tests/err.input033.c ================================================ Incompatible type to return on line 4 of input033.c ================================================ FILE: 53_Mop_up_pt2/tests/err.input034.c ================================================ For now, declaration of non-global arrays is not implemented on line 4 of input034.c ================================================ FILE: 53_Mop_up_pt2/tests/err.input035.c ================================================ Duplicate local variable declaration:a on line 4 of input035.c ================================================ FILE: 53_Mop_up_pt2/tests/err.input036.c ================================================ Type doesn't match prototype for parameter:2 on line 4 of input036.c ================================================ FILE: 53_Mop_up_pt2/tests/err.input037.c ================================================ Expected:comma on line 3 of input037.c ================================================ FILE: 53_Mop_up_pt2/tests/err.input038.c ================================================ Type doesn't match prototype for parameter:2 on line 4 of input038.c ================================================ FILE: 53_Mop_up_pt2/tests/err.input039.c ================================================ No statements in function with non-void type on line 4 of input039.c ================================================ FILE: 53_Mop_up_pt2/tests/err.input040.c ================================================ No return for function with non-void type on line 4 of input040.c ================================================ FILE: 53_Mop_up_pt2/tests/err.input041.c ================================================ Can't return from a void function on line 3 of input041.c ================================================ FILE: 53_Mop_up_pt2/tests/err.input042.c ================================================ Unknown variable or function:fred on line 3 of input042.c ================================================ FILE: 53_Mop_up_pt2/tests/err.input043.c ================================================ Unknown variable or function:b on line 3 of input043.c ================================================ FILE: 53_Mop_up_pt2/tests/err.input044.c ================================================ Unknown variable or function:z on line 3 of input044.c ================================================ FILE: 53_Mop_up_pt2/tests/err.input045.c ================================================ & operator must be followed by an identifier on line 3 of input045.c ================================================ FILE: 53_Mop_up_pt2/tests/err.input046.c ================================================ * operator must be followed by an identifier or * on line 3 of input046.c ================================================ FILE: 53_Mop_up_pt2/tests/err.input047.c ================================================ ++ operator must be followed by an identifier on line 3 of input047.c ================================================ FILE: 53_Mop_up_pt2/tests/err.input048.c ================================================ -- operator must be followed by an identifier on line 3 of input048.c ================================================ FILE: 53_Mop_up_pt2/tests/err.input049.c ================================================ Incompatible expression in assignment on line 6 of input049.c ================================================ FILE: 53_Mop_up_pt2/tests/err.input050.c ================================================ Incompatible types in binary expression on line 6 of input050.c ================================================ FILE: 53_Mop_up_pt2/tests/err.input051.c ================================================ Expected '\'' at end of char literal on line 4 of input051.c ================================================ FILE: 53_Mop_up_pt2/tests/err.input052.c ================================================ Unrecognised character:$ on line 5 of input052.c ================================================ FILE: 53_Mop_up_pt2/tests/err.input056.c ================================================ unknown struct/union type:var1 on line 2 of input056.c ================================================ FILE: 53_Mop_up_pt2/tests/err.input057.c ================================================ previously defined struct/union:fred on line 2 of input057.c ================================================ FILE: 53_Mop_up_pt2/tests/err.input059.c ================================================ Unknown variable or function:y on line 3 of input059.c ================================================ FILE: 53_Mop_up_pt2/tests/err.input060.c ================================================ Expression is not a struct/union on line 3 of input060.c ================================================ FILE: 53_Mop_up_pt2/tests/err.input061.c ================================================ Expression is not a pointer to a struct/union on line 3 of input061.c ================================================ FILE: 53_Mop_up_pt2/tests/err.input064.c ================================================ undeclared enum type::fred on line 1 of input064.c ================================================ FILE: 53_Mop_up_pt2/tests/err.input065.c ================================================ enum type redeclared::fred on line 2 of input065.c ================================================ FILE: 53_Mop_up_pt2/tests/err.input066.c ================================================ enum value redeclared::z on line 2 of input066.c ================================================ FILE: 53_Mop_up_pt2/tests/err.input068.c ================================================ redefinition of typedef:FOO on line 2 of input068.c ================================================ FILE: 53_Mop_up_pt2/tests/err.input069.c ================================================ unknown type:FLOO on line 2 of input069.c ================================================ FILE: 53_Mop_up_pt2/tests/err.input072.c ================================================ no loop or switch to break out from on line 1 of input072.c ================================================ FILE: 53_Mop_up_pt2/tests/err.input073.c ================================================ no loop to continue to on line 1 of input073.c ================================================ FILE: 53_Mop_up_pt2/tests/err.input075.c ================================================ Unexpected token in switch:if on line 4 of input075.c ================================================ FILE: 53_Mop_up_pt2/tests/err.input076.c ================================================ No cases in switch on line 3 of input076.c ================================================ FILE: 53_Mop_up_pt2/tests/err.input077.c ================================================ case or default after existing default on line 6 of input077.c ================================================ FILE: 53_Mop_up_pt2/tests/err.input078.c ================================================ case or default after existing default on line 6 of input078.c ================================================ FILE: 53_Mop_up_pt2/tests/err.input079.c ================================================ Duplicate case value on line 6 of input079.c ================================================ FILE: 53_Mop_up_pt2/tests/err.input085.c ================================================ Bad type in parameter list on line 1 of input085.c ================================================ FILE: 53_Mop_up_pt2/tests/err.input086.c ================================================ Function definition not at global level on line 3 of input086.c ================================================ FILE: 53_Mop_up_pt2/tests/err.input087.c ================================================ Bad type in member list on line 4 of input087.c ================================================ FILE: 53_Mop_up_pt2/tests/err.input092.c ================================================ Type mismatch: literal vs. variable on line 1 of input092.c ================================================ FILE: 53_Mop_up_pt2/tests/err.input093.c ================================================ Unknown variable or function:fred on line 1 of input093.c ================================================ FILE: 53_Mop_up_pt2/tests/err.input094.c ================================================ Type mismatch: literal vs. variable on line 1 of input094.c ================================================ FILE: 53_Mop_up_pt2/tests/err.input095.c ================================================ Variable can not be initialised:x on line 1 of input095.c ================================================ FILE: 53_Mop_up_pt2/tests/err.input096.c ================================================ Array size is illegal:0 on line 1 of input096.c ================================================ FILE: 53_Mop_up_pt2/tests/err.input097.c ================================================ For now, declaration of non-global arrays is not implemented on line 2 of input097.c ================================================ FILE: 53_Mop_up_pt2/tests/err.input098.c ================================================ Too many values in initialisation list on line 1 of input098.c ================================================ FILE: 53_Mop_up_pt2/tests/err.input102.c ================================================ Cannot cast to a struct, union or void type on line 3 of input102.c ================================================ FILE: 53_Mop_up_pt2/tests/err.input103.c ================================================ Cannot cast to a struct, union or void type on line 3 of input103.c ================================================ FILE: 53_Mop_up_pt2/tests/err.input104.c ================================================ Cannot cast to a struct, union or void type on line 2 of input104.c ================================================ FILE: 53_Mop_up_pt2/tests/err.input105.c ================================================ Incompatible expression in assignment on line 4 of input105.c ================================================ FILE: 53_Mop_up_pt2/tests/err.input118.c ================================================ Compiler doesn't support static or extern local declarations on line 2 of input118.c ================================================ FILE: 53_Mop_up_pt2/tests/err.input124.c ================================================ Cannot ++ on rvalue on line 6 of input124.c ================================================ FILE: 53_Mop_up_pt2/tests/err.input126.c ================================================ Unknown variable or function:ptr on line 7 of input126.c ================================================ FILE: 53_Mop_up_pt2/tests/err.input129.c ================================================ Cannot ++ and/or -- more than once on line 6 of input129.c ================================================ FILE: 53_Mop_up_pt2/tests/err.input136.c ================================================ Must return a value from a non-void function on line 4 of input136.c ================================================ FILE: 53_Mop_up_pt2/tests/input001.c ================================================ int printf(char *fmt); void main() { printf("%d\n", 12 * 3); printf("%d\n", 18 - 2 * 4); printf("%d\n", 1 + 2 + 9 - 5/2 + 3*5); } ================================================ FILE: 53_Mop_up_pt2/tests/input002.c ================================================ int printf(char *fmt); void main() { int fred; int jim; fred= 5; jim= 12; printf("%d\n", fred + jim); } ================================================ FILE: 53_Mop_up_pt2/tests/input003.c ================================================ int printf(char *fmt); void main() { int x; x= 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); } ================================================ FILE: 53_Mop_up_pt2/tests/input004.c ================================================ int printf(char *fmt); void main() { int x; x= 7 < 9; printf("%d\n", x); x= 7 <= 9; printf("%d\n", x); x= 7 != 9; printf("%d\n", x); x= 7 == 7; printf("%d\n", x); x= 7 >= 7; printf("%d\n", x); x= 7 <= 7; printf("%d\n", x); x= 9 > 7; printf("%d\n", x); x= 9 >= 7; printf("%d\n", x); x= 9 != 7; printf("%d\n", x); } ================================================ FILE: 53_Mop_up_pt2/tests/input005.c ================================================ int printf(char *fmt); void main() { int i; int j; i=6; j=12; if (i < j) { printf("%d\n", i); } else { printf("%d\n", j); } } ================================================ FILE: 53_Mop_up_pt2/tests/input006.c ================================================ int printf(char *fmt); void main() { int i; i=1; while (i <= 10) { printf("%d\n", i); i= i + 1; } } ================================================ FILE: 53_Mop_up_pt2/tests/input007.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 53_Mop_up_pt2/tests/input008.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 53_Mop_up_pt2/tests/input009.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } void fred() { int a; int b; a= 12; b= 3 * a; if (a >= b) { printf("%d\n", 2 * b - a); } } ================================================ FILE: 53_Mop_up_pt2/tests/input010.c ================================================ int printf(char *fmt); void main() { int i; char j; j= 20; printf("%d\n", j); i= 10; printf("%d\n", i); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 2; j= j + 1) { printf("%d\n", j); } } ================================================ FILE: 53_Mop_up_pt2/tests/input011.c ================================================ int printf(char *fmt); int main() { int i; char j; long k; i= 10; printf("%d\n", i); j= 20; printf("%d\n", j); k= 30; printf("%d\n", k); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 4; j= j + 1) { printf("%d\n", j); } for (k= 1; k <= 5; k= k + 1) { printf("%d\n", k); } return(i); printf("%d\n", 12345); return(3); } ================================================ FILE: 53_Mop_up_pt2/tests/input012.c ================================================ int printf(char *fmt); int fred() { return(5); } void main() { int x; x= fred(2); printf("%d\n", x); } ================================================ FILE: 53_Mop_up_pt2/tests/input013.c ================================================ int printf(char *fmt); int fred() { return(56); } void main() { int dummy; int result; dummy= printf("%d\n", 23); result= fred(10); dummy= printf("%d\n", result); } ================================================ FILE: 53_Mop_up_pt2/tests/input014.c ================================================ int printf(char *fmt); int fred() { return(20); } int main() { int result; printf("%d\n", 10); result= fred(15); printf("%d\n", result); printf("%d\n", fred(15)+10); return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input015.c ================================================ int printf(char *fmt); int main() { char a; char *b; char c; int d; int *e; int f; a= 18; printf("%d\n", a); b= &a; c= *b; printf("%d\n", c); d= 12; printf("%d\n", d); e= &d; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input016.c ================================================ int printf(char *fmt); int c; int d; int *e; int f; int main() { c= 12; d=18; printf("%d\n", c); e= &c + 1; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input017.c ================================================ int printf(char *fmt); int main() { char a; char *b; int d; int *e; b= &a; *b= 19; printf("%d\n", a); e= &d; *e= 12; printf("%d\n", d); return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input018.c ================================================ int printf(char *fmt); int main() { int a; int b; a= b= 34; printf("%d\n", a); printf("%d\n", b); return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input018a.c ================================================ int printf(char *fmt); int a; int *b; char c; char *d; int main() { b= &a; *b= 15; printf("%d\n", a); d= &c; *d= 16; printf("%d\n", c); return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input019.c ================================================ int printf(char *fmt); int a; int b; int c; int d; int e; int main() { a= 2; b= 4; c= 3; d= 2; e= (a+b) * (c+d); printf("%d\n", e); return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input020.c ================================================ int printf(char *fmt); int a; int b[25]; int main() { b[3]= 12; a= b[3]; printf("%d\n", a); return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input021.c ================================================ int printf(char *fmt); char c; char *str; int main() { c= '\n'; printf("%d\n", c); for (str= "Hello world\n"; *str != 0; str= str + 1) { printf("%c", *str); } return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input022.c ================================================ int printf(char *fmt); char a; char b; char c; int d; int e; int f; long g; long h; long i; int main() { b= 5; c= 7; a= b + c++; printf("%d\n", a); e= 5; f= 7; d= e + f++; printf("%d\n", d); h= 5; i= 7; g= h + i++; printf("%d\n", g); a= b-- + c; printf("%d\n", a); d= e-- + f; printf("%d\n", d); g= h-- + i; printf("%d\n", g); a= ++b + c; printf("%d\n", a); d= ++e + f; printf("%d\n", d); g= ++h + i; printf("%d\n", g); a= b * --c; printf("%d\n", a); d= e * --f; printf("%d\n", d); g= h * --i; printf("%d\n", g); return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input023.c ================================================ int printf(char *fmt); char *str; int x; int main() { x= -23; printf("%d\n", x); printf("%d\n", -10 * -10); x= 1; x= ~x; printf("%d\n", x); x= 2 > 5; printf("%d\n", x); x= !x; printf("%d\n", x); x= !x; printf("%d\n", x); x= 13; if (x) { printf("%d\n", 13); } x= 0; if (!x) { printf("%d\n", 14); } for (str= "Hello world\n"; *str; str++) { printf("%c", *str); } return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input024.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { a= 42; b= 19; printf("%d\n", a & b); printf("%d\n", a | b); printf("%d\n", a ^ b); printf("%d\n", 1 << 3); printf("%d\n", 63 >> 3); return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input025.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { char z; int y; int x; x= 10; y= 20; z= 30; printf("%d\n", x); printf("%d\n", y); printf("%d\n", z); a= 5; b= 15; c= 25; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input026.c ================================================ int printf(char *fmt); int main(int a, char b, long c, int d, int e, int f, int g, int h) { int i; int j; int k; a= 13; printf("%d\n", a); b= 23; printf("%d\n", b); c= 34; printf("%d\n", c); d= 44; printf("%d\n", d); e= 54; printf("%d\n", e); f= 64; printf("%d\n", f); g= 74; printf("%d\n", g); h= 84; printf("%d\n", h); i= 94; printf("%d\n", i); j= 95; printf("%d\n", j); k= 96; printf("%d\n", k); return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input027.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int param5(int a, int b, int c, int d, int e) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param2(int a, int b) { int c; int d; int e; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param0() { int a; int b; int c; int d; int e; a= 1; b= 2; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int main() { param8(1,2,3,4,5,6,7,8); param5(1,2,3,4,5); param2(1,2); param0(); return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input028.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input029.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h); int fred(int a, int b, int c); int main(); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input030.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input030.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 53_Mop_up_pt2/tests/input031.c ================================================ int printf(char *fmt); int main() { int x; x= 2 + + 3 - * / ; } ================================================ FILE: 53_Mop_up_pt2/tests/input032.c ================================================ int printf(char *fmt); int main() { pizza cow llama sausage; } ================================================ FILE: 53_Mop_up_pt2/tests/input033.c ================================================ int printf(char *fmt); int main() { char *z; return(z); } ================================================ FILE: 53_Mop_up_pt2/tests/input034.c ================================================ int printf(char *fmt); int main() { int a[12]; return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input035.c ================================================ int printf(char *fmt); int fred(int a, int b) { int a; return(a); } ================================================ FILE: 53_Mop_up_pt2/tests/input036.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c); ================================================ FILE: 53_Mop_up_pt2/tests/input037.c ================================================ int printf(char *fmt); int fred(int a, char b +, int z); ================================================ FILE: 53_Mop_up_pt2/tests/input038.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c, int g); ================================================ FILE: 53_Mop_up_pt2/tests/input039.c ================================================ int printf(char *fmt); int main() { int a; } ================================================ FILE: 53_Mop_up_pt2/tests/input040.c ================================================ int printf(char *fmt); int main() { int a; a= 5; } ================================================ FILE: 53_Mop_up_pt2/tests/input041.c ================================================ int printf(char *fmt); void fred() { return(5); } ================================================ FILE: 53_Mop_up_pt2/tests/input042.c ================================================ int printf(char *fmt); int main() { fred(5); } ================================================ FILE: 53_Mop_up_pt2/tests/input043.c ================================================ int printf(char *fmt); int main() { int a; a= b[4]; } ================================================ FILE: 53_Mop_up_pt2/tests/input044.c ================================================ int printf(char *fmt); int main() { int a; a= z; } ================================================ FILE: 53_Mop_up_pt2/tests/input045.c ================================================ int printf(char *fmt); int main() { int a; a= &5; } ================================================ FILE: 53_Mop_up_pt2/tests/input046.c ================================================ int printf(char *fmt); int main() { int a; a= *5; } ================================================ FILE: 53_Mop_up_pt2/tests/input047.c ================================================ int printf(char *fmt); int main() { int a; a= ++5; } ================================================ FILE: 53_Mop_up_pt2/tests/input048.c ================================================ int printf(char *fmt); int main() { int a; a= --5; } ================================================ FILE: 53_Mop_up_pt2/tests/input049.c ================================================ int printf(char *fmt); int main() { int x; char y; y= x; } ================================================ FILE: 53_Mop_up_pt2/tests/input050.c ================================================ int printf(char *fmt); int main() { char *a; char *b; a= a + b; } ================================================ FILE: 53_Mop_up_pt2/tests/input051.c ================================================ int printf(char *fmt); int main() { char a; a= 'fred'; } ================================================ FILE: 53_Mop_up_pt2/tests/input052.c ================================================ int printf(char *fmt); int main() { int a; a= $5.00; } ================================================ FILE: 53_Mop_up_pt2/tests/input053.c ================================================ int printf(char *fmt); int main() { printf("Hello world, %d\n", 23); return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input054.c ================================================ int printf(char *fmt); int main() { int i; for (i=0; i < 20; i++) { printf("Hello world, %d\n", i); } return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input055.c ================================================ int printf(char *fmt); int main(int argc, char **argv) { int i; char *argument; printf("Hello world\n"); for (i=0; i < argc; i++) { argument= *argv; argv= argv + 1; printf("Argument %d is %s\n", i, argument); } return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input056.c ================================================ struct foo { int x; }; struct mary var1; ================================================ FILE: 53_Mop_up_pt2/tests/input057.c ================================================ struct fred { int x; } ; struct fred { char y; } ; ================================================ FILE: 53_Mop_up_pt2/tests/input058.c ================================================ int printf(char *fmt); struct fred { int x; char y; long z; }; struct fred var2; struct fred *varptr; int main() { long result; var2.x= 12; printf("%d\n", var2.x); var2.y= 'c'; printf("%d\n", var2.y); var2.z= 4005; printf("%d\n", var2.z); result= var2.x + var2.y + var2.z; printf("%d\n", result); varptr= &var2; result= varptr->x + varptr->y + varptr->z; printf("%d\n", result); return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input059.c ================================================ int main() { int x; x= y.foo; } ================================================ FILE: 53_Mop_up_pt2/tests/input060.c ================================================ int main() { int x; x= x.foo; } ================================================ FILE: 53_Mop_up_pt2/tests/input061.c ================================================ int main() { int x; x= x->foo; } ================================================ FILE: 53_Mop_up_pt2/tests/input062.c ================================================ int printf(char *fmt); union fred { char w; int x; int y; long z; }; union fred var1; union fred *varptr; int main() { var1.x= 65; printf("%d\n", var1.x); var1.x= 66; printf("%d\n", var1.x); printf("%d\n", var1.y); printf("The next two depend on the endian of the platform\n"); printf("%d\n", var1.w); printf("%d\n", var1.z); varptr= &var1; varptr->x= 67; printf("%d\n", varptr->x); printf("%d\n", varptr->y); return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input063.c ================================================ int printf(char *fmt); enum fred { apple=1, banana, carrot, pear=10, peach, mango, papaya }; enum jane { aple=1, bnana, crrot, par=10, pech, mago, paaya }; enum fred var1; enum jane var2; enum fred var3; int main() { var1= carrot + pear + mango; printf("%d\n", var1); return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input064.c ================================================ enum fred var3; ================================================ FILE: 53_Mop_up_pt2/tests/input065.c ================================================ enum fred { x, y, z }; enum fred { a, b }; ================================================ FILE: 53_Mop_up_pt2/tests/input066.c ================================================ enum fred { x, y, z }; enum mary { a, b, z }; ================================================ FILE: 53_Mop_up_pt2/tests/input067.c ================================================ int printf(char *fmt); typedef int FOO; FOO var1; struct bar { int x; int y} ; typedef struct bar BAR; BAR var2; int main() { var1= 5; printf("%d\n", var1); var2.x= 7; var2.y= 10; printf("%d\n", var2.x + var2.y); return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input068.c ================================================ typedef int FOO; typedef char FOO; ================================================ FILE: 53_Mop_up_pt2/tests/input069.c ================================================ typedef int FOO; FLOO y; ================================================ FILE: 53_Mop_up_pt2/tests/input070.c ================================================ #include typedef int FOO; int main() { FOO x; x= 56; printf("%d\n", x); return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input071.c ================================================ #include int main() { int x; x = 0; while (x < 100) { if (x == 5) { x = x + 2; continue; } printf("%d\n", x); if (x == 14) { break; } x = x + 1; } printf("Done\n"); return (0); } ================================================ FILE: 53_Mop_up_pt2/tests/input072.c ================================================ int main() { break; } ================================================ FILE: 53_Mop_up_pt2/tests/input073.c ================================================ int main() { continue; } ================================================ FILE: 53_Mop_up_pt2/tests/input074.c ================================================ #include int main() { int x; int y; y= 0; for (x=0; x < 5; x++) { switch(x) { case 1: { y= 5; break; } case 2: { y= 7; break; } case 3: { y= 9; } default: { y= 100; } } printf("%d\n", y); } return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input075.c ================================================ int main() { int x; switch(x) { if (x<5); } } ================================================ FILE: 53_Mop_up_pt2/tests/input076.c ================================================ int main() { int x; switch(x) { } } ================================================ FILE: 53_Mop_up_pt2/tests/input077.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } default: { x= 3; } case 4: { x= 2; } } } ================================================ FILE: 53_Mop_up_pt2/tests/input078.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } default: { x= 3; } default: { x= 2; } } } ================================================ FILE: 53_Mop_up_pt2/tests/input079.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } case 2: { x= 2; } case 1: { x= 2; } default: { x= 2; } } } ================================================ FILE: 53_Mop_up_pt2/tests/input080.c ================================================ #include int x; int y; int main() { for (x=0, y=1; x < 6; x++, y=y+2) { printf("%d %d\n", x, y); } return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input081.c ================================================ #include int x; int y; int main() { x= 0; y=1; for (;x<5;) { printf("%d %d\n", x, y); x=x+1; y=y+2; } return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input082.c ================================================ #include int main() { int x; x= 10; // Dangling else test if (x > 5) if (x > 15) printf("x > 15\n"); else printf("15 >= x > 5\n"); else printf("5 >= x\n"); return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input083.c ================================================ #include int main() { int x; // Dangling else test. // We should not print anything for x<= 5 for (x=0; x < 12; x++) if (x > 5) if (x > 10) printf("10 < %2d\n", x); else printf(" 5 < %2d <= 10\n", x); return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input084.c ================================================ #include int main() { int x, y; x=2; y=3; printf("%d %d\n", x, y); char a, *b; a= 'f'; b= &a; printf("%c %c\n", a, *b); return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input085.c ================================================ int main(struct foo { int x; }; , int a) { return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input086.c ================================================ int main() { int fred() { return(5); } int x; x=2; return(x); } ================================================ FILE: 53_Mop_up_pt2/tests/input087.c ================================================ struct foo { int x; int y; struct blah { int g; }; }; ================================================ FILE: 53_Mop_up_pt2/tests/input088.c ================================================ #include struct foo { int x; int y; } fred, mary; struct foo james; int main() { fred.x= 5; mary.y= 6; printf("%d %d\n", fred.x, mary.y); return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input089.c ================================================ #include int x= 23; char y= 'H'; char *z= "Hello world"; int main() { printf("%d %c %s\n", x, y, z); return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input090.c ================================================ #include int a = 23, b = 100; char y = 'H', *z = "Hello world"; int main() { printf("%d %d %c %s\n", a, b, y, z); return (0); } ================================================ FILE: 53_Mop_up_pt2/tests/input091.c ================================================ #include int fred[] = { 1, 2, 3, 4, 5 }; int jim[10] = { 1, 2, 3, 4, 5 }; int main() { int i; for (i=0; i < 5; i++) printf("%d\n", fred[i]); for (i=0; i < 10; i++) printf("%d\n", jim[i]); return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input092.c ================================================ char x= 3000; ================================================ FILE: 53_Mop_up_pt2/tests/input093.c ================================================ char x= fred; ================================================ FILE: 53_Mop_up_pt2/tests/input094.c ================================================ char *s= 54; ================================================ FILE: 53_Mop_up_pt2/tests/input095.c ================================================ int fred(int x=2) { return(x); } ================================================ FILE: 53_Mop_up_pt2/tests/input096.c ================================================ int fred[0]; ================================================ FILE: 53_Mop_up_pt2/tests/input097.c ================================================ int main() { int x[45]; return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input098.c ================================================ int fred[3]= { 1, 2, 3, 4, 5 }; ================================================ FILE: 53_Mop_up_pt2/tests/input099.c ================================================ #include // List of token strings, for debugging purposes. // As yet, we can't store a NULL into the list char *Tstring[] = { "EOF", "=", "||", "&&", "|", "^", "&", "==", "!=", ",", ">", "<=", ">=", "<<", ">>", "+", "-", "*", "/", "++", "--", "~", "!", "void", "char", "int", "long", "if", "else", "while", "for", "return", "struct", "union", "enum", "typedef", "extern", "break", "continue", "switch", "case", "default", "intlit", "strlit", ";", "identifier", "{", "}", "(", ")", "[", "]", ",", ".", "->", ":", "" }; int main() { int i; char *str; i=0; while (1) { str= Tstring[i]; if (*str == 0) break; printf("%s\n", str); i++; } return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input100.c ================================================ #include int main() { int x= 3, y=14; int z= 2 * x + y; char *str= "Hello world"; printf("%s %d %d\n", str, x+y, z); return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input101.c ================================================ #include int main() { int x= 65535; char y; char *str; y= (char )x; printf("0x%x\n", y); str= (char *)0; printf("0x%lx\n", (long)str); return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input102.c ================================================ int main() { struct foo { int p; }; int y= (struct foo) x; } ================================================ FILE: 53_Mop_up_pt2/tests/input103.c ================================================ int main() { union foo { int p; }; int y= (union foo) x; } ================================================ FILE: 53_Mop_up_pt2/tests/input104.c ================================================ int main() { int y= (void) x; } ================================================ FILE: 53_Mop_up_pt2/tests/input105.c ================================================ int main() { int x; char *y; y= (char) x; } ================================================ FILE: 53_Mop_up_pt2/tests/input106.c ================================================ #include char *y= (char *)0; int main() { printf("0x%lx\n", (long)y); return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input107.c ================================================ #include char *y[] = { "fish", "cow", NULL }; char *z= NULL; int main() { int i; char *ptr; for (i=0; i < 3; i++) { ptr= y[i]; if (ptr != (char *)0) printf("%s\n", y[i]); else printf("NULL\n"); } return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input108.c ================================================ int main() { char *str= (void *)0; return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input109.c ================================================ #include int main() { int x= 17 - 1; printf("%d\n", x); return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input110.c ================================================ #include int x; int y; int main() { x= 3; y= 15; y += x; printf("%d\n", y); x= 3; y= 15; y -= x; printf("%d\n", y); x= 3; y= 15; y *= x; printf("%d\n", y); x= 3; y= 15; y /= x; printf("%d\n", y); return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input111.c ================================================ #include int main() { int x= 2000 + 3 + 4 * 5 + 6; printf("%d\n", x); return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input112.c ================================================ #include char* y = NULL; int x= 10 + 6; int fred [ 2 + 3 ]; int main() { fred[2]= x; printf("%d\n", fred[2]); return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input113.c ================================================ #include void fred(void); void fred(void) { printf("fred says hello\n"); } int main(void) { fred(); return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input114.c ================================================ #include int main() { int x= 0x4a; printf("%c\n", x); return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input115.c ================================================ #include struct foo { int x; char y; long z; }; typedef struct foo blah; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol struct symtable *ctype; // If struct/union, ptr to that type int stype; // Structural type for the symbol int class; // Storage class for the symbol int size; // Total size in bytes of this symbol int nelems; // Functions: # params. Arrays: # elements #define st_endlabel st_posn // For functions, the end label int st_posn; // For locals, the negative offset // from the stack base pointer int *initlist; // List of initial values struct symtable *next; // Next symbol in one list struct symtable *member; // First member of a function, struct, }; // union or enum // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates int rvalue; // True if the node is an rvalue struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; struct symtable *sym; // For many AST nodes, the pointer to // the symbol in the symbol table #define a_intvalue a_size // For A_INTLIT, the integer value int a_size; // For A_SCALE, the size to scale by }; int main() { printf("%ld\n", sizeof(char)); printf("%ld\n", sizeof(int)); printf("%ld\n", sizeof(long)); printf("%ld\n", sizeof(char *)); printf("%ld\n", sizeof(blah)); printf("%ld\n", sizeof(struct symtable)); printf("%ld\n", sizeof(struct ASTnode)); return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input116.c ================================================ #include static int counter=0; static int fred(void) { return(counter++); } int main(void) { int i; for (i=0; i < 5; i++) printf("%d\n", fred()); return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input117.c ================================================ #include static char *fred(void) { return("Hello"); } int main(void) { printf("%s\n", fred()); return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input118.c ================================================ int main(void) { static int x; } ================================================ FILE: 53_Mop_up_pt2/tests/input119.c ================================================ #include int x; int y= 3; int main() { x= y != 3 ? 6 : 8; printf("%d\n", x); x= (y == 3) ? 6 : 8; printf("%d\n", x); return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input120.c ================================================ #include int x; int y= 3; int main() { for (y= 0; y < 10; y++) { x= y > 4 ? y + 2 : y + 9; printf("%d\n", x); } return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input121.c ================================================ #include int x; int y= 3; int main() { for (y= 0; y < 10; y++) { x= (y < 4) ? y + 2 : (y > 7) ? 1000 : y + 9; printf("%d\n", x); } return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input122.c ================================================ #include int x, y, z1, z2; int main() { for (x= 0; x <= 1; x++) { for (y= 0; y <= 1; y++) { z1= x || y; z2= x && y; printf("x %d, y %d, x || y %d, x && y %d\n", x, y, z1, z2); } } //z= x || y; return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input123.c ================================================ #include int main() { int x; for (x=0; x < 20; x++) switch(x) { case 2: case 3: case 5: case 7: case 11: printf("%2d infant prime\n", x); break; case 13: case 17: case 19: printf("%2d teen prime\n", x); break; case 0: case 1: case 4: case 6: case 8: case 9: case 10: case 12: printf("%2d infant composite\n", x); break; default: printf("%2d teen composite\n", x); break; } return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input124.c ================================================ #include int ary[5]; int main() { ary++; return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input125.c ================================================ #include int ary[5]; int *ptr; int x; int main() { ary[3]= 2008; ptr= ary; // Load ary's address into ptr x= ary[3]; printf("%d\n", x); x= ptr[3]; printf("%d\n", x); // Treat ptr as an array return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input126.c ================================================ #include int ary[5]; int main() { ary[3]= 2008; ptr= &ary; return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input127.c ================================================ #include int ary[5]; void fred(int *ptr) { // Receive a pointer printf("%d\n", ptr[3]); } int main() { ary[3]= 2008; printf("%d\n", ary[3]); fred(ary); // Pass ary as a pointer return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input128.c ================================================ #include struct foo { int val; struct foo *next; }; struct foo head, mid, tail; int main() { struct foo *ptr; tail.val= 20; tail.next= NULL; mid.val= 15; mid.next= &tail; head.val= 10; head.next= ∣ ptr= &head; printf("%d %d\n", head.val, ptr->val); printf("%d %d\n", mid.val, ptr->next->val); printf("%d %d\n", tail.val, ptr->next->next->val); return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input129.c ================================================ #include int x= 6; int main() { printf("%d\n", x++ ++); return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input130.c ================================================ #include char *x= "foo"; int main() { printf("Hello " "world" "\n"); return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input131.c ================================================ #include void donothing() { } int main() { int x=0; printf("Doing nothing... "); donothing(); printf("nothing done\n"); while (++x < 100) ; printf("x is now %d\n", x); return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input132.c ================================================ extern int fred; int fred; int mary; extern int mary; int main() { return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input133.c ================================================ #include extern int fred[]; int fred[23]; char mary[100]; extern char mary[]; void main() { printf("OK\n"); } ================================================ FILE: 53_Mop_up_pt2/tests/input134.c ================================================ #include char y = 'a'; char *x; int main() { x= &y; if (x && y == 'a') printf("1st match\n"); x= NULL; if (x && y == 'a') printf("2nd match\n"); x= &y; y='b'; if (x && y == 'a') printf("3rd match\n"); return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input135.c ================================================ #include void fred() { int x= 5; printf("testing x\n"); if (x > 4) return; printf("x below 5\n"); } int main() { fred(); return(0); } ================================================ FILE: 53_Mop_up_pt2/tests/input136.c ================================================ #include int main() { return; } ================================================ FILE: 53_Mop_up_pt2/tests/mktests ================================================ #!/bin/sh # Make the output files for each test # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make install) fi for i in input*c do if [ ! -f "out.$i" -a ! -f "err.$i" ] then ../cwj -o out $i 2> "err.$i" # If the err file is empty if [ ! -s "err.$i" ] then rm -f "err.$i" cc -o out $i ./out > "out.$i" fi fi rm -f out out.s done ================================================ FILE: 53_Mop_up_pt2/tests/out.input001.c ================================================ 36 10 25 ================================================ FILE: 53_Mop_up_pt2/tests/out.input002.c ================================================ 17 ================================================ FILE: 53_Mop_up_pt2/tests/out.input003.c ================================================ 1 2 3 4 5 ================================================ FILE: 53_Mop_up_pt2/tests/out.input004.c ================================================ 1 1 1 1 1 1 1 1 1 ================================================ FILE: 53_Mop_up_pt2/tests/out.input005.c ================================================ 6 ================================================ FILE: 53_Mop_up_pt2/tests/out.input006.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 53_Mop_up_pt2/tests/out.input007.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 53_Mop_up_pt2/tests/out.input008.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 53_Mop_up_pt2/tests/out.input009.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 53_Mop_up_pt2/tests/out.input010.c ================================================ 20 10 1 2 3 4 5 253 254 255 0 1 ================================================ FILE: 53_Mop_up_pt2/tests/out.input011.c ================================================ 10 20 30 1 2 3 4 5 253 254 255 0 1 2 3 1 2 3 4 5 ================================================ FILE: 53_Mop_up_pt2/tests/out.input012.c ================================================ 5 ================================================ FILE: 53_Mop_up_pt2/tests/out.input013.c ================================================ 23 56 ================================================ FILE: 53_Mop_up_pt2/tests/out.input014.c ================================================ 10 20 30 ================================================ FILE: 53_Mop_up_pt2/tests/out.input015.c ================================================ 18 18 12 12 ================================================ FILE: 53_Mop_up_pt2/tests/out.input016.c ================================================ 12 18 ================================================ FILE: 53_Mop_up_pt2/tests/out.input017.c ================================================ 19 12 ================================================ FILE: 53_Mop_up_pt2/tests/out.input018.c ================================================ 34 34 ================================================ FILE: 53_Mop_up_pt2/tests/out.input018a.c ================================================ 15 16 ================================================ FILE: 53_Mop_up_pt2/tests/out.input019.c ================================================ 30 ================================================ FILE: 53_Mop_up_pt2/tests/out.input020.c ================================================ 12 ================================================ FILE: 53_Mop_up_pt2/tests/out.input021.c ================================================ 10 Hello world ================================================ FILE: 53_Mop_up_pt2/tests/out.input022.c ================================================ 12 12 12 13 13 13 13 13 13 35 35 35 ================================================ FILE: 53_Mop_up_pt2/tests/out.input023.c ================================================ -23 100 -2 0 1 0 13 14 Hello world ================================================ FILE: 53_Mop_up_pt2/tests/out.input024.c ================================================ 2 59 57 8 7 ================================================ FILE: 53_Mop_up_pt2/tests/out.input025.c ================================================ 10 20 30 5 15 25 ================================================ FILE: 53_Mop_up_pt2/tests/out.input026.c ================================================ 13 23 34 44 54 64 74 84 94 95 96 ================================================ FILE: 53_Mop_up_pt2/tests/out.input027.c ================================================ 1 2 3 4 5 6 7 8 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 ================================================ FILE: 53_Mop_up_pt2/tests/out.input028.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 53_Mop_up_pt2/tests/out.input029.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 53_Mop_up_pt2/tests/out.input030.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input030.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 53_Mop_up_pt2/tests/out.input053.c ================================================ Hello world, 23 ================================================ FILE: 53_Mop_up_pt2/tests/out.input054.c ================================================ Hello world, 0 Hello world, 1 Hello world, 2 Hello world, 3 Hello world, 4 Hello world, 5 Hello world, 6 Hello world, 7 Hello world, 8 Hello world, 9 Hello world, 10 Hello world, 11 Hello world, 12 Hello world, 13 Hello world, 14 Hello world, 15 Hello world, 16 Hello world, 17 Hello world, 18 Hello world, 19 ================================================ FILE: 53_Mop_up_pt2/tests/out.input055.c ================================================ Hello world Argument 0 is ./out ================================================ FILE: 53_Mop_up_pt2/tests/out.input058.c ================================================ 12 99 4005 4116 4116 ================================================ FILE: 53_Mop_up_pt2/tests/out.input062.c ================================================ 65 66 66 The next two depend on the endian of the platform 66 66 67 67 ================================================ FILE: 53_Mop_up_pt2/tests/out.input063.c ================================================ 25 ================================================ FILE: 53_Mop_up_pt2/tests/out.input067.c ================================================ 5 17 ================================================ FILE: 53_Mop_up_pt2/tests/out.input070.c ================================================ 56 ================================================ FILE: 53_Mop_up_pt2/tests/out.input071.c ================================================ 0 1 2 3 4 7 8 9 10 11 12 13 14 Done ================================================ FILE: 53_Mop_up_pt2/tests/out.input074.c ================================================ 100 5 7 100 100 ================================================ FILE: 53_Mop_up_pt2/tests/out.input080.c ================================================ 0 1 1 3 2 5 3 7 4 9 5 11 ================================================ FILE: 53_Mop_up_pt2/tests/out.input081.c ================================================ 0 1 1 3 2 5 3 7 4 9 ================================================ FILE: 53_Mop_up_pt2/tests/out.input082.c ================================================ 15 >= x > 5 ================================================ FILE: 53_Mop_up_pt2/tests/out.input083.c ================================================ 5 < 6 <= 10 5 < 7 <= 10 5 < 8 <= 10 5 < 9 <= 10 5 < 10 <= 10 10 < 11 ================================================ FILE: 53_Mop_up_pt2/tests/out.input084.c ================================================ 2 3 f f ================================================ FILE: 53_Mop_up_pt2/tests/out.input088.c ================================================ 5 6 ================================================ FILE: 53_Mop_up_pt2/tests/out.input089.c ================================================ 23 H Hello world ================================================ FILE: 53_Mop_up_pt2/tests/out.input090.c ================================================ 23 100 H Hello world ================================================ FILE: 53_Mop_up_pt2/tests/out.input091.c ================================================ 1 2 3 4 5 1 2 3 4 5 0 0 0 0 0 ================================================ FILE: 53_Mop_up_pt2/tests/out.input099.c ================================================ EOF = || && | ^ & == != , > <= >= << >> + - * / ++ -- ~ ! void char int long if else while for return struct union enum typedef extern break continue switch case default intlit strlit ; identifier { } ( ) [ ] , . -> : ================================================ FILE: 53_Mop_up_pt2/tests/out.input100.c ================================================ Hello world 17 20 ================================================ FILE: 53_Mop_up_pt2/tests/out.input101.c ================================================ 0xff 0x0 ================================================ FILE: 53_Mop_up_pt2/tests/out.input106.c ================================================ 0x0 ================================================ FILE: 53_Mop_up_pt2/tests/out.input107.c ================================================ fish cow NULL ================================================ FILE: 53_Mop_up_pt2/tests/out.input108.c ================================================ ================================================ FILE: 53_Mop_up_pt2/tests/out.input109.c ================================================ 16 ================================================ FILE: 53_Mop_up_pt2/tests/out.input110.c ================================================ 18 12 45 5 ================================================ FILE: 53_Mop_up_pt2/tests/out.input111.c ================================================ 2029 ================================================ FILE: 53_Mop_up_pt2/tests/out.input112.c ================================================ 16 ================================================ FILE: 53_Mop_up_pt2/tests/out.input113.c ================================================ fred says hello ================================================ FILE: 53_Mop_up_pt2/tests/out.input114.c ================================================ J ================================================ FILE: 53_Mop_up_pt2/tests/out.input115.c ================================================ 1 4 8 8 13 64 48 ================================================ FILE: 53_Mop_up_pt2/tests/out.input116.c ================================================ 0 1 2 3 4 ================================================ FILE: 53_Mop_up_pt2/tests/out.input117.c ================================================ Hello ================================================ FILE: 53_Mop_up_pt2/tests/out.input119.c ================================================ 8 6 ================================================ FILE: 53_Mop_up_pt2/tests/out.input120.c ================================================ 9 10 11 12 13 7 8 9 10 11 ================================================ FILE: 53_Mop_up_pt2/tests/out.input121.c ================================================ 2 3 4 5 13 14 15 16 1000 1000 ================================================ FILE: 53_Mop_up_pt2/tests/out.input122.c ================================================ x 0, y 0, x || y 0, x && y 0 x 0, y 1, x || y 1, x && y 0 x 1, y 0, x || y 1, x && y 0 x 1, y 1, x || y 1, x && y 1 ================================================ FILE: 53_Mop_up_pt2/tests/out.input123.c ================================================ 0 infant composite 1 infant composite 2 infant prime 3 infant prime 4 infant composite 5 infant prime 6 infant composite 7 infant prime 8 infant composite 9 infant composite 10 infant composite 11 infant prime 12 infant composite 13 teen prime 14 teen composite 15 teen composite 16 teen composite 17 teen prime 18 teen composite 19 teen prime ================================================ FILE: 53_Mop_up_pt2/tests/out.input125.c ================================================ 2008 2008 ================================================ FILE: 53_Mop_up_pt2/tests/out.input127.c ================================================ 2008 2008 ================================================ FILE: 53_Mop_up_pt2/tests/out.input128.c ================================================ 10 10 15 15 20 20 ================================================ FILE: 53_Mop_up_pt2/tests/out.input130.c ================================================ Hello world ================================================ FILE: 53_Mop_up_pt2/tests/out.input131.c ================================================ Doing nothing... nothing done x is now 100 ================================================ FILE: 53_Mop_up_pt2/tests/out.input132.c ================================================ ================================================ FILE: 53_Mop_up_pt2/tests/out.input133.c ================================================ OK ================================================ FILE: 53_Mop_up_pt2/tests/out.input134.c ================================================ 1st match ================================================ FILE: 53_Mop_up_pt2/tests/out.input135.c ================================================ testing x ================================================ FILE: 53_Mop_up_pt2/tests/runtests ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make install) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../cwj -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../cwj $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.s "trial.$i" done ================================================ FILE: 53_Mop_up_pt2/tests/runtestsn ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../compn ] then (cd ..; make install) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../compn -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../compn $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.o out.s "trial.$i" done ================================================ FILE: 53_Mop_up_pt2/tree.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree functions // Copyright (c) 2019 Warren Toomey, GPL3 // Build and return a generic AST node struct ASTnode *mkastnode(int op, int type, struct symtable *ctype, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue) { struct ASTnode *n; // Malloc a new ASTnode n = (struct ASTnode *) malloc(sizeof(struct ASTnode)); if (n == NULL) fatal("Unable to malloc in mkastnode()"); // Copy in the field values and return it n->op = op; n->type = type; n->ctype = ctype; n->left = left; n->mid = mid; n->right = right; n->sym = sym; n->a_intvalue = intvalue; return (n); } // Make an AST leaf node struct ASTnode *mkastleaf(int op, int type, struct symtable *ctype, struct symtable *sym, int intvalue) { return (mkastnode(op, type, ctype, NULL, NULL, NULL, sym, intvalue)); } // Make a unary AST node: only one child struct ASTnode *mkastunary(int op, int type, struct symtable *ctype, struct ASTnode *left, struct symtable *sym, int intvalue) { return (mkastnode(op, type, ctype, left, NULL, NULL, sym, intvalue)); } // Generate and return a new label number // just for AST dumping purposes static int dumpid = 1; static int gendumplabel(void) { return (dumpid++); } // Given an AST tree, print it out and follow the // traversal of the tree that genAST() follows void dumpAST(struct ASTnode *n, int label, int level) { int Lfalse, Lstart, Lend; int i; switch (n->op) { case A_IF: Lfalse = gendumplabel(); for (i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_IF"); if (n->right) { Lend = gendumplabel(); fprintf(stdout, ", end L%d", Lend); } fprintf(stdout, "\n"); dumpAST(n->left, Lfalse, level + 2); dumpAST(n->mid, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); return; case A_WHILE: Lstart = gendumplabel(); for (i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_WHILE, start L%d\n", Lstart); Lend = gendumplabel(); dumpAST(n->left, Lend, level + 2); dumpAST(n->right, NOLABEL, level + 2); return; } // Reset level to -2 for A_GLUE if (n->op == A_GLUE) level = -2; // General AST node handling if (n->left) dumpAST(n->left, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); for (i = 0; i < level; i++) fprintf(stdout, " "); switch (n->op) { case A_GLUE: fprintf(stdout, "\n\n"); return; case A_FUNCTION: fprintf(stdout, "A_FUNCTION %s\n", n->sym->name); return; case A_ADD: fprintf(stdout, "A_ADD\n"); return; case A_SUBTRACT: fprintf(stdout, "A_SUBTRACT\n"); return; case A_MULTIPLY: fprintf(stdout, "A_MULTIPLY\n"); return; case A_DIVIDE: fprintf(stdout, "A_DIVIDE\n"); return; case A_EQ: fprintf(stdout, "A_EQ\n"); return; case A_NE: fprintf(stdout, "A_NE\n"); return; case A_LT: fprintf(stdout, "A_LE\n"); return; case A_GT: fprintf(stdout, "A_GT\n"); return; case A_LE: fprintf(stdout, "A_LE\n"); return; case A_GE: fprintf(stdout, "A_GE\n"); return; case A_INTLIT: fprintf(stdout, "A_INTLIT %d\n", n->a_intvalue); return; case A_STRLIT: fprintf(stdout, "A_STRLIT rval label L%d\n", n->a_intvalue); return; case A_IDENT: if (n->rvalue) fprintf(stdout, "A_IDENT rval %s\n", n->sym->name); else fprintf(stdout, "A_IDENT %s\n", n->sym->name); return; case A_ASSIGN: fprintf(stdout, "A_ASSIGN\n"); return; case A_WIDEN: fprintf(stdout, "A_WIDEN\n"); return; case A_RETURN: fprintf(stdout, "A_RETURN\n"); return; case A_FUNCCALL: fprintf(stdout, "A_FUNCCALL %s\n", n->sym->name); return; case A_ADDR: fprintf(stdout, "A_ADDR %s\n", n->sym->name); return; case A_DEREF: if (n->rvalue) fprintf(stdout, "A_DEREF rval\n"); else fprintf(stdout, "A_DEREF\n"); return; case A_SCALE: fprintf(stdout, "A_SCALE %d\n", n->a_size); return; case A_PREINC: fprintf(stdout, "A_PREINC %s\n", n->sym->name); return; case A_PREDEC: fprintf(stdout, "A_PREDEC %s\n", n->sym->name); return; case A_POSTINC: fprintf(stdout, "A_POSTINC\n"); return; case A_POSTDEC: fprintf(stdout, "A_POSTDEC\n"); return; case A_NEGATE: fprintf(stdout, "A_NEGATE\n"); return; case A_BREAK: fprintf(stdout, "A_BREAK\n"); return; case A_CONTINUE: fprintf(stdout, "A_CONTINUE\n"); return; case A_CASE: fprintf(stdout, "A_CASE %d\n", n->a_intvalue); return; case A_DEFAULT: fprintf(stdout, "A_DEFAULT\n"); return; case A_SWITCH: fprintf(stdout, "A_SWITCH\n"); return; case A_CAST: fprintf(stdout, "A_CAST %d\n", n->type); return; case A_ASPLUS: fprintf(stdout, "A_ASPLUS\n"); return; case A_ASMINUS: fprintf(stdout, "A_ASMINUS\n"); return; case A_ASSTAR: fprintf(stdout, "A_ASSTAR\n"); return; case A_ASSLASH: fprintf(stdout, "A_ASSLASH\n"); return; case A_TOBOOL: fprintf(stdout, "A_TOBOOL\n"); return; case A_LOGOR: fprintf(stdout, "A_LOGOR\n"); return; case A_LOGAND: fprintf(stdout, "A_LOGAND\n"); return; default: fatald("Unknown dumpAST operator", n->op); } } ================================================ FILE: 53_Mop_up_pt2/types.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Types and type handling // Copyright (c) 2019 Warren Toomey, GPL3 // Return true if a type is an int type // of any size, false otherwise int inttype(int type) { return (((type & 0xf) == 0) && (type >= P_CHAR && type <= P_LONG)); } // Return true if a type is of pointer type int ptrtype(int type) { return ((type & 0xf) != 0); } // Given a primitive type, return // the type which is a pointer to it int pointer_to(int type) { if ((type & 0xf) == 0xf) fatald("Unrecognised in pointer_to: type", type); return (type + 1); } // Given a primitive pointer type, return // the type which it points to int value_at(int type) { if ((type & 0xf) == 0x0) fatald("Unrecognised in value_at: type", type); return (type - 1); } // Given a type and a composite type pointer, return // the size of this type in bytes int typesize(int type, struct symtable *ctype) { if (type == P_STRUCT || type == P_UNION) return (ctype->size); return (genprimsize(type)); } // Given an AST tree and a type which we want it to become, // possibly modify the tree by widening or scaling so that // it is compatible with this type. Return the original tree // if no changes occurred, a modified tree, or NULL if the // tree is not compatible with the given type. // If this will be part of a binary operation, the AST op is not zero. struct ASTnode *modify_type(struct ASTnode *tree, int rtype, struct symtable *rctype, int op) { int ltype; int lsize, rsize; ltype = tree->type; // For A_LOGOR and A_LOGAND, both types have to be int or pointer types if (op==A_LOGOR || op==A_LOGAND) { if (!inttype(ltype) && !ptrtype(ltype)) return(NULL); if (!inttype(ltype) && !ptrtype(rtype)) return(NULL); return (tree); } // XXX No idea on these yet if (ltype == P_STRUCT || ltype == P_UNION) fatal("Don't know how to do this yet"); if (rtype == P_STRUCT || rtype == P_UNION) fatal("Don't know how to do this yet"); // Compare scalar int types if (inttype(ltype) && inttype(rtype)) { // Both types same, nothing to do if (ltype == rtype) return (tree); // Get the sizes for each type lsize = typesize(ltype, NULL); rsize = typesize(rtype, NULL); // Tree's size is too big if (lsize > rsize) return (NULL); // Widen to the right if (rsize > lsize) return (mkastunary(A_WIDEN, rtype, NULL, tree, NULL, 0)); } // For pointers if (ptrtype(ltype) && ptrtype(rtype)) { // We can compare them if (op >= A_EQ && op <= A_GE) return (tree); // A comparison of the same type for a non-binary operation is OK, // or when the left tree is of `void *` type. if (op == 0 && (ltype == rtype || ltype == pointer_to(P_VOID))) return (tree); } // We can scale only on A_ADD or A_SUBTRACT operation if (op == A_ADD || op == A_SUBTRACT) { // Left is int type, right is pointer type and the size // of the original type is >1: scale the left if (inttype(ltype) && ptrtype(rtype)) { rsize = genprimsize(value_at(rtype)); if (rsize > 1) return (mkastunary(A_SCALE, rtype, rctype, tree, NULL, rsize)); else return (tree); // Size 1, no need to scale } } // If we get here, the types are not compatible return (NULL); } ================================================ FILE: 54_Reg_Spills/Makefile ================================================ # Define the location of the include directory # and the location to install the compiler binary INCDIR=/tmp/include BINDIR=/tmp HSRCS= data.h decl.h defs.h SRCS= cg.c decl.c expr.c gen.c main.c misc.c \ opt.c scan.c stmt.c sym.c tree.c types.c SRCN= cgn.c decl.c expr.c gen.c main.c misc.c \ opt.c scan.c stmt.c sym.c tree.c types.c ARMSRCS= cg_arm.c decl.c expr.c gen.c main.c misc.c \ opt.c scan.c stmt.c sym.c tree.c types.c cwj: $(SRCS) $(HSRCS) cc -o cwj -g -Wall -DINCDIR=\"$(INCDIR)\" $(SRCS) compn: $(SRCN) $(HSRCS) cc -D__NASM__ -o compn -g -Wall -DINCDIR=\"$(INCDIR)\" $(SRCN) cwjarm: $(ARMSRCS) $(HSRCS) cc -o cwjarm -g -Wall $(ARMSRCS) cp cwjarm cwj install: cwj mkdir -p $(INCDIR) rsync -a include/. $(INCDIR) cp cwj $(BINDIR) chmod +x $(BINDIR)/cwj installn: compn mkdir -p $(INCDIR) rsync -a include/. $(INCDIR) cp compn $(BINDIR) chmod +x $(BINDIR)/compn clean: rm -f cwj cwjarm compn *.o *.s out a.out test: install tests/runtests (cd tests; chmod +x runtests; ./runtests) armtest: cwjarm tests/runtests (cd tests; chmod +x runtests; ./runtests) testn: installn tests/runtestsn (cd tests; chmod +x runtestsn; ./runtestsn) ================================================ FILE: 54_Reg_Spills/Readme.md ================================================ # Part 54: Spilling Registers I've been putting off dealing with [register spilling](https://en.wikipedia.org/wiki/Register_allocation#Spilling) for a while because I knew the issue was going to be thorny. I think what I've done here is a first cut at the problem. It's naive, but it is a start. ## The Issues Registers are a limited commodity in most CPUs. They are the fastest storage units, and we use them to hold temporary results while we evaluate expressions. Once we have stored a result into a more permanent location (e.g. a memory location which represents a variable) we can free the in-use registers and re-use them. Once we hit expressions of large complexity we run out of enough registers to hold the intermediate results, and this prevents us from evaluating the expression. At present the compiler can allocate up to four registers. Yes, I know this is a bit artificial; however, there will always be an expression so complex that it can't be evaluated with a fixed number of registers. Consider this expression, and remember the order of precedence of the C operators: ```c int x= 5 || 6 && 7 | 8 & 9 << 2 + 3 * 4; ``` Each operator on the right has higher precedence that the one on its left. Thus, we need to store 5 into a register, but then evaluate the rest of the expression. Now we store 6 into a register, and ditto. Now, 7 in a register and ditto. Now 8 in a register and ditto. Oops! We now need to load 9 into a register, but all four registers are allocated. In fact, we'll need to allocate another *four* registers to evaluate this expression. What is the solution? The solution is to [spill registers](https://en.wikipedia.org/wiki/Register_allocation#Spilling) somewhere in main memory so that we free up a register. However, we also need to reload the spilled register at the point when we need it; this means that it must now be free to have its old value reloaded. So, we need not only the ability to spill registers somewhere but also track which ones were spilled and when, and reload them as needed. It's tricky. You can see by the external link above that there is a tonne of theory behind optimal register allocation and spilling. This isn't going to be the place for that theory. I'll implement a simple solution and leave you the opportunity to improve the code based on the theory! Now, where do registers get spilled? We could allocate an arbitrary sized [memory heap](https://en.wikipedia.org/wiki/Memory_management#Dynamic_memory_allocation) and store all the spilled registers here. Generally, though, most register spill implementations use the existing stack. Why? The answers are that we already have hardware-defined *push* and *pop* operations on the stack which are quick. We can (usually) rely on the operating system extending the stack size indefinitely. Also, we divide our stack up into stack frames, one per function. At the end of a function we can simply move the stack pointer, and we don't have to worry about popping off any registers that we spilled and somehow forgot about. I'm going to use the stack for spilling registers in our compiler. Let's look at the implications of spilling and of using the stack. ## The Implications To do register spilling, we need the ability to: + Choose and spill one register's value when we need to allocate a register and none are free. It will be pushed on to the stack. + Reload the spilled register's value fron the stack when we need it. + Ensure that the register is free at the point when we need to reload its value. + Before a function call, we need to spill all in-use registers. This is because a function call is an expression. We need to be able to do `2 + 3 * fred(4,5) - 7`, and still have the 2 and 3 in registers once the function returns with its value. + Thus, we need to reload all the registers that we spilled before a function call. The above is what we need, regardless of the mechanism. Now let's bring the stack in and see how it will constrain us. If we can only push a register's value on the stack to spill it, and pop a register's value from the stack, this implies that we have to reload registers in the reverse order in which we spilled them on the stack. Is this something that we can guarantee? In other words, will we ever need to reload a register out of order? If so, the stack isn't going to be the mechanism that we need. Alternatively, can we write our compiler to ensure that the registers reload in reverse spill order? ## Some Optimisations If you have read the external link above, or you know something about register allocation already, then you know there are so many ways we can optimise register allocation and spilling. You probably know much more than I do, so don't giggle too much in the next section. When we call a function, not all of our registers will be allocated already. Also, some registers will be used to hold some of the argument values for the function. Also, the function will likely return a value and hence destroy a register. Thus, we don't have to spill all of our registers onto the stack before we do a function call. If we were clever, we could work out which registers have to be spilled and only spill these ones. We can even take a step back and rewrite the AST tree to ease the pressure on our expression evaluation. For example, we could use a form of [strength reduction](https://en.wikipedia.org/wiki/Strength_reduction) to lower the number of registers allocated. Consider the expression: ```c 2 + (3 + (4 + (5 + (6 + (7 + 8))))) ``` The way it is written, we would have to load 2 into a register, start to evaluate the rest, load 3 into a register and ditto. We would end up with seven register allocations. However, addition is *commutative*, and therefore we can re-visualise the above expression as: ```c ((((2 + 3) + 4) + 5) + 6) + 7 ``` Now we can evaluate `2+3` and put it into a register, add on `4` and still only need one register, etc. This is something that the [SubC](http://www.t3x.org/subc/) compiler does with its AST trees, and it is something that I'll implement later. But for now, no optimisations. In fact, the spilling code is going to produce some pretty bad assembly. But at least the assembly that it produces works. Remember, "*premature optimisation is the root of all evil*" -- Donald Knuth. ## The Nuts and Bolts Let's start with the most primitive new functions in `cg.c`: ```c // Push and pop a register on/off the stack static void pushreg(int r) { fprintf(Outfile, "\tpushq\t%s\n", reglist[r]); } static void popreg(int r) { fprintf(Outfile, "\tpopq\t%s\n", reglist[r]); } ``` We can use these to spill and reload a register on the stack. Note that I didn't call them `spillreg()` and `reloadreg()`. They are general-purpose and we might use them for something else later. ## The `spillreg` Next up is a new static variable in `cg.c`: ```c static int spillreg=0; ``` This is the next register that we will choose to spill on the stack. Each time we spill a register, we will increment `spillreg`. So it eventually will be 4, then 5, ... then 8, ... then 3002 etc. Question: why not reset it to zero when we got past the maximum number of registers? The answer is that, when we pop registers from the stack, we need to know when to *stop* popping registers. If we had used modulo arithmetic, we would pop in a fixed cycle and not know when to stop. That said, we must only spill registers from 0 to `NUMFREEREGS-1`, so we will do some modulo arithmetic in the following code. ## Spilling One Register We spill a register when there are no free registers. We will choose the `spillreg` (modulo NUMFREEREGS) register to spill. In the `alloc_register()` function in `cg.c`: ```c int alloc_register(void) { int reg; // Try to allocate a register but fail ... // We have no registers, so we must spill one reg= (spillreg % NUMFREEREGS); spillreg++; fprintf(Outfile, "# spilling reg %d\n", reg); pushreg(reg); return (reg); } ``` We choose `spillreg % NUMFREEREGS` as the register to spill, and we `pushreg(reg)` to do so. We increment `spillreg` to be the next register to spill, and we return the newly spilled register number as that is now free. I also have a debug statement in there which I'll remove later. ## Reloading One Register We can only reload a register when a) it becomes free and b) its the most recent register that was spilled onto the stack. Here is where we insert an implicit assumption into our code: we must always reload the most recently-spilled register. We had better make sure that the compiler can keep this promise. The new code in `free_register()` in `cg.c` is: ```c static void free_register(int reg) { ... // If this was a spilled register, get it back if (spillreg > 0) { spillreg--; reg= (spillreg % NUMFREEREGS); fprintf(Outfile, "# unspilling reg %d\n", reg); popreg(reg); } else // Simply free the in-use register ... } ``` We simply undo the most recent spill, and decrement `spillreg`. Note that this is why we didn't store `spillreg` with a modulo value. Once it hits zero, we know that there are no spilled registers on the stack and there is no point in trying to pop a register value from the stack. ## Register Spills Before a Function Call As I mentioned before, a clever compiler would determine which registers *had* to be spilled before a function call. This is not a clever compiler, and so we have these new functions: ```c // Spill all registers on the stack void spill_all_regs(void) { int i; for (i = 0; i < NUMFREEREGS; i++) pushreg(i); } // Unspill all registers from the stack static void unspill_all_regs(void) { int i; for (i = NUMFREEREGS-1; i >= 0; i--) popreg(i); } ``` At this point, while you are either laughing or crying (or both), I'll remind you of a Ken Thompson quote: "*When in doubt, use brute force.*" ## Keeping Our Assumptions Intact We have an implicit assumption built into this code: any reloaded register was the one last spilled. We had better check that this is the case. For binary expressions, `genAST()` in `gen.c` does this: ```c // Get the left and right sub-tree values leftreg = genAST(n->left, NOLABEL, NOLABEL, NOLABEL, n->op); rightreg = genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); switch (n->op) { // Do the specific binary operation } ``` We allocate the register for the left-hand expression first, then the register for the right-hand expression. If we have to spill registers, then the register for the right-hand expression will be the most recently-spilled register. Therefore, we had better *free* the register for the right-hand expression first, to ensure that any spilled value will get reloaded back into this register. I've gone through `cg.c` and made some modifications to the binary expression generators to do this. An example is `cgadd()` in `cg.c`: ```c // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\taddq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } ``` The code used to add into `r2`, free `r1` and return `r2`. Not good, but luckily addition is commutative. We can save the result in either register, so now `r1` returns the result and `r2` is freed. If if was spilled, it will get its old value back. I *hope* that I've done this everywhere that is needed, and I *hope* that our assumption is not satisfied, but I'm not completely sure yet. We will have to do a lot of testing to be reasonably satisfied. ## Changes to Function Calls We now have the spill/reload nuts and bolts in place. For ordinary register allocations and frees, the above code will spill and reload as required. We also try to ensure that we free the most recently spilled register. The last thing we need to do is spill the registers before a function call and reload them afterwards. There is a wrinkle: the function may be part of an expression. We need to: 1. Spill the registers first. 1. Copy the arguments to the function (using the registers). 1. Call the function. 1. Reload the registers before we 1. Copy the register's return value. If we do the last two out of order, we will lose the returned value as we reload all the old registers. To make the above happen, I've had to share the spill/reload duties between `gen.c` and `cg.c` as follows. In `gen_funccall()` in `gen.c`: ```c static int gen_funccall(struct ASTnode *n) { ... // Save the registers before we copy the arguments spill_all_regs(); // Walk the list of arguments and copy them ... // Call the function, clean up the stack (based on numargs), // and return its result return (cgcall(n->sym, numargs)); } ``` which does steps 1, 2 and 3: spill, copy, call. And in `cgcall()` in `cg.c`: ```c int cgcall(struct symtable *sym, int numargs) { int outr; // Call the function ... // Remove any arguments pushed on the stack ... // Unspill all the registers unspill_all_regs(); // Get a new register and copy the return value into it outr = alloc_register(); fprintf(Outfile, "\tmovq\t%%rax, %s\n", reglist[outr]); return (outr); } ``` which does the final two steps: reload and copy the return value. ## Example Time Here are some examples which cause register spills: function calls and complex expressions. We'll start with `tests/input136.c`: ```c int add(int x, int y) { return(x+y); } int main() { int result; result= 3 * add(2,3) - 5 * add(4,6); printf("%d\n", result); return(0); } ``` `add()` needs to be treated as an expression. We put 3 into a register, and spill all the registers before we call `add(2,3)`. We reload the registers before we get the return value. The assembly code is: ``` movq $3, %r10 # Get 3 into %r10 pushq %r10 pushq %r11 # Spill all four registers, thus pushq %r12 # preserving the %r10 value pushq %r13 movq $3, %r11 # Copy the 3 and 2 arguments movq %r11, %rsi movq $2, %r11 movq %r11, %rdi call add@PLT # Call add() popq %r13 popq %r12 # Reload all four registers, thus popq %r11 # restoring the %r10 value popq %r10 movq %rax, %r11 # Get the return value into %r11 imulq %r11, %r10 # Multiply 3 * add(2,3) ``` Yes, there is plenty of scope for optimisation here. KISS, though. In `tests/input137.c`, there is this expression: ```c x= a + (b + (c + (d + (e + (f + (g + h)))))); ``` which requires eight registers and so we'll need to spill four of them. The generated assembly code is: ``` movslq a(%rip), %r10 movslq b(%rip), %r11 movslq c(%rip), %r12 movslq d(%rip), %r13 pushq %r10 # spilling %r10 movslq e(%rip), %r10 pushq %r11 # spilling %r11 movslq f(%rip), %r11 pushq %r12 # spilling %r12 movslq g(%rip), %r12 pushq %r13 # spilling %r13 movslq h(%rip), %r13 addq %r13, %r12 popq %r13 # unspilling %r13 addq %r12, %r11 popq %r12 # unspilling %r12 addq %r11, %r10 popq %r11 # unspilling %r11 addq %r10, %r13 popq %r10 # unspilling %r10 addq %r13, %r12 addq %r12, %r11 addq %r11, %r10 movl %r10d, -4(%rbp) ``` and overall we end up with the correct expression evaluation. ## Conclusion and What's Next Register allocating and spilling is hard to get right, and there is a lot of optimisation theory which can be brought to bear. I've implemented quite a naive approach to register allocating and spilling. It will work but there is substantial room for improvement. While doing the above, I also fixed the problem with `&&` and `||`. I've decided to write these changes up in the next part, even though the code here already has these changes. [Next step](../55_Lazy_Evaluation/Readme.md) ================================================ FILE: 54_Reg_Spills/cg.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\t.text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\t.data\n", Outfile); currSeg = data_seg; } } // Given a scalar type value, return the // size of the type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch (type) { case P_CHAR: return (offset); case P_INT: case P_LONG: break; default: if (!ptrtype(type)) fatald("Bad type in cg_align:", type); } // Here we have an int or a long. Align it on a 4-byte offset // I put the generic code here so it can be reused elsewhere. alignment = 4; offset = (offset + direction * (alignment - 1)) & ~(alignment - 1); return (offset); } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. static int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "%r10", "%r11", "%r12", "%r13", "%r9", "%r8", "%rcx", "%rdx", "%rsi", "%rdi" }; static char *breglist[] = { "%r10b", "%r11b", "%r12b", "%r13b", "%r9b", "%r8b", "%cl", "%dl", "%sil", "%dil" }; static char *dreglist[] = { "%r10d", "%r11d", "%r12d", "%r13d", "%r9d", "%r8d", "%ecx", "%edx", "%esi", "%edi" }; // Push and pop a register on/off the stack static void pushreg(int r) { fprintf(Outfile, "\tpushq\t%s\n", reglist[r]); } static void popreg(int r) { fprintf(Outfile, "\tpopq\t%s\n", reglist[r]); } // Set all registers as available. // But if reg is positive, don't free that one. void freeall_registers(int keepreg) { int i; for (i = 0; i < NUMFREEREGS; i++) if (i != keepreg) freereg[i] = 1; } // When we need to spill a register, we choose // the following register and then cycle through // the remaining registers. The spillreg increments // continually, so we need to take a modulo NUMFREEREGS // on it. static int spillreg=0; // Allocate a free register. Return the number of // the register. Die if no available registers. int alloc_register(void) { int reg; for (reg = 0; reg < NUMFREEREGS; reg++) { if (freereg[reg]) { freereg[reg] = 0; return (reg); } } // We have no registers, so we must spill one reg= (spillreg % NUMFREEREGS); spillreg++; fprintf(Outfile, "# spilling reg %d\n", reg); pushreg(reg); return (reg); } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) { fprintf(Outfile, "# error trying to free register %d\n", reg); fatald("Error trying to free register", reg); } // If this was a spilled register, get it back if (spillreg > 0) { spillreg--; reg= (spillreg % NUMFREEREGS); fprintf(Outfile, "# unspilling reg %d\n", reg); popreg(reg); } else { freereg[reg] = 1; } } // Spill all registers on the stack void spill_all_regs(void) { int i; for (i = 0; i < NUMFREEREGS; i++) pushreg(i); } // Unspill all registers from the stack static void unspill_all_regs(void) { int i; for (i = NUMFREEREGS-1; i >= 0; i--) popreg(i); } // Print out the assembly preamble void cgpreamble() { freeall_registers(NOREG); cgtextseg(); fprintf(Outfile, "# internal switch(expr) routine\n" "# %%rsi = switch table, %%rax = expr\n" "# from SubC: http://www.t3x.org/subc/\n" "\n" "switch:\n" " pushq %%rsi\n" " movq %%rdx, %%rsi\n" " movq %%rax, %%rbx\n" " cld\n" " lodsq\n" " movq %%rax, %%rcx\n" "next:\n" " lodsq\n" " movq %%rax, %%rdx\n" " lodsq\n" " cmpq %%rdx, %%rbx\n" " jnz no\n" " popq %%rsi\n" " jmp *%%rax\n" "no:\n" " loop next\n" " lodsq\n" " popq %%rsi\n" " jmp *%%rax\n" "\n"); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the %rsp and %rsp if (sym->class == C_GLOBAL) fprintf(Outfile, "\t.globl\t%s\n" "\t.type\t%s, @function\n", name, name); fprintf(Outfile, "%s:\n" "\tpushq\t%%rbp\n" "\tmovq\t%%rsp, %%rbp\n", name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->st_posn = paramOffset; paramOffset += 8; } else { parm->st_posn = newlocaloffset(parm->type); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->st_posn = newlocaloffset(locvar->type); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\taddq\t$%d,%%rsp\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->st_endlabel); fprintf(Outfile, "\taddq\t$%d,%%rsp\n", stackOffset); fputs("\tpopq %rbp\n" "\tret\n", Outfile); freeall_registers(NOREG); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmovq\t$%d, %s\n", value, reglist[r]); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", sym->name); } else // Print out the code to initialise it switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovzbq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", sym->name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovslq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", sym->name); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", sym->st_posn); fprintf(Outfile, "\tmovq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", sym->st_posn); } else switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", sym->st_posn); fprintf(Outfile, "\tmovzbq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", sym->st_posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", sym->st_posn); fprintf(Outfile, "\tmovslq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", sym->st_posn); break; default: fatald("Bad type in cgloadlocal:", sym->type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tleaq\tL%d(%%rip), %s\n", label, reglist[r]); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\taddq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsubq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timulq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s,%%rax\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidivq\t%s\n", reglist[r2]); fprintf(Outfile, "\tmovq\t%%rax,%s\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tandq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } int cgor(int r1, int r2) { fprintf(Outfile, "\torq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txorq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshlq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshrq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tnegq\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnotq\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); return (r); } // Load a boolean value (only 0 or 1) // into the given register void cgloadboolean(int r, int val) { fprintf(Outfile, "\tmovq\t$%d, %s\n", val, reglist[r]); } // Convert an integer value to a boolean value. Jump if // it's an IF, WHILE, LOGAND or LOGOR operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); switch(op) { case A_IF: case A_WHILE: case A_LOGAND: fprintf(Outfile, "\tje\tL%d\n", label); break; case A_LOGOR: fprintf(Outfile, "\tjne\tL%d\n", label); break; default: fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(struct symtable *sym, int numargs) { int outr; // Call the function fprintf(Outfile, "\tcall\t%s@PLT\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\taddq\t$%d, %%rsp\n", 8 * (numargs - 6)); // Unspill all the registers unspill_all_regs(); // Get a new register and copy the return value into it outr = alloc_register(); fprintf(Outfile, "\tmovq\t%%rax, %s\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpushq\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmovq\t%s, %s\n", reglist[r], reglist[FIRSTPARAMREG - argposn + 1]); } free_register(r); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsalq\t$%d, %s\n", val, reglist[r]); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %s(%%rip)\n", reglist[r], sym->name); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %s(%%rip)\n", breglist[r], sym->name); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %s(%%rip)\n", dreglist[r], sym->name); break; default: fatald("Bad type in cgstorglob:", sym->type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %d(%%rbp)\n", reglist[r], sym->st_posn); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %d(%%rbp)\n", breglist[r], sym->st_posn); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %d(%%rbp)\n", dreglist[r], sym->st_posn); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size, type; int initvalue; int i; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the variable (or its elements if an array) // and the type of the variable if (node->stype == S_ARRAY) { size = typesize(value_at(node->type), node->ctype); type = value_at(node->type); } else { size = node->size; type = node->type; } // Generate the global identity and the label cgdataseg(); if (node->class == C_GLOBAL) fprintf(Outfile, "\t.globl\t%s\n", node->name); fprintf(Outfile, "%s:\n", node->name); // Output space for one or more elements for (i = 0; i < node->nelems; i++) { // Get any initial value initvalue = 0; if (node->initlist != NULL) initvalue = node->initlist[i]; // Generate the space for this type switch (size) { case 1: fprintf(Outfile, "\t.byte\t%d\n", initvalue); break; case 4: fprintf(Outfile, "\t.long\t%d\n", initvalue); break; case 8: // Generate the pointer to a string literal. Treat a zero value // as actually zero, not the label L0 if (node->initlist != NULL && type == pointer_to(P_CHAR) && initvalue != 0) fprintf(Outfile, "\t.quad\tL%d\n", initvalue); else fprintf(Outfile, "\t.quad\t%d\n", initvalue); break; default: for (i = 0; i < size; i++) fprintf(Outfile, "\t.byte\t0\n"); } } } // Generate a global string and its start label // Don't output the label if append is true. void cgglobstr(int l, char *strvalue, int append) { char *cptr; if (!append) cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\t.byte\t%d\n", *cptr); } } void cgglobstrend(void) { fprintf(Outfile, "\t.byte\t0\n"); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(NOREG); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Only return a value if we have a value to return if (reg != NOREG) { // Deal with pointers here as we can't put them in // the switch statement if (ptrtype(sym->type)) fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); else { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzbl\t%s, %%eax\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %%eax\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } } } cgjump(sym->st_endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = alloc_register(); if (sym->class == C_GLOBAL || sym->class == C_STATIC) fprintf(Outfile, "\tleaq\t%s(%%rip), %s\n", sym->name, reglist[r]); else fprintf(Outfile, "\tleaq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzbq\t(%s), %s\n", reglist[r], reglist[r]); break; case 4: fprintf(Outfile, "\tmovslq\t(%s), %s\n", reglist[r], reglist[r]); break; case 8: fprintf(Outfile, "\tmovq\t(%s), %s\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { // Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmovb\t%s, (%s)\n", breglist[r1], reglist[r2]); break; case 2: case 4: case 8: fprintf(Outfile, "\tmovq\t%s, (%s)\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } // Generate a switch jump table and the code to // load the registers and call the switch() code void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel) { int i, label; // Get a label for the switch table label = genlabel(); cglabel(label); // Heuristic. If we have no cases, create one case // which points to the default case if (casecount == 0) { caseval[0] = 0; caselabel[0] = defaultlabel; casecount = 1; } // Generate the switch jump table. fprintf(Outfile, "\t.quad\t%d\n", casecount); for (i = 0; i < casecount; i++) fprintf(Outfile, "\t.quad\t%d, L%d\n", caseval[i], caselabel[i]); fprintf(Outfile, "\t.quad\tL%d\n", defaultlabel); // Load the specific registers cglabel(toplabel); fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); fprintf(Outfile, "\tleaq\tL%d(%%rip), %%rdx\n", label); fprintf(Outfile, "\tjmp\tswitch\n"); } // Move value between registers void cgmove(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s, %s\n", reglist[r1], reglist[r2]); } ================================================ FILE: 54_Reg_Spills/cg_arm.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for ARMv6 on Raspberry Pi // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. static int freereg[4]; static char *reglist[4] = { "r4", "r5", "r6", "r7" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // We have to store large integer literal values in memory. // Keep a list of them which will be output in the postamble #define MAXINTS 1024 int Intlist[MAXINTS]; static int Intslot = 0; // Determine the offset of a large integer // literal from the .L3 label. If the integer // isn't in the list, add it. static void set_int_offset(int val) { int offset = -1; // See if it is already there for (int i = 0; i < Intslot; i++) { if (Intlist[i] == val) { offset = 4 * i; break; } } // Not in the list, so add it if (offset == -1) { offset = 4 * Intslot; if (Intslot == MAXINTS) fatal("Out of int slots in set_int_offset()"); Intlist[Intslot++] = val; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L3+%d\n", offset); } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n", Outfile); } // Print out the assembly postamble void cgpostamble() { // Print out the global variables fprintf(Outfile, ".L2:\n"); for (int i = 0; i < Globs; i++) { if (Symtable[i].stype == S_VARIABLE) fprintf(Outfile, "\t.word %s\n", Symtable[i].name); } // Print out the integer literals fprintf(Outfile, ".L3:\n"); for (int i = 0; i < Intslot; i++) { fprintf(Outfile, "\t.word %d\n", Intlist[i]); } } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Symtable[id].name; fprintf(Outfile, "\t.text\n" "\t.globl\t%s\n" "\t.type\t%s, \%%function\n" "%s:\n" "\tpush\t{fp, lr}\n" "\tadd\tfp, sp, #4\n" "\tsub\tsp, sp, #8\n" "\tstr\tr0, [fp, #-8]\n", name, name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Symtable[id].endlabel); fputs("\tsub\tsp, fp, #4\n" "\tpop\t{fp, pc}\n" "\t.align\t2\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); // If the literal value is small, do it with one instruction if (value <= 1000) fprintf(Outfile, "\tmov\t%s, #%d\n", reglist[r], value); else { set_int_offset(value); fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); } return (r); } // Determine the offset of a variable from the .L2 // label. Yes, this is inefficient code. static void set_var_offset(int id) { int offset = 0; // Walk the symbol table up to id. // Find S_VARIABLEs and add on 4 until // we get to our variable for (int i = 0; i < id; i++) { if (Symtable[i].stype == S_VARIABLE) offset += 4; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L2+%d\n", offset); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tldrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgloadglob:", Symtable[id].type); } return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s, %s\n", reglist[r1], reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\tmul\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { // To do a divide: r0 holds the dividend, r1 holds the divisor. // The quotient is in r0. fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r1]); fprintf(Outfile, "\tmov\tr1, %s\n", reglist[r2]); fprintf(Outfile, "\tbl\t__aeabi_idiv\n"); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r1]); free_register(r2); return (r1); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r]); fprintf(Outfile, "\tbl\t%s\n", Symtable[id].name); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r]); return (r); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tlsl\t%s, %s, #%d\n", reglist[r], reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, int id) { // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tstr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgstorglob:", Symtable[id].type); } return (r); } // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (4); switch (type) { case P_CHAR: return (1); case P_INT: case P_LONG: return (4); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Symtable[id].type); fprintf(Outfile, "\t.data\n" "\t.globl\t%s\n", Symtable[id].name); switch (typesize) { case 1: fprintf(Outfile, "%s:\t.byte\t0\n", Symtable[id].name); break; case 4: fprintf(Outfile, "%s:\t.long\t0\n", Symtable[id].name); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "moveq", "movne", "movlt", "movgt", "movle", "movge" }; // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "movne", "moveq", "movge", "movle", "movgt", "movlt" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #1\n", cmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #0\n", invcmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\tuxtb\t%s, %s\n", reglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tb\tL%d\n", l); } // List of inverted branch instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *brlist[] = { "bne", "beq", "bge", "ble", "bgt", "blt" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", brlist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[reg]); cgjump(Symtable[id].endlabel); } // Generate code to load the address of a global // identifier into a variable. Return a new register int cgaddress(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); fprintf(Outfile, "\tmov\t%s, r3\n", reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tldrb\t%s, [%s]\n", reglist[r], reglist[r]); break; case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [%s]\n", reglist[r], reglist[r]); break; } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [%s]\n", reglist[r1], reglist[r2]); break; case P_INT: case P_LONG: fprintf(Outfile, "\tstr\t%s, [%s]\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 54_Reg_Spills/cgn.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\tsection .text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\tsection .data\n", Outfile); currSeg = data_seg; } } // Given a scalar type value, return the // size of the type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch (type) { case P_CHAR: return (offset); case P_INT: case P_LONG: break; default: if (!ptrtype(type)) fatald("Bad type in cg_align:", type); } // Here we have an int or a long. Align it on a 4-byte offset // I put the generic code here so it can be reused elsewhere. alignment = 4; offset = (offset + direction * (alignment - 1)) & ~(alignment - 1); return (offset); } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. int newlocaloffset(int type) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (cgprimsize(type) > 4) ? cgprimsize(type) : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "r10", "r11", "r12", "r13", "r9", "r8", "rcx", "rdx", "rsi", "rdi" }; static char *breglist[] = { "r10b", "r11b", "r12b", "r13b", "r9b", "r8b", "cl", "dl", "sil", "dil" }; static char *dreglist[] = { "r10d", "r11d", "r12d", "r13d", "r9d", "r8d", "ecx", "edx", "esi", "edi" }; // Push and pop a register on/off the stack static void pushreg(int r) { fprintf(Outfile, "\tpush\t%s\n", reglist[r]); } static void popreg(int r) { fprintf(Outfile, "\tpop\t%s\n", reglist[r]); } // Set all registers as available // But if reg is positive, don't free that one. void freeall_registers(int keepreg) { int i; for (i = 0; i < NUMFREEREGS; i++) if (i != keepreg) freereg[i] = 1; } // When we need to spill a register, we choose // the following register and then cycle through // the remaining registers. The spillreg increments // continually, so we need to take a modulo NUMFREEREGS // on it. static int spillreg=0; // Allocate a free register. Return the number of // the register. Die if no available registers. int alloc_register(void) { int reg; for (reg = 0; reg < NUMFREEREGS; reg++) { if (freereg[reg]) { freereg[reg] = 0; return (reg); } } // We have no registers, so we must spill one reg = (spillreg % NUMFREEREGS); spillreg++; fprintf(Outfile, "; spilling reg %d\n", reg); pushreg(reg); return (reg); } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) { fprintf(Outfile, "# error trying to free register %d\n", reg); fatald("Error trying to free register", reg); } // If this was a spilled register, get it back if (spillreg > 0) { spillreg--; reg= (spillreg % NUMFREEREGS); fprintf(Outfile, "; unspilling reg %d\n", reg); popreg(reg); } else { freereg[reg] = 1; } } // Spill all registers on the stack void spill_all_regs(void) { int i; for (i = 0; i < NUMFREEREGS; i++) pushreg(i); } // Unspill all registers from the stack static void unspill_all_regs(void) { int i; for (i = NUMFREEREGS-1; i >= 0; i--) popreg(i); } // Print out the assembly preamble void cgpreamble() { freeall_registers(NOREG); fputs("\textern\tprintint\n", Outfile); fputs("\textern\tprintchar\n", Outfile); fputs("\textern\topen\n", Outfile); fputs("\textern\tclose\n", Outfile); fputs("\textern\tread\n", Outfile); fputs("\textern\twrite\n", Outfile); fputs("\textern\tprintf\n", Outfile); cgtextseg(); fprintf(Outfile, "; internal switch(expr) routine\n" "; rsi = switch table, rax = expr\n" "; from SubC: http://www.t3x.org/subc/\n" "\n" "switch:\n" " push rsi\n" " mov rsi, rdx\n" " mov rbx, rax\n" " cld\n" " lodsq\n" " mov rcx, rax\n" "next:\n" " lodsq\n" " mov rdx, rax\n" " lodsq\n" " cmp rbx, rdx\n" " jnz no\n" " pop rsi\n" " jmp rax\n" "no:\n" " loop next\n" " lodsq\n" " pop rsi\n" " jmp rax\n" "\n"); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the rsp and rbp if (sym->class == C_GLOBAL) fprintf(Outfile, "\tglobal\t%s\n", name); fprintf(Outfile, "%s:\n" "\tpush\trbp\n" "\tmov\trbp, rsp\n", name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->st_posn = paramOffset; paramOffset += 8; } else { parm->st_posn = newlocaloffset(parm->type); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->st_posn = newlocaloffset(locvar->type); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\tadd\trsp, %d\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->st_endlabel); fprintf(Outfile, "\tadd\trsp, %d\n", stackOffset); fputs("\tpop rbp\n" "\tret\n", Outfile); freeall_registers(NOREG); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], value); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", sym->name); fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], sym->name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", sym->name); } else // Print out the code to initialise it switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", sym->name); fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], sym->name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", sym->name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", sym->name); fprintf(Outfile, "\tmovsx\t%s, word [%s]\n", dreglist[r], sym->name); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", sym->name); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", sym->st_posn); fprintf(Outfile, "\tmov\t%s, [rbp+%d]\n", reglist[r], sym->st_posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", sym->st_posn); } else switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", sym->st_posn); fprintf(Outfile, "\tmovzx\t%s, byte [rbp+%d]\n", reglist[r], sym->st_posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", sym->st_posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", sym->st_posn); fprintf(Outfile, "\tmovsx\t%s, dword [rbp+%d]\n", reglist[r], sym->st_posn); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", sym->st_posn); break; default: fatald("Bad type in cgloadlocal:", sym->type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, L%d\n", reglist[r], label); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timul\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmov\trax, %s\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidiv\t%s\n", reglist[r2]); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tand\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } int cgor(int r1, int r2) { fprintf(Outfile, "\tor\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txor\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshl\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshr\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tneg\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnot\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r], breglist[r]); return (r); } // Load a boolean value (only 0 or 1) // into the given register void cgloadboolean(int r, int val) { fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], val); } // Convert an integer value to a boolean value. Jump if // it's an IF, WHILE, LOGAND or LOGOR operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); switch(op) { case A_IF: case A_WHILE: case A_LOGAND: fprintf(Outfile, "\tje\tL%d\n", label); break; case A_LOGOR: fprintf(Outfile, "\tjne\tL%d\n", label); break; default: fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, byte %s\n", reglist[r], breglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(struct symtable *sym, int numargs) { int outr; // Call the function fprintf(Outfile, "\tcall\t%s\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\tadd\trsp, %d\n", 8 * (numargs - 6)); // Unspill all the registers unspill_all_regs(); // Get a new register and copy the return value into it outr = alloc_register(); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpush\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmov\t%s, %s\n", reglist[FIRSTPARAMREG - argposn + 1], reglist[r]); } free_register(r); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsal\t%s, %d\n", reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, dreglist[r]); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\tqword\t[rbp+%d], %s\n", sym->st_posn, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\tbyte\t[rbp+%d], %s\n", sym->st_posn, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\tdword\t[rbp+%d], %s\n", sym->st_posn, dreglist[r]); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size, type; int initvalue; int i; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the variable (or its elements if an array) // and the type of the variable if (node->stype == S_ARRAY) { size = typesize(value_at(node->type), node->ctype); type = value_at(node->type); } else { size = node->size; type = node->type; } // Generate the global identity and the label cgdataseg(); if (node->class == C_GLOBAL) fprintf(Outfile, "\tglobal\t%s\n", node->name); fprintf(Outfile, "%s:\n", node->name); // Output space for one or more elements for (i = 0; i < node->nelems; i++) { // Get any initial value initvalue = 0; if (node->initlist != NULL) initvalue = node->initlist[i]; // Generate the space for this type // original version switch(size) { case 1: fprintf(Outfile, "\tdb\t%d\n", initvalue); break; case 4: fprintf(Outfile, "\tdd\t%d\n", initvalue); break; case 8: // Generate the pointer to a string literal. Treat a zero value // as actually zero, not the label L0 if (node->initlist != NULL && type == pointer_to(P_CHAR) && initvalue != 0) fprintf(Outfile, "\tdq\tL%d\n", initvalue); else fprintf(Outfile, "\tdq\t%d\n", initvalue); break; default: for (i = 0; i < size; i++) fprintf(Outfile, "\tdb\t0\n"); /* compact version using times instead of loop fprintf(Outfile, "\ttimes\t%d\tdb\t0\n", size); */ } } } // Generate a global string and its start label // Don't output the label if append is true. void cgglobstr(int l, char *strvalue, int append) { char *cptr; if (!append) cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\tdb\t%d\n", *cptr); } } void cgglobstrend(void) { fprintf(Outfile, "\tdb\t0\n"); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r2], breglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(NOREG); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Only return a value if we have a value to return if (reg != NOREG) { // Deal with pointers here as we can't put them in // the switch statement if (ptrtype(sym->type)) fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); else { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzx\teax, %s\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmov\teax, %s\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } } } cgjump(sym->st_endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = alloc_register(); if (sym->class == C_GLOBAL || sym->class == C_STATIC) fprintf(Outfile, "\tmov\t%s, %s\n", reglist[r], sym->name); else fprintf(Outfile, "\tlea\t%s, [rbp+%d]\n", reglist[r], sym->st_posn); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], reglist[r]); break; case 4: fprintf(Outfile, "\tmovsx\t%s, dword [%s]\n", reglist[r], reglist[r]); break; case 8: fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { //Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmov\t[%s], byte %s\n", reglist[r2], breglist[r1]); break; case 2: case 4: case 8: fprintf(Outfile, "\tmov\t[%s], %s\n", reglist[r2], reglist[r1]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } // Generate a switch jump table and the code to // load the registers and call the switch() code void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel) { int i, label; // Get a label for the switch table label = genlabel(); cglabel(label); // Heuristic. If we have no cases, create one case // which points to the default case if (casecount == 0) { caseval[0] = 0; caselabel[0] = defaultlabel; casecount = 1; } // Generate the switch jump table. fprintf(Outfile, "\tdq\t%d\n", casecount); for (i = 0; i < casecount; i++) fprintf(Outfile, "\tdq\t%d, L%d\n", caseval[i], caselabel[i]); fprintf(Outfile, "\tdq\tL%d\n", defaultlabel); // Load the specific registers cglabel(toplabel); fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); fprintf(Outfile, "\tmov\trdx, L%d\n", label); fprintf(Outfile, "\tjmp\tswitch\n"); } // Move value between registers void cgmove(int r1, int r2) { fprintf(Outfile, "\tmov\t%s, %s\n", reglist[r2], reglist[r1]); } ================================================ FILE: 54_Reg_Spills/data.h ================================================ #ifndef extern_ #define extern_ extern #endif // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 extern_ int Line; // Current line number extern_ int Linestart; // True if at start of a line extern_ int Putback; // Character put back by scanner extern_ struct symtable *Functionid; // Symbol ptr of the current function extern_ FILE *Infile; // Input and output files extern_ FILE *Outfile; extern_ char *Infilename; // Name of file we are parsing extern_ char *Outfilename; // Name of file we opened as Outfile extern_ struct token Token; // Last token scanned extern_ struct token Peektoken; // A look-ahead token extern_ char Text[TEXTLEN + 1]; // Last identifier scanned extern_ int Looplevel; // Depth of nested loops extern_ int Switchlevel; // Depth of nested switches extern char *Tstring[]; // List of token strings // Symbol table lists extern_ struct symtable *Globhead, *Globtail; // Global variables and functions extern_ struct symtable *Loclhead, *Locltail; // Local variables extern_ struct symtable *Parmhead, *Parmtail; // Local parameters extern_ struct symtable *Membhead, *Membtail; // Temp list of struct/union members extern_ struct symtable *Structhead, *Structtail; // List of struct types extern_ struct symtable *Unionhead, *Uniontail; // List of union types extern_ struct symtable *Enumhead, *Enumtail; // List of enum types and values extern_ struct symtable *Typehead, *Typetail; // List of typedefs // Command-line flags extern_ int O_dumpAST; // If true, dump the AST trees extern_ int O_dumpsym; // If true, dump the symbol table extern_ int O_keepasm; // If true, keep any assembly files extern_ int O_assemble; // If true, assemble the assembly files extern_ int O_dolink; // If true, link the object files extern_ int O_verbose; // If true, print info on compilation stages ================================================ FILE: 54_Reg_Spills/decl.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of declarations // Copyright (c) 2019 Warren Toomey, GPL3 static struct symtable *composite_declaration(int type); static int typedef_declaration(struct symtable **ctype); static int type_of_typedef(char *name, struct symtable **ctype); static void enum_declaration(void); // Parse the current token and return a primitive type enum value, // a pointer to any composite type and possibly modify // the class of the type. int parse_type(struct symtable **ctype, int *class) { int type, exstatic = 1; // See if the class has been changed to extern or static while (exstatic) { switch (Token.token) { case T_EXTERN: if (*class == C_STATIC) fatal("Illegal to have extern and static at the same time"); *class = C_EXTERN; scan(&Token); break; case T_STATIC: if (*class == C_LOCAL) fatal("Compiler doesn't support static local declarations"); if (*class == C_EXTERN) fatal("Illegal to have extern and static at the same time"); *class = C_STATIC; scan(&Token); break; default: exstatic = 0; } } // Now work on the actual type keyword switch (Token.token) { case T_VOID: type = P_VOID; scan(&Token); break; case T_CHAR: type = P_CHAR; scan(&Token); break; case T_INT: type = P_INT; scan(&Token); break; case T_LONG: type = P_LONG; scan(&Token); break; // For the following, if we have a ';' after the // parsing then there is no type, so return -1. // Example: struct x {int y; int z}; case T_STRUCT: type = P_STRUCT; *ctype = composite_declaration(P_STRUCT); if (Token.token == T_SEMI) type = -1; break; case T_UNION: type = P_UNION; *ctype = composite_declaration(P_UNION); if (Token.token == T_SEMI) type = -1; break; case T_ENUM: type = P_INT; // Enums are really ints enum_declaration(); if (Token.token == T_SEMI) type = -1; break; case T_TYPEDEF: type = typedef_declaration(ctype); if (Token.token == T_SEMI) type = -1; break; case T_IDENT: type = type_of_typedef(Text, ctype); break; default: fatals("Illegal type, token", Token.tokstr); } return (type); } // Given a type parsed by parse_type(), scan in any following // '*' tokens and return the new type int parse_stars(int type) { while (1) { if (Token.token != T_STAR) break; type = pointer_to(type); scan(&Token); } return (type); } // Parse a type which appears inside a cast int parse_cast(struct symtable **ctype) { int type, class = 0; // Get the type inside the parentheses type = parse_stars(parse_type(ctype, &class)); // Do some error checking. I'm sure more can be done if (type == P_STRUCT || type == P_UNION || type == P_VOID) fatal("Cannot cast to a struct, union or void type"); return (type); } // Given a type, parse an expression of literals and ensure // that the type of this expression matches the given type. // Parse any type cast that precedes the expression. // If an integer literal, return this value. // If a string literal, return the label number of the string. int parse_literal(int type) { struct ASTnode *tree; // Parse the expression and optimise the resulting AST tree tree = optimise(binexpr(0)); // If there's a cast, get the child and // mark it as having the type from the cast if (tree->op == A_CAST) { tree->left->type = tree->type; tree = tree->left; } // The tree must now have an integer or string literal if (tree->op != A_INTLIT && tree->op != A_STRLIT) fatal("Cannot initialise globals with a general expression"); // If the type is char * and if (type == pointer_to(P_CHAR)) { // We have a string literal, return the label number if (tree->op == A_STRLIT) return (tree->a_intvalue); // We have a zero int literal, so that's a NULL if (tree->op == A_INTLIT && tree->a_intvalue == 0) return (0); } // We only get here with an integer literal. The input type // is an integer type and is wide enough to hold the literal value if (inttype(type) && typesize(type, NULL) >= typesize(tree->type, NULL)) return (tree->a_intvalue); fatal("Type mismatch: literal vs. variable"); return (0); // Keep -Wall happy } // Given a pointer to a symbol that may already exist // return true if this symbol doesn't exist. We use // this function to convert externs into globals int is_new_symbol(struct symtable *sym, int class, int type, struct symtable *ctype) { // There is no existing symbol, thus is new if (sym==NULL) return(1); // global versus extern: if they match that it's not new // and we can convert the class to global if ((sym->class== C_GLOBAL && class== C_EXTERN) || (sym->class== C_EXTERN && class== C_GLOBAL)) { // If the types don't match, there's a problem if (type != sym->type) fatals("Type mismatch between global/extern", sym->name); // Struct/unions, also compare the ctype if (type >= P_STRUCT && ctype != sym->ctype) fatals("Type mismatch between global/extern", sym->name); // If we get to here, the types match, so mark the symbol // as global sym->class= C_GLOBAL; // Return that symbol is not new return(0); } // It must be a duplicate symbol if we get here fatals("Duplicate global variable declaration", sym->name); return(-1); // Keep -Wall happy } // Given the type, name and class of a scalar variable, // parse any initialisation value and allocate storage for it. // Return the variable's symbol table entry. static struct symtable *scalar_declaration(char *varname, int type, struct symtable *ctype, int class, struct ASTnode **tree) { struct symtable *sym = NULL; struct ASTnode *varnode, *exprnode; *tree = NULL; // Add this as a known scalar switch (class) { case C_STATIC: case C_EXTERN: case C_GLOBAL: // See if this variable is new or already exists sym= findglob(varname); if (is_new_symbol(sym, class, type, ctype)) sym = addglob(varname, type, ctype, S_VARIABLE, class, 1, 0); break; case C_LOCAL: sym = addlocl(varname, type, ctype, S_VARIABLE, 1); break; case C_PARAM: sym = addparm(varname, type, ctype, S_VARIABLE); break; case C_MEMBER: sym = addmemb(varname, type, ctype, S_VARIABLE, 1); break; } // The variable is being initialised if (Token.token == T_ASSIGN) { // Only possible for a global or local if (class != C_GLOBAL && class != C_LOCAL && class != C_STATIC) fatals("Variable can not be initialised", varname); scan(&Token); // Globals must be assigned a literal value if (class == C_GLOBAL || class == C_STATIC) { // Create one initial value for the variable and // parse this value sym->initlist = (int *) malloc(sizeof(int)); sym->initlist[0] = parse_literal(type); } if (class == C_LOCAL) { // Make an A_IDENT AST node with the variable varnode = mkastleaf(A_IDENT, sym->type, sym->ctype, sym, 0); // Get the expression for the assignment, make into a rvalue exprnode = binexpr(0); exprnode->rvalue = 1; // Ensure the expression's type matches the variable exprnode = modify_type(exprnode, varnode->type, varnode->ctype, 0); if (exprnode == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree *tree = mkastnode(A_ASSIGN, exprnode->type, exprnode->ctype, exprnode, NULL, varnode, NULL, 0); } } // Generate any global space if (class == C_GLOBAL || class == C_STATIC) genglobsym(sym); return (sym); } // Given the type, name and class of an variable, parse // the size of the array, if any. Then parse any initialisation // value and allocate storage for it. // Return the variable's symbol table entry. static struct symtable *array_declaration(char *varname, int type, struct symtable *ctype, int class) { struct symtable *sym; // New symbol table entry int nelems = -1; // Assume the number of elements won't be given int maxelems; // The maximum number of elements in the init list int *initlist; // The list of initial elements int i = 0, j; // Skip past the '[' scan(&Token); // See we have an array size if (Token.token != T_RBRACKET) { nelems = parse_literal(P_INT); if (nelems <= 0) fatald("Array size is illegal", nelems); } // Ensure we have a following ']' match(T_RBRACKET, "]"); // Add this as a known array. We treat the // array as a pointer to its elements' type switch (class) { case C_STATIC: case C_EXTERN: case C_GLOBAL: // See if this variable is new or already exists sym= findglob(varname); if (is_new_symbol(sym, class, pointer_to(type), ctype)) sym = addglob(varname, pointer_to(type), ctype, S_ARRAY, class, 0, 0); break; default: fatal("For now, declaration of non-global arrays is not implemented"); } // Array initialisation if (Token.token == T_ASSIGN) { if (class != C_GLOBAL && class != C_STATIC) fatals("Variable can not be initialised", varname); scan(&Token); // Get the following left curly bracket match(T_LBRACE, "{"); #define TABLE_INCREMENT 10 // If the array already has nelems, allocate that many elements // in the list. Otherwise, start with TABLE_INCREMENT. if (nelems != -1) maxelems = nelems; else maxelems = TABLE_INCREMENT; initlist = (int *) malloc(maxelems * sizeof(int)); // Loop getting a new literal value from the list while (1) { // Check we can add the next value, then parse and add it if (nelems != -1 && i == maxelems) fatal("Too many values in initialisation list"); initlist[i++] = parse_literal(type); // Increase the list size if the original size was // not set and we have hit the end of the current list if (nelems == -1 && i == maxelems) { maxelems += TABLE_INCREMENT; initlist = (int *) realloc(initlist, maxelems * sizeof(int)); } // Leave when we hit the right curly bracket if (Token.token == T_RBRACE) { scan(&Token); break; } // Next token must be a comma, then comma(); } // Zero any unused elements in the initlist. // Attach the list to the symbol table entry for (j = i; j < sym->nelems; j++) initlist[j] = 0; if (i > nelems) nelems = i; sym->initlist = initlist; } // Set the size of the array and the number of elements sym->nelems = nelems; sym->size = sym->nelems * typesize(type, ctype); // Generate any global space if (class == C_GLOBAL || class == C_STATIC) genglobsym(sym); return (sym); } // Given a pointer to the new function being declared and // a possibly NULL pointer to the function's previous declaration, // parse a list of parameters and cross-check them against the // previous declaration. Return the count of parameters static int param_declaration_list(struct symtable *oldfuncsym, struct symtable *newfuncsym) { int type, paramcnt = 0; struct symtable *ctype; struct symtable *protoptr = NULL; struct ASTnode *unused; // Get the pointer to the first prototype parameter if (oldfuncsym != NULL) protoptr = oldfuncsym->member; // Loop getting any parameters while (Token.token != T_RPAREN) { // If the first token is 'void' if (Token.token == T_VOID) { // Peek at the next token. If a ')', the function // has no parameters, so leave the loop. scan(&Peektoken); if (Peektoken.token == T_RPAREN) { // Move the Peektoken into the Token paramcnt = 0; scan(&Token); break; } } // Get the type of the next parameter type = declaration_list(&ctype, C_PARAM, T_COMMA, T_RPAREN, &unused); if (type == -1) fatal("Bad type in parameter list"); // Ensure the type of this parameter matches the prototype if (protoptr != NULL) { if (type != protoptr->type) fatald("Type doesn't match prototype for parameter", paramcnt + 1); protoptr = protoptr->next; } paramcnt++; // Stop when we hit the right parenthesis if (Token.token == T_RPAREN) break; // We need a comma as separator comma(); } if (oldfuncsym != NULL && paramcnt != oldfuncsym->nelems) fatals("Parameter count mismatch for function", oldfuncsym->name); // Return the count of parameters return (paramcnt); } // // function_declaration: type identifier '(' parameter_list ')' ; // | type identifier '(' parameter_list ')' compound_statement ; // // Parse the declaration of function. static struct symtable *function_declaration(char *funcname, int type, struct symtable *ctype, int class) { struct ASTnode *tree, *finalstmt; struct symtable *oldfuncsym, *newfuncsym = NULL; int endlabel, paramcnt; // Text has the identifier's name. If this exists and is a // function, get the id. Otherwise, set oldfuncsym to NULL. if ((oldfuncsym = findsymbol(funcname)) != NULL) if (oldfuncsym->stype != S_FUNCTION) oldfuncsym = NULL; // If this is a new function declaration, get a // label-id for the end label, and add the function // to the symbol table, if (oldfuncsym == NULL) { endlabel = genlabel(); // Assumtion: functions only return scalar types, so NULL below newfuncsym = addglob(funcname, type, NULL, S_FUNCTION, class, 0, endlabel); } // Scan in the '(', any parameters and the ')'. // Pass in any existing function prototype pointer lparen(); paramcnt = param_declaration_list(oldfuncsym, newfuncsym); rparen(); // If this is a new function declaration, update the // function symbol entry with the number of parameters. // Also copy the parameter list into the function's node. if (newfuncsym) { newfuncsym->nelems = paramcnt; newfuncsym->member = Parmhead; oldfuncsym = newfuncsym; } // Clear out the parameter list Parmhead = Parmtail = NULL; // Declaration ends in a semicolon, only a prototype. if (Token.token == T_SEMI) return (oldfuncsym); // This is not just a prototype. // Set the Functionid global to the function's symbol pointer Functionid = oldfuncsym; // Get the AST tree for the compound statement and mark // that we have parsed no loops or switches yet Looplevel = 0; Switchlevel = 0; lbrace(); tree = compound_statement(0); rbrace(); // If the function type isn't P_VOID .. if (type != P_VOID) { // Error if no statements in the function if (tree == NULL) fatal("No statements in function with non-void type"); // Check that the last AST operation in the // compound statement was a return statement finalstmt = (tree->op == A_GLUE) ? tree->right : tree; if (finalstmt == NULL || finalstmt->op != A_RETURN) fatal("No return for function with non-void type"); } // Build the A_FUNCTION node which has the function's symbol pointer // and the compound statement sub-tree tree = mkastunary(A_FUNCTION, type, ctype, tree, oldfuncsym, endlabel); // Do optimisations on the AST tree tree = optimise(tree); // Dump the AST tree if requested if (O_dumpAST) { dumpAST(tree, NOLABEL, 0); fprintf(stdout, "\n\n"); } // Generate the assembly code for it genAST(tree, NOLABEL, NOLABEL, NOLABEL, 0); // Now free the symbols associated // with this function freeloclsyms(); return (oldfuncsym); } // Parse composite type declarations: structs or unions. // Either find an existing struct/union declaration, or build // a struct/union symbol table entry and return its pointer. static struct symtable *composite_declaration(int type) { struct symtable *ctype = NULL; struct symtable *m; struct ASTnode *unused; int offset; int t; // Skip the struct/union keyword scan(&Token); // See if there is a following struct/union name if (Token.token == T_IDENT) { // Find any matching composite type if (type == P_STRUCT) ctype = findstruct(Text); else ctype = findunion(Text); scan(&Token); } // If the next token isn't an LBRACE , this is // the usage of an existing struct/union type. // Return the pointer to the type. if (Token.token != T_LBRACE) { if (ctype == NULL) fatals("unknown struct/union type", Text); return (ctype); } // Ensure this struct/union type hasn't been // previously defined if (ctype) fatals("previously defined struct/union", Text); // Build the composite type and skip the left brace if (type == P_STRUCT) ctype = addstruct(Text); else ctype = addunion(Text); scan(&Token); // Scan in the list of members while (1) { // Get the next member. m is used as a dummy t = declaration_list(&m, C_MEMBER, T_SEMI, T_RBRACE, &unused); if (t == -1) fatal("Bad type in member list"); if (Token.token == T_SEMI) scan(&Token); if (Token.token == T_RBRACE) break; } // Attach to the struct type's node rbrace(); if (Membhead == NULL) fatals("No members in struct", ctype->name); ctype->member = Membhead; Membhead = Membtail = NULL; // Set the offset of the initial member // and find the first free byte after it m = ctype->member; m->st_posn = 0; offset = typesize(m->type, m->ctype); // Set the position of each successive member in the composite type // Unions are easy. For structs, align the member and find the next free byte for (m = m->next; m != NULL; m = m->next) { // Set the offset for this member if (type == P_STRUCT) m->st_posn = genalign(m->type, offset, 1); else m->st_posn = 0; // Get the offset of the next free byte after this member offset += typesize(m->type, m->ctype); } // Set the overall size of the composite type ctype->size = offset; return (ctype); } // Parse an enum declaration static void enum_declaration(void) { struct symtable *etype = NULL; char *name = NULL; int intval = 0; // Skip the enum keyword. scan(&Token); // If there's a following enum type name, get a // pointer to any existing enum type node. if (Token.token == T_IDENT) { etype = findenumtype(Text); name = strdup(Text); // As it gets tromped soon scan(&Token); } // If the next token isn't a LBRACE, check // that we have an enum type name, then return if (Token.token != T_LBRACE) { if (etype == NULL) fatals("undeclared enum type:", name); return; } // We do have an LBRACE. Skip it scan(&Token); // If we have an enum type name, ensure that it // hasn't been declared before. if (etype != NULL) fatals("enum type redeclared:", etype->name); else // Build an enum type node for this identifier etype = addenum(name, C_ENUMTYPE, 0); // Loop to get all the enum values while (1) { // Ensure we have an identifier // Copy it in case there's an int literal coming up ident(); name = strdup(Text); // Ensure this enum value hasn't been declared before etype = findenumval(name); if (etype != NULL) fatals("enum value redeclared:", Text); // If the next token is an '=', skip it and // get the following int literal if (Token.token == T_ASSIGN) { scan(&Token); if (Token.token != T_INTLIT) fatal("Expected int literal after '='"); intval = Token.intvalue; scan(&Token); } // Build an enum value node for this identifier. // Increment the value for the next enum identifier. etype = addenum(name, C_ENUMVAL, intval++); // Bail out on a right curly bracket, else get a comma if (Token.token == T_RBRACE) break; comma(); } scan(&Token); // Skip over the right curly bracket } // Parse a typedef declaration and return the type // and ctype that it represents static int typedef_declaration(struct symtable **ctype) { int type, class = 0; // Skip the typedef keyword. scan(&Token); // Get the actual type following the keyword type = parse_type(ctype, &class); if (class != 0) fatal("Can't have extern in a typedef declaration"); // See if the typedef identifier already exists if (findtypedef(Text) != NULL) fatals("redefinition of typedef", Text); // Get any following '*' tokens type = parse_stars(type); // It doesn't exist so add it to the typedef list addtypedef(Text, type, *ctype); scan(&Token); return (type); } // Given a typedef name, return the type it represents static int type_of_typedef(char *name, struct symtable **ctype) { struct symtable *t; // Look up the typedef in the list t = findtypedef(name); if (t == NULL) fatals("unknown type", name); scan(&Token); *ctype = t->ctype; return (t->type); } // Parse the declaration of a variable or function. // The type and any following '*'s have been scanned, and we // have the identifier in the Token variable. // The class argument is the variable's class. // Return a pointer to the symbol's entry in the symbol table static struct symtable *symbol_declaration(int type, struct symtable *ctype, int class, struct ASTnode **tree) { struct symtable *sym = NULL; char *varname = strdup(Text); // Ensure that we have an identifier. // We copied it above so we can scan more tokens in, e.g. // an assignment expression for a local variable. ident(); // Deal with function declarations if (Token.token == T_LPAREN) { return (function_declaration(varname, type, ctype, class)); } // See if this array or scalar variable has already been declared switch (class) { case C_EXTERN: case C_STATIC: case C_GLOBAL: case C_LOCAL: case C_PARAM: if (findlocl(varname) != NULL) fatals("Duplicate local variable declaration", varname); case C_MEMBER: if (findmember(varname) != NULL) fatals("Duplicate struct/union member declaration", varname); } // Add the array or scalar variable to the symbol table if (Token.token == T_LBRACKET) sym = array_declaration(varname, type, ctype, class); else sym = scalar_declaration(varname, type, ctype, class, tree); return (sym); } // Parse a list of symbols where there is an initial type. // Return the type of the symbols. et1 and et2 are end tokens. int declaration_list(struct symtable **ctype, int class, int et1, int et2, struct ASTnode **gluetree) { int inittype, type; struct symtable *sym; struct ASTnode *tree; *gluetree = NULL; // Get the initial type. If -1, it was // a composite type definition, return this if ((inittype = parse_type(ctype, &class)) == -1) return (inittype); // Now parse the list of symbols while (1) { // See if this symbol is a pointer type = parse_stars(inittype); // Parse this symbol sym = symbol_declaration(type, *ctype, class, &tree); // We parsed a function, there is no list so leave if (sym->stype == S_FUNCTION) { if (class != C_GLOBAL && class != C_STATIC) fatal("Function definition not at global level"); return (type); } // Glue any AST tree from a local declaration // to build a sequence of assignments to perform if (*gluetree == NULL) *gluetree = tree; else *gluetree = mkastnode(A_GLUE, P_NONE, NULL, *gluetree, NULL, tree, NULL, 0); // We are at the end of the list, leave if (Token.token == et1 || Token.token == et2) return (type); // Otherwise, we need a comma as separator comma(); } return(0); // Keep -Wall happy } // Parse one or more global declarations, either // variables, functions or structs void global_declarations(void) { struct symtable *ctype= NULL; struct ASTnode *unused; while (Token.token != T_EOF) { declaration_list(&ctype, C_GLOBAL, T_SEMI, T_EOF, &unused); // Skip any semicolons and right curly brackets if (Token.token == T_SEMI) scan(&Token); } } ================================================ FILE: 54_Reg_Spills/decl.h ================================================ // Function prototypes for all compiler files // Copyright (c) 2019 Warren Toomey, GPL3 // scan.c void reject_token(struct token *t); int scan(struct token *t); // tree.c struct ASTnode *mkastnode(int op, int type, struct symtable *ctype, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue); struct ASTnode *mkastleaf(int op, int type, struct symtable *ctype, struct symtable *sym, int intvalue); struct ASTnode *mkastunary(int op, int type, struct symtable *ctype, struct ASTnode *left, struct symtable *sym, int intvalue); void dumpAST(struct ASTnode *n, int label, int level); // gen.c int genlabel(void); int genAST(struct ASTnode *n, int iflabel, int looptoplabel, int loopendlabel, int parentASTop); void genpreamble(); void genpostamble(); void genfreeregs(int keepreg); void genglobsym(struct symtable *node); int genglobstr(char *strvalue, int append); void genglobstrend(void); int genprimsize(int type); int genalign(int type, int offset, int direction); void genreturn(int reg, int id); // cg.c int cgprimsize(int type); int cgalign(int type, int offset, int direction); void cgtextseg(); void cgdataseg(); int alloc_register(void); void freeall_registers(int keepreg); void spill_all_regs(void); void cgpreamble(); void cgpostamble(); void cgfuncpreamble(struct symtable *sym); void cgfuncpostamble(struct symtable *sym); int cgloadint(int value, int type); int cgloadglob(struct symtable *sym, int op); int cgloadlocal(struct symtable *sym, int op); int cgloadglobstr(int label); int cgadd(int r1, int r2); int cgsub(int r1, int r2); int cgmul(int r1, int r2); int cgdiv(int r1, int r2); int cgshlconst(int r, int val); int cgcall(struct symtable *sym, int numargs); void cgcopyarg(int r, int argposn); int cgstorglob(int r, struct symtable *sym); int cgstorlocal(int r, struct symtable *sym); void cgglobsym(struct symtable *node); void cgglobstr(int l, char *strvalue, int append); void cgglobstrend(void); int cgcompare_and_set(int ASTop, int r1, int r2); int cgcompare_and_jump(int ASTop, int r1, int r2, int label); void cglabel(int l); void cgjump(int l); int cgwiden(int r, int oldtype, int newtype); void cgreturn(int reg, struct symtable *sym); int cgaddress(struct symtable *sym); int cgderef(int r, int type); int cgstorderef(int r1, int r2, int type); int cgnegate(int r); int cginvert(int r); int cglognot(int r); void cgloadboolean(int r, int val); int cgboolean(int r, int op, int label); int cgand(int r1, int r2); int cgor(int r1, int r2); int cgxor(int r1, int r2); int cgshl(int r1, int r2); int cgshr(int r1, int r2); void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel); void cgmove(int r1, int r2); // expr.c struct ASTnode *expression_list(int endtoken); struct ASTnode *binexpr(int ptp); // stmt.c struct ASTnode *compound_statement(int inswitch); // misc.c void match(int t, char *what); void semi(void); void lbrace(void); void rbrace(void); void lparen(void); void rparen(void); void ident(void); void comma(void); void fatal(char *s); void fatals(char *s1, char *s2); void fatald(char *s, int d); void fatalc(char *s, int c); // sym.c void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node); struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn); struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn); struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int nelems); struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype); struct symtable *addstruct(char *name); struct symtable *addunion(char *name); struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int nelems); struct symtable *addenum(char *name, int class, int value); struct symtable *addtypedef(char *name, int type, struct symtable *ctype); struct symtable *findglob(char *s); struct symtable *findlocl(char *s); struct symtable *findsymbol(char *s); struct symtable *findmember(char *s); struct symtable *findstruct(char *s); struct symtable *findunion(char *s); struct symtable *findenumtype(char *s); struct symtable *findenumval(char *s); struct symtable *findtypedef(char *s); void clear_symtable(void); void freeloclsyms(void); void freestaticsyms(void); void dumptable(struct symtable *head, char *name, int indent); void dumpsymtables(void); // decl.c int parse_type(struct symtable **ctype, int *class); int parse_stars(int type); int parse_cast(struct symtable **ctype); int declaration_list(struct symtable **ctype, int class, int et1, int et2, struct ASTnode **gluetree); void global_declarations(void); // types.c int inttype(int type); int ptrtype(int type); int pointer_to(int type); int value_at(int type); int typesize(int type, struct symtable *ctype); struct ASTnode *modify_type(struct ASTnode *tree, int rtype, struct symtable *rctype, int op); // opt.c struct ASTnode *optimise(struct ASTnode *n); ================================================ FILE: 54_Reg_Spills/defs.h ================================================ #include #include #include #include // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 enum { TEXTLEN = 512 // Length of identifiers in input }; // Commands and default filenames #define AOUT "a.out" #ifdef __NASM__ #define ASCMD "nasm -f elf64 -w-ptr -o " #define LDCMD "cc -no-pie -fno-plt -Wall -o " #else #define ASCMD "as -o " #define LDCMD "cc -o " #endif #define CPPCMD "cpp -nostdinc -isystem " // Token types enum { T_EOF, // Binary operators T_ASSIGN, T_ASPLUS, T_ASMINUS, T_ASSTAR, T_ASSLASH, T_QUESTION, T_LOGOR, T_LOGAND, T_OR, T_XOR, T_AMPER, T_EQ, T_NE, T_LT, T_GT, T_LE, T_GE, T_LSHIFT, T_RSHIFT, T_PLUS, T_MINUS, T_STAR, T_SLASH, // Other operators T_INC, T_DEC, T_INVERT, T_LOGNOT, // Type keywords T_VOID, T_CHAR, T_INT, T_LONG, // Other keywords T_IF, T_ELSE, T_WHILE, T_FOR, T_RETURN, T_STRUCT, T_UNION, T_ENUM, T_TYPEDEF, T_EXTERN, T_BREAK, T_CONTINUE, T_SWITCH, T_CASE, T_DEFAULT, T_SIZEOF, T_STATIC, // Structural tokens T_INTLIT, T_STRLIT, T_SEMI, T_IDENT, T_LBRACE, T_RBRACE, T_LPAREN, T_RPAREN, T_LBRACKET, T_RBRACKET, T_COMMA, T_DOT, T_ARROW, T_COLON }; // Token structure struct token { int token; // Token type, from the enum list above char *tokstr; // String version of the token int intvalue; // For T_INTLIT, the integer value }; // AST node types. The first few line up // with the related tokens enum { A_ASSIGN = 1, A_ASPLUS, A_ASMINUS, A_ASSTAR, A_ASSLASH, // 1 A_TERNARY, A_LOGOR, A_LOGAND, A_OR, A_XOR, A_AND, // 6 A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE, A_LSHIFT, A_RSHIFT, // 12 A_ADD, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, // 20 A_INTLIT, A_STRLIT, A_IDENT, A_GLUE, // 24 A_IF, A_WHILE, A_FUNCTION, A_WIDEN, A_RETURN, // 28 A_FUNCCALL, A_DEREF, A_ADDR, A_SCALE, // 33 A_PREINC, A_PREDEC, A_POSTINC, A_POSTDEC, // 37 A_NEGATE, A_INVERT, A_LOGNOT, A_TOBOOL, A_BREAK, // 41 A_CONTINUE, A_SWITCH, A_CASE, A_DEFAULT, A_CAST // 46 }; // Primitive types. The bottom 4 bits is an integer // value that represents the level of indirection, // e.g. 0= no pointer, 1= pointer, 2= pointer pointer etc. enum { P_NONE, P_VOID = 16, P_CHAR = 32, P_INT = 48, P_LONG = 64, P_STRUCT=80, P_UNION=96 }; // Structural types enum { S_VARIABLE, S_FUNCTION, S_ARRAY }; // Storage classes enum { C_GLOBAL = 1, // Globally visible symbol C_LOCAL, // Locally visible symbol C_PARAM, // Locally visible function parameter C_EXTERN, // External globally visible symbol C_STATIC, // Static symbol, visible in one file C_STRUCT, // A struct C_UNION, // A union C_MEMBER, // Member of a struct or union C_ENUMTYPE, // A named enumeration type C_ENUMVAL, // A named enumeration value C_TYPEDEF // A named typedef }; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol struct symtable *ctype; // If struct/union, ptr to that type int stype; // Structural type for the symbol int class; // Storage class for the symbol int size; // Total size in bytes of this symbol int nelems; // Functions: # params. Arrays: # elements #define st_endlabel st_posn // For functions, the end label int st_posn; // For locals, the negative offset // from the stack base pointer int *initlist; // List of initial values struct symtable *next; // Next symbol in one list struct symtable *member; // First member of a function, struct, }; // union or enum // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates struct symtable *ctype; // If struct/union, ptr to that type int rvalue; // True if the node is an rvalue struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; struct symtable *sym; // For many AST nodes, the pointer to // the symbol in the symbol table #define a_intvalue a_size // For A_INTLIT, the integer value int a_size; // For A_SCALE, the size to scale by }; enum { NOREG = -1, // Use NOREG when the AST generation // functions have no register to return NOLABEL = 0 // Use NOLABEL when we have no label to // pass to genAST() }; ================================================ FILE: 54_Reg_Spills/expr.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of expressions // Copyright (c) 2019 Warren Toomey, GPL3 // expression_list: // | expression // | expression ',' expression_list // ; // Parse a list of zero or more comma-separated expressions and // return an AST composed of A_GLUE nodes with the left-hand child // being the sub-tree of previous expressions (or NULL) and the right-hand // child being the next expression. Each A_GLUE node will have size field // set to the number of expressions in the tree at this point. If no // expressions are parsed, NULL is returned struct ASTnode *expression_list(int endtoken) { struct ASTnode *tree = NULL; struct ASTnode *child = NULL; int exprcount = 0; // Loop until the end token while (Token.token != endtoken) { // Parse the next expression and increment the expression count child = binexpr(0); exprcount++; // Build an A_GLUE AST node with the previous tree as the left child // and the new expression as the right child. Store the expression count. tree = mkastnode(A_GLUE, P_NONE, NULL, tree, NULL, child, NULL, exprcount); // Stop when we reach the end token if (Token.token == endtoken) break; // Must have a ',' at this point match(T_COMMA, ","); } // Return the tree of expressions return (tree); } // Parse a function call and return its AST static struct ASTnode *funccall(void) { struct ASTnode *tree; struct symtable *funcptr; // Check that the identifier has been defined as a function, // then make a leaf node for it. if ((funcptr = findsymbol(Text)) == NULL || funcptr->stype != S_FUNCTION) { fatals("Undeclared function", Text); } // Get the '(' lparen(); // Parse the argument expression list tree = expression_list(T_RPAREN); // XXX Check type of each argument against the function's prototype // Build the function call AST node. Store the // function's return type as this node's type. // Also record the function's symbol-id tree = mkastunary(A_FUNCCALL, funcptr->type, funcptr->ctype, tree, funcptr, 0); // Get the ')' rparen(); return (tree); } // Parse the index into an array and return an AST tree for it static struct ASTnode *array_access(struct ASTnode *left) { struct ASTnode *right; // Check that the sub-tree is a pointer if (!ptrtype(left->type)) fatal("Not an array or pointer"); // Get the '[' scan(&Token); // Parse the following expression right = binexpr(0); // Get the ']' match(T_RBRACKET, "]"); // Ensure that this is of int type if (!inttype(right->type)) fatal("Array index is not of integer type"); // Make the left tree an rvalue left->rvalue = 1; // Scale the index by the size of the element's type right = modify_type(right, left->type, left->ctype, A_ADD); // Return an AST tree where the array's base has the offset added to it, // and dereference the element. Still an lvalue at this point. left = mkastnode(A_ADD, left->type, left->ctype, left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, value_at(left->type), left->ctype, left, NULL, 0); return (left); } // Parse the member reference of a struct or union // and return an AST tree for it. If withpointer is true, // the access is through a pointer to the member. static struct ASTnode *member_access(struct ASTnode *left, int withpointer) { struct ASTnode *right; struct symtable *typeptr; struct symtable *m; // Check that the left AST tree is a pointer to struct or union if (withpointer && left->type != pointer_to(P_STRUCT) && left->type != pointer_to(P_UNION)) fatal("Expression is not a pointer to a struct/union"); // Or, check that the left AST tree is a struct or union. // If so, change it from an A_IDENT to an A_ADDR so that // we get the base address, not the value at this address. if (!withpointer) { if (left->type == P_STRUCT || left->type == P_UNION) left->op = A_ADDR; else fatal("Expression is not a struct/union"); } // Get the details of the composite type typeptr = left->ctype; // Skip the '.' or '->' token and get the member's name scan(&Token); ident(); // Find the matching member's name in the type // Die if we can't find it for (m = typeptr->member; m != NULL; m = m->next) if (!strcmp(m->name, Text)) break; if (m == NULL) fatals("No member found in struct/union: ", Text); // Make the left tree an rvalue left->rvalue = 1; // Build an A_INTLIT node with the offset right = mkastleaf(A_INTLIT, P_INT, NULL, NULL, m->st_posn); // Add the member's offset to the base of the struct/union // and dereference it. Still an lvalue at this point left = mkastnode(A_ADD, pointer_to(m->type), m->ctype, left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, m->type, m->ctype, left, NULL, 0); return (left); } // Parse a parenthesised expression and // return an AST node representing it. static struct ASTnode *paren_expression(void) { struct ASTnode *n; int type = 0; struct symtable *ctype = NULL; // Beginning of a parenthesised expression, skip the '('. scan(&Token); // If the token after is a type identifier, this is a cast expression switch (Token.token) { case T_IDENT: // We have to see if the identifier matches a typedef. // If not, treat it as an expression. if (findtypedef(Text) == NULL) { n = binexpr(0); break; } case T_VOID: case T_CHAR: case T_INT: case T_LONG: case T_STRUCT: case T_UNION: case T_ENUM: // Get the type inside the parentheses type = parse_cast(&ctype); // Skip the closing ')' and then parse the following expression rparen(); default: n = binexpr(0); // Scan in the expression } // We now have at least an expression in n, and possibly a non-zero type // in type if there was a cast. Skip the closing ')' if there was no cast. if (type == 0) rparen(); else // Otherwise, make a unary AST node for the cast n = mkastunary(A_CAST, type, ctype, n, NULL, 0); return (n); } // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(void) { struct ASTnode *n; struct symtable *enumptr; struct symtable *varptr; int id; int type = 0; int size, class; struct symtable *ctype; switch (Token.token) { case T_STATIC: case T_EXTERN: fatal("Compiler doesn't support static or extern local declarations"); case T_SIZEOF: // Skip the T_SIZEOF and ensure we have a left parenthesis scan(&Token); if (Token.token != T_LPAREN) fatal("Left parenthesis expected after sizeof"); scan(&Token); // Get the type inside the parentheses type = parse_stars(parse_type(&ctype, &class)); // Get the type's size size = typesize(type, ctype); rparen(); // Make a leaf node int literal with the size return (mkastleaf(A_INTLIT, P_INT, NULL, NULL, size)); case T_INTLIT: // For an INTLIT token, make a leaf AST node for it. // Make it a P_CHAR if it's within the P_CHAR range if (Token.intvalue >= 0 && Token.intvalue < 256) n = mkastleaf(A_INTLIT, P_CHAR, NULL, NULL, Token.intvalue); else n = mkastleaf(A_INTLIT, P_INT, NULL, NULL, Token.intvalue); break; case T_STRLIT: // For a STRLIT token, generate the assembly for it. id = genglobstr(Text, 0); // For successive STRLIT tokens, append their contents // to this one while (1) { scan(&Peektoken); if (Peektoken.token != T_STRLIT) break; genglobstr(Text, 1); scan(&Token); // To skip it properly } // Now make a leaf AST node for it. id is the string's label. genglobstrend(); n = mkastleaf(A_STRLIT, pointer_to(P_CHAR), NULL, NULL, id); break; case T_IDENT: // If the identifier matches an enum value, // return an A_INTLIT node if ((enumptr = findenumval(Text)) != NULL) { n = mkastleaf(A_INTLIT, P_INT, NULL, NULL, enumptr->st_posn); break; } // See if this identifier exists as a symbol. For arrays, set rvalue to 1. if ((varptr = findsymbol(Text)) == NULL) fatals("Unknown variable or function", Text); switch (varptr->stype) { case S_VARIABLE: n = mkastleaf(A_IDENT, varptr->type, varptr->ctype, varptr, 0); break; case S_ARRAY: n = mkastleaf(A_ADDR, varptr->type, varptr->ctype, varptr, 0); n->rvalue = 1; break; case S_FUNCTION: // Function call, see if the next token is a left parenthesis scan(&Token); if (Token.token != T_LPAREN) fatals("Function name used without parentheses", Text); return (funccall()); default: fatals("Identifier not a scalar or array variable", Text); } break; case T_LPAREN: return (paren_expression()); default: fatals("Expecting a primary expression, got token", Token.tokstr); } // Scan in the next token and return the leaf node scan(&Token); return (n); } // Parse a postfix expression and return // an AST node representing it. The // identifier is already in Text. static struct ASTnode *postfix(void) { struct ASTnode *n; // Get the primary expression n = primary(); // Loop until there are no more postfix operators while (1) { switch (Token.token) { case T_LBRACKET: // An array reference n = array_access(n); break; case T_DOT: // Access into a struct or union n = member_access(n, 0); break; case T_ARROW: // Pointer access into a struct or union n = member_access(n, 1); break; case T_INC: // Post-increment: skip over the token if (n->rvalue == 1) fatal("Cannot ++ on rvalue"); scan(&Token); // Can't do it twice if (n->op == A_POSTINC || n->op == A_POSTDEC) fatal("Cannot ++ and/or -- more than once"); // and change the AST operation n->op = A_POSTINC; break; case T_DEC: // Post-decrement: skip over the token if (n->rvalue == 1) fatal("Cannot -- on rvalue"); scan(&Token); // Can't do it twice if (n->op == A_POSTINC || n->op == A_POSTDEC) fatal("Cannot ++ and/or -- more than once"); // and change the AST operation n->op = A_POSTDEC; break; default: return (n); } } return (NULL); // Keep -Wall happy } // Convert a binary operator token into a binary AST operation. // We rely on a 1:1 mapping from token to AST operation static int binastop(int tokentype) { if (tokentype > T_EOF && tokentype <= T_SLASH) return (tokentype); fatals("Syntax error, token", Tstring[tokentype]); return (0); // Keep -Wall happy } // Return true if a token is right-associative, // false otherwise. static int rightassoc(int tokentype) { if (tokentype >= T_ASSIGN && tokentype <= T_ASSLASH) return (1); return (0); } // Operator precedence for each token. Must // match up with the order of tokens in defs.h static int OpPrec[] = { 0, 10, 10, // T_EOF, T_ASSIGN, T_ASPLUS, 10, 10, 10, // T_ASMINUS, T_ASSTAR, T_ASSLASH, 15, // T_QUESTION, 20, 30, // T_LOGOR, T_LOGAND 40, 50, 60, // T_OR, T_XOR, T_AMPER 70, 70, // T_EQ, T_NE 80, 80, 80, 80, // T_LT, T_GT, T_LE, T_GE 90, 90, // T_LSHIFT, T_RSHIFT 100, 100, // T_PLUS, T_MINUS 110, 110 // T_STAR, T_SLASH }; // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec; if (tokentype > T_SLASH) fatals("Token with no precedence in op_precedence:", Tstring[tokentype]); prec = OpPrec[tokentype]; if (prec == 0) fatals("Syntax error, token", Tstring[tokentype]); return (prec); } // prefix_expression: postfix_expression // | '*' prefix_expression // | '&' prefix_expression // | '-' prefix_expression // | '++' prefix_expression // | '--' prefix_expression // ; // Parse a prefix expression and return // a sub-tree representing it. struct ASTnode *prefix(void) { struct ASTnode *tree; switch (Token.token) { case T_AMPER: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Ensure that it's an identifier if (tree->op != A_IDENT) fatal("& operator must be followed by an identifier"); // Prevent '&' being performed on an array if (tree->sym->stype == S_ARRAY) fatal("& operator cannot be performed on an array"); // Now change the operator to A_ADDR and the type to // a pointer to the original type tree->op = A_ADDR; tree->type = pointer_to(tree->type); break; case T_STAR: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's either another deref or an // identifier if (tree->op != A_IDENT && tree->op != A_DEREF) fatal("* operator must be followed by an identifier or *"); // Prepend an A_DEREF operation to the tree tree = mkastunary(A_DEREF, value_at(tree->type), tree->ctype, tree, NULL, 0); break; case T_MINUS: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_NEGATE operation to the tree and // make the child an rvalue. Because chars are unsigned, // also widen this if needed to int so that it's signed tree->rvalue = 1; if (tree->type == P_CHAR) tree->type = P_INT; tree = mkastunary(A_NEGATE, tree->type, tree->ctype, tree, NULL, 0); break; case T_INVERT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_INVERT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_INVERT, tree->type, tree->ctype, tree, NULL, 0); break; case T_LOGNOT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_LOGNOT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_LOGNOT, tree->type, tree->ctype, tree, NULL, 0); break; case T_INC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("++ operator must be followed by an identifier"); // Prepend an A_PREINC operation to the tree tree = mkastunary(A_PREINC, tree->type, tree->ctype, tree, NULL, 0); break; case T_DEC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("-- operator must be followed by an identifier"); // Prepend an A_PREDEC operation to the tree tree = mkastunary(A_PREDEC, tree->type, tree->ctype, tree, NULL, 0); break; default: tree = postfix(); } return (tree); } // Return an AST tree whose root is a binary operator. // Parameter ptp is the previous token's precedence. struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; struct ASTnode *ltemp, *rtemp; int ASTop; int tokentype; // Get the tree on the left. // Fetch the next token at the same time. left = prefix(); // If we hit one of several terminating tokens, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA || tokentype == T_COLON || tokentype == T_RBRACE) { left->rvalue = 1; return (left); } // While the precedence of this token is more than that of the // previous token precedence, or it's right associative and // equal to the previous token's precedence while ((op_precedence(tokentype) > ptp) || (rightassoc(tokentype) && op_precedence(tokentype) == ptp)) { // Fetch in the next integer literal scan(&Token); // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Determine the operation to be performed on the sub-trees ASTop = binastop(tokentype); switch (ASTop) { case A_TERNARY: // Ensure we have a ':' token, scan in the expression after it match(T_COLON, ":"); ltemp = binexpr(0); // Build and return the AST for this statement. Use the middle // expression's type as the return type. XXX We should also // consider the third expression's type. return (mkastnode (A_TERNARY, right->type, right->ctype, left, right, ltemp, NULL, 0)); case A_ASSIGN: // Assignment // Make the right tree into an rvalue right->rvalue = 1; // Ensure the right's type matches the left right = modify_type(right, left->type, left->ctype, 0); if (right == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree. However, switch // left and right around, so that the right expression's // code will be generated before the left expression ltemp = left; left = right; right = ltemp; break; default: // We are not doing a ternary or assignment, so both trees should // be rvalues. Convert both trees into rvalue if they are lvalue trees left->rvalue = 1; right->rvalue = 1; // Ensure the two types are compatible by trying // to modify each tree to match the other's type. ltemp = modify_type(left, right->type, right->ctype, ASTop); rtemp = modify_type(right, left->type, left->ctype, ASTop); if (ltemp == NULL && rtemp == NULL) fatal("Incompatible types in binary expression"); if (ltemp != NULL) left = ltemp; if (rtemp != NULL) right = rtemp; } // Join that sub-tree with ours. Convert the token // into an AST operation at the same time. left = mkastnode(binastop(tokentype), left->type, left->ctype, left, NULL, right, NULL, 0); // Some operators produce an int result regardless of their operands switch (binastop(tokentype)) { case A_LOGOR: case A_LOGAND: case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: left->type = P_INT; } // Update the details of the current token. // If we hit a terminating token, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA || tokentype == T_COLON || tokentype == T_RBRACE) { left->rvalue = 1; return (left); } } // Return the tree we have when the precedence // is the same or lower left->rvalue = 1; return (left); } ================================================ FILE: 54_Reg_Spills/gen.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Generic code generator // Copyright (c) 2019 Warren Toomey, GPL3 // Generate and return a new label number static int labelid = 1; int genlabel(void) { return (labelid++); } // Generate the code for an IF statement // and an optional ELSE clause. static int genIF(struct ASTnode *n, int looptoplabel, int loopendlabel) { int Lfalse, Lend; // Generate two labels: one for the // false compound statement, and one // for the end of the overall IF statement. // When there is no ELSE clause, Lfalse _is_ // the ending label! Lfalse = genlabel(); if (n->right) Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. genAST(n->left, Lfalse, NOLABEL, NOLABEL, n->op); genfreeregs(NOREG); // Generate the true compound statement genAST(n->mid, NOLABEL, looptoplabel, loopendlabel, n->op); genfreeregs(NOREG); // If there is an optional ELSE clause, // generate the jump to skip to the end if (n->right) cgjump(Lend); // Now the false label cglabel(Lfalse); // Optional ELSE clause: generate the // false compound statement and the // end label if (n->right) { genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); genfreeregs(NOREG); cglabel(Lend); } return (NOREG); } // Generate the code for a WHILE statement static int genWHILE(struct ASTnode *n) { int Lstart, Lend; // Generate the start and end labels // and output the start label Lstart = genlabel(); Lend = genlabel(); cglabel(Lstart); // Generate the condition code followed // by a jump to the end label. genAST(n->left, Lend, Lstart, Lend, n->op); genfreeregs(NOREG); // Generate the compound statement for the body genAST(n->right, NOLABEL, Lstart, Lend, n->op); genfreeregs(NOREG); // Finally output the jump back to the condition, // and the end label cgjump(Lstart); cglabel(Lend); return (NOREG); } // Generate the code for a SWITCH statement static int genSWITCH(struct ASTnode *n) { int *caseval, *caselabel; int Ljumptop, Lend; int i, reg, defaultlabel = 0, casecount = 0; struct ASTnode *c; // Create arrays for the case values and associated labels. // Ensure that we have at least one position in each array. caseval = (int *) malloc((n->a_intvalue + 1) * sizeof(int)); caselabel = (int *) malloc((n->a_intvalue + 1) * sizeof(int)); // Generate labels for the top of the jump table, and the // end of the switch statement. Set a default label for // the end of the switch, in case we don't have a default. Ljumptop = genlabel(); Lend = genlabel(); defaultlabel = Lend; // Output the code to calculate the switch condition reg = genAST(n->left, NOLABEL, NOLABEL, NOLABEL, 0); cgjump(Ljumptop); genfreeregs(reg); // Walk the right-child linked list to // generate the code for each case for (i = 0, c = n->right; c != NULL; i++, c = c->right) { // Get a label for this case. Store it // and the case value in the arrays. // Record if it is the default case. caselabel[i] = genlabel(); caseval[i] = c->a_intvalue; cglabel(caselabel[i]); if (c->op == A_DEFAULT) defaultlabel = caselabel[i]; else casecount++; // Generate the case code. Pass in the end label for the breaks. // If case has no body, we will fall into the following body. if (c->left) genAST(c->left, NOLABEL, NOLABEL, Lend, 0); genfreeregs(NOREG); } // Ensure the last case jumps past the switch table cgjump(Lend); // Now output the switch table and the end label. cgswitch(reg, casecount, Ljumptop, caselabel, caseval, defaultlabel); cglabel(Lend); return (NOREG); } // Generate the code for an // A_LOGAND or A_LOGOR operation static int gen_logandor(struct ASTnode *n) { // Generate two labels int Lfalse = genlabel(); int Lend = genlabel(); int reg; // Generate the code for the left expression // followed by the jump to the false label reg= genAST(n->left, NOLABEL, NOLABEL, NOLABEL, 0); cgboolean(reg, n->op, Lfalse); genfreeregs(NOREG); // Generate the code for the right expression // followed by the jump to the false label reg= genAST(n->right, NOLABEL, NOLABEL, NOLABEL, 0); cgboolean(reg, n->op, Lfalse); genfreeregs(reg); // We didn't jump so set the right boolean value if (n->op== A_LOGAND) { cgloadboolean(reg, 1); cgjump(Lend); cglabel(Lfalse); cgloadboolean(reg, 0); } else { cgloadboolean(reg, 0); cgjump(Lend); cglabel(Lfalse); cgloadboolean(reg, 1); } cglabel(Lend); return(reg); } // Generate the code to copy the arguments of a // function call to its parameters, then call the // function itself. Return the register that holds // the function's return value. static int gen_funccall(struct ASTnode *n) { struct ASTnode *gluetree = n->left; int reg; int numargs = 0; // Save the registers before we copy the arguments spill_all_regs(); // If there is a list of arguments, walk this list // from the last argument (right-hand child) to the // first while (gluetree) { // Calculate the expression's value reg = genAST(gluetree->right, NOLABEL, NOLABEL, NOLABEL, gluetree->op); // Copy this into the n'th function parameter: size is 1, 2, 3, ... cgcopyarg(reg, gluetree->a_size); // Keep the first (highest) number of arguments if (numargs == 0) numargs = gluetree->a_size; gluetree = gluetree->left; } // Call the function, clean up the stack (based on numargs), // and return its result return (cgcall(n->sym, numargs)); } // Generate code for a ternary expression static int gen_ternary(struct ASTnode *n) { int Lfalse, Lend; int reg, expreg; // Generate two labels: one for the // false expression, and one for the // end of the overall expression Lfalse = genlabel(); Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. genAST(n->left, Lfalse, NOLABEL, NOLABEL, n->op); genfreeregs(NOREG); // Get a register to hold the result of the two expressions reg = alloc_register(); // Generate the true expression and the false label. // Move the expression result into the known register. expreg = genAST(n->mid, NOLABEL, NOLABEL, NOLABEL, n->op); cgmove(expreg, reg); // Don't free the register holding the result, though! genfreeregs(reg); cgjump(Lend); cglabel(Lfalse); // Generate the false expression and the end label. // Move the expression result into the known register. expreg = genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); cgmove(expreg, reg); // Don't free the register holding the result, though! genfreeregs(reg); cglabel(Lend); return (reg); } // Given an AST, an optional label, and the AST op // of the parent, generate assembly code recursively. // Return the register id with the tree's final value. int genAST(struct ASTnode *n, int iflabel, int looptoplabel, int loopendlabel, int parentASTop) { int leftreg= NOREG, rightreg= NOREG; // Empty tree, do nothing if (n==NULL) return(NOREG); // We have some specific AST node handling at the top // so that we don't evaluate the child sub-trees immediately switch (n->op) { case A_IF: return (genIF(n, looptoplabel, loopendlabel)); case A_WHILE: return (genWHILE(n)); case A_SWITCH: return (genSWITCH(n)); case A_FUNCCALL: return (gen_funccall(n)); case A_TERNARY: return (gen_ternary(n)); case A_LOGOR: return (gen_logandor(n)); case A_LOGAND: return (gen_logandor(n)); case A_GLUE: // Do each child statement, and free the // registers after each child if (n->left != NULL) genAST(n->left, iflabel, looptoplabel, loopendlabel, n->op); genfreeregs(NOREG); if (n->right != NULL) genAST(n->right, iflabel, looptoplabel, loopendlabel, n->op); genfreeregs(NOREG); return (NOREG); case A_FUNCTION: // Generate the function's preamble before the code // in the child sub-tree cgfuncpreamble(n->sym); genAST(n->left, NOLABEL, NOLABEL, NOLABEL, n->op); cgfuncpostamble(n->sym); return (NOREG); } // General AST node handling below // Get the left and right sub-tree values if (n->left) leftreg = genAST(n->left, NOLABEL, NOLABEL, NOLABEL, n->op); if (n->right) rightreg = genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); switch (n->op) { case A_ADD: return (cgadd(leftreg, rightreg)); case A_SUBTRACT: return (cgsub(leftreg, rightreg)); case A_MULTIPLY: return (cgmul(leftreg, rightreg)); case A_DIVIDE: return (cgdiv(leftreg, rightreg)); case A_AND: return (cgand(leftreg, rightreg)); case A_OR: return (cgor(leftreg, rightreg)); case A_XOR: return (cgxor(leftreg, rightreg)); case A_LSHIFT: return (cgshl(leftreg, rightreg)); case A_RSHIFT: return (cgshr(leftreg, rightreg)); case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: // If the parent AST node is an A_IF, A_WHILE or A_TERNARY, // generate a compare followed by a jump. Otherwise, compare // registers and set one to 1 or 0 based on the comparison. if (parentASTop == A_IF || parentASTop == A_WHILE || parentASTop == A_TERNARY) return (cgcompare_and_jump(n->op, leftreg, rightreg, iflabel)); else return (cgcompare_and_set(n->op, leftreg, rightreg)); case A_INTLIT: return (cgloadint(n->a_intvalue, n->type)); case A_STRLIT: return (cgloadglobstr(n->a_intvalue)); case A_IDENT: // Load our value if we are an rvalue // or we are being dereferenced if (n->rvalue || parentASTop == A_DEREF) { if (n->sym->class == C_GLOBAL || n->sym->class == C_STATIC) { return (cgloadglob(n->sym, n->op)); } else { return (cgloadlocal(n->sym, n->op)); } } else return (NOREG); case A_ASPLUS: case A_ASMINUS: case A_ASSTAR: case A_ASSLASH: case A_ASSIGN: // For the '+=' and friends operators, generate suitable code // and get the register with the result. Then take the left child, // make it the right child so that we can fall into the assignment code. switch (n->op) { case A_ASPLUS: leftreg = cgadd(leftreg, rightreg); n->right = n->left; break; case A_ASMINUS: leftreg = cgsub(leftreg, rightreg); n->right = n->left; break; case A_ASSTAR: leftreg = cgmul(leftreg, rightreg); n->right = n->left; break; case A_ASSLASH: leftreg = cgdiv(leftreg, rightreg); n->right = n->left; break; } // Now into the assignment code // Are we assigning to an identifier or through a pointer? switch (n->right->op) { case A_IDENT: if (n->right->sym->class == C_GLOBAL || n->right->sym->class == C_STATIC) return (cgstorglob(leftreg, n->right->sym)); else return (cgstorlocal(leftreg, n->right->sym)); case A_DEREF: return (cgstorderef(leftreg, rightreg, n->right->type)); default: fatald("Can't A_ASSIGN in genAST(), op", n->op); } case A_WIDEN: // Widen the child's type to the parent's type return (cgwiden(leftreg, n->left->type, n->type)); case A_RETURN: cgreturn(leftreg, Functionid); return (NOREG); case A_ADDR: return (cgaddress(n->sym)); case A_DEREF: // If we are an rvalue, dereference to get the value we point at, // otherwise leave it for A_ASSIGN to store through the pointer if (n->rvalue) return (cgderef(leftreg, n->left->type)); else return (leftreg); case A_SCALE: // Small optimisation: use shift if the // scale value is a known power of two switch (n->a_size) { case 2: return (cgshlconst(leftreg, 1)); case 4: return (cgshlconst(leftreg, 2)); case 8: return (cgshlconst(leftreg, 3)); default: // Load a register with the size and // multiply the leftreg by this size rightreg = cgloadint(n->a_size, P_INT); return (cgmul(leftreg, rightreg)); } case A_POSTINC: case A_POSTDEC: // Load and decrement the variable's value into a register // and post increment/decrement it if (n->sym->class == C_GLOBAL || n->sym->class == C_STATIC) return (cgloadglob(n->sym, n->op)); else return (cgloadlocal(n->sym, n->op)); case A_PREINC: case A_PREDEC: // Load and decrement the variable's value into a register // and pre increment/decrement it if (n->left->sym->class == C_GLOBAL || n->left->sym->class == C_STATIC) return (cgloadglob(n->left->sym, n->op)); else return (cgloadlocal(n->left->sym, n->op)); case A_NEGATE: return (cgnegate(leftreg)); case A_INVERT: return (cginvert(leftreg)); case A_LOGNOT: return (cglognot(leftreg)); case A_TOBOOL: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, set the register // to 0 or 1 based on it's zeroeness or non-zeroeness return (cgboolean(leftreg, parentASTop, iflabel)); case A_BREAK: cgjump(loopendlabel); return (NOREG); case A_CONTINUE: cgjump(looptoplabel); return (NOREG); case A_CAST: return (leftreg); // Not much to do default: fatald("Unknown AST operator", n->op); } return (NOREG); // Keep -Wall happy } void genpreamble() { cgpreamble(); } void genpostamble() { cgpostamble(); } void genfreeregs(int keepreg) { freeall_registers(keepreg); } void genglobsym(struct symtable *node) { cgglobsym(node); } // Generate a global string. // If append is true, append to // previous genglobstr() call. int genglobstr(char *strvalue, int append) { int l = genlabel(); cgglobstr(l, strvalue, append); return (l); } void genglobstrend(void) { cgglobstrend(); } int genprimsize(int type) { return (cgprimsize(type)); } int genalign(int type, int offset, int direction) { return (cgalign(type, offset, direction)); } ================================================ FILE: 54_Reg_Spills/include/ctype.h ================================================ #ifndef _CTYPE_H_ # define _CTYPE_H_ int isalnum(int c); int isalpha(int c); int iscntrl(int c); int isdigit(int c); int isgraph(int c); int islower(int c); int isprint(int c); int ispunct(int c); int isspace(int c); int isupper(int c); int isxdigit(int c); int isascii(int c); int isblank(int c); int toupper(int c); int tolower(int c); #endif // _CTYPE_H_ ================================================ FILE: 54_Reg_Spills/include/errno.h ================================================ #ifndef _ERRNO_H_ # define _ERRNO_H_ #endif // _ERRNO_H_ ================================================ FILE: 54_Reg_Spills/include/fcntl.h ================================================ #ifndef _FCNTL_H_ # define _FCNTL_H_ #define O_RDONLY 00 #define O_WRONLY 01 #define O_RDWR 02 int open(char *pathname, int flags); #endif // _FCNTL_H_ ================================================ FILE: 54_Reg_Spills/include/stddef.h ================================================ #ifndef _STDDEF_H_ # define _STDDEF_H_ #ifndef NULL # define NULL (void *)0 #endif typedef long size_t; #endif //_STDDEF_H_ ================================================ FILE: 54_Reg_Spills/include/stdio.h ================================================ #ifndef _STDIO_H_ # define _STDIO_H_ #include #ifndef NULL # define NULL (void *)0 #endif #ifndef EOF # define EOF (-1) #endif // This FILE definition will do for now typedef char * FILE; FILE *fopen(char *pathname, char *mode); size_t fread(void *ptr, size_t size, size_t nmemb, FILE *stream); size_t fwrite(void *ptr, size_t size, size_t nmemb, FILE *stream); int fclose(FILE *stream); int printf(char *format); int fprintf(FILE *stream, char *format); int fgetc(FILE *stream); int fputc(int c, FILE *stream); int fputs(char *s, FILE *stream); int putc(int c, FILE *stream); int putchar(int c); int puts(char *s); extern FILE *stdin; extern FILE *stdout; extern FILE *stderr; #endif // _STDIO_H_ ================================================ FILE: 54_Reg_Spills/include/stdlib.h ================================================ #ifndef _STDLIB_H_ # define _STDLIB_H_ void exit(int status); void _Exit(int status); void *malloc(int size); void free(void *ptr); void *calloc(int nmemb, int size); void *realloc(void *ptr, int size); #endif // _STDLIB_H_ ================================================ FILE: 54_Reg_Spills/include/string.h ================================================ #ifndef _STRING_H_ # define _STRING_H_ char *strdup(char *s); char *strchr(char *s, int c); char *strrchr(char *s, int c); int strcmp(char *s1, char *s2); int strncmp(char *s1, char *s2, size_t n); #endif // _STRING_H_ ================================================ FILE: 54_Reg_Spills/include/unistd.h ================================================ #ifndef _UNISTD_H_ # define _UNISTD_H_ void _exit(int status); int unlink(char *pathname); #endif // _UNISTD_H_ ================================================ FILE: 54_Reg_Spills/main.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "decl.h" #include #include // Compiler setup and top-level execution // Copyright (c) 2019 Warren Toomey, GPL3 // Given a string with a '.' and at least a 1-character suffix // after the '.', change the suffix to be the given character. // Return the new string or NULL if the original string could // not be modified char *alter_suffix(char *str, char suffix) { char *posn; char *newstr; // Clone the string if ((newstr = strdup(str)) == NULL) return (NULL); // Find the '.' if ((posn = strrchr(newstr, '.')) == NULL) return (NULL); // Ensure there is a suffix posn++; if (*posn == '\0') return (NULL); // Change the suffix and NUL-terminate the string *posn = suffix; posn++; *posn = '\0'; return (newstr); } // Given an input filename, compile that file // down to assembly code. Return the new file's name static char *do_compile(char *filename) { char cmd[TEXTLEN]; // Change the input file's suffix to .s Outfilename = alter_suffix(filename, 's'); if (Outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .c on the end\n", filename); exit(1); } // Generate the pre-processor command snprintf(cmd, TEXTLEN, "%s %s %s", CPPCMD, INCDIR, filename); // Open up the pre-processor pipe if ((Infile = popen(cmd, "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", filename, strerror(errno)); exit(1); } Infilename = filename; // Create the output file if ((Outfile = fopen(Outfilename, "w")) == NULL) { fprintf(stderr, "Unable to create %s: %s\n", Outfilename, strerror(errno)); exit(1); } Line = 1; // Reset the scanner Linestart = 1; Putback = '\n'; clear_symtable(); // Clear the symbol table if (O_verbose) printf("compiling %s\n", filename); scan(&Token); // Get the first token from the input Peektoken.token = 0; // and set there is no lookahead token genpreamble(); // Output the preamble global_declarations(); // Parse the global declarations genpostamble(); // Output the postamble fclose(Outfile); // Close the output file // Dump the symbol table if requested if (O_dumpsym) { printf("Symbols for %s\n", filename); dumpsymtables(); fprintf(stdout, "\n\n"); } freestaticsyms(); // Free any static symbols in the file return (Outfilename); } // Given an input filename, assemble that file // down to object code. Return the object filename char *do_assemble(char *filename) { char cmd[TEXTLEN]; int err; char *outfilename = alter_suffix(filename, 'o'); if (outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .s on the end\n", filename); exit(1); } // Build the assembly command and run it snprintf(cmd, TEXTLEN, "%s %s %s", ASCMD, outfilename, filename); if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Assembly of %s failed\n", filename); exit(1); } return (outfilename); } // Given a list of object files and an output filename, // link all of the object filenames together. void do_link(char *outfilename, char *objlist[]) { int cnt, size = TEXTLEN; char cmd[TEXTLEN], *cptr; int err; // Start with the linker command and the output file cptr = cmd; cnt = snprintf(cptr, size, "%s %s ", LDCMD, outfilename); cptr += cnt; size -= cnt; // Now append each object file while (*objlist != NULL) { cnt = snprintf(cptr, size, "%s ", *objlist); cptr += cnt; size -= cnt; objlist++; } if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Linking failed\n"); exit(1); } } // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "Usage: %s [-vcSTM] [-o outfile] file [file ...]\n", prog); fprintf(stderr, " -v give verbose output of the compilation stages\n"); fprintf(stderr, " -c generate object files but don't link them\n"); fprintf(stderr, " -S generate assembly files but don't link them\n"); fprintf(stderr, " -T dump the AST trees for each input file\n"); fprintf(stderr, " -M dump the symbol table for each input file\n"); fprintf(stderr, " -o outfile, produce the outfile executable file\n"); exit(1); } // Main program: check arguments and print a usage // if we don't have an argument. Open up the input // file and call scanfile() to scan the tokens in it. enum { MAXOBJ = 100 }; int main(int argc, char *argv[]) { char *outfilename = AOUT; char *asmfile, *objfile; char *objlist[MAXOBJ]; int i, objcnt = 0; // Initialise our variables O_dumpAST = 0; O_dumpsym = 0; O_keepasm = 0; O_assemble = 0; O_verbose = 0; O_dolink = 1; // Scan for command-line options for (i = 1; i < argc; i++) { // No leading '-', stop scanning for options if (*argv[i] != '-') break; // For each option in this argument for (int j = 1; (*argv[i] == '-') && argv[i][j]; j++) { switch (argv[i][j]) { case 'o': outfilename = argv[++i]; // Save & skip to next argument break; case 'T': O_dumpAST = 1; break; case 'M': O_dumpsym = 1; break; case 'c': O_assemble = 1; O_keepasm = 0; O_dolink = 0; break; case 'S': O_keepasm = 1; O_assemble = 0; O_dolink = 0; break; case 'v': O_verbose = 1; break; default: usage(argv[0]); } } } // Ensure we have at lease one input file argument if (i >= argc) usage(argv[0]); // Work on each input file in turn while (i < argc) { asmfile = do_compile(argv[i]); // Compile the source file if (O_dolink || O_assemble) { objfile = do_assemble(asmfile); // Assemble it to object forma if (objcnt == (MAXOBJ - 2)) { fprintf(stderr, "Too many object files for the compiler to handle\n"); exit(1); } objlist[objcnt++] = objfile; // Add the object file's name objlist[objcnt] = NULL; // to the list of object files } if (!O_keepasm) // Remove the assembly file if unlink(asmfile); // we don't need to keep it i++; } // Now link all the object files together if (O_dolink) { do_link(outfilename, objlist); // If we don't need to keep the object // files, then remove them if (!O_assemble) { for (i = 0; objlist[i] != NULL; i++) unlink(objlist[i]); } } return (0); } ================================================ FILE: 54_Reg_Spills/misc.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" #include #include // Miscellaneous functions // Copyright (c) 2019 Warren Toomey, GPL3 // Ensure that the current token is t, // and fetch the next token. Otherwise // throw an error void match(int t, char *what) { if (Token.token == t) { scan(&Token); } else { fatals("Expected", what); } } // Match a semicolon and fetch the next token void semi(void) { match(T_SEMI, ";"); } // Match a left brace and fetch the next token void lbrace(void) { match(T_LBRACE, "{"); } // Match a right brace and fetch the next token void rbrace(void) { match(T_RBRACE, "}"); } // Match a left parenthesis and fetch the next token void lparen(void) { match(T_LPAREN, "("); } // Match a right parenthesis and fetch the next token void rparen(void) { match(T_RPAREN, ")"); } // Match an identifier and fetch the next token void ident(void) { match(T_IDENT, "identifier"); } // Match a comma and fetch the next token void comma(void) { match(T_COMMA, "comma"); } // Print out fatal messages void fatal(char *s) { fprintf(stderr, "%s on line %d of %s\n", s, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatals(char *s1, char *s2) { fprintf(stderr, "%s:%s on line %d of %s\n", s1, s2, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatald(char *s, int d) { fprintf(stderr, "%s:%d on line %d of %s\n", s, d, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatalc(char *s, int c) { fprintf(stderr, "%s:%c on line %d of %s\n", s, c, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } ================================================ FILE: 54_Reg_Spills/opt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST Tree Optimisation Code // Copyright (c) 2019 Warren Toomey, GPL3 // Fold an AST tree with a binary operator // and two A_INTLIT children. Return either // the original tree or a new leaf node. static struct ASTnode *fold2(struct ASTnode *n) { int val, leftval, rightval; // Get the values from each child leftval = n->left->a_intvalue; rightval = n->right->a_intvalue; // Perform some of the binary operations. // For any AST op we can't do, return // the original tree. switch (n->op) { case A_ADD: val = leftval + rightval; break; case A_SUBTRACT: val = leftval - rightval; break; case A_MULTIPLY: val = leftval * rightval; break; case A_DIVIDE: // Don't try to divide by zero. if (rightval == 0) return (n); val = leftval / rightval; break; default: return (n); } // Return a leaf node with the new value return (mkastleaf(A_INTLIT, n->type, NULL, NULL, val)); } // Fold an AST tree with a unary operator // and one INTLIT children. Return either // the original tree or a new leaf node. static struct ASTnode *fold1(struct ASTnode *n) { int val; // Get the child value. Do the // operation if recognised. // Return the new leaf node. val = n->left->a_intvalue; switch (n->op) { case A_WIDEN: break; case A_INVERT: val = ~val; break; case A_LOGNOT: val = !val; break; default: return (n); } // Return a leaf node with the new value return (mkastleaf(A_INTLIT, n->type, NULL, NULL, val)); } // Attempt to do constant folding on // the AST tree with the root node n static struct ASTnode *fold(struct ASTnode *n) { if (n == NULL) return (NULL); // Fold on the left child, then // do the same on the right child n->left = fold(n->left); n->right = fold(n->right); // If both children are A_INTLITs, do a fold2() if (n->left && n->left->op == A_INTLIT) { if (n->right && n->right->op == A_INTLIT) n = fold2(n); else // If only the left is A_INTLIT, do a fold1() n = fold1(n); } // Return the possibly modified tree return (n); } // Optimise an AST tree by // constant folding in all sub-trees struct ASTnode *optimise(struct ASTnode *n) { n = fold(n); return (n); } ================================================ FILE: 54_Reg_Spills/scan.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { int i; for (i = 0; s[i] != '\0'; i++) if (s[i] == (char) c) return (i); return (-1); } // Get the next character from the input file. static int next(void) { int c, l; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return (c); } c = fgetc(Infile); // Read from input file while (Linestart && c == '#') { // We've hit a pre-processor statement Linestart = 0; // No longer at the start of the line scan(&Token); // Get the line number into l if (Token.token != T_INTLIT) fatals("Expecting pre-processor line number, got:", Text); l = Token.intvalue; scan(&Token); // Get the filename in Text if (Token.token != T_STRLIT) fatals("Expecting pre-processor file name, got:", Text); if (Text[0] != '<') { // If this is a real filename if (strcmp(Text, Infilename)) // and not the one we have now Infilename = strdup(Text); // save it. Then update the line num Line = l; } while ((c = fgetc(Infile)) != '\n'); // Skip to the end of the line c = fgetc(Infile); // and get the next character Linestart = 1; // Now back at the start of the line } Linestart = 0; // No longer at the start of the line if ('\n' == c) { Line++; // Increment line count Linestart = 1; // Now back at the start of the line } return (c); } // Put back an unwanted character static void putback(int c) { Putback = c; } // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } // Read in a hexadecimal constant from the input static int hexchar(void) { int c, h, n = 0, f = 0; // Loop getting characters while (isxdigit(c = next())) { // Convert from char to int value h = chrpos("0123456789abcdef", tolower(c)); // Add to running hex value n = n * 16 + h; f = 1; } // We hit a non-hex character, put it back putback(c); // Flag tells us we never saw any hex characters if (!f) fatal("missing digits after '\\x'"); if (n > 255) fatal("value out of range after '\\x'"); return (n); } // Return the next character from a character // or string literal static int scanch(void) { int i, c, c2; // Get the next input character and interpret // metacharacters that start with a backslash c = next(); if (c == '\\') { switch (c = next()) { case 'a': return ('\a'); case 'b': return ('\b'); case 'f': return ('\f'); case 'n': return ('\n'); case 'r': return ('\r'); case 't': return ('\t'); case 'v': return ('\v'); case '\\': return ('\\'); case '"': return ('"'); case '\'': return ('\''); // Deal with octal constants by reading in // characters until we hit a non-octal digit. // Build up the octal value in c2 and count // # digits in i. Permit only 3 octal digits. case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': for (i = c2 = 0; isdigit(c) && c < '8'; c = next()) { if (++i > 3) break; c2 = c2 * 8 + (c - '0'); } putback(c); // Put back the first non-octal char return (c2); case 'x': return (hexchar()); default: fatalc("unknown escape sequence", c); } } return (c); // Just an ordinary old character! } // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0, radix = 10; // Assume the radix is 10, but if it starts with 0 if (c == '0') { // and the next character is 'x', it's radix 16 if ((c = next()) == 'x') { radix = 16; c = next(); } else // Otherwise, it's radix 8 radix = 8; } // Convert each character into an int value while ((k = chrpos("0123456789abcdef", tolower(c))) >= 0) { if (k >= radix) fatalc("invalid digit in integer literal", c); val = val * radix + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return (val); } // Scan in a string literal from the input file, // and store it in buf[]. Return the length of // the string. static int scanstr(char *buf) { int i, c; // Loop while we have enough buffer space for (i = 0; i < TEXTLEN - 1; i++) { // Get the next char and append to buf // Return when we hit the ending double quote if ((c = scanch()) == '"') { buf[i] = 0; return (i); } buf[i] = (char)c; } // Ran out of buf[] space fatal("String literal too long"); return (0); } // Scan an identifier from the input file and // store it in buf[]. Return the identifier's length static int scanident(int c, char *buf, int lim) { int i = 0; // Allow digits, alpha and underscores while (isalpha(c) || isdigit(c) || '_' == c) { // Error if we hit the identifier length limit, // else append to buf[] and get next character if (lim - 1 == i) { fatal("Identifier too long"); } else if (i < lim - 1) { buf[i++] = (char)c; } c = next(); } // We hit a non-valid character, put it back. // NUL-terminate the buf[] and return the length putback(c); buf[i] = '\0'; return (i); } // Given a word from the input, return the matching // keyword token number or 0 if it's not a keyword. // Switch on the first letter so that we don't have // to waste time strcmp()ing against all the keywords. static int keyword(char *s) { switch (*s) { case 'b': if (!strcmp(s, "break")) return (T_BREAK); break; case 'c': if (!strcmp(s, "case")) return (T_CASE); if (!strcmp(s, "char")) return (T_CHAR); if (!strcmp(s, "continue")) return (T_CONTINUE); break; case 'd': if (!strcmp(s, "default")) return (T_DEFAULT); break; case 'e': if (!strcmp(s, "else")) return (T_ELSE); if (!strcmp(s, "enum")) return (T_ENUM); if (!strcmp(s, "extern")) return (T_EXTERN); break; case 'f': if (!strcmp(s, "for")) return (T_FOR); break; case 'i': if (!strcmp(s, "if")) return (T_IF); if (!strcmp(s, "int")) return (T_INT); break; case 'l': if (!strcmp(s, "long")) return (T_LONG); break; case 'r': if (!strcmp(s, "return")) return (T_RETURN); break; case 's': if (!strcmp(s, "sizeof")) return (T_SIZEOF); if (!strcmp(s, "static")) return (T_STATIC); if (!strcmp(s, "struct")) return (T_STRUCT); if (!strcmp(s, "switch")) return (T_SWITCH); break; case 't': if (!strcmp(s, "typedef")) return (T_TYPEDEF); break; case 'u': if (!strcmp(s, "union")) return (T_UNION); break; case 'v': if (!strcmp(s, "void")) return (T_VOID); break; case 'w': if (!strcmp(s, "while")) return (T_WHILE); break; } return (0); } // List of token strings, for debugging purposes char *Tstring[] = { "EOF", "=", "+=", "-=", "*=", "/=", "?", "||", "&&", "|", "^", "&", "==", "!=", ",", ">", "<=", ">=", "<<", ">>", "+", "-", "*", "/", "++", "--", "~", "!", "void", "char", "int", "long", "if", "else", "while", "for", "return", "struct", "union", "enum", "typedef", "extern", "break", "continue", "switch", "case", "default", "sizeof", "static", "intlit", "strlit", ";", "identifier", "{", "}", "(", ")", "[", "]", ",", ".", "->", ":" }; // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c, tokentype; // If we have a lookahead token, return this token if (Peektoken.token != 0) { t->token = Peektoken.token; t->tokstr = Peektoken.tokstr; t->intvalue = Peektoken.intvalue; Peektoken.token = 0; return (1); } // Skip whitespace c = skip(); // Determine the token based on // the input character switch (c) { case EOF: t->token = T_EOF; return (0); case '+': if ((c = next()) == '+') { t->token = T_INC; } else if (c == '=') { t->token = T_ASPLUS; } else { putback(c); t->token = T_PLUS; } break; case '-': if ((c = next()) == '-') { t->token = T_DEC; } else if (c == '>') { t->token = T_ARROW; } else if (c == '=') { t->token = T_ASMINUS; } else if (isdigit(c)) { // Negative int literal t->intvalue = -scanint(c); t->token = T_INTLIT; } else { putback(c); t->token = T_MINUS; } break; case '*': if ((c = next()) == '=') { t->token = T_ASSTAR; } else { putback(c); t->token = T_STAR; } break; case '/': if ((c = next()) == '=') { t->token = T_ASSLASH; } else { putback(c); t->token = T_SLASH; } break; case ';': t->token = T_SEMI; break; case '{': t->token = T_LBRACE; break; case '}': t->token = T_RBRACE; break; case '(': t->token = T_LPAREN; break; case ')': t->token = T_RPAREN; break; case '[': t->token = T_LBRACKET; break; case ']': t->token = T_RBRACKET; break; case '~': t->token = T_INVERT; break; case '^': t->token = T_XOR; break; case ',': t->token = T_COMMA; break; case '.': t->token = T_DOT; break; case ':': t->token = T_COLON; break; case '?': t->token = T_QUESTION; break; case '=': if ((c = next()) == '=') { t->token = T_EQ; } else { putback(c); t->token = T_ASSIGN; } break; case '!': if ((c = next()) == '=') { t->token = T_NE; } else { putback(c); t->token = T_LOGNOT; } break; case '<': if ((c = next()) == '=') { t->token = T_LE; } else if (c == '<') { t->token = T_LSHIFT; } else { putback(c); t->token = T_LT; } break; case '>': if ((c = next()) == '=') { t->token = T_GE; } else if (c == '>') { t->token = T_RSHIFT; } else { putback(c); t->token = T_GT; } break; case '&': if ((c = next()) == '&') { t->token = T_LOGAND; } else { putback(c); t->token = T_AMPER; } break; case '|': if ((c = next()) == '|') { t->token = T_LOGOR; } else { putback(c); t->token = T_OR; } break; case '\'': // If it's a quote, scan in the // literal character value and // the trailing quote t->intvalue = scanch(); t->token = T_INTLIT; if (next() != '\'') fatal("Expected '\\'' at end of char literal"); break; case '"': // Scan in a literal string scanstr(Text); t->token = T_STRLIT; break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } else if (isalpha(c) || '_' == c) { // Read in a keyword or identifier scanident(c, Text, TEXTLEN); // If it's a recognised keyword, return that token if ((tokentype = keyword(Text)) != 0) { t->token = tokentype; break; } // Not a recognised keyword, so it must be an identifier t->token = T_IDENT; break; } // The character isn't part of any recognised token, error fatalc("Unrecognised character", c); } // We found a token t->tokstr = Tstring[t->token]; return (1); } ================================================ FILE: 54_Reg_Spills/stmt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of statements // Copyright (c) 2019 Warren Toomey, GPL3 // Prototypes static struct ASTnode *single_statement(void); // compound_statement: // empty, i.e. no statement // | statement // | statement statements // ; // // statement: declaration // | expression_statement // | function_call // | if_statement // | while_statement // | for_statement // | return_statement // ; // if_statement: if_head // | if_head 'else' statement // ; // // if_head: 'if' '(' true_false_expression ')' statement ; // // Parse an IF statement including any // optional ELSE clause and return its AST static struct ASTnode *if_statement(void) { struct ASTnode *condAST, *trueAST, *falseAST = NULL; // Ensure we have 'if' '(' match(T_IF, "if"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST->ctype, condAST, NULL, 0); rparen(); // Get the AST for the statement trueAST = single_statement(); // If we have an 'else', skip it // and get the AST for the statement if (Token.token == T_ELSE) { scan(&Token); falseAST = single_statement(); } // Build and return the AST for this statement return (mkastnode(A_IF, P_NONE, NULL, condAST, trueAST, falseAST, NULL, 0)); } // while_statement: 'while' '(' true_false_expression ')' statement ; // // Parse a WHILE statement and return its AST static struct ASTnode *while_statement(void) { struct ASTnode *condAST, *bodyAST; // Ensure we have 'while' '(' match(T_WHILE, "while"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST->ctype, condAST, NULL, 0); rparen(); // Get the AST for the statement. // Update the loop depth in the process Looplevel++; bodyAST = single_statement(); Looplevel--; // Build and return the AST for this statement return (mkastnode(A_WHILE, P_NONE, NULL, condAST, NULL, bodyAST, NULL, 0)); } // for_statement: 'for' '(' expression_list ';' // true_false_expression ';' // expression_list ')' statement ; // // Parse a FOR statement and return its AST static struct ASTnode *for_statement(void) { struct ASTnode *condAST, *bodyAST; struct ASTnode *preopAST, *postopAST; struct ASTnode *tree; // Ensure we have 'for' '(' match(T_FOR, "for"); lparen(); // Get the pre_op expression and the ';' preopAST = expression_list(T_SEMI); semi(); // Get the condition and the ';'. // Force a non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST->ctype, condAST, NULL, 0); semi(); // Get the post_op expression and the ')' postopAST = expression_list(T_RPAREN); rparen(); // Get the statement which is the body // Update the loop depth in the process Looplevel++; bodyAST = single_statement(); Looplevel--; // Glue the statement and the postop tree tree = mkastnode(A_GLUE, P_NONE, NULL, bodyAST, NULL, postopAST, NULL, 0); // Make a WHILE loop with the condition and this new body tree = mkastnode(A_WHILE, P_NONE, NULL, condAST, NULL, tree, NULL, 0); // And glue the preop tree to the A_WHILE tree return (mkastnode(A_GLUE, P_NONE, NULL, preopAST, NULL, tree, NULL, 0)); } // return_statement: 'return' '(' expression ')' ; // // Parse a return statement and return its AST static struct ASTnode *return_statement(void) { struct ASTnode *tree= NULL; // Ensure we have 'return' match(T_RETURN, "return"); // See if we have a return value if (Token.token == T_LPAREN) { // Can't return a value if function returns P_VOID if (Functionid->type == P_VOID) fatal("Can't return from a void function"); // Skip the left parenthesis lparen(); // Parse the following expression tree = binexpr(0); // Ensure this is compatible with the function's type tree = modify_type(tree, Functionid->type, Functionid->ctype, 0); if (tree == NULL) fatal("Incompatible type to return"); // Get the ')' rparen(); } // Add on the A_RETURN node tree = mkastunary(A_RETURN, P_NONE, NULL, tree, NULL, 0); // Get the ';' semi(); return (tree); } // break_statement: 'break' ; // // Parse a break statement and return its AST static struct ASTnode *break_statement(void) { if (Looplevel == 0 && Switchlevel == 0) fatal("no loop or switch to break out from"); scan(&Token); semi(); return (mkastleaf(A_BREAK, P_NONE, NULL, NULL, 0)); } // continue_statement: 'continue' ; // // Parse a continue statement and return its AST static struct ASTnode *continue_statement(void) { if (Looplevel == 0) fatal("no loop to continue to"); scan(&Token); semi(); return (mkastleaf(A_CONTINUE, P_NONE, NULL, NULL, 0)); } // Parse a switch statement and return its AST static struct ASTnode *switch_statement(void) { struct ASTnode *left, *body, *n, *c; struct ASTnode *casetree = NULL, *casetail; int inloop = 1, casecount = 0; int seendefault = 0; int ASTop, casevalue; // Skip the 'switch' and '(' scan(&Token); lparen(); // Get the switch expression, the ')' and the '{' left = binexpr(0); rparen(); lbrace(); // Ensure that this is of int type if (!inttype(left->type)) fatal("Switch expression is not of integer type"); // Build an A_SWITCH subtree with the expression as // the child n = mkastunary(A_SWITCH, P_NONE, NULL, left, NULL, 0); // Now parse the cases Switchlevel++; while (inloop) { switch (Token.token) { // Leave the loop when we hit a '}' case T_RBRACE: if (casecount == 0) fatal("No cases in switch"); inloop = 0; break; case T_CASE: case T_DEFAULT: // Ensure this isn't after a previous 'default' if (seendefault) fatal("case or default after existing default"); // Set the AST operation. Scan the case value if required if (Token.token == T_DEFAULT) { ASTop = A_DEFAULT; seendefault = 1; scan(&Token); } else { ASTop = A_CASE; scan(&Token); left = binexpr(0); // Ensure the case value is an integer literal if (left->op != A_INTLIT) fatal("Expecting integer literal for case value"); casevalue = left->a_intvalue; // Walk the list of existing case values to ensure // that there isn't a duplicate case value for (c = casetree; c != NULL; c = c->right) if (casevalue == c->a_intvalue) fatal("Duplicate case value"); } // Scan the ':' and increment the casecount match(T_COLON, ":"); casecount++; // If the next token is a T_CASE, the existing case will fall // into the next case. Otherwise, parse the case body. if (Token.token == T_CASE) body = NULL; else body = compound_statement(1); // Build a sub-tree with any compound statement as the left child // and link it in to the growing A_CASE tree if (casetree == NULL) { casetree = casetail = mkastunary(ASTop, P_NONE, NULL, body, NULL, casevalue); } else { casetail->right = mkastunary(ASTop, P_NONE, NULL, body, NULL, casevalue); casetail = casetail->right; } break; default: fatals("Unexpected token in switch", Token.tokstr); } } Switchlevel--; // We have a sub-tree with the cases and any default. Put the // case count into the A_SWITCH node and attach the case tree. n->a_intvalue = casecount; n->right = casetree; rbrace(); return (n); } // Parse a single statement and return its AST. static struct ASTnode *single_statement(void) { struct ASTnode *stmt; struct symtable *ctype; switch (Token.token) { case T_SEMI: // An empty statement semi(); break; case T_LBRACE: // We have a '{', so this is a compound statement lbrace(); stmt = compound_statement(0); rbrace(); return (stmt); case T_IDENT: // We have to see if the identifier matches a typedef. // If not, treat it as an expression. // Otherwise, fall down to the parse_type() call. if (findtypedef(Text) == NULL) { stmt = binexpr(0); semi(); return (stmt); } case T_CHAR: case T_INT: case T_LONG: case T_STRUCT: case T_UNION: case T_ENUM: case T_TYPEDEF: // The beginning of a variable declaration list. declaration_list(&ctype, C_LOCAL, T_SEMI, T_EOF, &stmt); semi(); return (stmt); // Any assignments from the declarations case T_IF: return (if_statement()); case T_WHILE: return (while_statement()); case T_FOR: return (for_statement()); case T_RETURN: return (return_statement()); case T_BREAK: return (break_statement()); case T_CONTINUE: return (continue_statement()); case T_SWITCH: return (switch_statement()); default: // For now, see if this is an expression. // This catches assignment statements. stmt = binexpr(0); semi(); return (stmt); } return (NULL); // Keep -Wall happy } // Parse a compound statement // and return its AST. If inswitch is true, // we look for a '}', 'case' or 'default' token // to end the parsing. Otherwise, look for // just a '}' to end the parsing. struct ASTnode *compound_statement(int inswitch) { struct ASTnode *left = NULL; struct ASTnode *tree; while (1) { // Leave if we've hit the end token. We do this first to allow // an empty compound statement if (Token.token == T_RBRACE) return (left); if (inswitch && (Token.token == T_CASE || Token.token == T_DEFAULT)) return (left); // Parse a single statement tree = single_statement(); // For each new tree, either save it in left // if left is empty, or glue the left and the // new tree together if (tree != NULL) { if (left == NULL) left = tree; else left = mkastnode(A_GLUE, P_NONE, NULL, left, NULL, tree, NULL, 0); } } return (NULL); // Keep -Wall happy } ================================================ FILE: 54_Reg_Spills/sym.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Symbol table functions // Copyright (c) 2019 Warren Toomey, GPL3 // Append a node to the singly-linked list pointed to by head or tail void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node) { // Check for valid pointers if (head == NULL || tail == NULL || node == NULL) fatal("Either head, tail or node is NULL in appendsym"); // Append to the list if (*tail) { (*tail)->next = node; *tail = node; } else *head = *tail = node; node->next = NULL; } // Create a symbol node to be added to a symbol table list. // Set up the node's: // + type: char, int etc. // + ctype: composite type pointer for struct/union // + structural type: var, function, array etc. // + size: number of elements, or endlabel: end label for a function // + posn: Position information for local symbols // Return a pointer to the new node struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn) { // Get a new node struct symtable *node = (struct symtable *) malloc(sizeof(struct symtable)); if (node == NULL) fatal("Unable to malloc a symbol table node in newsym"); // Fill in the values if (name == NULL) node->name = NULL; else node->name = strdup(name); node->type = type; node->ctype = ctype; node->stype = stype; node->class = class; node->nelems = nelems; // For pointers and integer types, set the size // of the symbol. structs and union declarations // manually set this up themselves. if (ptrtype(type) || inttype(type)) node->size = nelems * typesize(type, ctype); node->st_posn = posn; node->next = NULL; node->member = NULL; node->initlist = NULL; return (node); } // Add a symbol to the global symbol list struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn) { struct symtable *sym = newsym(name, type, ctype, stype, class, nelems, posn); // For structs and unions, copy the size from the type node if (type == P_STRUCT || type == P_UNION) sym->size = ctype->size; appendsym(&Globhead, &Globtail, sym); return (sym); } // Add a symbol to the local symbol list struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int nelems) { struct symtable *sym = newsym(name, type, ctype, stype, C_LOCAL, nelems, 0); // For structs and unions, copy the size from the type node if (type == P_STRUCT || type == P_UNION) sym->size = ctype->size; appendsym(&Loclhead, &Locltail, sym); return (sym); } // Add a symbol to the parameter list struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype) { struct symtable *sym = newsym(name, type, ctype, stype, C_PARAM, 1, 0); appendsym(&Parmhead, &Parmtail, sym); return (sym); } // Add a symbol to the temporary member list struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int nelems) { struct symtable *sym = newsym(name, type, ctype, stype, C_MEMBER, nelems, 0); // For structs and unions, copy the size from the type node if (type == P_STRUCT || type == P_UNION) sym->size = ctype->size; appendsym(&Membhead, &Membtail, sym); return (sym); } // Add a struct to the struct list struct symtable *addstruct(char *name) { struct symtable *sym = newsym(name, P_STRUCT, NULL, 0, C_STRUCT, 0, 0); appendsym(&Structhead, &Structtail, sym); return (sym); } // Add a struct to the union list struct symtable *addunion(char *name) { struct symtable *sym = newsym(name, P_UNION, NULL, 0, C_UNION, 0, 0); appendsym(&Unionhead, &Uniontail, sym); return (sym); } // Add an enum type or value to the enum list. // Class is C_ENUMTYPE or C_ENUMVAL. // Use posn to store the int value. struct symtable *addenum(char *name, int class, int value) { struct symtable *sym = newsym(name, P_INT, NULL, 0, class, 0, value); appendsym(&Enumhead, &Enumtail, sym); return (sym); } // Add a typedef to the typedef list struct symtable *addtypedef(char *name, int type, struct symtable *ctype) { struct symtable *sym = newsym(name, type, ctype, 0, C_TYPEDEF, 0, 0); appendsym(&Typehead, &Typetail, sym); return (sym); } // Search for a symbol in a specific list. // Return a pointer to the found node or NULL if not found. // If class is not zero, also match on the given class static struct symtable *findsyminlist(char *s, struct symtable *list, int class) { for (; list != NULL; list = list->next) if ((list->name != NULL) && !strcmp(s, list->name)) if (class == 0 || class == list->class) return (list); return (NULL); } // Determine if the symbol s is in the global symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findglob(char *s) { return (findsyminlist(s, Globhead, 0)); } // Determine if the symbol s is in the local symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findlocl(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member, 0); if (node) return (node); } return (findsyminlist(s, Loclhead, 0)); } // Determine if the symbol s is in the symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findsymbol(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member, 0); if (node) return (node); } // Otherwise, try the local and global symbol lists node = findsyminlist(s, Loclhead, 0); if (node) return (node); return (findsyminlist(s, Globhead, 0)); } // Find a member in the member list // Return a pointer to the found node or NULL if not found. struct symtable *findmember(char *s) { return (findsyminlist(s, Membhead, 0)); } // Find a struct in the struct list // Return a pointer to the found node or NULL if not found. struct symtable *findstruct(char *s) { return (findsyminlist(s, Structhead, 0)); } // Find a struct in the union list // Return a pointer to the found node or NULL if not found. struct symtable *findunion(char *s) { return (findsyminlist(s, Unionhead, 0)); } // Find an enum type in the enum list // Return a pointer to the found node or NULL if not found. struct symtable *findenumtype(char *s) { return (findsyminlist(s, Enumhead, C_ENUMTYPE)); } // Find an enum value in the enum list // Return a pointer to the found node or NULL if not found. struct symtable *findenumval(char *s) { return (findsyminlist(s, Enumhead, C_ENUMVAL)); } // Find a type in the tyedef list // Return a pointer to the found node or NULL if not found. struct symtable *findtypedef(char *s) { return (findsyminlist(s, Typehead, 0)); } // Reset the contents of the symbol table void clear_symtable(void) { Globhead = Globtail = NULL; Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Membhead = Membtail = NULL; Structhead = Structtail = NULL; Unionhead = Uniontail = NULL; Enumhead = Enumtail = NULL; Typehead = Typetail = NULL; } // Clear all the entries in the local symbol table void freeloclsyms(void) { Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Functionid = NULL; } // Remove all static symbols from the global symbol table void freestaticsyms(void) { // g points at current node, prev at the previous one struct symtable *g, *prev = NULL; // Walk the global table looking for static entries for (g = Globhead; g != NULL; g = g->next) { if (g->class == C_STATIC) { // If there's a previous node, rearrange the prev pointer // to skip over the current node. If not, g is the head, // so do the same to Globhead if (prev != NULL) prev->next = g->next; else Globhead->next = g->next; // If g is the tail, point Globtail at the previous node // (if there is one), or Globhead if (g == Globtail) { if (prev != NULL) Globtail = prev; else Globtail = Globhead; } } } // Point prev at g before we move up to the next node prev = g; } // Dump a single symbol static void dumpsym(struct symtable *sym, int indent) { int i; for (i = 0; i < indent; i++) printf(" "); switch (sym->type & (~0xf)) { case P_VOID: printf("void "); break; case P_CHAR: printf("char "); break; case P_INT: printf("int "); break; case P_LONG: printf("long "); break; case P_STRUCT: if (sym->ctype != NULL) printf("struct %s ", sym->ctype->name); else printf("struct %s ", sym->name); break; case P_UNION: if (sym->ctype != NULL) printf("union %s ", sym->ctype->name); else printf("union %s ", sym->name); break; default: printf("unknown type "); } for (i = 0; i < (sym->type & 0xf); i++) printf("*"); printf("%s", sym->name); switch (sym->stype) { case S_VARIABLE: break; case S_FUNCTION: printf("()"); break; case S_ARRAY: printf("[]"); break; default: printf(" unknown stype"); } switch (sym->class) { case C_GLOBAL: printf(": global"); break; case C_LOCAL: printf(": local"); break; case C_PARAM: printf(": param"); break; case C_EXTERN: printf(": extern"); break; case C_STATIC: printf(": static"); break; case C_STRUCT: printf(": struct"); break; case C_UNION: printf(": union"); break; case C_MEMBER: printf(": member"); break; case C_ENUMTYPE: printf(": enumtype"); break; case C_ENUMVAL: printf(": enumval"); break; case C_TYPEDEF: printf(": typedef"); break; default: printf(": unknown class"); } switch (sym->stype) { case S_VARIABLE: if (sym->class == C_ENUMVAL) printf(", value %d\n", sym->st_posn); else printf(", size %d\n", sym->size); break; case S_FUNCTION: printf(", %d params\n", sym->nelems); break; case S_ARRAY: printf(", %d elems, size %d\n", sym->nelems, sym->size); break; } switch (sym->type & (~0xf)) { case P_STRUCT: case P_UNION: dumptable(sym->member, NULL, 4); } switch (sym->stype) { case S_FUNCTION: dumptable(sym->member, NULL, 4); } } // Dump one symbol table void dumptable(struct symtable *head, char *name, int indent) { struct symtable *sym; if (head != NULL && name != NULL) printf("%s\n--------\n", name); for (sym = head; sym != NULL; sym = sym->next) dumpsym(sym, indent); } void dumpsymtables(void) { dumptable(Globhead, "Global", 0); printf("\n"); dumptable(Enumhead, "Enums", 0); printf("\n"); dumptable(Typehead, "Typedefs", 0); } ================================================ FILE: 54_Reg_Spills/tests/err.input031.c ================================================ Expecting a primary expression, got token:+ on line 5 of input031.c ================================================ FILE: 54_Reg_Spills/tests/err.input032.c ================================================ Unknown variable or function:pizza on line 4 of input032.c ================================================ FILE: 54_Reg_Spills/tests/err.input033.c ================================================ Incompatible type to return on line 4 of input033.c ================================================ FILE: 54_Reg_Spills/tests/err.input034.c ================================================ For now, declaration of non-global arrays is not implemented on line 4 of input034.c ================================================ FILE: 54_Reg_Spills/tests/err.input035.c ================================================ Duplicate local variable declaration:a on line 4 of input035.c ================================================ FILE: 54_Reg_Spills/tests/err.input036.c ================================================ Type doesn't match prototype for parameter:2 on line 4 of input036.c ================================================ FILE: 54_Reg_Spills/tests/err.input037.c ================================================ Expected:comma on line 3 of input037.c ================================================ FILE: 54_Reg_Spills/tests/err.input038.c ================================================ Type doesn't match prototype for parameter:2 on line 4 of input038.c ================================================ FILE: 54_Reg_Spills/tests/err.input039.c ================================================ No statements in function with non-void type on line 4 of input039.c ================================================ FILE: 54_Reg_Spills/tests/err.input040.c ================================================ No return for function with non-void type on line 4 of input040.c ================================================ FILE: 54_Reg_Spills/tests/err.input041.c ================================================ Can't return from a void function on line 3 of input041.c ================================================ FILE: 54_Reg_Spills/tests/err.input042.c ================================================ Unknown variable or function:fred on line 3 of input042.c ================================================ FILE: 54_Reg_Spills/tests/err.input043.c ================================================ Unknown variable or function:b on line 3 of input043.c ================================================ FILE: 54_Reg_Spills/tests/err.input044.c ================================================ Unknown variable or function:z on line 3 of input044.c ================================================ FILE: 54_Reg_Spills/tests/err.input045.c ================================================ & operator must be followed by an identifier on line 3 of input045.c ================================================ FILE: 54_Reg_Spills/tests/err.input046.c ================================================ * operator must be followed by an identifier or * on line 3 of input046.c ================================================ FILE: 54_Reg_Spills/tests/err.input047.c ================================================ ++ operator must be followed by an identifier on line 3 of input047.c ================================================ FILE: 54_Reg_Spills/tests/err.input048.c ================================================ -- operator must be followed by an identifier on line 3 of input048.c ================================================ FILE: 54_Reg_Spills/tests/err.input049.c ================================================ Incompatible expression in assignment on line 6 of input049.c ================================================ FILE: 54_Reg_Spills/tests/err.input050.c ================================================ Incompatible types in binary expression on line 6 of input050.c ================================================ FILE: 54_Reg_Spills/tests/err.input051.c ================================================ Expected '\'' at end of char literal on line 4 of input051.c ================================================ FILE: 54_Reg_Spills/tests/err.input052.c ================================================ Unrecognised character:$ on line 5 of input052.c ================================================ FILE: 54_Reg_Spills/tests/err.input056.c ================================================ unknown struct/union type:var1 on line 2 of input056.c ================================================ FILE: 54_Reg_Spills/tests/err.input057.c ================================================ previously defined struct/union:fred on line 2 of input057.c ================================================ FILE: 54_Reg_Spills/tests/err.input059.c ================================================ Unknown variable or function:y on line 3 of input059.c ================================================ FILE: 54_Reg_Spills/tests/err.input060.c ================================================ Expression is not a struct/union on line 3 of input060.c ================================================ FILE: 54_Reg_Spills/tests/err.input061.c ================================================ Expression is not a pointer to a struct/union on line 3 of input061.c ================================================ FILE: 54_Reg_Spills/tests/err.input064.c ================================================ undeclared enum type::fred on line 1 of input064.c ================================================ FILE: 54_Reg_Spills/tests/err.input065.c ================================================ enum type redeclared::fred on line 2 of input065.c ================================================ FILE: 54_Reg_Spills/tests/err.input066.c ================================================ enum value redeclared::z on line 2 of input066.c ================================================ FILE: 54_Reg_Spills/tests/err.input068.c ================================================ redefinition of typedef:FOO on line 2 of input068.c ================================================ FILE: 54_Reg_Spills/tests/err.input069.c ================================================ unknown type:FLOO on line 2 of input069.c ================================================ FILE: 54_Reg_Spills/tests/err.input072.c ================================================ no loop or switch to break out from on line 1 of input072.c ================================================ FILE: 54_Reg_Spills/tests/err.input073.c ================================================ no loop to continue to on line 1 of input073.c ================================================ FILE: 54_Reg_Spills/tests/err.input075.c ================================================ Unexpected token in switch:if on line 4 of input075.c ================================================ FILE: 54_Reg_Spills/tests/err.input076.c ================================================ No cases in switch on line 3 of input076.c ================================================ FILE: 54_Reg_Spills/tests/err.input077.c ================================================ case or default after existing default on line 6 of input077.c ================================================ FILE: 54_Reg_Spills/tests/err.input078.c ================================================ case or default after existing default on line 6 of input078.c ================================================ FILE: 54_Reg_Spills/tests/err.input079.c ================================================ Duplicate case value on line 6 of input079.c ================================================ FILE: 54_Reg_Spills/tests/err.input085.c ================================================ Bad type in parameter list on line 1 of input085.c ================================================ FILE: 54_Reg_Spills/tests/err.input086.c ================================================ Function definition not at global level on line 3 of input086.c ================================================ FILE: 54_Reg_Spills/tests/err.input087.c ================================================ Bad type in member list on line 4 of input087.c ================================================ FILE: 54_Reg_Spills/tests/err.input092.c ================================================ Type mismatch: literal vs. variable on line 1 of input092.c ================================================ FILE: 54_Reg_Spills/tests/err.input093.c ================================================ Unknown variable or function:fred on line 1 of input093.c ================================================ FILE: 54_Reg_Spills/tests/err.input094.c ================================================ Type mismatch: literal vs. variable on line 1 of input094.c ================================================ FILE: 54_Reg_Spills/tests/err.input095.c ================================================ Variable can not be initialised:x on line 1 of input095.c ================================================ FILE: 54_Reg_Spills/tests/err.input096.c ================================================ Array size is illegal:0 on line 1 of input096.c ================================================ FILE: 54_Reg_Spills/tests/err.input097.c ================================================ For now, declaration of non-global arrays is not implemented on line 2 of input097.c ================================================ FILE: 54_Reg_Spills/tests/err.input098.c ================================================ Too many values in initialisation list on line 1 of input098.c ================================================ FILE: 54_Reg_Spills/tests/err.input102.c ================================================ Cannot cast to a struct, union or void type on line 3 of input102.c ================================================ FILE: 54_Reg_Spills/tests/err.input103.c ================================================ Cannot cast to a struct, union or void type on line 3 of input103.c ================================================ FILE: 54_Reg_Spills/tests/err.input104.c ================================================ Cannot cast to a struct, union or void type on line 2 of input104.c ================================================ FILE: 54_Reg_Spills/tests/err.input105.c ================================================ Incompatible expression in assignment on line 4 of input105.c ================================================ FILE: 54_Reg_Spills/tests/err.input118.c ================================================ Compiler doesn't support static or extern local declarations on line 2 of input118.c ================================================ FILE: 54_Reg_Spills/tests/err.input124.c ================================================ Cannot ++ on rvalue on line 6 of input124.c ================================================ FILE: 54_Reg_Spills/tests/err.input126.c ================================================ Unknown variable or function:ptr on line 7 of input126.c ================================================ FILE: 54_Reg_Spills/tests/err.input129.c ================================================ Cannot ++ and/or -- more than once on line 6 of input129.c ================================================ FILE: 54_Reg_Spills/tests/input001.c ================================================ int printf(char *fmt); void main() { printf("%d\n", 12 * 3); printf("%d\n", 18 - 2 * 4); printf("%d\n", 1 + 2 + 9 - 5/2 + 3*5); } ================================================ FILE: 54_Reg_Spills/tests/input002.c ================================================ int printf(char *fmt); void main() { int fred; int jim; fred= 5; jim= 12; printf("%d\n", fred + jim); } ================================================ FILE: 54_Reg_Spills/tests/input003.c ================================================ int printf(char *fmt); void main() { int x; x= 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); } ================================================ FILE: 54_Reg_Spills/tests/input004.c ================================================ int printf(char *fmt); void main() { int x; x= 7 < 9; printf("%d\n", x); x= 7 <= 9; printf("%d\n", x); x= 7 != 9; printf("%d\n", x); x= 7 == 7; printf("%d\n", x); x= 7 >= 7; printf("%d\n", x); x= 7 <= 7; printf("%d\n", x); x= 9 > 7; printf("%d\n", x); x= 9 >= 7; printf("%d\n", x); x= 9 != 7; printf("%d\n", x); } ================================================ FILE: 54_Reg_Spills/tests/input005.c ================================================ int printf(char *fmt); void main() { int i; int j; i=6; j=12; if (i < j) { printf("%d\n", i); } else { printf("%d\n", j); } } ================================================ FILE: 54_Reg_Spills/tests/input006.c ================================================ int printf(char *fmt); void main() { int i; i=1; while (i <= 10) { printf("%d\n", i); i= i + 1; } } ================================================ FILE: 54_Reg_Spills/tests/input007.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 54_Reg_Spills/tests/input008.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 54_Reg_Spills/tests/input009.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } void fred() { int a; int b; a= 12; b= 3 * a; if (a >= b) { printf("%d\n", 2 * b - a); } } ================================================ FILE: 54_Reg_Spills/tests/input010.c ================================================ int printf(char *fmt); void main() { int i; char j; j= 20; printf("%d\n", j); i= 10; printf("%d\n", i); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 2; j= j + 1) { printf("%d\n", j); } } ================================================ FILE: 54_Reg_Spills/tests/input011.c ================================================ int printf(char *fmt); int main() { int i; char j; long k; i= 10; printf("%d\n", i); j= 20; printf("%d\n", j); k= 30; printf("%d\n", k); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 4; j= j + 1) { printf("%d\n", j); } for (k= 1; k <= 5; k= k + 1) { printf("%d\n", k); } return(i); printf("%d\n", 12345); return(3); } ================================================ FILE: 54_Reg_Spills/tests/input012.c ================================================ int printf(char *fmt); int fred() { return(5); } void main() { int x; x= fred(2); printf("%d\n", x); } ================================================ FILE: 54_Reg_Spills/tests/input013.c ================================================ int printf(char *fmt); int fred() { return(56); } void main() { int dummy; int result; dummy= printf("%d\n", 23); result= fred(10); dummy= printf("%d\n", result); } ================================================ FILE: 54_Reg_Spills/tests/input014.c ================================================ int printf(char *fmt); int fred() { return(20); } int main() { int result; printf("%d\n", 10); result= fred(15); printf("%d\n", result); printf("%d\n", fred(15)+10); return(0); } ================================================ FILE: 54_Reg_Spills/tests/input015.c ================================================ int printf(char *fmt); int main() { char a; char *b; char c; int d; int *e; int f; a= 18; printf("%d\n", a); b= &a; c= *b; printf("%d\n", c); d= 12; printf("%d\n", d); e= &d; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 54_Reg_Spills/tests/input016.c ================================================ int printf(char *fmt); int c; int d; int *e; int f; int main() { c= 12; d=18; printf("%d\n", c); e= &c + 1; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 54_Reg_Spills/tests/input017.c ================================================ int printf(char *fmt); int main() { char a; char *b; int d; int *e; b= &a; *b= 19; printf("%d\n", a); e= &d; *e= 12; printf("%d\n", d); return(0); } ================================================ FILE: 54_Reg_Spills/tests/input018.c ================================================ int printf(char *fmt); int main() { int a; int b; a= b= 34; printf("%d\n", a); printf("%d\n", b); return(0); } ================================================ FILE: 54_Reg_Spills/tests/input018a.c ================================================ int printf(char *fmt); int a; int *b; char c; char *d; int main() { b= &a; *b= 15; printf("%d\n", a); d= &c; *d= 16; printf("%d\n", c); return(0); } ================================================ FILE: 54_Reg_Spills/tests/input019.c ================================================ int printf(char *fmt); int a; int b; int c; int d; int e; int main() { a= 2; b= 4; c= 3; d= 2; e= (a+b) * (c+d); printf("%d\n", e); return(0); } ================================================ FILE: 54_Reg_Spills/tests/input020.c ================================================ int printf(char *fmt); int a; int b[25]; int main() { b[3]= 12; a= b[3]; printf("%d\n", a); return(0); } ================================================ FILE: 54_Reg_Spills/tests/input021.c ================================================ int printf(char *fmt); char c; char *str; int main() { c= '\n'; printf("%d\n", c); for (str= "Hello world\n"; *str != 0; str= str + 1) { printf("%c", *str); } return(0); } ================================================ FILE: 54_Reg_Spills/tests/input022.c ================================================ int printf(char *fmt); char a; char b; char c; int d; int e; int f; long g; long h; long i; int main() { b= 5; c= 7; a= b + c++; printf("%d\n", a); e= 5; f= 7; d= e + f++; printf("%d\n", d); h= 5; i= 7; g= h + i++; printf("%d\n", g); a= b-- + c; printf("%d\n", a); d= e-- + f; printf("%d\n", d); g= h-- + i; printf("%d\n", g); a= ++b + c; printf("%d\n", a); d= ++e + f; printf("%d\n", d); g= ++h + i; printf("%d\n", g); a= b * --c; printf("%d\n", a); d= e * --f; printf("%d\n", d); g= h * --i; printf("%d\n", g); return(0); } ================================================ FILE: 54_Reg_Spills/tests/input023.c ================================================ int printf(char *fmt); char *str; int x; int main() { x= -23; printf("%d\n", x); printf("%d\n", -10 * -10); x= 1; x= ~x; printf("%d\n", x); x= 2 > 5; printf("%d\n", x); x= !x; printf("%d\n", x); x= !x; printf("%d\n", x); x= 13; if (x) { printf("%d\n", 13); } x= 0; if (!x) { printf("%d\n", 14); } for (str= "Hello world\n"; *str; str++) { printf("%c", *str); } return(0); } ================================================ FILE: 54_Reg_Spills/tests/input024.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { a= 42; b= 19; printf("%d\n", a & b); printf("%d\n", a | b); printf("%d\n", a ^ b); printf("%d\n", 1 << 3); printf("%d\n", 63 >> 3); return(0); } ================================================ FILE: 54_Reg_Spills/tests/input025.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { char z; int y; int x; x= 10; y= 20; z= 30; printf("%d\n", x); printf("%d\n", y); printf("%d\n", z); a= 5; b= 15; c= 25; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); return(0); } ================================================ FILE: 54_Reg_Spills/tests/input026.c ================================================ int printf(char *fmt); int main(int a, char b, long c, int d, int e, int f, int g, int h) { int i; int j; int k; a= 13; printf("%d\n", a); b= 23; printf("%d\n", b); c= 34; printf("%d\n", c); d= 44; printf("%d\n", d); e= 54; printf("%d\n", e); f= 64; printf("%d\n", f); g= 74; printf("%d\n", g); h= 84; printf("%d\n", h); i= 94; printf("%d\n", i); j= 95; printf("%d\n", j); k= 96; printf("%d\n", k); return(0); } ================================================ FILE: 54_Reg_Spills/tests/input027.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int param5(int a, int b, int c, int d, int e) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param2(int a, int b) { int c; int d; int e; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param0() { int a; int b; int c; int d; int e; a= 1; b= 2; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int main() { param8(1,2,3,4,5,6,7,8); param5(1,2,3,4,5); param2(1,2); param0(); return(0); } ================================================ FILE: 54_Reg_Spills/tests/input028.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 54_Reg_Spills/tests/input029.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h); int fred(int a, int b, int c); int main(); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 54_Reg_Spills/tests/input030.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input030.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 54_Reg_Spills/tests/input031.c ================================================ int printf(char *fmt); int main() { int x; x= 2 + + 3 - * / ; } ================================================ FILE: 54_Reg_Spills/tests/input032.c ================================================ int printf(char *fmt); int main() { pizza cow llama sausage; } ================================================ FILE: 54_Reg_Spills/tests/input033.c ================================================ int printf(char *fmt); int main() { char *z; return(z); } ================================================ FILE: 54_Reg_Spills/tests/input034.c ================================================ int printf(char *fmt); int main() { int a[12]; return(0); } ================================================ FILE: 54_Reg_Spills/tests/input035.c ================================================ int printf(char *fmt); int fred(int a, int b) { int a; return(a); } ================================================ FILE: 54_Reg_Spills/tests/input036.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c); ================================================ FILE: 54_Reg_Spills/tests/input037.c ================================================ int printf(char *fmt); int fred(int a, char b +, int z); ================================================ FILE: 54_Reg_Spills/tests/input038.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c, int g); ================================================ FILE: 54_Reg_Spills/tests/input039.c ================================================ int printf(char *fmt); int main() { int a; } ================================================ FILE: 54_Reg_Spills/tests/input040.c ================================================ int printf(char *fmt); int main() { int a; a= 5; } ================================================ FILE: 54_Reg_Spills/tests/input041.c ================================================ int printf(char *fmt); void fred() { return(5); } ================================================ FILE: 54_Reg_Spills/tests/input042.c ================================================ int printf(char *fmt); int main() { fred(5); } ================================================ FILE: 54_Reg_Spills/tests/input043.c ================================================ int printf(char *fmt); int main() { int a; a= b[4]; } ================================================ FILE: 54_Reg_Spills/tests/input044.c ================================================ int printf(char *fmt); int main() { int a; a= z; } ================================================ FILE: 54_Reg_Spills/tests/input045.c ================================================ int printf(char *fmt); int main() { int a; a= &5; } ================================================ FILE: 54_Reg_Spills/tests/input046.c ================================================ int printf(char *fmt); int main() { int a; a= *5; } ================================================ FILE: 54_Reg_Spills/tests/input047.c ================================================ int printf(char *fmt); int main() { int a; a= ++5; } ================================================ FILE: 54_Reg_Spills/tests/input048.c ================================================ int printf(char *fmt); int main() { int a; a= --5; } ================================================ FILE: 54_Reg_Spills/tests/input049.c ================================================ int printf(char *fmt); int main() { int x; char y; y= x; } ================================================ FILE: 54_Reg_Spills/tests/input050.c ================================================ int printf(char *fmt); int main() { char *a; char *b; a= a + b; } ================================================ FILE: 54_Reg_Spills/tests/input051.c ================================================ int printf(char *fmt); int main() { char a; a= 'fred'; } ================================================ FILE: 54_Reg_Spills/tests/input052.c ================================================ int printf(char *fmt); int main() { int a; a= $5.00; } ================================================ FILE: 54_Reg_Spills/tests/input053.c ================================================ int printf(char *fmt); int main() { printf("Hello world, %d\n", 23); return(0); } ================================================ FILE: 54_Reg_Spills/tests/input054.c ================================================ int printf(char *fmt); int main() { int i; for (i=0; i < 20; i++) { printf("Hello world, %d\n", i); } return(0); } ================================================ FILE: 54_Reg_Spills/tests/input055.c ================================================ int printf(char *fmt); int main(int argc, char **argv) { int i; char *argument; printf("Hello world\n"); for (i=0; i < argc; i++) { argument= *argv; argv= argv + 1; printf("Argument %d is %s\n", i, argument); } return(0); } ================================================ FILE: 54_Reg_Spills/tests/input056.c ================================================ struct foo { int x; }; struct mary var1; ================================================ FILE: 54_Reg_Spills/tests/input057.c ================================================ struct fred { int x; } ; struct fred { char y; } ; ================================================ FILE: 54_Reg_Spills/tests/input058.c ================================================ int printf(char *fmt); struct fred { int x; char y; long z; }; struct fred var2; struct fred *varptr; int main() { long result; var2.x= 12; printf("%d\n", var2.x); var2.y= 'c'; printf("%d\n", var2.y); var2.z= 4005; printf("%d\n", var2.z); result= var2.x + var2.y + var2.z; printf("%d\n", result); varptr= &var2; result= varptr->x + varptr->y + varptr->z; printf("%d\n", result); return(0); } ================================================ FILE: 54_Reg_Spills/tests/input059.c ================================================ int main() { int x; x= y.foo; } ================================================ FILE: 54_Reg_Spills/tests/input060.c ================================================ int main() { int x; x= x.foo; } ================================================ FILE: 54_Reg_Spills/tests/input061.c ================================================ int main() { int x; x= x->foo; } ================================================ FILE: 54_Reg_Spills/tests/input062.c ================================================ int printf(char *fmt); union fred { char w; int x; int y; long z; }; union fred var1; union fred *varptr; int main() { var1.x= 65; printf("%d\n", var1.x); var1.x= 66; printf("%d\n", var1.x); printf("%d\n", var1.y); printf("The next two depend on the endian of the platform\n"); printf("%d\n", var1.w); printf("%d\n", var1.z); varptr= &var1; varptr->x= 67; printf("%d\n", varptr->x); printf("%d\n", varptr->y); return(0); } ================================================ FILE: 54_Reg_Spills/tests/input063.c ================================================ int printf(char *fmt); enum fred { apple=1, banana, carrot, pear=10, peach, mango, papaya }; enum jane { aple=1, bnana, crrot, par=10, pech, mago, paaya }; enum fred var1; enum jane var2; enum fred var3; int main() { var1= carrot + pear + mango; printf("%d\n", var1); return(0); } ================================================ FILE: 54_Reg_Spills/tests/input064.c ================================================ enum fred var3; ================================================ FILE: 54_Reg_Spills/tests/input065.c ================================================ enum fred { x, y, z }; enum fred { a, b }; ================================================ FILE: 54_Reg_Spills/tests/input066.c ================================================ enum fred { x, y, z }; enum mary { a, b, z }; ================================================ FILE: 54_Reg_Spills/tests/input067.c ================================================ int printf(char *fmt); typedef int FOO; FOO var1; struct bar { int x; int y} ; typedef struct bar BAR; BAR var2; int main() { var1= 5; printf("%d\n", var1); var2.x= 7; var2.y= 10; printf("%d\n", var2.x + var2.y); return(0); } ================================================ FILE: 54_Reg_Spills/tests/input068.c ================================================ typedef int FOO; typedef char FOO; ================================================ FILE: 54_Reg_Spills/tests/input069.c ================================================ typedef int FOO; FLOO y; ================================================ FILE: 54_Reg_Spills/tests/input070.c ================================================ #include typedef int FOO; int main() { FOO x; x= 56; printf("%d\n", x); return(0); } ================================================ FILE: 54_Reg_Spills/tests/input071.c ================================================ #include int main() { int x; x = 0; while (x < 100) { if (x == 5) { x = x + 2; continue; } printf("%d\n", x); if (x == 14) { break; } x = x + 1; } printf("Done\n"); return (0); } ================================================ FILE: 54_Reg_Spills/tests/input072.c ================================================ int main() { break; } ================================================ FILE: 54_Reg_Spills/tests/input073.c ================================================ int main() { continue; } ================================================ FILE: 54_Reg_Spills/tests/input074.c ================================================ #include int main() { int x; int y; y= 0; for (x=0; x < 5; x++) { switch(x) { case 1: { y= 5; break; } case 2: { y= 7; break; } case 3: { y= 9; } default: { y= 100; } } printf("%d\n", y); } return(0); } ================================================ FILE: 54_Reg_Spills/tests/input075.c ================================================ int main() { int x; switch(x) { if (x<5); } } ================================================ FILE: 54_Reg_Spills/tests/input076.c ================================================ int main() { int x; switch(x) { } } ================================================ FILE: 54_Reg_Spills/tests/input077.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } default: { x= 3; } case 4: { x= 2; } } } ================================================ FILE: 54_Reg_Spills/tests/input078.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } default: { x= 3; } default: { x= 2; } } } ================================================ FILE: 54_Reg_Spills/tests/input079.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } case 2: { x= 2; } case 1: { x= 2; } default: { x= 2; } } } ================================================ FILE: 54_Reg_Spills/tests/input080.c ================================================ #include int x; int y; int main() { for (x=0, y=1; x < 6; x++, y=y+2) { printf("%d %d\n", x, y); } return(0); } ================================================ FILE: 54_Reg_Spills/tests/input081.c ================================================ #include int x; int y; int main() { x= 0; y=1; for (;x<5;) { printf("%d %d\n", x, y); x=x+1; y=y+2; } return(0); } ================================================ FILE: 54_Reg_Spills/tests/input082.c ================================================ #include int main() { int x; x= 10; // Dangling else test if (x > 5) if (x > 15) printf("x > 15\n"); else printf("15 >= x > 5\n"); else printf("5 >= x\n"); return(0); } ================================================ FILE: 54_Reg_Spills/tests/input083.c ================================================ #include int main() { int x; // Dangling else test. // We should not print anything for x<= 5 for (x=0; x < 12; x++) if (x > 5) if (x > 10) printf("10 < %2d\n", x); else printf(" 5 < %2d <= 10\n", x); return(0); } ================================================ FILE: 54_Reg_Spills/tests/input084.c ================================================ #include int main() { int x, y; x=2; y=3; printf("%d %d\n", x, y); char a, *b; a= 'f'; b= &a; printf("%c %c\n", a, *b); return(0); } ================================================ FILE: 54_Reg_Spills/tests/input085.c ================================================ int main(struct foo { int x; }; , int a) { return(0); } ================================================ FILE: 54_Reg_Spills/tests/input086.c ================================================ int main() { int fred() { return(5); } int x; x=2; return(x); } ================================================ FILE: 54_Reg_Spills/tests/input087.c ================================================ struct foo { int x; int y; struct blah { int g; }; }; ================================================ FILE: 54_Reg_Spills/tests/input088.c ================================================ #include struct foo { int x; int y; } fred, mary; struct foo james; int main() { fred.x= 5; mary.y= 6; printf("%d %d\n", fred.x, mary.y); return(0); } ================================================ FILE: 54_Reg_Spills/tests/input089.c ================================================ #include int x= 23; char y= 'H'; char *z= "Hello world"; int main() { printf("%d %c %s\n", x, y, z); return(0); } ================================================ FILE: 54_Reg_Spills/tests/input090.c ================================================ #include int a = 23, b = 100; char y = 'H', *z = "Hello world"; int main() { printf("%d %d %c %s\n", a, b, y, z); return (0); } ================================================ FILE: 54_Reg_Spills/tests/input091.c ================================================ #include int fred[] = { 1, 2, 3, 4, 5 }; int jim[10] = { 1, 2, 3, 4, 5 }; int main() { int i; for (i=0; i < 5; i++) printf("%d\n", fred[i]); for (i=0; i < 10; i++) printf("%d\n", jim[i]); return(0); } ================================================ FILE: 54_Reg_Spills/tests/input092.c ================================================ char x= 3000; ================================================ FILE: 54_Reg_Spills/tests/input093.c ================================================ char x= fred; ================================================ FILE: 54_Reg_Spills/tests/input094.c ================================================ char *s= 54; ================================================ FILE: 54_Reg_Spills/tests/input095.c ================================================ int fred(int x=2) { return(x); } ================================================ FILE: 54_Reg_Spills/tests/input096.c ================================================ int fred[0]; ================================================ FILE: 54_Reg_Spills/tests/input097.c ================================================ int main() { int x[45]; return(0); } ================================================ FILE: 54_Reg_Spills/tests/input098.c ================================================ int fred[3]= { 1, 2, 3, 4, 5 }; ================================================ FILE: 54_Reg_Spills/tests/input099.c ================================================ #include // List of token strings, for debugging purposes. // As yet, we can't store a NULL into the list char *Tstring[] = { "EOF", "=", "||", "&&", "|", "^", "&", "==", "!=", ",", ">", "<=", ">=", "<<", ">>", "+", "-", "*", "/", "++", "--", "~", "!", "void", "char", "int", "long", "if", "else", "while", "for", "return", "struct", "union", "enum", "typedef", "extern", "break", "continue", "switch", "case", "default", "intlit", "strlit", ";", "identifier", "{", "}", "(", ")", "[", "]", ",", ".", "->", ":", "" }; int main() { int i; char *str; i=0; while (1) { str= Tstring[i]; if (*str == 0) break; printf("%s\n", str); i++; } return(0); } ================================================ FILE: 54_Reg_Spills/tests/input100.c ================================================ #include int main() { int x= 3, y=14; int z= 2 * x + y; char *str= "Hello world"; printf("%s %d %d\n", str, x+y, z); return(0); } ================================================ FILE: 54_Reg_Spills/tests/input101.c ================================================ #include int main() { int x= 65535; char y; char *str; y= (char )x; printf("0x%x\n", y); str= (char *)0; printf("0x%lx\n", (long)str); return(0); } ================================================ FILE: 54_Reg_Spills/tests/input102.c ================================================ int main() { struct foo { int p; }; int y= (struct foo) x; } ================================================ FILE: 54_Reg_Spills/tests/input103.c ================================================ int main() { union foo { int p; }; int y= (union foo) x; } ================================================ FILE: 54_Reg_Spills/tests/input104.c ================================================ int main() { int y= (void) x; } ================================================ FILE: 54_Reg_Spills/tests/input105.c ================================================ int main() { int x; char *y; y= (char) x; } ================================================ FILE: 54_Reg_Spills/tests/input106.c ================================================ #include char *y= (char *)0; int main() { printf("0x%lx\n", (long)y); return(0); } ================================================ FILE: 54_Reg_Spills/tests/input107.c ================================================ #include char *y[] = { "fish", "cow", NULL }; char *z= NULL; int main() { int i; char *ptr; for (i=0; i < 3; i++) { ptr= y[i]; if (ptr != (char *)0) printf("%s\n", y[i]); else printf("NULL\n"); } return(0); } ================================================ FILE: 54_Reg_Spills/tests/input108.c ================================================ int main() { char *str= (void *)0; return(0); } ================================================ FILE: 54_Reg_Spills/tests/input109.c ================================================ #include int main() { int x= 17 - 1; printf("%d\n", x); return(0); } ================================================ FILE: 54_Reg_Spills/tests/input110.c ================================================ #include int x; int y; int main() { x= 3; y= 15; y += x; printf("%d\n", y); x= 3; y= 15; y -= x; printf("%d\n", y); x= 3; y= 15; y *= x; printf("%d\n", y); x= 3; y= 15; y /= x; printf("%d\n", y); return(0); } ================================================ FILE: 54_Reg_Spills/tests/input111.c ================================================ #include int main() { int x= 2000 + 3 + 4 * 5 + 6; printf("%d\n", x); return(0); } ================================================ FILE: 54_Reg_Spills/tests/input112.c ================================================ #include char* y = NULL; int x= 10 + 6; int fred [ 2 + 3 ]; int main() { fred[2]= x; printf("%d\n", fred[2]); return(0); } ================================================ FILE: 54_Reg_Spills/tests/input113.c ================================================ #include void fred(void); void fred(void) { printf("fred says hello\n"); } int main(void) { fred(); return(0); } ================================================ FILE: 54_Reg_Spills/tests/input114.c ================================================ #include int main() { int x= 0x4a; printf("%c\n", x); return(0); } ================================================ FILE: 54_Reg_Spills/tests/input115.c ================================================ #include struct foo { int x; char y; long z; }; typedef struct foo blah; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol struct symtable *ctype; // If struct/union, ptr to that type int stype; // Structural type for the symbol int class; // Storage class for the symbol int size; // Total size in bytes of this symbol int nelems; // Functions: # params. Arrays: # elements #define st_endlabel st_posn // For functions, the end label int st_posn; // For locals, the negative offset // from the stack base pointer int *initlist; // List of initial values struct symtable *next; // Next symbol in one list struct symtable *member; // First member of a function, struct, }; // union or enum // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates int rvalue; // True if the node is an rvalue struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; struct symtable *sym; // For many AST nodes, the pointer to // the symbol in the symbol table #define a_intvalue a_size // For A_INTLIT, the integer value int a_size; // For A_SCALE, the size to scale by }; int main() { printf("%ld\n", sizeof(char)); printf("%ld\n", sizeof(int)); printf("%ld\n", sizeof(long)); printf("%ld\n", sizeof(char *)); printf("%ld\n", sizeof(blah)); printf("%ld\n", sizeof(struct symtable)); printf("%ld\n", sizeof(struct ASTnode)); return(0); } ================================================ FILE: 54_Reg_Spills/tests/input116.c ================================================ #include static int counter=0; static int fred(void) { return(counter++); } int main(void) { int i; for (i=0; i < 5; i++) printf("%d\n", fred()); return(0); } ================================================ FILE: 54_Reg_Spills/tests/input117.c ================================================ #include static char *fred(void) { return("Hello"); } int main(void) { printf("%s\n", fred()); return(0); } ================================================ FILE: 54_Reg_Spills/tests/input118.c ================================================ int main(void) { static int x; } ================================================ FILE: 54_Reg_Spills/tests/input119.c ================================================ #include int x; int y= 3; int main() { x= y != 3 ? 6 : 8; printf("%d\n", x); x= (y == 3) ? 6 : 8; printf("%d\n", x); return(0); } ================================================ FILE: 54_Reg_Spills/tests/input120.c ================================================ #include int x; int y= 3; int main() { for (y= 0; y < 10; y++) { x= y > 4 ? y + 2 : y + 9; printf("%d\n", x); } return(0); } ================================================ FILE: 54_Reg_Spills/tests/input121.c ================================================ #include int x; int y= 3; int main() { for (y= 0; y < 10; y++) { x= (y < 4) ? y + 2 : (y > 7) ? 1000 : y + 9; printf("%d\n", x); } return(0); } ================================================ FILE: 54_Reg_Spills/tests/input122.c ================================================ #include int x, y, z1, z2; int main() { for (x= 0; x <= 1; x++) { for (y= 0; y <= 1; y++) { z1= x || y; z2= x && y; printf("x %d, y %d, x || y %d, x && y %d\n", x, y, z1, z2); } } //z= x || y; return(0); } ================================================ FILE: 54_Reg_Spills/tests/input123.c ================================================ #include int main() { int x; for (x=0; x < 20; x++) switch(x) { case 2: case 3: case 5: case 7: case 11: printf("%2d infant prime\n", x); break; case 13: case 17: case 19: printf("%2d teen prime\n", x); break; case 0: case 1: case 4: case 6: case 8: case 9: case 10: case 12: printf("%2d infant composite\n", x); break; default: printf("%2d teen composite\n", x); break; } return(0); } ================================================ FILE: 54_Reg_Spills/tests/input124.c ================================================ #include int ary[5]; int main() { ary++; return(0); } ================================================ FILE: 54_Reg_Spills/tests/input125.c ================================================ #include int ary[5]; int *ptr; int x; int main() { ary[3]= 2008; ptr= ary; // Load ary's address into ptr x= ary[3]; printf("%d\n", x); x= ptr[3]; printf("%d\n", x); // Treat ptr as an array return(0); } ================================================ FILE: 54_Reg_Spills/tests/input126.c ================================================ #include int ary[5]; int main() { ary[3]= 2008; ptr= &ary; return(0); } ================================================ FILE: 54_Reg_Spills/tests/input127.c ================================================ #include int ary[5]; void fred(int *ptr) { // Receive a pointer printf("%d\n", ptr[3]); } int main() { ary[3]= 2008; printf("%d\n", ary[3]); fred(ary); // Pass ary as a pointer return(0); } ================================================ FILE: 54_Reg_Spills/tests/input128.c ================================================ #include struct foo { int val; struct foo *next; }; struct foo head, mid, tail; int main() { struct foo *ptr; tail.val= 20; tail.next= NULL; mid.val= 15; mid.next= &tail; head.val= 10; head.next= ∣ ptr= &head; printf("%d %d\n", head.val, ptr->val); printf("%d %d\n", mid.val, ptr->next->val); printf("%d %d\n", tail.val, ptr->next->next->val); return(0); } ================================================ FILE: 54_Reg_Spills/tests/input129.c ================================================ #include int x= 6; int main() { printf("%d\n", x++ ++); return(0); } ================================================ FILE: 54_Reg_Spills/tests/input130.c ================================================ #include char *x= "foo"; int main() { printf("Hello " "world" "\n"); return(0); } ================================================ FILE: 54_Reg_Spills/tests/input131.c ================================================ #include void donothing() { } int main() { int x=0; printf("Doing nothing... "); donothing(); printf("nothing done\n"); while (++x < 100) ; printf("x is now %d\n", x); return(0); } ================================================ FILE: 54_Reg_Spills/tests/input132.c ================================================ extern int fred; int fred; int mary; extern int mary; int main() { return(0); } ================================================ FILE: 54_Reg_Spills/tests/input133.c ================================================ #include extern int fred[]; int fred[23]; char mary[100]; extern char mary[]; void main() { printf("OK\n"); } ================================================ FILE: 54_Reg_Spills/tests/input134.c ================================================ #include char y = 'a'; char *x; int main() { x= &y; if (x && y == 'a') printf("1st match\n"); x= NULL; if (x && y == 'a') printf("2nd match\n"); x= &y; y='b'; if (x && y == 'a') printf("3rd match\n"); return(0); } ================================================ FILE: 54_Reg_Spills/tests/input135.c ================================================ #include void fred() { int x= 5; printf("testing x\n"); if (x > 4) return; printf("x below 5\n"); } int main() { fred(); return(0); } ================================================ FILE: 54_Reg_Spills/tests/input136.c ================================================ #include int add(int x, int y) { return(x+y); } int main() { int result; result= 3 * add(2,3) - 5 * add(4,6); printf("%d\n", result); return(0); } ================================================ FILE: 54_Reg_Spills/tests/input137.c ================================================ #include int a=1, b=2, c=3, d=4, e=5, f=6, g=7, h=8; int main() { int x; x= ((((((a + b) + c) + d) + e) + f) + g) + h; x= a + (b + (c + (d + (e + (f + (g + h)))))); printf("x is %d\n", x); return(0); } ================================================ FILE: 54_Reg_Spills/tests/input138.c ================================================ #include int x, y, z; int a=1; int *aptr; int main() { // See if generic AND works for (x=0; x <= 1; x++) for (y=0; y <= 1; y++) { z= x && y; printf("%d %d | %d\n", x, y, z); } // See if generic AND works for (x=0; x <= 1; x++) for (y=0; y <= 1; y++) { z= x || y; printf("%d %d | %d\n", x, y, z); } // Now some lazy evaluation aptr= NULL; if (aptr && *aptr == 1) printf("aptr points at 1\n"); else printf("aptr is NULL or doesn't point at 1\n"); aptr= &a; if (aptr && *aptr == 1) printf("aptr points at 1\n"); else printf("aptr is NULL or doesn't point at 1\n"); return(0); } ================================================ FILE: 54_Reg_Spills/tests/input139.c ================================================ #include int same(int x) { return(x); } int main() { int a= 3; if (same(a) && same(a) >= same(a)) printf("same apparently\n"); return(0); } ================================================ FILE: 54_Reg_Spills/tests/mktests ================================================ #!/bin/sh # Make the output files for each test # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make install) fi for i in input*c do if [ ! -f "out.$i" -a ! -f "err.$i" ] then ../cwj -o out $i 2> "err.$i" # If the err file is empty if [ ! -s "err.$i" ] then rm -f "err.$i" cc -o out $i ./out > "out.$i" fi fi rm -f out out.s done ================================================ FILE: 54_Reg_Spills/tests/out.input001.c ================================================ 36 10 25 ================================================ FILE: 54_Reg_Spills/tests/out.input002.c ================================================ 17 ================================================ FILE: 54_Reg_Spills/tests/out.input003.c ================================================ 1 2 3 4 5 ================================================ FILE: 54_Reg_Spills/tests/out.input004.c ================================================ 1 1 1 1 1 1 1 1 1 ================================================ FILE: 54_Reg_Spills/tests/out.input005.c ================================================ 6 ================================================ FILE: 54_Reg_Spills/tests/out.input006.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 54_Reg_Spills/tests/out.input007.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 54_Reg_Spills/tests/out.input008.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 54_Reg_Spills/tests/out.input009.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 54_Reg_Spills/tests/out.input010.c ================================================ 20 10 1 2 3 4 5 253 254 255 0 1 ================================================ FILE: 54_Reg_Spills/tests/out.input011.c ================================================ 10 20 30 1 2 3 4 5 253 254 255 0 1 2 3 1 2 3 4 5 ================================================ FILE: 54_Reg_Spills/tests/out.input012.c ================================================ 5 ================================================ FILE: 54_Reg_Spills/tests/out.input013.c ================================================ 23 56 ================================================ FILE: 54_Reg_Spills/tests/out.input014.c ================================================ 10 20 30 ================================================ FILE: 54_Reg_Spills/tests/out.input015.c ================================================ 18 18 12 12 ================================================ FILE: 54_Reg_Spills/tests/out.input016.c ================================================ 12 18 ================================================ FILE: 54_Reg_Spills/tests/out.input017.c ================================================ 19 12 ================================================ FILE: 54_Reg_Spills/tests/out.input018.c ================================================ 34 34 ================================================ FILE: 54_Reg_Spills/tests/out.input018a.c ================================================ 15 16 ================================================ FILE: 54_Reg_Spills/tests/out.input019.c ================================================ 30 ================================================ FILE: 54_Reg_Spills/tests/out.input020.c ================================================ 12 ================================================ FILE: 54_Reg_Spills/tests/out.input021.c ================================================ 10 Hello world ================================================ FILE: 54_Reg_Spills/tests/out.input022.c ================================================ 12 12 12 13 13 13 13 13 13 35 35 35 ================================================ FILE: 54_Reg_Spills/tests/out.input023.c ================================================ -23 100 -2 0 1 0 13 14 Hello world ================================================ FILE: 54_Reg_Spills/tests/out.input024.c ================================================ 2 59 57 8 7 ================================================ FILE: 54_Reg_Spills/tests/out.input025.c ================================================ 10 20 30 5 15 25 ================================================ FILE: 54_Reg_Spills/tests/out.input026.c ================================================ 13 23 34 44 54 64 74 84 94 95 96 ================================================ FILE: 54_Reg_Spills/tests/out.input027.c ================================================ 1 2 3 4 5 6 7 8 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 ================================================ FILE: 54_Reg_Spills/tests/out.input028.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 54_Reg_Spills/tests/out.input029.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 54_Reg_Spills/tests/out.input030.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input030.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 54_Reg_Spills/tests/out.input053.c ================================================ Hello world, 23 ================================================ FILE: 54_Reg_Spills/tests/out.input054.c ================================================ Hello world, 0 Hello world, 1 Hello world, 2 Hello world, 3 Hello world, 4 Hello world, 5 Hello world, 6 Hello world, 7 Hello world, 8 Hello world, 9 Hello world, 10 Hello world, 11 Hello world, 12 Hello world, 13 Hello world, 14 Hello world, 15 Hello world, 16 Hello world, 17 Hello world, 18 Hello world, 19 ================================================ FILE: 54_Reg_Spills/tests/out.input055.c ================================================ Hello world Argument 0 is ./out ================================================ FILE: 54_Reg_Spills/tests/out.input058.c ================================================ 12 99 4005 4116 4116 ================================================ FILE: 54_Reg_Spills/tests/out.input062.c ================================================ 65 66 66 The next two depend on the endian of the platform 66 66 67 67 ================================================ FILE: 54_Reg_Spills/tests/out.input063.c ================================================ 25 ================================================ FILE: 54_Reg_Spills/tests/out.input067.c ================================================ 5 17 ================================================ FILE: 54_Reg_Spills/tests/out.input070.c ================================================ 56 ================================================ FILE: 54_Reg_Spills/tests/out.input071.c ================================================ 0 1 2 3 4 7 8 9 10 11 12 13 14 Done ================================================ FILE: 54_Reg_Spills/tests/out.input074.c ================================================ 100 5 7 100 100 ================================================ FILE: 54_Reg_Spills/tests/out.input080.c ================================================ 0 1 1 3 2 5 3 7 4 9 5 11 ================================================ FILE: 54_Reg_Spills/tests/out.input081.c ================================================ 0 1 1 3 2 5 3 7 4 9 ================================================ FILE: 54_Reg_Spills/tests/out.input082.c ================================================ 15 >= x > 5 ================================================ FILE: 54_Reg_Spills/tests/out.input083.c ================================================ 5 < 6 <= 10 5 < 7 <= 10 5 < 8 <= 10 5 < 9 <= 10 5 < 10 <= 10 10 < 11 ================================================ FILE: 54_Reg_Spills/tests/out.input084.c ================================================ 2 3 f f ================================================ FILE: 54_Reg_Spills/tests/out.input088.c ================================================ 5 6 ================================================ FILE: 54_Reg_Spills/tests/out.input089.c ================================================ 23 H Hello world ================================================ FILE: 54_Reg_Spills/tests/out.input090.c ================================================ 23 100 H Hello world ================================================ FILE: 54_Reg_Spills/tests/out.input091.c ================================================ 1 2 3 4 5 1 2 3 4 5 0 0 0 0 0 ================================================ FILE: 54_Reg_Spills/tests/out.input099.c ================================================ EOF = || && | ^ & == != , > <= >= << >> + - * / ++ -- ~ ! void char int long if else while for return struct union enum typedef extern break continue switch case default intlit strlit ; identifier { } ( ) [ ] , . -> : ================================================ FILE: 54_Reg_Spills/tests/out.input100.c ================================================ Hello world 17 20 ================================================ FILE: 54_Reg_Spills/tests/out.input101.c ================================================ 0xff 0x0 ================================================ FILE: 54_Reg_Spills/tests/out.input106.c ================================================ 0x0 ================================================ FILE: 54_Reg_Spills/tests/out.input107.c ================================================ fish cow NULL ================================================ FILE: 54_Reg_Spills/tests/out.input108.c ================================================ ================================================ FILE: 54_Reg_Spills/tests/out.input109.c ================================================ 16 ================================================ FILE: 54_Reg_Spills/tests/out.input110.c ================================================ 18 12 45 5 ================================================ FILE: 54_Reg_Spills/tests/out.input111.c ================================================ 2029 ================================================ FILE: 54_Reg_Spills/tests/out.input112.c ================================================ 16 ================================================ FILE: 54_Reg_Spills/tests/out.input113.c ================================================ fred says hello ================================================ FILE: 54_Reg_Spills/tests/out.input114.c ================================================ J ================================================ FILE: 54_Reg_Spills/tests/out.input115.c ================================================ 1 4 8 8 13 64 48 ================================================ FILE: 54_Reg_Spills/tests/out.input116.c ================================================ 0 1 2 3 4 ================================================ FILE: 54_Reg_Spills/tests/out.input117.c ================================================ Hello ================================================ FILE: 54_Reg_Spills/tests/out.input119.c ================================================ 8 6 ================================================ FILE: 54_Reg_Spills/tests/out.input120.c ================================================ 9 10 11 12 13 7 8 9 10 11 ================================================ FILE: 54_Reg_Spills/tests/out.input121.c ================================================ 2 3 4 5 13 14 15 16 1000 1000 ================================================ FILE: 54_Reg_Spills/tests/out.input122.c ================================================ x 0, y 0, x || y 0, x && y 0 x 0, y 1, x || y 1, x && y 0 x 1, y 0, x || y 1, x && y 0 x 1, y 1, x || y 1, x && y 1 ================================================ FILE: 54_Reg_Spills/tests/out.input123.c ================================================ 0 infant composite 1 infant composite 2 infant prime 3 infant prime 4 infant composite 5 infant prime 6 infant composite 7 infant prime 8 infant composite 9 infant composite 10 infant composite 11 infant prime 12 infant composite 13 teen prime 14 teen composite 15 teen composite 16 teen composite 17 teen prime 18 teen composite 19 teen prime ================================================ FILE: 54_Reg_Spills/tests/out.input125.c ================================================ 2008 2008 ================================================ FILE: 54_Reg_Spills/tests/out.input127.c ================================================ 2008 2008 ================================================ FILE: 54_Reg_Spills/tests/out.input128.c ================================================ 10 10 15 15 20 20 ================================================ FILE: 54_Reg_Spills/tests/out.input130.c ================================================ Hello world ================================================ FILE: 54_Reg_Spills/tests/out.input131.c ================================================ Doing nothing... nothing done x is now 100 ================================================ FILE: 54_Reg_Spills/tests/out.input132.c ================================================ ================================================ FILE: 54_Reg_Spills/tests/out.input133.c ================================================ OK ================================================ FILE: 54_Reg_Spills/tests/out.input134.c ================================================ 1st match ================================================ FILE: 54_Reg_Spills/tests/out.input135.c ================================================ testing x ================================================ FILE: 54_Reg_Spills/tests/out.input136.c ================================================ -35 ================================================ FILE: 54_Reg_Spills/tests/out.input137.c ================================================ x is 36 ================================================ FILE: 54_Reg_Spills/tests/out.input138.c ================================================ 0 0 | 0 0 1 | 0 1 0 | 0 1 1 | 1 0 0 | 0 0 1 | 1 1 0 | 1 1 1 | 1 aptr is NULL or doesn't point at 1 aptr points at 1 ================================================ FILE: 54_Reg_Spills/tests/out.input139.c ================================================ same apparently ================================================ FILE: 54_Reg_Spills/tests/runtests ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make install) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../cwj -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../cwj $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.s "trial.$i" done ================================================ FILE: 54_Reg_Spills/tests/runtestsn ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../compn ] then (cd ..; make install) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../compn -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../compn $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.o out.s "trial.$i" done ================================================ FILE: 54_Reg_Spills/tree.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree functions // Copyright (c) 2019 Warren Toomey, GPL3 // Build and return a generic AST node struct ASTnode *mkastnode(int op, int type, struct symtable *ctype, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue) { struct ASTnode *n; // Malloc a new ASTnode n = (struct ASTnode *) malloc(sizeof(struct ASTnode)); if (n == NULL) fatal("Unable to malloc in mkastnode()"); // Copy in the field values and return it n->op = op; n->type = type; n->ctype = ctype; n->left = left; n->mid = mid; n->right = right; n->sym = sym; n->a_intvalue = intvalue; return (n); } // Make an AST leaf node struct ASTnode *mkastleaf(int op, int type, struct symtable *ctype, struct symtable *sym, int intvalue) { return (mkastnode(op, type, ctype, NULL, NULL, NULL, sym, intvalue)); } // Make a unary AST node: only one child struct ASTnode *mkastunary(int op, int type, struct symtable *ctype, struct ASTnode *left, struct symtable *sym, int intvalue) { return (mkastnode(op, type, ctype, left, NULL, NULL, sym, intvalue)); } // Generate and return a new label number // just for AST dumping purposes static int dumpid = 1; static int gendumplabel(void) { return (dumpid++); } // Given an AST tree, print it out and follow the // traversal of the tree that genAST() follows void dumpAST(struct ASTnode *n, int label, int level) { int Lfalse, Lstart, Lend; int i; switch (n->op) { case A_IF: Lfalse = gendumplabel(); for (i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_IF"); if (n->right) { Lend = gendumplabel(); fprintf(stdout, ", end L%d", Lend); } fprintf(stdout, "\n"); dumpAST(n->left, Lfalse, level + 2); dumpAST(n->mid, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); return; case A_WHILE: Lstart = gendumplabel(); for (i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_WHILE, start L%d\n", Lstart); Lend = gendumplabel(); dumpAST(n->left, Lend, level + 2); dumpAST(n->right, NOLABEL, level + 2); return; } // Reset level to -2 for A_GLUE if (n->op == A_GLUE) level = -2; // General AST node handling if (n->left) dumpAST(n->left, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); for (i = 0; i < level; i++) fprintf(stdout, " "); switch (n->op) { case A_GLUE: fprintf(stdout, "\n\n"); return; case A_FUNCTION: fprintf(stdout, "A_FUNCTION %s\n", n->sym->name); return; case A_ADD: fprintf(stdout, "A_ADD\n"); return; case A_SUBTRACT: fprintf(stdout, "A_SUBTRACT\n"); return; case A_MULTIPLY: fprintf(stdout, "A_MULTIPLY\n"); return; case A_DIVIDE: fprintf(stdout, "A_DIVIDE\n"); return; case A_EQ: fprintf(stdout, "A_EQ\n"); return; case A_NE: fprintf(stdout, "A_NE\n"); return; case A_LT: fprintf(stdout, "A_LE\n"); return; case A_GT: fprintf(stdout, "A_GT\n"); return; case A_LE: fprintf(stdout, "A_LE\n"); return; case A_GE: fprintf(stdout, "A_GE\n"); return; case A_INTLIT: fprintf(stdout, "A_INTLIT %d\n", n->a_intvalue); return; case A_STRLIT: fprintf(stdout, "A_STRLIT rval label L%d\n", n->a_intvalue); return; case A_IDENT: if (n->rvalue) fprintf(stdout, "A_IDENT rval %s\n", n->sym->name); else fprintf(stdout, "A_IDENT %s\n", n->sym->name); return; case A_ASSIGN: fprintf(stdout, "A_ASSIGN\n"); return; case A_WIDEN: fprintf(stdout, "A_WIDEN\n"); return; case A_RETURN: fprintf(stdout, "A_RETURN\n"); return; case A_FUNCCALL: fprintf(stdout, "A_FUNCCALL %s\n", n->sym->name); return; case A_ADDR: fprintf(stdout, "A_ADDR %s\n", n->sym->name); return; case A_DEREF: if (n->rvalue) fprintf(stdout, "A_DEREF rval\n"); else fprintf(stdout, "A_DEREF\n"); return; case A_SCALE: fprintf(stdout, "A_SCALE %d\n", n->a_size); return; case A_PREINC: fprintf(stdout, "A_PREINC %s\n", n->sym->name); return; case A_PREDEC: fprintf(stdout, "A_PREDEC %s\n", n->sym->name); return; case A_POSTINC: fprintf(stdout, "A_POSTINC\n"); return; case A_POSTDEC: fprintf(stdout, "A_POSTDEC\n"); return; case A_NEGATE: fprintf(stdout, "A_NEGATE\n"); return; case A_BREAK: fprintf(stdout, "A_BREAK\n"); return; case A_CONTINUE: fprintf(stdout, "A_CONTINUE\n"); return; case A_CASE: fprintf(stdout, "A_CASE %d\n", n->a_intvalue); return; case A_DEFAULT: fprintf(stdout, "A_DEFAULT\n"); return; case A_SWITCH: fprintf(stdout, "A_SWITCH\n"); return; case A_CAST: fprintf(stdout, "A_CAST %d\n", n->type); return; case A_ASPLUS: fprintf(stdout, "A_ASPLUS\n"); return; case A_ASMINUS: fprintf(stdout, "A_ASMINUS\n"); return; case A_ASSTAR: fprintf(stdout, "A_ASSTAR\n"); return; case A_ASSLASH: fprintf(stdout, "A_ASSLASH\n"); return; case A_TOBOOL: fprintf(stdout, "A_TOBOOL\n"); return; case A_LOGOR: fprintf(stdout, "A_LOGOR\n"); return; case A_LOGAND: fprintf(stdout, "A_LOGAND\n"); return; default: fatald("Unknown dumpAST operator", n->op); } } ================================================ FILE: 54_Reg_Spills/types.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Types and type handling // Copyright (c) 2019 Warren Toomey, GPL3 // Return true if a type is an int type // of any size, false otherwise int inttype(int type) { return (((type & 0xf) == 0) && (type >= P_CHAR && type <= P_LONG)); } // Return true if a type is of pointer type int ptrtype(int type) { return ((type & 0xf) != 0); } // Given a primitive type, return // the type which is a pointer to it int pointer_to(int type) { if ((type & 0xf) == 0xf) fatald("Unrecognised in pointer_to: type", type); return (type + 1); } // Given a primitive pointer type, return // the type which it points to int value_at(int type) { if ((type & 0xf) == 0x0) fatald("Unrecognised in value_at: type", type); return (type - 1); } // Given a type and a composite type pointer, return // the size of this type in bytes int typesize(int type, struct symtable *ctype) { if (type == P_STRUCT || type == P_UNION) return (ctype->size); return (genprimsize(type)); } // Given an AST tree and a type which we want it to become, // possibly modify the tree by widening or scaling so that // it is compatible with this type. Return the original tree // if no changes occurred, a modified tree, or NULL if the // tree is not compatible with the given type. // If this will be part of a binary operation, the AST op is not zero. struct ASTnode *modify_type(struct ASTnode *tree, int rtype, struct symtable *rctype, int op) { int ltype; int lsize, rsize; ltype = tree->type; // For A_LOGOR and A_LOGAND, both types have to be int or pointer types if (op==A_LOGOR || op==A_LOGAND) { if (!inttype(ltype) && !ptrtype(ltype)) return(NULL); if (!inttype(ltype) && !ptrtype(rtype)) return(NULL); return (tree); } // XXX No idea on these yet if (ltype == P_STRUCT || ltype == P_UNION) fatal("Don't know how to do this yet"); if (rtype == P_STRUCT || rtype == P_UNION) fatal("Don't know how to do this yet"); // Compare scalar int types if (inttype(ltype) && inttype(rtype)) { // Both types same, nothing to do if (ltype == rtype) return (tree); // Get the sizes for each type lsize = typesize(ltype, NULL); rsize = typesize(rtype, NULL); // Tree's size is too big if (lsize > rsize) return (NULL); // Widen to the right if (rsize > lsize) return (mkastunary(A_WIDEN, rtype, NULL, tree, NULL, 0)); } // For pointers if (ptrtype(ltype) && ptrtype(rtype)) { // We can compare them if (op >= A_EQ && op <= A_GE) return (tree); // A comparison of the same type for a non-binary operation is OK, // or when the left tree is of `void *` type. if (op == 0 && (ltype == rtype || ltype == pointer_to(P_VOID))) return (tree); } // We can scale only on A_ADD or A_SUBTRACT operation if (op == A_ADD || op == A_SUBTRACT) { // Left is int type, right is pointer type and the size // of the original type is >1: scale the left if (inttype(ltype) && ptrtype(rtype)) { rsize = genprimsize(value_at(rtype)); if (rsize > 1) return (mkastunary(A_SCALE, rtype, rctype, tree, NULL, rsize)); else return (tree); // Size 1, no need to scale } } // If we get here, the types are not compatible return (NULL); } ================================================ FILE: 54_Reg_Spills/zresults ================================================ For now, declaration of non-global arrays is not implemented on line 43 of main.c ================================================ FILE: 55_Lazy_Evaluation/Readme.md ================================================ # Part 55: Lazy Evaluation I decided to move the coverage of fixing `&&` and `||` to here instead of in the previous part of our compiler writing journey as the previous part was already big enough. So why was our original implementation of `&&` and `||` flawed? C programmers expect that these operators will perform [lazy evaluation](https://en.wikipedia.org/wiki/Lazy_evaluation). In other words, the right-hand operand of `&&` and `||` is evaluated only if the left-hand operand's value is not enough to determine the result. A common use of lazy evaluation is to see if a pointer is pointing at a specific value, but only if the pointer is actually pointing at something. The `test/input138.c` has an example of this: ```c int *aptr; ... if (aptr && *aptr == 1) printf("aptr points at 1\n"); else printf("aptr is NULL or doesn't point at 1\n"); ``` We don't want to evaluate both operands to the `&&` operator: if `aptr` is NULL, then the `*aptr == 1` expression will cause a NULL dereference and crash the program. ## The Problem The problem is that our current implementation of `&&` and `||` *does* evaluate both operands. In `genAST()` in `gen.c`: ```c // Get the left and right sub-tree values leftreg = genAST(n->left, NOLABEL, NOLABEL, NOLABEL, n->op); rightreg = genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); switch (n->op) { ... case A_LOGOR: return (cglogor(leftreg, rightreg)); case A_LOGAND: return (cglogand(leftreg, rightreg)); ... } ``` We have to rewrite this to *not* evaluate both operands. Instead, we have to evaluate the left-hand one first. If it is enough to give the result, we can jump to the code to set the result. If not, now we evaluate the right-hand operand. Again, we jump to the code to set the result. And if we didn't jump, we must have the opposite result. This is very much like the code generator for the IF statement, but it is different enough that I've written a new code generator in `gen.c`. It gets called *before* we run `genAST()` on the left- and right-hand operands. The code is (in stages): ```c // Generate the code for an // A_LOGAND or A_LOGOR operation static int gen_logandor(struct ASTnode *n) { // Generate two labels int Lfalse = genlabel(); int Lend = genlabel(); int reg; // Generate the code for the left expression // followed by the jump to the false label reg= genAST(n->left, NOLABEL, NOLABEL, NOLABEL, 0); cgboolean(reg, n->op, Lfalse); genfreeregs(NOREG); ``` The left operand is evaluated. Let's assume that we are doing the `&&` operation. If this result is zero, we can jump down to `Lfalse` and set the result to zero (false). Also, once the expression has been evaluated we can free all the registers. This also helps to ease the pressure on register allocation. ```c // Generate the code for the right expression // followed by the jump to the false label reg= genAST(n->right, NOLABEL, NOLABEL, NOLABEL, 0); cgboolean(reg, n->op, Lfalse); genfreeregs(reg); ``` We do exactly the same for the right-hand operand. If it was false, we jump to the `Lfalse` label. If we don't jump, the `&&` result must be true. For `&&`, we now do: ```c cgloadboolean(reg, 1); cgjump(Lend); cglabel(Lfalse); cgloadboolean(reg, 0); cglabel(Lend); return(reg); } ``` The `cgloadboolean()` sets the register to true (if 1 is the argument) or false (if 0 is the argument). For the x86-64 this is 1 and 0, but I've coded it this way in case other architectures have different register values for true and false. The above produces this output for the expression `(aptr && *aptr == 1)`: ``` movq aptr(%rip), %r10 test %r10, %r10 # Test if aptr is not NULL je L38 # No, jump to L38 movq aptr(%rip), %r10 movslq (%r10), %r10 # Get *aptr in %r10 movq $1, %r11 cmpq %r11, %r10 # Is *aptr == 1? sete %r11b movzbq %r11b, %r11 test %r11, %r11 je L38 # No, jump to L38 movq $1, %r11 # Both true, true is the result jmp L39 # Skip the false code L38: movq $0, %r11 # One or both false, false is the result L39: # Continue on with the rest ``` I haven't given the C code to evaluate the `||` operation. Essentially, we jump if either the left or right is true and set true as the result. If we don't jump, we fall into the code which sets false as the result and which jumps over the true setting code. ## Testing the Changes `test/input138.c` also has code to print out an AND and an OR truth table: ```c // See if generic AND works for (x=0; x <= 1; x++) for (y=0; y <= 1; y++) { z= x && y; printf("%d %d | %d\n", x, y, z); } // See if generic AND works for (x=0; x <= 1; x++) for (y=0; y <= 1; y++) { z= x || y; printf("%d %d | %d\n", x, y, z); } ``` and this produces the output (with a space added): ``` 0 0 | 0 0 1 | 0 1 0 | 0 1 1 | 1 0 0 | 0 0 1 | 1 1 0 | 1 1 1 | 1 ``` ## Conclusion and What's Next Now we have lazy evaluation in the compiler for `&&` and `||`, which we definitely need for the compiler to compile itself. In fact, at this point, the only thing the compiler can't parse (in its own source code) is the declaration and use of local arrays. So, guess what... In the next part of our compiler writing journey, I'll try to work out how to declare and use local arrays. [Next step](../56_Local_Arrays/Readme.md) ================================================ FILE: 56_Local_Arrays/Makefile ================================================ # Define the location of the include directory # and the location to install the compiler binary INCDIR=/tmp/include BINDIR=/tmp HSRCS= data.h decl.h defs.h SRCS= cg.c decl.c expr.c gen.c main.c misc.c \ opt.c scan.c stmt.c sym.c tree.c types.c SRCN= cgn.c decl.c expr.c gen.c main.c misc.c \ opt.c scan.c stmt.c sym.c tree.c types.c ARMSRCS= cg_arm.c decl.c expr.c gen.c main.c misc.c \ opt.c scan.c stmt.c sym.c tree.c types.c cwj: $(SRCS) $(HSRCS) cc -o cwj -g -Wall -DINCDIR=\"$(INCDIR)\" $(SRCS) compn: $(SRCN) $(HSRCS) cc -D__NASM__ -o compn -g -Wall -DINCDIR=\"$(INCDIR)\" $(SRCN) cwjarm: $(ARMSRCS) $(HSRCS) cc -o cwjarm -g -Wall $(ARMSRCS) cp cwjarm cwj install: cwj mkdir -p $(INCDIR) rsync -a include/. $(INCDIR) cp cwj $(BINDIR) chmod +x $(BINDIR)/cwj installn: compn mkdir -p $(INCDIR) rsync -a include/. $(INCDIR) cp compn $(BINDIR) chmod +x $(BINDIR)/compn clean: rm -f cwj cwjarm compn *.o *.s out a.out test: install tests/runtests (cd tests; chmod +x runtests; ./runtests) armtest: cwjarm tests/runtests (cd tests; chmod +x runtests; ./runtests) testn: installn tests/runtestsn (cd tests; chmod +x runtestsn; ./runtestsn) ================================================ FILE: 56_Local_Arrays/Readme.md ================================================ # Part 56: Local Arrays Well, colour me surprised. It wasn't hard to get local arrays implemented at all. It turns out that we had all the pieces in the compiler already, we just had to wire them together. ## Local Array Parsing Let's start on the parsing side. I want to allow local array declaration but only with a number of elements, no assignment of values. The declaration side is easy, we just add these lines to `array_declaration()` in `decl.c`: ```c // Add this as a known array. We treat the // array as a pointer to its elements' type switch (class) { ... case C_LOCAL: sym = addlocl(varname, pointer_to(type), ctype, S_ARRAY, 0); break; ... } ``` Now, we must prevent assignment to local arrays: ```c // Array initialisation if (Token.token == T_ASSIGN) { if (class != C_GLOBAL && class != C_STATIC) fatals("Variable can not be initialised", varname); ``` I also added some more error checking: ```c // Set the size of the array and the number of elements // Only externs can have no elements. if (class != C_EXTERN && nelems<=0) fatals("Array must have non-zero elements", sym->name); ``` And that's it on the declaration side for local arrays. ## Code Generation In `cg.c`, we have a function `newlocaloffset()` that calculates the offset of a local variable relative to the top of the stack frame. Its argument was a primitive type because the compiler only allowed int and pointer types as local variables. Now that each symbol has its size (which `sizeof()` uses), we can change the code in this function to use the symbol's size: ```c // Create the position of a new local variable. static int newlocaloffset(int size) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (size > 4) ? size : 4; return (-localOffset); } ``` And in the code that generates the function's preamble, `cgfuncpreamble()`, we only have to make these changes: ```c // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->st_posn = paramOffset; paramOffset += 8; } else { parm->st_posn = newlocaloffset(parm->size); // Here cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->st_posn = newlocaloffset(locvar->size); // Here } ``` That's it! It possibly means that we can also allow structs and unions as local variables. I haven't worried about this yet, but it is something to explore later. ## Testing the Changes `test/input140.c` declares: ```c int main() { int i; int ary[5]; char z; ... ``` The array is filled with a FOR loop, `i` being the index. The `z` local is also initialised. This checks to see if any of the variables will tromp over the other variables. It also checks that we can assign all elements of the array and get their values back. Files `test/input141.c` and `test/input142.c` check that the compiler spots and rejects arrays as parameters and array declarations with no elements. ## Conclusion and What's Next In the next part of our compiler writing journey, I'll return to mopping up duties. [Next step](../57_Mop_up_pt3/Readme.md) ================================================ FILE: 56_Local_Arrays/cg.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\t.text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\t.data\n", Outfile); currSeg = data_seg; } } // Given a scalar type value, return the // size of the type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch (type) { case P_CHAR: return (offset); case P_INT: case P_LONG: break; default: if (!ptrtype(type)) fatald("Bad type in cg_align:", type); } // Here we have an int or a long. Align it on a 4-byte offset // I put the generic code here so it can be reused elsewhere. alignment = 4; offset = (offset + direction * (alignment - 1)) & ~(alignment - 1); return (offset); } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. static int newlocaloffset(int size) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (size > 4) ? size : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "%r10", "%r11", "%r12", "%r13", "%r9", "%r8", "%rcx", "%rdx", "%rsi", "%rdi" }; static char *breglist[] = { "%r10b", "%r11b", "%r12b", "%r13b", "%r9b", "%r8b", "%cl", "%dl", "%sil", "%dil" }; static char *dreglist[] = { "%r10d", "%r11d", "%r12d", "%r13d", "%r9d", "%r8d", "%ecx", "%edx", "%esi", "%edi" }; // Push and pop a register on/off the stack static void pushreg(int r) { fprintf(Outfile, "\tpushq\t%s\n", reglist[r]); } static void popreg(int r) { fprintf(Outfile, "\tpopq\t%s\n", reglist[r]); } // Set all registers as available. // But if reg is positive, don't free that one. void freeall_registers(int keepreg) { int i; for (i = 0; i < NUMFREEREGS; i++) if (i != keepreg) freereg[i] = 1; } // When we need to spill a register, we choose // the following register and then cycle through // the remaining registers. The spillreg increments // continually, so we need to take a modulo NUMFREEREGS // on it. static int spillreg=0; // Allocate a free register. Return the number of // the register. Die if no available registers. int alloc_register(void) { int reg; for (reg = 0; reg < NUMFREEREGS; reg++) { if (freereg[reg]) { freereg[reg] = 0; return (reg); } } // We have no registers, so we must spill one reg= (spillreg % NUMFREEREGS); spillreg++; fprintf(Outfile, "# spilling reg %d\n", reg); pushreg(reg); return (reg); } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) { fprintf(Outfile, "# error trying to free register %d\n", reg); fatald("Error trying to free register", reg); } // If this was a spilled register, get it back if (spillreg > 0) { spillreg--; reg= (spillreg % NUMFREEREGS); fprintf(Outfile, "# unspilling reg %d\n", reg); popreg(reg); } else { freereg[reg] = 1; } } // Spill all registers on the stack void spill_all_regs(void) { int i; for (i = 0; i < NUMFREEREGS; i++) pushreg(i); } // Unspill all registers from the stack static void unspill_all_regs(void) { int i; for (i = NUMFREEREGS-1; i >= 0; i--) popreg(i); } // Print out the assembly preamble void cgpreamble() { freeall_registers(NOREG); cgtextseg(); fprintf(Outfile, "# internal switch(expr) routine\n" "# %%rsi = switch table, %%rax = expr\n" "# from SubC: http://www.t3x.org/subc/\n" "\n" "switch:\n" " pushq %%rsi\n" " movq %%rdx, %%rsi\n" " movq %%rax, %%rbx\n" " cld\n" " lodsq\n" " movq %%rax, %%rcx\n" "next:\n" " lodsq\n" " movq %%rax, %%rdx\n" " lodsq\n" " cmpq %%rdx, %%rbx\n" " jnz no\n" " popq %%rsi\n" " jmp *%%rax\n" "no:\n" " loop next\n" " lodsq\n" " popq %%rsi\n" " jmp *%%rax\n" "\n"); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the %rsp and %rsp if (sym->class == C_GLOBAL) fprintf(Outfile, "\t.globl\t%s\n" "\t.type\t%s, @function\n", name, name); fprintf(Outfile, "%s:\n" "\tpushq\t%%rbp\n" "\tmovq\t%%rsp, %%rbp\n", name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->st_posn = paramOffset; paramOffset += 8; } else { parm->st_posn = newlocaloffset(parm->size); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->st_posn = newlocaloffset(locvar->size); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\taddq\t$%d,%%rsp\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->st_endlabel); fprintf(Outfile, "\taddq\t$%d,%%rsp\n", stackOffset); fputs("\tpopq %rbp\n" "\tret\n", Outfile); freeall_registers(NOREG); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmovq\t$%d, %s\n", value, reglist[r]); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", sym->name); } else // Print out the code to initialise it switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovzbq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", sym->name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovslq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", sym->name); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", sym->st_posn); fprintf(Outfile, "\tmovq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", sym->st_posn); } else switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", sym->st_posn); fprintf(Outfile, "\tmovzbq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", sym->st_posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", sym->st_posn); fprintf(Outfile, "\tmovslq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", sym->st_posn); break; default: fatald("Bad type in cgloadlocal:", sym->type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tleaq\tL%d(%%rip), %s\n", label, reglist[r]); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\taddq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsubq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timulq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s,%%rax\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidivq\t%s\n", reglist[r2]); fprintf(Outfile, "\tmovq\t%%rax,%s\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tandq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } int cgor(int r1, int r2) { fprintf(Outfile, "\torq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txorq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshlq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshrq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tnegq\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnotq\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); return (r); } // Load a boolean value (only 0 or 1) // into the given register void cgloadboolean(int r, int val) { fprintf(Outfile, "\tmovq\t$%d, %s\n", val, reglist[r]); } // Convert an integer value to a boolean value. Jump if // it's an IF, WHILE, LOGAND or LOGOR operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); switch(op) { case A_IF: case A_WHILE: case A_LOGAND: fprintf(Outfile, "\tje\tL%d\n", label); break; case A_LOGOR: fprintf(Outfile, "\tjne\tL%d\n", label); break; default: fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(struct symtable *sym, int numargs) { int outr; // Call the function fprintf(Outfile, "\tcall\t%s@PLT\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\taddq\t$%d, %%rsp\n", 8 * (numargs - 6)); // Unspill all the registers unspill_all_regs(); // Get a new register and copy the return value into it outr = alloc_register(); fprintf(Outfile, "\tmovq\t%%rax, %s\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpushq\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmovq\t%s, %s\n", reglist[r], reglist[FIRSTPARAMREG - argposn + 1]); } free_register(r); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsalq\t$%d, %s\n", val, reglist[r]); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %s(%%rip)\n", reglist[r], sym->name); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %s(%%rip)\n", breglist[r], sym->name); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %s(%%rip)\n", dreglist[r], sym->name); break; default: fatald("Bad type in cgstorglob:", sym->type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %d(%%rbp)\n", reglist[r], sym->st_posn); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %d(%%rbp)\n", breglist[r], sym->st_posn); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %d(%%rbp)\n", dreglist[r], sym->st_posn); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size, type; int initvalue; int i; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the variable (or its elements if an array) // and the type of the variable if (node->stype == S_ARRAY) { size = typesize(value_at(node->type), node->ctype); type = value_at(node->type); } else { size = node->size; type = node->type; } // Generate the global identity and the label cgdataseg(); if (node->class == C_GLOBAL) fprintf(Outfile, "\t.globl\t%s\n", node->name); fprintf(Outfile, "%s:\n", node->name); // Output space for one or more elements for (i = 0; i < node->nelems; i++) { // Get any initial value initvalue = 0; if (node->initlist != NULL) initvalue = node->initlist[i]; // Generate the space for this type switch (size) { case 1: fprintf(Outfile, "\t.byte\t%d\n", initvalue); break; case 4: fprintf(Outfile, "\t.long\t%d\n", initvalue); break; case 8: // Generate the pointer to a string literal. Treat a zero value // as actually zero, not the label L0 if (node->initlist != NULL && type == pointer_to(P_CHAR) && initvalue != 0) fprintf(Outfile, "\t.quad\tL%d\n", initvalue); else fprintf(Outfile, "\t.quad\t%d\n", initvalue); break; default: for (i = 0; i < size; i++) fprintf(Outfile, "\t.byte\t0\n"); } } } // Generate a global string and its start label // Don't output the label if append is true. void cgglobstr(int l, char *strvalue, int append) { char *cptr; if (!append) cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\t.byte\t%d\n", *cptr); } } void cgglobstrend(void) { fprintf(Outfile, "\t.byte\t0\n"); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(NOREG); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Only return a value if we have a value to return if (reg != NOREG) { // Deal with pointers here as we can't put them in // the switch statement if (ptrtype(sym->type)) fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); else { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzbl\t%s, %%eax\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %%eax\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } } } cgjump(sym->st_endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = alloc_register(); if (sym->class == C_GLOBAL || sym->class == C_STATIC) fprintf(Outfile, "\tleaq\t%s(%%rip), %s\n", sym->name, reglist[r]); else fprintf(Outfile, "\tleaq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzbq\t(%s), %s\n", reglist[r], reglist[r]); break; case 4: fprintf(Outfile, "\tmovslq\t(%s), %s\n", reglist[r], reglist[r]); break; case 8: fprintf(Outfile, "\tmovq\t(%s), %s\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { // Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmovb\t%s, (%s)\n", breglist[r1], reglist[r2]); break; case 4: fprintf(Outfile, "\tmovl\t%s, (%s)\n", dreglist[r1], reglist[r2]); break; case 8: fprintf(Outfile, "\tmovq\t%s, (%s)\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } // Generate a switch jump table and the code to // load the registers and call the switch() code void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel) { int i, label; // Get a label for the switch table label = genlabel(); cglabel(label); // Heuristic. If we have no cases, create one case // which points to the default case if (casecount == 0) { caseval[0] = 0; caselabel[0] = defaultlabel; casecount = 1; } // Generate the switch jump table. fprintf(Outfile, "\t.quad\t%d\n", casecount); for (i = 0; i < casecount; i++) fprintf(Outfile, "\t.quad\t%d, L%d\n", caseval[i], caselabel[i]); fprintf(Outfile, "\t.quad\tL%d\n", defaultlabel); // Load the specific registers cglabel(toplabel); fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); fprintf(Outfile, "\tleaq\tL%d(%%rip), %%rdx\n", label); fprintf(Outfile, "\tjmp\tswitch\n"); } // Move value between registers void cgmove(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s, %s\n", reglist[r1], reglist[r2]); } ================================================ FILE: 56_Local_Arrays/cg_arm.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for ARMv6 on Raspberry Pi // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. static int freereg[4]; static char *reglist[4] = { "r4", "r5", "r6", "r7" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // We have to store large integer literal values in memory. // Keep a list of them which will be output in the postamble #define MAXINTS 1024 int Intlist[MAXINTS]; static int Intslot = 0; // Determine the offset of a large integer // literal from the .L3 label. If the integer // isn't in the list, add it. static void set_int_offset(int val) { int offset = -1; // See if it is already there for (int i = 0; i < Intslot; i++) { if (Intlist[i] == val) { offset = 4 * i; break; } } // Not in the list, so add it if (offset == -1) { offset = 4 * Intslot; if (Intslot == MAXINTS) fatal("Out of int slots in set_int_offset()"); Intlist[Intslot++] = val; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L3+%d\n", offset); } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n", Outfile); } // Print out the assembly postamble void cgpostamble() { // Print out the global variables fprintf(Outfile, ".L2:\n"); for (int i = 0; i < Globs; i++) { if (Symtable[i].stype == S_VARIABLE) fprintf(Outfile, "\t.word %s\n", Symtable[i].name); } // Print out the integer literals fprintf(Outfile, ".L3:\n"); for (int i = 0; i < Intslot; i++) { fprintf(Outfile, "\t.word %d\n", Intlist[i]); } } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Symtable[id].name; fprintf(Outfile, "\t.text\n" "\t.globl\t%s\n" "\t.type\t%s, \%%function\n" "%s:\n" "\tpush\t{fp, lr}\n" "\tadd\tfp, sp, #4\n" "\tsub\tsp, sp, #8\n" "\tstr\tr0, [fp, #-8]\n", name, name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Symtable[id].endlabel); fputs("\tsub\tsp, fp, #4\n" "\tpop\t{fp, pc}\n" "\t.align\t2\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); // If the literal value is small, do it with one instruction if (value <= 1000) fprintf(Outfile, "\tmov\t%s, #%d\n", reglist[r], value); else { set_int_offset(value); fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); } return (r); } // Determine the offset of a variable from the .L2 // label. Yes, this is inefficient code. static void set_var_offset(int id) { int offset = 0; // Walk the symbol table up to id. // Find S_VARIABLEs and add on 4 until // we get to our variable for (int i = 0; i < id; i++) { if (Symtable[i].stype == S_VARIABLE) offset += 4; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L2+%d\n", offset); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tldrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgloadglob:", Symtable[id].type); } return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s, %s\n", reglist[r1], reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\tmul\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { // To do a divide: r0 holds the dividend, r1 holds the divisor. // The quotient is in r0. fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r1]); fprintf(Outfile, "\tmov\tr1, %s\n", reglist[r2]); fprintf(Outfile, "\tbl\t__aeabi_idiv\n"); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r1]); free_register(r2); return (r1); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r]); fprintf(Outfile, "\tbl\t%s\n", Symtable[id].name); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r]); return (r); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tlsl\t%s, %s, #%d\n", reglist[r], reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, int id) { // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tstr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgstorglob:", Symtable[id].type); } return (r); } // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (4); switch (type) { case P_CHAR: return (1); case P_INT: case P_LONG: return (4); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Symtable[id].type); fprintf(Outfile, "\t.data\n" "\t.globl\t%s\n", Symtable[id].name); switch (typesize) { case 1: fprintf(Outfile, "%s:\t.byte\t0\n", Symtable[id].name); break; case 4: fprintf(Outfile, "%s:\t.long\t0\n", Symtable[id].name); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "moveq", "movne", "movlt", "movgt", "movle", "movge" }; // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "movne", "moveq", "movge", "movle", "movgt", "movlt" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #1\n", cmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #0\n", invcmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\tuxtb\t%s, %s\n", reglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tb\tL%d\n", l); } // List of inverted branch instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *brlist[] = { "bne", "beq", "bge", "ble", "bgt", "blt" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", brlist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[reg]); cgjump(Symtable[id].endlabel); } // Generate code to load the address of a global // identifier into a variable. Return a new register int cgaddress(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); fprintf(Outfile, "\tmov\t%s, r3\n", reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tldrb\t%s, [%s]\n", reglist[r], reglist[r]); break; case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [%s]\n", reglist[r], reglist[r]); break; } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [%s]\n", reglist[r1], reglist[r2]); break; case P_INT: case P_LONG: fprintf(Outfile, "\tstr\t%s, [%s]\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 56_Local_Arrays/cgn.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\tsection .text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\tsection .data\n", Outfile); currSeg = data_seg; } } // Given a scalar type value, return the // size of the type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch (type) { case P_CHAR: return (offset); case P_INT: case P_LONG: break; default: if (!ptrtype(type)) fatald("Bad type in cg_align:", type); } // Here we have an int or a long. Align it on a 4-byte offset // I put the generic code here so it can be reused elsewhere. alignment = 4; offset = (offset + direction * (alignment - 1)) & ~(alignment - 1); return (offset); } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. int newlocaloffset(int size) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (size > 4) ? size : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "r10", "r11", "r12", "r13", "r9", "r8", "rcx", "rdx", "rsi", "rdi" }; static char *breglist[] = { "r10b", "r11b", "r12b", "r13b", "r9b", "r8b", "cl", "dl", "sil", "dil" }; static char *dreglist[] = { "r10d", "r11d", "r12d", "r13d", "r9d", "r8d", "ecx", "edx", "esi", "edi" }; // Push and pop a register on/off the stack static void pushreg(int r) { fprintf(Outfile, "\tpush\t%s\n", reglist[r]); } static void popreg(int r) { fprintf(Outfile, "\tpop\t%s\n", reglist[r]); } // Set all registers as available // But if reg is positive, don't free that one. void freeall_registers(int keepreg) { int i; for (i = 0; i < NUMFREEREGS; i++) if (i != keepreg) freereg[i] = 1; } // When we need to spill a register, we choose // the following register and then cycle through // the remaining registers. The spillreg increments // continually, so we need to take a modulo NUMFREEREGS // on it. static int spillreg=0; // Allocate a free register. Return the number of // the register. Die if no available registers. int alloc_register(void) { int reg; for (reg = 0; reg < NUMFREEREGS; reg++) { if (freereg[reg]) { freereg[reg] = 0; return (reg); } } // We have no registers, so we must spill one reg = (spillreg % NUMFREEREGS); spillreg++; fprintf(Outfile, "; spilling reg %d\n", reg); pushreg(reg); return (reg); } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) { fprintf(Outfile, "# error trying to free register %d\n", reg); fatald("Error trying to free register", reg); } // If this was a spilled register, get it back if (spillreg > 0) { spillreg--; reg= (spillreg % NUMFREEREGS); fprintf(Outfile, "; unspilling reg %d\n", reg); popreg(reg); } else { freereg[reg] = 1; } } // Spill all registers on the stack void spill_all_regs(void) { int i; for (i = 0; i < NUMFREEREGS; i++) pushreg(i); } // Unspill all registers from the stack static void unspill_all_regs(void) { int i; for (i = NUMFREEREGS-1; i >= 0; i--) popreg(i); } // Print out the assembly preamble void cgpreamble() { freeall_registers(NOREG); fputs("\textern\tprintint\n", Outfile); fputs("\textern\tprintchar\n", Outfile); fputs("\textern\topen\n", Outfile); fputs("\textern\tclose\n", Outfile); fputs("\textern\tread\n", Outfile); fputs("\textern\twrite\n", Outfile); fputs("\textern\tprintf\n", Outfile); cgtextseg(); fprintf(Outfile, "; internal switch(expr) routine\n" "; rsi = switch table, rax = expr\n" "; from SubC: http://www.t3x.org/subc/\n" "\n" "switch:\n" " push rsi\n" " mov rsi, rdx\n" " mov rbx, rax\n" " cld\n" " lodsq\n" " mov rcx, rax\n" "next:\n" " lodsq\n" " mov rdx, rax\n" " lodsq\n" " cmp rbx, rdx\n" " jnz no\n" " pop rsi\n" " jmp rax\n" "no:\n" " loop next\n" " lodsq\n" " pop rsi\n" " jmp rax\n" "\n"); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the rsp and rbp if (sym->class == C_GLOBAL) fprintf(Outfile, "\tglobal\t%s\n", name); fprintf(Outfile, "%s:\n" "\tpush\trbp\n" "\tmov\trbp, rsp\n", name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->st_posn = paramOffset; paramOffset += 8; } else { parm->st_posn = newlocaloffset(parm->size); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->st_posn = newlocaloffset(locvar->size); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\tadd\trsp, %d\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->st_endlabel); fprintf(Outfile, "\tadd\trsp, %d\n", stackOffset); fputs("\tpop rbp\n" "\tret\n", Outfile); freeall_registers(NOREG); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], value); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", sym->name); fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], sym->name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", sym->name); } else // Print out the code to initialise it switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", sym->name); fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], sym->name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", sym->name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", sym->name); fprintf(Outfile, "\tmovsx\t%s, word [%s]\n", dreglist[r], sym->name); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", sym->name); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", sym->st_posn); fprintf(Outfile, "\tmov\t%s, [rbp+%d]\n", reglist[r], sym->st_posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", sym->st_posn); } else switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", sym->st_posn); fprintf(Outfile, "\tmovzx\t%s, byte [rbp+%d]\n", reglist[r], sym->st_posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", sym->st_posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", sym->st_posn); fprintf(Outfile, "\tmovsx\t%s, dword [rbp+%d]\n", reglist[r], sym->st_posn); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", sym->st_posn); break; default: fatald("Bad type in cgloadlocal:", sym->type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, L%d\n", reglist[r], label); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timul\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmov\trax, %s\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidiv\t%s\n", reglist[r2]); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tand\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } int cgor(int r1, int r2) { fprintf(Outfile, "\tor\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txor\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshl\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshr\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tneg\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnot\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r], breglist[r]); return (r); } // Load a boolean value (only 0 or 1) // into the given register void cgloadboolean(int r, int val) { fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], val); } // Convert an integer value to a boolean value. Jump if // it's an IF, WHILE, LOGAND or LOGOR operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); switch(op) { case A_IF: case A_WHILE: case A_LOGAND: fprintf(Outfile, "\tje\tL%d\n", label); break; case A_LOGOR: fprintf(Outfile, "\tjne\tL%d\n", label); break; default: fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, byte %s\n", reglist[r], breglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(struct symtable *sym, int numargs) { int outr; // Call the function fprintf(Outfile, "\tcall\t%s\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\tadd\trsp, %d\n", 8 * (numargs - 6)); // Unspill all the registers unspill_all_regs(); // Get a new register and copy the return value into it outr = alloc_register(); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpush\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmov\t%s, %s\n", reglist[FIRSTPARAMREG - argposn + 1], reglist[r]); } free_register(r); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsal\t%s, %d\n", reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, dreglist[r]); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\tqword\t[rbp+%d], %s\n", sym->st_posn, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\tbyte\t[rbp+%d], %s\n", sym->st_posn, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\tdword\t[rbp+%d], %s\n", sym->st_posn, dreglist[r]); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size, type; int initvalue; int i; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the variable (or its elements if an array) // and the type of the variable if (node->stype == S_ARRAY) { size = typesize(value_at(node->type), node->ctype); type = value_at(node->type); } else { size = node->size; type = node->type; } // Generate the global identity and the label cgdataseg(); if (node->class == C_GLOBAL) fprintf(Outfile, "\tglobal\t%s\n", node->name); fprintf(Outfile, "%s:\n", node->name); // Output space for one or more elements for (i = 0; i < node->nelems; i++) { // Get any initial value initvalue = 0; if (node->initlist != NULL) initvalue = node->initlist[i]; // Generate the space for this type // original version switch(size) { case 1: fprintf(Outfile, "\tdb\t%d\n", initvalue); break; case 4: fprintf(Outfile, "\tdd\t%d\n", initvalue); break; case 8: // Generate the pointer to a string literal. Treat a zero value // as actually zero, not the label L0 if (node->initlist != NULL && type == pointer_to(P_CHAR) && initvalue != 0) fprintf(Outfile, "\tdq\tL%d\n", initvalue); else fprintf(Outfile, "\tdq\t%d\n", initvalue); break; default: for (i = 0; i < size; i++) fprintf(Outfile, "\tdb\t0\n"); /* compact version using times instead of loop fprintf(Outfile, "\ttimes\t%d\tdb\t0\n", size); */ } } } // Generate a global string and its start label // Don't output the label if append is true. void cgglobstr(int l, char *strvalue, int append) { char *cptr; if (!append) cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\tdb\t%d\n", *cptr); } } void cgglobstrend(void) { fprintf(Outfile, "\tdb\t0\n"); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r2], breglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(NOREG); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Only return a value if we have a value to return if (reg != NOREG) { // Deal with pointers here as we can't put them in // the switch statement if (ptrtype(sym->type)) fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); else { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzx\teax, %s\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmov\teax, %s\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } } } cgjump(sym->st_endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = alloc_register(); if (sym->class == C_GLOBAL || sym->class == C_STATIC) fprintf(Outfile, "\tmov\t%s, %s\n", reglist[r], sym->name); else fprintf(Outfile, "\tlea\t%s, [rbp+%d]\n", reglist[r], sym->st_posn); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], reglist[r]); break; case 4: fprintf(Outfile, "\tmovsx\t%s, dword [%s]\n", reglist[r], reglist[r]); break; case 8: fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { //Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmov\t[%s], byte %s\n", reglist[r2], breglist[r1]); break; case 4: fprintf(Outfile, "\tmov\t[%s], dword %s\n", reglist[r2], dreglist[r1]); break; case 8: fprintf(Outfile, "\tmov\t[%s], %s\n", reglist[r2], reglist[r1]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } // Generate a switch jump table and the code to // load the registers and call the switch() code void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel) { int i, label; // Get a label for the switch table label = genlabel(); cglabel(label); // Heuristic. If we have no cases, create one case // which points to the default case if (casecount == 0) { caseval[0] = 0; caselabel[0] = defaultlabel; casecount = 1; } // Generate the switch jump table. fprintf(Outfile, "\tdq\t%d\n", casecount); for (i = 0; i < casecount; i++) fprintf(Outfile, "\tdq\t%d, L%d\n", caseval[i], caselabel[i]); fprintf(Outfile, "\tdq\tL%d\n", defaultlabel); // Load the specific registers cglabel(toplabel); fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); fprintf(Outfile, "\tmov\trdx, L%d\n", label); fprintf(Outfile, "\tjmp\tswitch\n"); } // Move value between registers void cgmove(int r1, int r2) { fprintf(Outfile, "\tmov\t%s, %s\n", reglist[r2], reglist[r1]); } ================================================ FILE: 56_Local_Arrays/data.h ================================================ #ifndef extern_ #define extern_ extern #endif // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 extern_ int Line; // Current line number extern_ int Linestart; // True if at start of a line extern_ int Putback; // Character put back by scanner extern_ struct symtable *Functionid; // Symbol ptr of the current function extern_ FILE *Infile; // Input and output files extern_ FILE *Outfile; extern_ char *Infilename; // Name of file we are parsing extern_ char *Outfilename; // Name of file we opened as Outfile extern_ struct token Token; // Last token scanned extern_ struct token Peektoken; // A look-ahead token extern_ char Text[TEXTLEN + 1]; // Last identifier scanned extern_ int Looplevel; // Depth of nested loops extern_ int Switchlevel; // Depth of nested switches extern char *Tstring[]; // List of token strings // Symbol table lists extern_ struct symtable *Globhead, *Globtail; // Global variables and functions extern_ struct symtable *Loclhead, *Locltail; // Local variables extern_ struct symtable *Parmhead, *Parmtail; // Local parameters extern_ struct symtable *Membhead, *Membtail; // Temp list of struct/union members extern_ struct symtable *Structhead, *Structtail; // List of struct types extern_ struct symtable *Unionhead, *Uniontail; // List of union types extern_ struct symtable *Enumhead, *Enumtail; // List of enum types and values extern_ struct symtable *Typehead, *Typetail; // List of typedefs // Command-line flags extern_ int O_dumpAST; // If true, dump the AST trees extern_ int O_dumpsym; // If true, dump the symbol table extern_ int O_keepasm; // If true, keep any assembly files extern_ int O_assemble; // If true, assemble the assembly files extern_ int O_dolink; // If true, link the object files extern_ int O_verbose; // If true, print info on compilation stages ================================================ FILE: 56_Local_Arrays/decl.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of declarations // Copyright (c) 2019 Warren Toomey, GPL3 static struct symtable *composite_declaration(int type); static int typedef_declaration(struct symtable **ctype); static int type_of_typedef(char *name, struct symtable **ctype); static void enum_declaration(void); // Parse the current token and return a primitive type enum value, // a pointer to any composite type and possibly modify // the class of the type. int parse_type(struct symtable **ctype, int *class) { int type, exstatic = 1; // See if the class has been changed to extern or static while (exstatic) { switch (Token.token) { case T_EXTERN: if (*class == C_STATIC) fatal("Illegal to have extern and static at the same time"); *class = C_EXTERN; scan(&Token); break; case T_STATIC: if (*class == C_LOCAL) fatal("Compiler doesn't support static local declarations"); if (*class == C_EXTERN) fatal("Illegal to have extern and static at the same time"); *class = C_STATIC; scan(&Token); break; default: exstatic = 0; } } // Now work on the actual type keyword switch (Token.token) { case T_VOID: type = P_VOID; scan(&Token); break; case T_CHAR: type = P_CHAR; scan(&Token); break; case T_INT: type = P_INT; scan(&Token); break; case T_LONG: type = P_LONG; scan(&Token); break; // For the following, if we have a ';' after the // parsing then there is no type, so return -1. // Example: struct x {int y; int z}; case T_STRUCT: type = P_STRUCT; *ctype = composite_declaration(P_STRUCT); if (Token.token == T_SEMI) type = -1; break; case T_UNION: type = P_UNION; *ctype = composite_declaration(P_UNION); if (Token.token == T_SEMI) type = -1; break; case T_ENUM: type = P_INT; // Enums are really ints enum_declaration(); if (Token.token == T_SEMI) type = -1; break; case T_TYPEDEF: type = typedef_declaration(ctype); if (Token.token == T_SEMI) type = -1; break; case T_IDENT: type = type_of_typedef(Text, ctype); break; default: fatals("Illegal type, token", Token.tokstr); } return (type); } // Given a type parsed by parse_type(), scan in any following // '*' tokens and return the new type int parse_stars(int type) { while (1) { if (Token.token != T_STAR) break; type = pointer_to(type); scan(&Token); } return (type); } // Parse a type which appears inside a cast int parse_cast(struct symtable **ctype) { int type, class = 0; // Get the type inside the parentheses type = parse_stars(parse_type(ctype, &class)); // Do some error checking. I'm sure more can be done if (type == P_STRUCT || type == P_UNION || type == P_VOID) fatal("Cannot cast to a struct, union or void type"); return (type); } // Given a type, parse an expression of literals and ensure // that the type of this expression matches the given type. // Parse any type cast that precedes the expression. // If an integer literal, return this value. // If a string literal, return the label number of the string. int parse_literal(int type) { struct ASTnode *tree; // Parse the expression and optimise the resulting AST tree tree = optimise(binexpr(0)); // If there's a cast, get the child and // mark it as having the type from the cast if (tree->op == A_CAST) { tree->left->type = tree->type; tree = tree->left; } // The tree must now have an integer or string literal if (tree->op != A_INTLIT && tree->op != A_STRLIT) fatal("Cannot initialise globals with a general expression"); // If the type is char * and if (type == pointer_to(P_CHAR)) { // We have a string literal, return the label number if (tree->op == A_STRLIT) return (tree->a_intvalue); // We have a zero int literal, so that's a NULL if (tree->op == A_INTLIT && tree->a_intvalue == 0) return (0); } // We only get here with an integer literal. The input type // is an integer type and is wide enough to hold the literal value if (inttype(type) && typesize(type, NULL) >= typesize(tree->type, NULL)) return (tree->a_intvalue); fatal("Type mismatch: literal vs. variable"); return (0); // Keep -Wall happy } // Given a pointer to a symbol that may already exist // return true if this symbol doesn't exist. We use // this function to convert externs into globals int is_new_symbol(struct symtable *sym, int class, int type, struct symtable *ctype) { // There is no existing symbol, thus is new if (sym==NULL) return(1); // global versus extern: if they match that it's not new // and we can convert the class to global if ((sym->class== C_GLOBAL && class== C_EXTERN) || (sym->class== C_EXTERN && class== C_GLOBAL)) { // If the types don't match, there's a problem if (type != sym->type) fatals("Type mismatch between global/extern", sym->name); // Struct/unions, also compare the ctype if (type >= P_STRUCT && ctype != sym->ctype) fatals("Type mismatch between global/extern", sym->name); // If we get to here, the types match, so mark the symbol // as global sym->class= C_GLOBAL; // Return that symbol is not new return(0); } // It must be a duplicate symbol if we get here fatals("Duplicate global variable declaration", sym->name); return(-1); // Keep -Wall happy } // Given the type, name and class of a scalar variable, // parse any initialisation value and allocate storage for it. // Return the variable's symbol table entry. static struct symtable *scalar_declaration(char *varname, int type, struct symtable *ctype, int class, struct ASTnode **tree) { struct symtable *sym = NULL; struct ASTnode *varnode, *exprnode; *tree = NULL; // Add this as a known scalar switch (class) { case C_STATIC: case C_EXTERN: case C_GLOBAL: // See if this variable is new or already exists sym= findglob(varname); if (is_new_symbol(sym, class, type, ctype)) sym = addglob(varname, type, ctype, S_VARIABLE, class, 1, 0); break; case C_LOCAL: sym = addlocl(varname, type, ctype, S_VARIABLE, 1); break; case C_PARAM: sym = addparm(varname, type, ctype, S_VARIABLE); break; case C_MEMBER: sym = addmemb(varname, type, ctype, S_VARIABLE, 1); break; } // The variable is being initialised if (Token.token == T_ASSIGN) { // Only possible for a global or local if (class != C_GLOBAL && class != C_LOCAL && class != C_STATIC) fatals("Variable can not be initialised", varname); scan(&Token); // Globals must be assigned a literal value if (class == C_GLOBAL || class == C_STATIC) { // Create one initial value for the variable and // parse this value sym->initlist = (int *) malloc(sizeof(int)); sym->initlist[0] = parse_literal(type); } if (class == C_LOCAL) { // Make an A_IDENT AST node with the variable varnode = mkastleaf(A_IDENT, sym->type, sym->ctype, sym, 0); // Get the expression for the assignment, make into a rvalue exprnode = binexpr(0); exprnode->rvalue = 1; // Ensure the expression's type matches the variable exprnode = modify_type(exprnode, varnode->type, varnode->ctype, 0); if (exprnode == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree *tree = mkastnode(A_ASSIGN, exprnode->type, exprnode->ctype, exprnode, NULL, varnode, NULL, 0); } } // Generate any global space if (class == C_GLOBAL || class == C_STATIC) genglobsym(sym); return (sym); } // Given the type, name and class of an variable, parse // the size of the array, if any. Then parse any initialisation // value and allocate storage for it. // Return the variable's symbol table entry. static struct symtable *array_declaration(char *varname, int type, struct symtable *ctype, int class) { struct symtable *sym; // New symbol table entry int nelems = -1; // Assume the number of elements won't be given int maxelems; // The maximum number of elements in the init list int *initlist; // The list of initial elements int i = 0, j; // Skip past the '[' scan(&Token); // See we have an array size if (Token.token != T_RBRACKET) { nelems = parse_literal(P_INT); if (nelems <= 0) fatald("Array size is illegal", nelems); } // Ensure we have a following ']' match(T_RBRACKET, "]"); // Add this as a known array. We treat the // array as a pointer to its elements' type switch (class) { case C_STATIC: case C_EXTERN: case C_GLOBAL: // See if this variable is new or already exists sym= findglob(varname); if (is_new_symbol(sym, class, pointer_to(type), ctype)) sym = addglob(varname, pointer_to(type), ctype, S_ARRAY, class, 0, 0); break; case C_LOCAL: sym = addlocl(varname, pointer_to(type), ctype, S_ARRAY, 0); break; default: fatal("Declaration of array parameters is not implemented"); } // Array initialisation if (Token.token == T_ASSIGN) { if (class != C_GLOBAL && class != C_STATIC) fatals("Variable can not be initialised", varname); scan(&Token); // Get the following left curly bracket match(T_LBRACE, "{"); #define TABLE_INCREMENT 10 // If the array already has nelems, allocate that many elements // in the list. Otherwise, start with TABLE_INCREMENT. if (nelems != -1) maxelems = nelems; else maxelems = TABLE_INCREMENT; initlist = (int *) malloc(maxelems * sizeof(int)); // Loop getting a new literal value from the list while (1) { // Check we can add the next value, then parse and add it if (nelems != -1 && i == maxelems) fatal("Too many values in initialisation list"); initlist[i++] = parse_literal(type); // Increase the list size if the original size was // not set and we have hit the end of the current list if (nelems == -1 && i == maxelems) { maxelems += TABLE_INCREMENT; initlist = (int *) realloc(initlist, maxelems * sizeof(int)); } // Leave when we hit the right curly bracket if (Token.token == T_RBRACE) { scan(&Token); break; } // Next token must be a comma, then comma(); } // Zero any unused elements in the initlist. // Attach the list to the symbol table entry for (j = i; j < sym->nelems; j++) initlist[j] = 0; if (i > nelems) nelems = i; sym->initlist = initlist; } // Set the size of the array and the number of elements // Only externs can have no elements. if (class != C_EXTERN && nelems<=0) fatals("Array must have non-zero elements", sym->name); sym->nelems = nelems; sym->size = sym->nelems * typesize(type, ctype); // Generate any global space if (class == C_GLOBAL || class == C_STATIC) genglobsym(sym); return (sym); } // Given a pointer to the new function being declared and // a possibly NULL pointer to the function's previous declaration, // parse a list of parameters and cross-check them against the // previous declaration. Return the count of parameters static int param_declaration_list(struct symtable *oldfuncsym, struct symtable *newfuncsym) { int type, paramcnt = 0; struct symtable *ctype; struct symtable *protoptr = NULL; struct ASTnode *unused; // Get the pointer to the first prototype parameter if (oldfuncsym != NULL) protoptr = oldfuncsym->member; // Loop getting any parameters while (Token.token != T_RPAREN) { // If the first token is 'void' if (Token.token == T_VOID) { // Peek at the next token. If a ')', the function // has no parameters, so leave the loop. scan(&Peektoken); if (Peektoken.token == T_RPAREN) { // Move the Peektoken into the Token paramcnt = 0; scan(&Token); break; } } // Get the type of the next parameter type = declaration_list(&ctype, C_PARAM, T_COMMA, T_RPAREN, &unused); if (type == -1) fatal("Bad type in parameter list"); // Ensure the type of this parameter matches the prototype if (protoptr != NULL) { if (type != protoptr->type) fatald("Type doesn't match prototype for parameter", paramcnt + 1); protoptr = protoptr->next; } paramcnt++; // Stop when we hit the right parenthesis if (Token.token == T_RPAREN) break; // We need a comma as separator comma(); } if (oldfuncsym != NULL && paramcnt != oldfuncsym->nelems) fatals("Parameter count mismatch for function", oldfuncsym->name); // Return the count of parameters return (paramcnt); } // // function_declaration: type identifier '(' parameter_list ')' ; // | type identifier '(' parameter_list ')' compound_statement ; // // Parse the declaration of function. static struct symtable *function_declaration(char *funcname, int type, struct symtable *ctype, int class) { struct ASTnode *tree, *finalstmt; struct symtable *oldfuncsym, *newfuncsym = NULL; int endlabel, paramcnt; // Text has the identifier's name. If this exists and is a // function, get the id. Otherwise, set oldfuncsym to NULL. if ((oldfuncsym = findsymbol(funcname)) != NULL) if (oldfuncsym->stype != S_FUNCTION) oldfuncsym = NULL; // If this is a new function declaration, get a // label-id for the end label, and add the function // to the symbol table, if (oldfuncsym == NULL) { endlabel = genlabel(); // Assumtion: functions only return scalar types, so NULL below newfuncsym = addglob(funcname, type, NULL, S_FUNCTION, class, 0, endlabel); } // Scan in the '(', any parameters and the ')'. // Pass in any existing function prototype pointer lparen(); paramcnt = param_declaration_list(oldfuncsym, newfuncsym); rparen(); // If this is a new function declaration, update the // function symbol entry with the number of parameters. // Also copy the parameter list into the function's node. if (newfuncsym) { newfuncsym->nelems = paramcnt; newfuncsym->member = Parmhead; oldfuncsym = newfuncsym; } // Clear out the parameter list Parmhead = Parmtail = NULL; // Declaration ends in a semicolon, only a prototype. if (Token.token == T_SEMI) return (oldfuncsym); // This is not just a prototype. // Set the Functionid global to the function's symbol pointer Functionid = oldfuncsym; // Get the AST tree for the compound statement and mark // that we have parsed no loops or switches yet Looplevel = 0; Switchlevel = 0; lbrace(); tree = compound_statement(0); rbrace(); // If the function type isn't P_VOID .. if (type != P_VOID) { // Error if no statements in the function if (tree == NULL) fatal("No statements in function with non-void type"); // Check that the last AST operation in the // compound statement was a return statement finalstmt = (tree->op == A_GLUE) ? tree->right : tree; if (finalstmt == NULL || finalstmt->op != A_RETURN) fatal("No return for function with non-void type"); } // Build the A_FUNCTION node which has the function's symbol pointer // and the compound statement sub-tree tree = mkastunary(A_FUNCTION, type, ctype, tree, oldfuncsym, endlabel); // Do optimisations on the AST tree tree = optimise(tree); // Dump the AST tree if requested if (O_dumpAST) { dumpAST(tree, NOLABEL, 0); fprintf(stdout, "\n\n"); } // Generate the assembly code for it genAST(tree, NOLABEL, NOLABEL, NOLABEL, 0); // Now free the symbols associated // with this function freeloclsyms(); return (oldfuncsym); } // Parse composite type declarations: structs or unions. // Either find an existing struct/union declaration, or build // a struct/union symbol table entry and return its pointer. static struct symtable *composite_declaration(int type) { struct symtable *ctype = NULL; struct symtable *m; struct ASTnode *unused; int offset; int t; // Skip the struct/union keyword scan(&Token); // See if there is a following struct/union name if (Token.token == T_IDENT) { // Find any matching composite type if (type == P_STRUCT) ctype = findstruct(Text); else ctype = findunion(Text); scan(&Token); } // If the next token isn't an LBRACE , this is // the usage of an existing struct/union type. // Return the pointer to the type. if (Token.token != T_LBRACE) { if (ctype == NULL) fatals("unknown struct/union type", Text); return (ctype); } // Ensure this struct/union type hasn't been // previously defined if (ctype) fatals("previously defined struct/union", Text); // Build the composite type and skip the left brace if (type == P_STRUCT) ctype = addstruct(Text); else ctype = addunion(Text); scan(&Token); // Scan in the list of members while (1) { // Get the next member. m is used as a dummy t = declaration_list(&m, C_MEMBER, T_SEMI, T_RBRACE, &unused); if (t == -1) fatal("Bad type in member list"); if (Token.token == T_SEMI) scan(&Token); if (Token.token == T_RBRACE) break; } // Attach to the struct type's node rbrace(); if (Membhead == NULL) fatals("No members in struct", ctype->name); ctype->member = Membhead; Membhead = Membtail = NULL; // Set the offset of the initial member // and find the first free byte after it m = ctype->member; m->st_posn = 0; offset = typesize(m->type, m->ctype); // Set the position of each successive member in the composite type // Unions are easy. For structs, align the member and find the next free byte for (m = m->next; m != NULL; m = m->next) { // Set the offset for this member if (type == P_STRUCT) m->st_posn = genalign(m->type, offset, 1); else m->st_posn = 0; // Get the offset of the next free byte after this member offset += typesize(m->type, m->ctype); } // Set the overall size of the composite type ctype->size = offset; return (ctype); } // Parse an enum declaration static void enum_declaration(void) { struct symtable *etype = NULL; char *name = NULL; int intval = 0; // Skip the enum keyword. scan(&Token); // If there's a following enum type name, get a // pointer to any existing enum type node. if (Token.token == T_IDENT) { etype = findenumtype(Text); name = strdup(Text); // As it gets tromped soon scan(&Token); } // If the next token isn't a LBRACE, check // that we have an enum type name, then return if (Token.token != T_LBRACE) { if (etype == NULL) fatals("undeclared enum type:", name); return; } // We do have an LBRACE. Skip it scan(&Token); // If we have an enum type name, ensure that it // hasn't been declared before. if (etype != NULL) fatals("enum type redeclared:", etype->name); else // Build an enum type node for this identifier etype = addenum(name, C_ENUMTYPE, 0); // Loop to get all the enum values while (1) { // Ensure we have an identifier // Copy it in case there's an int literal coming up ident(); name = strdup(Text); // Ensure this enum value hasn't been declared before etype = findenumval(name); if (etype != NULL) fatals("enum value redeclared:", Text); // If the next token is an '=', skip it and // get the following int literal if (Token.token == T_ASSIGN) { scan(&Token); if (Token.token != T_INTLIT) fatal("Expected int literal after '='"); intval = Token.intvalue; scan(&Token); } // Build an enum value node for this identifier. // Increment the value for the next enum identifier. etype = addenum(name, C_ENUMVAL, intval++); // Bail out on a right curly bracket, else get a comma if (Token.token == T_RBRACE) break; comma(); } scan(&Token); // Skip over the right curly bracket } // Parse a typedef declaration and return the type // and ctype that it represents static int typedef_declaration(struct symtable **ctype) { int type, class = 0; // Skip the typedef keyword. scan(&Token); // Get the actual type following the keyword type = parse_type(ctype, &class); if (class != 0) fatal("Can't have extern in a typedef declaration"); // See if the typedef identifier already exists if (findtypedef(Text) != NULL) fatals("redefinition of typedef", Text); // Get any following '*' tokens type = parse_stars(type); // It doesn't exist so add it to the typedef list addtypedef(Text, type, *ctype); scan(&Token); return (type); } // Given a typedef name, return the type it represents static int type_of_typedef(char *name, struct symtable **ctype) { struct symtable *t; // Look up the typedef in the list t = findtypedef(name); if (t == NULL) fatals("unknown type", name); scan(&Token); *ctype = t->ctype; return (t->type); } // Parse the declaration of a variable or function. // The type and any following '*'s have been scanned, and we // have the identifier in the Token variable. // The class argument is the variable's class. // Return a pointer to the symbol's entry in the symbol table static struct symtable *symbol_declaration(int type, struct symtable *ctype, int class, struct ASTnode **tree) { struct symtable *sym = NULL; char *varname = strdup(Text); // Ensure that we have an identifier. // We copied it above so we can scan more tokens in, e.g. // an assignment expression for a local variable. ident(); // Deal with function declarations if (Token.token == T_LPAREN) { return (function_declaration(varname, type, ctype, class)); } // See if this array or scalar variable has already been declared switch (class) { case C_EXTERN: case C_STATIC: case C_GLOBAL: case C_LOCAL: case C_PARAM: if (findlocl(varname) != NULL) fatals("Duplicate local variable declaration", varname); case C_MEMBER: if (findmember(varname) != NULL) fatals("Duplicate struct/union member declaration", varname); } // Add the array or scalar variable to the symbol table if (Token.token == T_LBRACKET) { sym = array_declaration(varname, type, ctype, class); *tree= NULL; // Local arrays are not initialised } else sym = scalar_declaration(varname, type, ctype, class, tree); return (sym); } // Parse a list of symbols where there is an initial type. // Return the type of the symbols. et1 and et2 are end tokens. int declaration_list(struct symtable **ctype, int class, int et1, int et2, struct ASTnode **gluetree) { int inittype, type; struct symtable *sym; struct ASTnode *tree; *gluetree = NULL; // Get the initial type. If -1, it was // a composite type definition, return this if ((inittype = parse_type(ctype, &class)) == -1) return (inittype); // Now parse the list of symbols while (1) { // See if this symbol is a pointer type = parse_stars(inittype); // Parse this symbol sym = symbol_declaration(type, *ctype, class, &tree); // We parsed a function, there is no list so leave if (sym->stype == S_FUNCTION) { if (class != C_GLOBAL && class != C_STATIC) fatal("Function definition not at global level"); return (type); } // Glue any AST tree from a local declaration // to build a sequence of assignments to perform if (*gluetree == NULL) *gluetree = tree; else *gluetree = mkastnode(A_GLUE, P_NONE, NULL, *gluetree, NULL, tree, NULL, 0); // We are at the end of the list, leave if (Token.token == et1 || Token.token == et2) return (type); // Otherwise, we need a comma as separator comma(); } return(0); // Keep -Wall happy } // Parse one or more global declarations, either // variables, functions or structs void global_declarations(void) { struct symtable *ctype= NULL; struct ASTnode *unused; while (Token.token != T_EOF) { declaration_list(&ctype, C_GLOBAL, T_SEMI, T_EOF, &unused); // Skip any semicolons and right curly brackets if (Token.token == T_SEMI) scan(&Token); } } ================================================ FILE: 56_Local_Arrays/decl.h ================================================ // Function prototypes for all compiler files // Copyright (c) 2019 Warren Toomey, GPL3 // scan.c void reject_token(struct token *t); int scan(struct token *t); // tree.c struct ASTnode *mkastnode(int op, int type, struct symtable *ctype, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue); struct ASTnode *mkastleaf(int op, int type, struct symtable *ctype, struct symtable *sym, int intvalue); struct ASTnode *mkastunary(int op, int type, struct symtable *ctype, struct ASTnode *left, struct symtable *sym, int intvalue); void dumpAST(struct ASTnode *n, int label, int level); // gen.c int genlabel(void); int genAST(struct ASTnode *n, int iflabel, int looptoplabel, int loopendlabel, int parentASTop); void genpreamble(); void genpostamble(); void genfreeregs(int keepreg); void genglobsym(struct symtable *node); int genglobstr(char *strvalue, int append); void genglobstrend(void); int genprimsize(int type); int genalign(int type, int offset, int direction); void genreturn(int reg, int id); // cg.c int cgprimsize(int type); int cgalign(int type, int offset, int direction); void cgtextseg(); void cgdataseg(); int alloc_register(void); void freeall_registers(int keepreg); void spill_all_regs(void); void cgpreamble(); void cgpostamble(); void cgfuncpreamble(struct symtable *sym); void cgfuncpostamble(struct symtable *sym); int cgloadint(int value, int type); int cgloadglob(struct symtable *sym, int op); int cgloadlocal(struct symtable *sym, int op); int cgloadglobstr(int label); int cgadd(int r1, int r2); int cgsub(int r1, int r2); int cgmul(int r1, int r2); int cgdiv(int r1, int r2); int cgshlconst(int r, int val); int cgcall(struct symtable *sym, int numargs); void cgcopyarg(int r, int argposn); int cgstorglob(int r, struct symtable *sym); int cgstorlocal(int r, struct symtable *sym); void cgglobsym(struct symtable *node); void cgglobstr(int l, char *strvalue, int append); void cgglobstrend(void); int cgcompare_and_set(int ASTop, int r1, int r2); int cgcompare_and_jump(int ASTop, int r1, int r2, int label); void cglabel(int l); void cgjump(int l); int cgwiden(int r, int oldtype, int newtype); void cgreturn(int reg, struct symtable *sym); int cgaddress(struct symtable *sym); int cgderef(int r, int type); int cgstorderef(int r1, int r2, int type); int cgnegate(int r); int cginvert(int r); int cglognot(int r); void cgloadboolean(int r, int val); int cgboolean(int r, int op, int label); int cgand(int r1, int r2); int cgor(int r1, int r2); int cgxor(int r1, int r2); int cgshl(int r1, int r2); int cgshr(int r1, int r2); void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel); void cgmove(int r1, int r2); // expr.c struct ASTnode *expression_list(int endtoken); struct ASTnode *binexpr(int ptp); // stmt.c struct ASTnode *compound_statement(int inswitch); // misc.c void match(int t, char *what); void semi(void); void lbrace(void); void rbrace(void); void lparen(void); void rparen(void); void ident(void); void comma(void); void fatal(char *s); void fatals(char *s1, char *s2); void fatald(char *s, int d); void fatalc(char *s, int c); // sym.c void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node); struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn); struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn); struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int nelems); struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype); struct symtable *addstruct(char *name); struct symtable *addunion(char *name); struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int nelems); struct symtable *addenum(char *name, int class, int value); struct symtable *addtypedef(char *name, int type, struct symtable *ctype); struct symtable *findglob(char *s); struct symtable *findlocl(char *s); struct symtable *findsymbol(char *s); struct symtable *findmember(char *s); struct symtable *findstruct(char *s); struct symtable *findunion(char *s); struct symtable *findenumtype(char *s); struct symtable *findenumval(char *s); struct symtable *findtypedef(char *s); void clear_symtable(void); void freeloclsyms(void); void freestaticsyms(void); void dumptable(struct symtable *head, char *name, int indent); void dumpsymtables(void); // decl.c int parse_type(struct symtable **ctype, int *class); int parse_stars(int type); int parse_cast(struct symtable **ctype); int declaration_list(struct symtable **ctype, int class, int et1, int et2, struct ASTnode **gluetree); void global_declarations(void); // types.c int inttype(int type); int ptrtype(int type); int pointer_to(int type); int value_at(int type); int typesize(int type, struct symtable *ctype); struct ASTnode *modify_type(struct ASTnode *tree, int rtype, struct symtable *rctype, int op); // opt.c struct ASTnode *optimise(struct ASTnode *n); ================================================ FILE: 56_Local_Arrays/defs.h ================================================ #include #include #include #include // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 enum { TEXTLEN = 512 // Length of identifiers in input }; // Commands and default filenames #define AOUT "a.out" #ifdef __NASM__ #define ASCMD "nasm -f elf64 -w-ptr -o " #define LDCMD "cc -no-pie -fno-plt -Wall -o " #else #define ASCMD "as -o " #define LDCMD "cc -o " #endif #define CPPCMD "cpp -nostdinc -isystem " // Token types enum { T_EOF, // Binary operators T_ASSIGN, T_ASPLUS, T_ASMINUS, T_ASSTAR, T_ASSLASH, T_QUESTION, T_LOGOR, T_LOGAND, T_OR, T_XOR, T_AMPER, T_EQ, T_NE, T_LT, T_GT, T_LE, T_GE, T_LSHIFT, T_RSHIFT, T_PLUS, T_MINUS, T_STAR, T_SLASH, // Other operators T_INC, T_DEC, T_INVERT, T_LOGNOT, // Type keywords T_VOID, T_CHAR, T_INT, T_LONG, // Other keywords T_IF, T_ELSE, T_WHILE, T_FOR, T_RETURN, T_STRUCT, T_UNION, T_ENUM, T_TYPEDEF, T_EXTERN, T_BREAK, T_CONTINUE, T_SWITCH, T_CASE, T_DEFAULT, T_SIZEOF, T_STATIC, // Structural tokens T_INTLIT, T_STRLIT, T_SEMI, T_IDENT, T_LBRACE, T_RBRACE, T_LPAREN, T_RPAREN, T_LBRACKET, T_RBRACKET, T_COMMA, T_DOT, T_ARROW, T_COLON }; // Token structure struct token { int token; // Token type, from the enum list above char *tokstr; // String version of the token int intvalue; // For T_INTLIT, the integer value }; // AST node types. The first few line up // with the related tokens enum { A_ASSIGN = 1, A_ASPLUS, A_ASMINUS, A_ASSTAR, A_ASSLASH, // 1 A_TERNARY, A_LOGOR, A_LOGAND, A_OR, A_XOR, A_AND, // 6 A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE, A_LSHIFT, A_RSHIFT, // 12 A_ADD, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, // 20 A_INTLIT, A_STRLIT, A_IDENT, A_GLUE, // 24 A_IF, A_WHILE, A_FUNCTION, A_WIDEN, A_RETURN, // 28 A_FUNCCALL, A_DEREF, A_ADDR, A_SCALE, // 33 A_PREINC, A_PREDEC, A_POSTINC, A_POSTDEC, // 37 A_NEGATE, A_INVERT, A_LOGNOT, A_TOBOOL, A_BREAK, // 41 A_CONTINUE, A_SWITCH, A_CASE, A_DEFAULT, A_CAST // 46 }; // Primitive types. The bottom 4 bits is an integer // value that represents the level of indirection, // e.g. 0= no pointer, 1= pointer, 2= pointer pointer etc. enum { P_NONE, P_VOID = 16, P_CHAR = 32, P_INT = 48, P_LONG = 64, P_STRUCT=80, P_UNION=96 }; // Structural types enum { S_VARIABLE, S_FUNCTION, S_ARRAY }; // Storage classes enum { C_GLOBAL = 1, // Globally visible symbol C_LOCAL, // Locally visible symbol C_PARAM, // Locally visible function parameter C_EXTERN, // External globally visible symbol C_STATIC, // Static symbol, visible in one file C_STRUCT, // A struct C_UNION, // A union C_MEMBER, // Member of a struct or union C_ENUMTYPE, // A named enumeration type C_ENUMVAL, // A named enumeration value C_TYPEDEF // A named typedef }; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol struct symtable *ctype; // If struct/union, ptr to that type int stype; // Structural type for the symbol int class; // Storage class for the symbol int size; // Total size in bytes of this symbol int nelems; // Functions: # params. Arrays: # elements #define st_endlabel st_posn // For functions, the end label int st_posn; // For locals, the negative offset // from the stack base pointer int *initlist; // List of initial values struct symtable *next; // Next symbol in one list struct symtable *member; // First member of a function, struct, }; // union or enum // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates struct symtable *ctype; // If struct/union, ptr to that type int rvalue; // True if the node is an rvalue struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; struct symtable *sym; // For many AST nodes, the pointer to // the symbol in the symbol table #define a_intvalue a_size // For A_INTLIT, the integer value int a_size; // For A_SCALE, the size to scale by }; enum { NOREG = -1, // Use NOREG when the AST generation // functions have no register to return NOLABEL = 0 // Use NOLABEL when we have no label to // pass to genAST() }; ================================================ FILE: 56_Local_Arrays/expr.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of expressions // Copyright (c) 2019 Warren Toomey, GPL3 // expression_list: // | expression // | expression ',' expression_list // ; // Parse a list of zero or more comma-separated expressions and // return an AST composed of A_GLUE nodes with the left-hand child // being the sub-tree of previous expressions (or NULL) and the right-hand // child being the next expression. Each A_GLUE node will have size field // set to the number of expressions in the tree at this point. If no // expressions are parsed, NULL is returned struct ASTnode *expression_list(int endtoken) { struct ASTnode *tree = NULL; struct ASTnode *child = NULL; int exprcount = 0; // Loop until the end token while (Token.token != endtoken) { // Parse the next expression and increment the expression count child = binexpr(0); exprcount++; // Build an A_GLUE AST node with the previous tree as the left child // and the new expression as the right child. Store the expression count. tree = mkastnode(A_GLUE, P_NONE, NULL, tree, NULL, child, NULL, exprcount); // Stop when we reach the end token if (Token.token == endtoken) break; // Must have a ',' at this point match(T_COMMA, ","); } // Return the tree of expressions return (tree); } // Parse a function call and return its AST static struct ASTnode *funccall(void) { struct ASTnode *tree; struct symtable *funcptr; // Check that the identifier has been defined as a function, // then make a leaf node for it. if ((funcptr = findsymbol(Text)) == NULL || funcptr->stype != S_FUNCTION) { fatals("Undeclared function", Text); } // Get the '(' lparen(); // Parse the argument expression list tree = expression_list(T_RPAREN); // XXX Check type of each argument against the function's prototype // Build the function call AST node. Store the // function's return type as this node's type. // Also record the function's symbol-id tree = mkastunary(A_FUNCCALL, funcptr->type, funcptr->ctype, tree, funcptr, 0); // Get the ')' rparen(); return (tree); } // Parse the index into an array and return an AST tree for it static struct ASTnode *array_access(struct ASTnode *left) { struct ASTnode *right; // Check that the sub-tree is a pointer if (!ptrtype(left->type)) fatal("Not an array or pointer"); // Get the '[' scan(&Token); // Parse the following expression right = binexpr(0); // Get the ']' match(T_RBRACKET, "]"); // Ensure that this is of int type if (!inttype(right->type)) fatal("Array index is not of integer type"); // Make the left tree an rvalue left->rvalue = 1; // Scale the index by the size of the element's type right = modify_type(right, left->type, left->ctype, A_ADD); // Return an AST tree where the array's base has the offset added to it, // and dereference the element. Still an lvalue at this point. left = mkastnode(A_ADD, left->type, left->ctype, left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, value_at(left->type), left->ctype, left, NULL, 0); return (left); } // Parse the member reference of a struct or union // and return an AST tree for it. If withpointer is true, // the access is through a pointer to the member. static struct ASTnode *member_access(struct ASTnode *left, int withpointer) { struct ASTnode *right; struct symtable *typeptr; struct symtable *m; // Check that the left AST tree is a pointer to struct or union if (withpointer && left->type != pointer_to(P_STRUCT) && left->type != pointer_to(P_UNION)) fatal("Expression is not a pointer to a struct/union"); // Or, check that the left AST tree is a struct or union. // If so, change it from an A_IDENT to an A_ADDR so that // we get the base address, not the value at this address. if (!withpointer) { if (left->type == P_STRUCT || left->type == P_UNION) left->op = A_ADDR; else fatal("Expression is not a struct/union"); } // Get the details of the composite type typeptr = left->ctype; // Skip the '.' or '->' token and get the member's name scan(&Token); ident(); // Find the matching member's name in the type // Die if we can't find it for (m = typeptr->member; m != NULL; m = m->next) if (!strcmp(m->name, Text)) break; if (m == NULL) fatals("No member found in struct/union: ", Text); // Make the left tree an rvalue left->rvalue = 1; // Build an A_INTLIT node with the offset right = mkastleaf(A_INTLIT, P_INT, NULL, NULL, m->st_posn); // Add the member's offset to the base of the struct/union // and dereference it. Still an lvalue at this point left = mkastnode(A_ADD, pointer_to(m->type), m->ctype, left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, m->type, m->ctype, left, NULL, 0); return (left); } // Parse a parenthesised expression and // return an AST node representing it. static struct ASTnode *paren_expression(void) { struct ASTnode *n; int type = 0; struct symtable *ctype = NULL; // Beginning of a parenthesised expression, skip the '('. scan(&Token); // If the token after is a type identifier, this is a cast expression switch (Token.token) { case T_IDENT: // We have to see if the identifier matches a typedef. // If not, treat it as an expression. if (findtypedef(Text) == NULL) { n = binexpr(0); break; } case T_VOID: case T_CHAR: case T_INT: case T_LONG: case T_STRUCT: case T_UNION: case T_ENUM: // Get the type inside the parentheses type = parse_cast(&ctype); // Skip the closing ')' and then parse the following expression rparen(); default: n = binexpr(0); // Scan in the expression } // We now have at least an expression in n, and possibly a non-zero type // in type if there was a cast. Skip the closing ')' if there was no cast. if (type == 0) rparen(); else // Otherwise, make a unary AST node for the cast n = mkastunary(A_CAST, type, ctype, n, NULL, 0); return (n); } // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(void) { struct ASTnode *n; struct symtable *enumptr; struct symtable *varptr; int id; int type = 0; int size, class; struct symtable *ctype; switch (Token.token) { case T_STATIC: case T_EXTERN: fatal("Compiler doesn't support static or extern local declarations"); case T_SIZEOF: // Skip the T_SIZEOF and ensure we have a left parenthesis scan(&Token); if (Token.token != T_LPAREN) fatal("Left parenthesis expected after sizeof"); scan(&Token); // Get the type inside the parentheses type = parse_stars(parse_type(&ctype, &class)); // Get the type's size size = typesize(type, ctype); rparen(); // Make a leaf node int literal with the size return (mkastleaf(A_INTLIT, P_INT, NULL, NULL, size)); case T_INTLIT: // For an INTLIT token, make a leaf AST node for it. // Make it a P_CHAR if it's within the P_CHAR range if (Token.intvalue >= 0 && Token.intvalue < 256) n = mkastleaf(A_INTLIT, P_CHAR, NULL, NULL, Token.intvalue); else n = mkastleaf(A_INTLIT, P_INT, NULL, NULL, Token.intvalue); break; case T_STRLIT: // For a STRLIT token, generate the assembly for it. id = genglobstr(Text, 0); // For successive STRLIT tokens, append their contents // to this one while (1) { scan(&Peektoken); if (Peektoken.token != T_STRLIT) break; genglobstr(Text, 1); scan(&Token); // To skip it properly } // Now make a leaf AST node for it. id is the string's label. genglobstrend(); n = mkastleaf(A_STRLIT, pointer_to(P_CHAR), NULL, NULL, id); break; case T_IDENT: // If the identifier matches an enum value, // return an A_INTLIT node if ((enumptr = findenumval(Text)) != NULL) { n = mkastleaf(A_INTLIT, P_INT, NULL, NULL, enumptr->st_posn); break; } // See if this identifier exists as a symbol. For arrays, set rvalue to 1. if ((varptr = findsymbol(Text)) == NULL) fatals("Unknown variable or function", Text); switch (varptr->stype) { case S_VARIABLE: n = mkastleaf(A_IDENT, varptr->type, varptr->ctype, varptr, 0); break; case S_ARRAY: n = mkastleaf(A_ADDR, varptr->type, varptr->ctype, varptr, 0); n->rvalue = 1; break; case S_FUNCTION: // Function call, see if the next token is a left parenthesis scan(&Token); if (Token.token != T_LPAREN) fatals("Function name used without parentheses", Text); return (funccall()); default: fatals("Identifier not a scalar or array variable", Text); } break; case T_LPAREN: return (paren_expression()); default: fatals("Expecting a primary expression, got token", Token.tokstr); } // Scan in the next token and return the leaf node scan(&Token); return (n); } // Parse a postfix expression and return // an AST node representing it. The // identifier is already in Text. static struct ASTnode *postfix(void) { struct ASTnode *n; // Get the primary expression n = primary(); // Loop until there are no more postfix operators while (1) { switch (Token.token) { case T_LBRACKET: // An array reference n = array_access(n); break; case T_DOT: // Access into a struct or union n = member_access(n, 0); break; case T_ARROW: // Pointer access into a struct or union n = member_access(n, 1); break; case T_INC: // Post-increment: skip over the token if (n->rvalue == 1) fatal("Cannot ++ on rvalue"); scan(&Token); // Can't do it twice if (n->op == A_POSTINC || n->op == A_POSTDEC) fatal("Cannot ++ and/or -- more than once"); // and change the AST operation n->op = A_POSTINC; break; case T_DEC: // Post-decrement: skip over the token if (n->rvalue == 1) fatal("Cannot -- on rvalue"); scan(&Token); // Can't do it twice if (n->op == A_POSTINC || n->op == A_POSTDEC) fatal("Cannot ++ and/or -- more than once"); // and change the AST operation n->op = A_POSTDEC; break; default: return (n); } } return (NULL); // Keep -Wall happy } // Convert a binary operator token into a binary AST operation. // We rely on a 1:1 mapping from token to AST operation static int binastop(int tokentype) { if (tokentype > T_EOF && tokentype <= T_SLASH) return (tokentype); fatals("Syntax error, token", Tstring[tokentype]); return (0); // Keep -Wall happy } // Return true if a token is right-associative, // false otherwise. static int rightassoc(int tokentype) { if (tokentype >= T_ASSIGN && tokentype <= T_ASSLASH) return (1); return (0); } // Operator precedence for each token. Must // match up with the order of tokens in defs.h static int OpPrec[] = { 0, 10, 10, // T_EOF, T_ASSIGN, T_ASPLUS, 10, 10, 10, // T_ASMINUS, T_ASSTAR, T_ASSLASH, 15, // T_QUESTION, 20, 30, // T_LOGOR, T_LOGAND 40, 50, 60, // T_OR, T_XOR, T_AMPER 70, 70, // T_EQ, T_NE 80, 80, 80, 80, // T_LT, T_GT, T_LE, T_GE 90, 90, // T_LSHIFT, T_RSHIFT 100, 100, // T_PLUS, T_MINUS 110, 110 // T_STAR, T_SLASH }; // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec; if (tokentype > T_SLASH) fatals("Token with no precedence in op_precedence:", Tstring[tokentype]); prec = OpPrec[tokentype]; if (prec == 0) fatals("Syntax error, token", Tstring[tokentype]); return (prec); } // prefix_expression: postfix_expression // | '*' prefix_expression // | '&' prefix_expression // | '-' prefix_expression // | '++' prefix_expression // | '--' prefix_expression // ; // Parse a prefix expression and return // a sub-tree representing it. struct ASTnode *prefix(void) { struct ASTnode *tree; switch (Token.token) { case T_AMPER: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Ensure that it's an identifier if (tree->op != A_IDENT) fatal("& operator must be followed by an identifier"); // Prevent '&' being performed on an array if (tree->sym->stype == S_ARRAY) fatal("& operator cannot be performed on an array"); // Now change the operator to A_ADDR and the type to // a pointer to the original type tree->op = A_ADDR; tree->type = pointer_to(tree->type); break; case T_STAR: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's either another deref or an // identifier if (tree->op != A_IDENT && tree->op != A_DEREF) fatal("* operator must be followed by an identifier or *"); // Prepend an A_DEREF operation to the tree tree = mkastunary(A_DEREF, value_at(tree->type), tree->ctype, tree, NULL, 0); break; case T_MINUS: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_NEGATE operation to the tree and // make the child an rvalue. Because chars are unsigned, // also widen this if needed to int so that it's signed tree->rvalue = 1; if (tree->type == P_CHAR) tree->type = P_INT; tree = mkastunary(A_NEGATE, tree->type, tree->ctype, tree, NULL, 0); break; case T_INVERT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_INVERT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_INVERT, tree->type, tree->ctype, tree, NULL, 0); break; case T_LOGNOT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // Prepend a A_LOGNOT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_LOGNOT, tree->type, tree->ctype, tree, NULL, 0); break; case T_INC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("++ operator must be followed by an identifier"); // Prepend an A_PREINC operation to the tree tree = mkastunary(A_PREINC, tree->type, tree->ctype, tree, NULL, 0); break; case T_DEC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("-- operator must be followed by an identifier"); // Prepend an A_PREDEC operation to the tree tree = mkastunary(A_PREDEC, tree->type, tree->ctype, tree, NULL, 0); break; default: tree = postfix(); } return (tree); } // Return an AST tree whose root is a binary operator. // Parameter ptp is the previous token's precedence. struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; struct ASTnode *ltemp, *rtemp; int ASTop; int tokentype; // Get the tree on the left. // Fetch the next token at the same time. left = prefix(); // If we hit one of several terminating tokens, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA || tokentype == T_COLON || tokentype == T_RBRACE) { left->rvalue = 1; return (left); } // While the precedence of this token is more than that of the // previous token precedence, or it's right associative and // equal to the previous token's precedence while ((op_precedence(tokentype) > ptp) || (rightassoc(tokentype) && op_precedence(tokentype) == ptp)) { // Fetch in the next integer literal scan(&Token); // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Determine the operation to be performed on the sub-trees ASTop = binastop(tokentype); switch (ASTop) { case A_TERNARY: // Ensure we have a ':' token, scan in the expression after it match(T_COLON, ":"); ltemp = binexpr(0); // Build and return the AST for this statement. Use the middle // expression's type as the return type. XXX We should also // consider the third expression's type. return (mkastnode (A_TERNARY, right->type, right->ctype, left, right, ltemp, NULL, 0)); case A_ASSIGN: // Assignment // Make the right tree into an rvalue right->rvalue = 1; // Ensure the right's type matches the left right = modify_type(right, left->type, left->ctype, 0); if (right == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree. However, switch // left and right around, so that the right expression's // code will be generated before the left expression ltemp = left; left = right; right = ltemp; break; default: // We are not doing a ternary or assignment, so both trees should // be rvalues. Convert both trees into rvalue if they are lvalue trees left->rvalue = 1; right->rvalue = 1; // Ensure the two types are compatible by trying // to modify each tree to match the other's type. ltemp = modify_type(left, right->type, right->ctype, ASTop); rtemp = modify_type(right, left->type, left->ctype, ASTop); if (ltemp == NULL && rtemp == NULL) fatal("Incompatible types in binary expression"); if (ltemp != NULL) left = ltemp; if (rtemp != NULL) right = rtemp; } // Join that sub-tree with ours. Convert the token // into an AST operation at the same time. left = mkastnode(binastop(tokentype), left->type, left->ctype, left, NULL, right, NULL, 0); // Some operators produce an int result regardless of their operands switch (binastop(tokentype)) { case A_LOGOR: case A_LOGAND: case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: left->type = P_INT; } // Update the details of the current token. // If we hit a terminating token, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA || tokentype == T_COLON || tokentype == T_RBRACE) { left->rvalue = 1; return (left); } } // Return the tree we have when the precedence // is the same or lower left->rvalue = 1; return (left); } ================================================ FILE: 56_Local_Arrays/gen.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Generic code generator // Copyright (c) 2019 Warren Toomey, GPL3 // Generate and return a new label number static int labelid = 1; int genlabel(void) { return (labelid++); } // Generate the code for an IF statement // and an optional ELSE clause. static int genIF(struct ASTnode *n, int looptoplabel, int loopendlabel) { int Lfalse, Lend; // Generate two labels: one for the // false compound statement, and one // for the end of the overall IF statement. // When there is no ELSE clause, Lfalse _is_ // the ending label! Lfalse = genlabel(); if (n->right) Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. genAST(n->left, Lfalse, NOLABEL, NOLABEL, n->op); genfreeregs(NOREG); // Generate the true compound statement genAST(n->mid, NOLABEL, looptoplabel, loopendlabel, n->op); genfreeregs(NOREG); // If there is an optional ELSE clause, // generate the jump to skip to the end if (n->right) cgjump(Lend); // Now the false label cglabel(Lfalse); // Optional ELSE clause: generate the // false compound statement and the // end label if (n->right) { genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); genfreeregs(NOREG); cglabel(Lend); } return (NOREG); } // Generate the code for a WHILE statement static int genWHILE(struct ASTnode *n) { int Lstart, Lend; // Generate the start and end labels // and output the start label Lstart = genlabel(); Lend = genlabel(); cglabel(Lstart); // Generate the condition code followed // by a jump to the end label. genAST(n->left, Lend, Lstart, Lend, n->op); genfreeregs(NOREG); // Generate the compound statement for the body genAST(n->right, NOLABEL, Lstart, Lend, n->op); genfreeregs(NOREG); // Finally output the jump back to the condition, // and the end label cgjump(Lstart); cglabel(Lend); return (NOREG); } // Generate the code for a SWITCH statement static int genSWITCH(struct ASTnode *n) { int *caseval, *caselabel; int Ljumptop, Lend; int i, reg, defaultlabel = 0, casecount = 0; struct ASTnode *c; // Create arrays for the case values and associated labels. // Ensure that we have at least one position in each array. caseval = (int *) malloc((n->a_intvalue + 1) * sizeof(int)); caselabel = (int *) malloc((n->a_intvalue + 1) * sizeof(int)); // Generate labels for the top of the jump table, and the // end of the switch statement. Set a default label for // the end of the switch, in case we don't have a default. Ljumptop = genlabel(); Lend = genlabel(); defaultlabel = Lend; // Output the code to calculate the switch condition reg = genAST(n->left, NOLABEL, NOLABEL, NOLABEL, 0); cgjump(Ljumptop); genfreeregs(reg); // Walk the right-child linked list to // generate the code for each case for (i = 0, c = n->right; c != NULL; i++, c = c->right) { // Get a label for this case. Store it // and the case value in the arrays. // Record if it is the default case. caselabel[i] = genlabel(); caseval[i] = c->a_intvalue; cglabel(caselabel[i]); if (c->op == A_DEFAULT) defaultlabel = caselabel[i]; else casecount++; // Generate the case code. Pass in the end label for the breaks. // If case has no body, we will fall into the following body. if (c->left) genAST(c->left, NOLABEL, NOLABEL, Lend, 0); genfreeregs(NOREG); } // Ensure the last case jumps past the switch table cgjump(Lend); // Now output the switch table and the end label. cgswitch(reg, casecount, Ljumptop, caselabel, caseval, defaultlabel); cglabel(Lend); return (NOREG); } // Generate the code for an // A_LOGAND or A_LOGOR operation static int gen_logandor(struct ASTnode *n) { // Generate two labels int Lfalse = genlabel(); int Lend = genlabel(); int reg; // Generate the code for the left expression // followed by the jump to the false label reg= genAST(n->left, NOLABEL, NOLABEL, NOLABEL, 0); cgboolean(reg, n->op, Lfalse); genfreeregs(NOREG); // Generate the code for the right expression // followed by the jump to the false label reg= genAST(n->right, NOLABEL, NOLABEL, NOLABEL, 0); cgboolean(reg, n->op, Lfalse); genfreeregs(reg); // We didn't jump so set the right boolean value if (n->op== A_LOGAND) { cgloadboolean(reg, 1); cgjump(Lend); cglabel(Lfalse); cgloadboolean(reg, 0); } else { cgloadboolean(reg, 0); cgjump(Lend); cglabel(Lfalse); cgloadboolean(reg, 1); } cglabel(Lend); return(reg); } // Generate the code to copy the arguments of a // function call to its parameters, then call the // function itself. Return the register that holds // the function's return value. static int gen_funccall(struct ASTnode *n) { struct ASTnode *gluetree = n->left; int reg; int numargs = 0; // Save the registers before we copy the arguments spill_all_regs(); // If there is a list of arguments, walk this list // from the last argument (right-hand child) to the // first while (gluetree) { // Calculate the expression's value reg = genAST(gluetree->right, NOLABEL, NOLABEL, NOLABEL, gluetree->op); // Copy this into the n'th function parameter: size is 1, 2, 3, ... cgcopyarg(reg, gluetree->a_size); // Keep the first (highest) number of arguments if (numargs == 0) numargs = gluetree->a_size; gluetree = gluetree->left; } // Call the function, clean up the stack (based on numargs), // and return its result return (cgcall(n->sym, numargs)); } // Generate code for a ternary expression static int gen_ternary(struct ASTnode *n) { int Lfalse, Lend; int reg, expreg; // Generate two labels: one for the // false expression, and one for the // end of the overall expression Lfalse = genlabel(); Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. genAST(n->left, Lfalse, NOLABEL, NOLABEL, n->op); genfreeregs(NOREG); // Get a register to hold the result of the two expressions reg = alloc_register(); // Generate the true expression and the false label. // Move the expression result into the known register. expreg = genAST(n->mid, NOLABEL, NOLABEL, NOLABEL, n->op); cgmove(expreg, reg); // Don't free the register holding the result, though! genfreeregs(reg); cgjump(Lend); cglabel(Lfalse); // Generate the false expression and the end label. // Move the expression result into the known register. expreg = genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); cgmove(expreg, reg); // Don't free the register holding the result, though! genfreeregs(reg); cglabel(Lend); return (reg); } // Given an AST, an optional label, and the AST op // of the parent, generate assembly code recursively. // Return the register id with the tree's final value. int genAST(struct ASTnode *n, int iflabel, int looptoplabel, int loopendlabel, int parentASTop) { int leftreg= NOREG, rightreg= NOREG; // Empty tree, do nothing if (n==NULL) return(NOREG); // We have some specific AST node handling at the top // so that we don't evaluate the child sub-trees immediately switch (n->op) { case A_IF: return (genIF(n, looptoplabel, loopendlabel)); case A_WHILE: return (genWHILE(n)); case A_SWITCH: return (genSWITCH(n)); case A_FUNCCALL: return (gen_funccall(n)); case A_TERNARY: return (gen_ternary(n)); case A_LOGOR: return (gen_logandor(n)); case A_LOGAND: return (gen_logandor(n)); case A_GLUE: // Do each child statement, and free the // registers after each child if (n->left != NULL) genAST(n->left, iflabel, looptoplabel, loopendlabel, n->op); genfreeregs(NOREG); if (n->right != NULL) genAST(n->right, iflabel, looptoplabel, loopendlabel, n->op); genfreeregs(NOREG); return (NOREG); case A_FUNCTION: // Generate the function's preamble before the code // in the child sub-tree cgfuncpreamble(n->sym); genAST(n->left, NOLABEL, NOLABEL, NOLABEL, n->op); cgfuncpostamble(n->sym); return (NOREG); } // General AST node handling below // Get the left and right sub-tree values if (n->left) leftreg = genAST(n->left, NOLABEL, NOLABEL, NOLABEL, n->op); if (n->right) rightreg = genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); switch (n->op) { case A_ADD: return (cgadd(leftreg, rightreg)); case A_SUBTRACT: return (cgsub(leftreg, rightreg)); case A_MULTIPLY: return (cgmul(leftreg, rightreg)); case A_DIVIDE: return (cgdiv(leftreg, rightreg)); case A_AND: return (cgand(leftreg, rightreg)); case A_OR: return (cgor(leftreg, rightreg)); case A_XOR: return (cgxor(leftreg, rightreg)); case A_LSHIFT: return (cgshl(leftreg, rightreg)); case A_RSHIFT: return (cgshr(leftreg, rightreg)); case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: // If the parent AST node is an A_IF, A_WHILE or A_TERNARY, // generate a compare followed by a jump. Otherwise, compare // registers and set one to 1 or 0 based on the comparison. if (parentASTop == A_IF || parentASTop == A_WHILE || parentASTop == A_TERNARY) return (cgcompare_and_jump(n->op, leftreg, rightreg, iflabel)); else return (cgcompare_and_set(n->op, leftreg, rightreg)); case A_INTLIT: return (cgloadint(n->a_intvalue, n->type)); case A_STRLIT: return (cgloadglobstr(n->a_intvalue)); case A_IDENT: // Load our value if we are an rvalue // or we are being dereferenced if (n->rvalue || parentASTop == A_DEREF) { if (n->sym->class == C_GLOBAL || n->sym->class == C_STATIC) { return (cgloadglob(n->sym, n->op)); } else { return (cgloadlocal(n->sym, n->op)); } } else return (NOREG); case A_ASPLUS: case A_ASMINUS: case A_ASSTAR: case A_ASSLASH: case A_ASSIGN: // For the '+=' and friends operators, generate suitable code // and get the register with the result. Then take the left child, // make it the right child so that we can fall into the assignment code. switch (n->op) { case A_ASPLUS: leftreg = cgadd(leftreg, rightreg); n->right = n->left; break; case A_ASMINUS: leftreg = cgsub(leftreg, rightreg); n->right = n->left; break; case A_ASSTAR: leftreg = cgmul(leftreg, rightreg); n->right = n->left; break; case A_ASSLASH: leftreg = cgdiv(leftreg, rightreg); n->right = n->left; break; } // Now into the assignment code // Are we assigning to an identifier or through a pointer? switch (n->right->op) { case A_IDENT: if (n->right->sym->class == C_GLOBAL || n->right->sym->class == C_STATIC) return (cgstorglob(leftreg, n->right->sym)); else return (cgstorlocal(leftreg, n->right->sym)); case A_DEREF: return (cgstorderef(leftreg, rightreg, n->right->type)); default: fatald("Can't A_ASSIGN in genAST(), op", n->op); } case A_WIDEN: // Widen the child's type to the parent's type return (cgwiden(leftreg, n->left->type, n->type)); case A_RETURN: cgreturn(leftreg, Functionid); return (NOREG); case A_ADDR: return (cgaddress(n->sym)); case A_DEREF: // If we are an rvalue, dereference to get the value we point at, // otherwise leave it for A_ASSIGN to store through the pointer if (n->rvalue) return (cgderef(leftreg, n->left->type)); else return (leftreg); case A_SCALE: // Small optimisation: use shift if the // scale value is a known power of two switch (n->a_size) { case 2: return (cgshlconst(leftreg, 1)); case 4: return (cgshlconst(leftreg, 2)); case 8: return (cgshlconst(leftreg, 3)); default: // Load a register with the size and // multiply the leftreg by this size rightreg = cgloadint(n->a_size, P_INT); return (cgmul(leftreg, rightreg)); } case A_POSTINC: case A_POSTDEC: // Load and decrement the variable's value into a register // and post increment/decrement it if (n->sym->class == C_GLOBAL || n->sym->class == C_STATIC) return (cgloadglob(n->sym, n->op)); else return (cgloadlocal(n->sym, n->op)); case A_PREINC: case A_PREDEC: // Load and decrement the variable's value into a register // and pre increment/decrement it if (n->left->sym->class == C_GLOBAL || n->left->sym->class == C_STATIC) return (cgloadglob(n->left->sym, n->op)); else return (cgloadlocal(n->left->sym, n->op)); case A_NEGATE: return (cgnegate(leftreg)); case A_INVERT: return (cginvert(leftreg)); case A_LOGNOT: return (cglognot(leftreg)); case A_TOBOOL: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, set the register // to 0 or 1 based on it's zeroeness or non-zeroeness return (cgboolean(leftreg, parentASTop, iflabel)); case A_BREAK: cgjump(loopendlabel); return (NOREG); case A_CONTINUE: cgjump(looptoplabel); return (NOREG); case A_CAST: return (leftreg); // Not much to do default: fatald("Unknown AST operator", n->op); } return (NOREG); // Keep -Wall happy } void genpreamble() { cgpreamble(); } void genpostamble() { cgpostamble(); } void genfreeregs(int keepreg) { freeall_registers(keepreg); } void genglobsym(struct symtable *node) { cgglobsym(node); } // Generate a global string. // If append is true, append to // previous genglobstr() call. int genglobstr(char *strvalue, int append) { int l = genlabel(); cgglobstr(l, strvalue, append); return (l); } void genglobstrend(void) { cgglobstrend(); } int genprimsize(int type) { return (cgprimsize(type)); } int genalign(int type, int offset, int direction) { return (cgalign(type, offset, direction)); } ================================================ FILE: 56_Local_Arrays/include/ctype.h ================================================ #ifndef _CTYPE_H_ # define _CTYPE_H_ int isalnum(int c); int isalpha(int c); int iscntrl(int c); int isdigit(int c); int isgraph(int c); int islower(int c); int isprint(int c); int ispunct(int c); int isspace(int c); int isupper(int c); int isxdigit(int c); int isascii(int c); int isblank(int c); int toupper(int c); int tolower(int c); #endif // _CTYPE_H_ ================================================ FILE: 56_Local_Arrays/include/errno.h ================================================ #ifndef _ERRNO_H_ # define _ERRNO_H_ #endif // _ERRNO_H_ ================================================ FILE: 56_Local_Arrays/include/fcntl.h ================================================ #ifndef _FCNTL_H_ # define _FCNTL_H_ #define O_RDONLY 00 #define O_WRONLY 01 #define O_RDWR 02 int open(char *pathname, int flags); #endif // _FCNTL_H_ ================================================ FILE: 56_Local_Arrays/include/stddef.h ================================================ #ifndef _STDDEF_H_ # define _STDDEF_H_ #ifndef NULL # define NULL (void *)0 #endif typedef long size_t; #endif //_STDDEF_H_ ================================================ FILE: 56_Local_Arrays/include/stdio.h ================================================ #ifndef _STDIO_H_ # define _STDIO_H_ #include #ifndef NULL # define NULL (void *)0 #endif #ifndef EOF # define EOF (-1) #endif // This FILE definition will do for now typedef char * FILE; FILE *fopen(char *pathname, char *mode); size_t fread(void *ptr, size_t size, size_t nmemb, FILE *stream); size_t fwrite(void *ptr, size_t size, size_t nmemb, FILE *stream); int fclose(FILE *stream); int printf(char *format); int fprintf(FILE *stream, char *format); int fgetc(FILE *stream); int fputc(int c, FILE *stream); int fputs(char *s, FILE *stream); int putc(int c, FILE *stream); int putchar(int c); int puts(char *s); extern FILE *stdin; extern FILE *stdout; extern FILE *stderr; #endif // _STDIO_H_ ================================================ FILE: 56_Local_Arrays/include/stdlib.h ================================================ #ifndef _STDLIB_H_ # define _STDLIB_H_ void exit(int status); void _Exit(int status); void *malloc(int size); void free(void *ptr); void *calloc(int nmemb, int size); void *realloc(void *ptr, int size); #endif // _STDLIB_H_ ================================================ FILE: 56_Local_Arrays/include/string.h ================================================ #ifndef _STRING_H_ # define _STRING_H_ char *strdup(char *s); char *strchr(char *s, int c); char *strrchr(char *s, int c); int strcmp(char *s1, char *s2); int strncmp(char *s1, char *s2, size_t n); #endif // _STRING_H_ ================================================ FILE: 56_Local_Arrays/include/unistd.h ================================================ #ifndef _UNISTD_H_ # define _UNISTD_H_ void _exit(int status); int unlink(char *pathname); #endif // _UNISTD_H_ ================================================ FILE: 56_Local_Arrays/main.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "decl.h" #include #include // Compiler setup and top-level execution // Copyright (c) 2019 Warren Toomey, GPL3 // Given a string with a '.' and at least a 1-character suffix // after the '.', change the suffix to be the given character. // Return the new string or NULL if the original string could // not be modified char *alter_suffix(char *str, char suffix) { char *posn; char *newstr; // Clone the string if ((newstr = strdup(str)) == NULL) return (NULL); // Find the '.' if ((posn = strrchr(newstr, '.')) == NULL) return (NULL); // Ensure there is a suffix posn++; if (*posn == '\0') return (NULL); // Change the suffix and NUL-terminate the string *posn = suffix; posn++; *posn = '\0'; return (newstr); } // Given an input filename, compile that file // down to assembly code. Return the new file's name static char *do_compile(char *filename) { char cmd[TEXTLEN]; // Change the input file's suffix to .s Outfilename = alter_suffix(filename, 's'); if (Outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .c on the end\n", filename); exit(1); } // Generate the pre-processor command snprintf(cmd, TEXTLEN, "%s %s %s", CPPCMD, INCDIR, filename); // Open up the pre-processor pipe if ((Infile = popen(cmd, "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", filename, strerror(errno)); exit(1); } Infilename = filename; // Create the output file if ((Outfile = fopen(Outfilename, "w")) == NULL) { fprintf(stderr, "Unable to create %s: %s\n", Outfilename, strerror(errno)); exit(1); } Line = 1; // Reset the scanner Linestart = 1; Putback = '\n'; clear_symtable(); // Clear the symbol table if (O_verbose) printf("compiling %s\n", filename); scan(&Token); // Get the first token from the input Peektoken.token = 0; // and set there is no lookahead token genpreamble(); // Output the preamble global_declarations(); // Parse the global declarations genpostamble(); // Output the postamble fclose(Outfile); // Close the output file // Dump the symbol table if requested if (O_dumpsym) { printf("Symbols for %s\n", filename); dumpsymtables(); fprintf(stdout, "\n\n"); } freestaticsyms(); // Free any static symbols in the file return (Outfilename); } // Given an input filename, assemble that file // down to object code. Return the object filename char *do_assemble(char *filename) { char cmd[TEXTLEN]; int err; char *outfilename = alter_suffix(filename, 'o'); if (outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .s on the end\n", filename); exit(1); } // Build the assembly command and run it snprintf(cmd, TEXTLEN, "%s %s %s", ASCMD, outfilename, filename); if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Assembly of %s failed\n", filename); exit(1); } return (outfilename); } // Given a list of object files and an output filename, // link all of the object filenames together. void do_link(char *outfilename, char *objlist[]) { int cnt, size = TEXTLEN; char cmd[TEXTLEN], *cptr; int err; // Start with the linker command and the output file cptr = cmd; cnt = snprintf(cptr, size, "%s %s ", LDCMD, outfilename); cptr += cnt; size -= cnt; // Now append each object file while (*objlist != NULL) { cnt = snprintf(cptr, size, "%s ", *objlist); cptr += cnt; size -= cnt; objlist++; } if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Linking failed\n"); exit(1); } } // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "Usage: %s [-vcSTM] [-o outfile] file [file ...]\n", prog); fprintf(stderr, " -v give verbose output of the compilation stages\n"); fprintf(stderr, " -c generate object files but don't link them\n"); fprintf(stderr, " -S generate assembly files but don't link them\n"); fprintf(stderr, " -T dump the AST trees for each input file\n"); fprintf(stderr, " -M dump the symbol table for each input file\n"); fprintf(stderr, " -o outfile, produce the outfile executable file\n"); exit(1); } // Main program: check arguments and print a usage // if we don't have an argument. Open up the input // file and call scanfile() to scan the tokens in it. enum { MAXOBJ = 100 }; int main(int argc, char *argv[]) { char *outfilename = AOUT; char *asmfile, *objfile; char *objlist[MAXOBJ]; int i, objcnt = 0; // Initialise our variables O_dumpAST = 0; O_dumpsym = 0; O_keepasm = 0; O_assemble = 0; O_verbose = 0; O_dolink = 1; // Scan for command-line options for (i = 1; i < argc; i++) { // No leading '-', stop scanning for options if (*argv[i] != '-') break; // For each option in this argument for (int j = 1; (*argv[i] == '-') && argv[i][j]; j++) { switch (argv[i][j]) { case 'o': outfilename = argv[++i]; // Save & skip to next argument break; case 'T': O_dumpAST = 1; break; case 'M': O_dumpsym = 1; break; case 'c': O_assemble = 1; O_keepasm = 0; O_dolink = 0; break; case 'S': O_keepasm = 1; O_assemble = 0; O_dolink = 0; break; case 'v': O_verbose = 1; break; default: usage(argv[0]); } } } // Ensure we have at lease one input file argument if (i >= argc) usage(argv[0]); // Work on each input file in turn while (i < argc) { asmfile = do_compile(argv[i]); // Compile the source file if (O_dolink || O_assemble) { objfile = do_assemble(asmfile); // Assemble it to object forma if (objcnt == (MAXOBJ - 2)) { fprintf(stderr, "Too many object files for the compiler to handle\n"); exit(1); } objlist[objcnt++] = objfile; // Add the object file's name objlist[objcnt] = NULL; // to the list of object files } if (!O_keepasm) // Remove the assembly file if unlink(asmfile); // we don't need to keep it i++; } // Now link all the object files together if (O_dolink) { do_link(outfilename, objlist); // If we don't need to keep the object // files, then remove them if (!O_assemble) { for (i = 0; objlist[i] != NULL; i++) unlink(objlist[i]); } } return (0); } ================================================ FILE: 56_Local_Arrays/misc.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" #include #include // Miscellaneous functions // Copyright (c) 2019 Warren Toomey, GPL3 // Ensure that the current token is t, // and fetch the next token. Otherwise // throw an error void match(int t, char *what) { if (Token.token == t) { scan(&Token); } else { fatals("Expected", what); } } // Match a semicolon and fetch the next token void semi(void) { match(T_SEMI, ";"); } // Match a left brace and fetch the next token void lbrace(void) { match(T_LBRACE, "{"); } // Match a right brace and fetch the next token void rbrace(void) { match(T_RBRACE, "}"); } // Match a left parenthesis and fetch the next token void lparen(void) { match(T_LPAREN, "("); } // Match a right parenthesis and fetch the next token void rparen(void) { match(T_RPAREN, ")"); } // Match an identifier and fetch the next token void ident(void) { match(T_IDENT, "identifier"); } // Match a comma and fetch the next token void comma(void) { match(T_COMMA, "comma"); } // Print out fatal messages void fatal(char *s) { fprintf(stderr, "%s on line %d of %s\n", s, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatals(char *s1, char *s2) { fprintf(stderr, "%s:%s on line %d of %s\n", s1, s2, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatald(char *s, int d) { fprintf(stderr, "%s:%d on line %d of %s\n", s, d, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatalc(char *s, int c) { fprintf(stderr, "%s:%c on line %d of %s\n", s, c, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } ================================================ FILE: 56_Local_Arrays/opt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST Tree Optimisation Code // Copyright (c) 2019 Warren Toomey, GPL3 // Fold an AST tree with a binary operator // and two A_INTLIT children. Return either // the original tree or a new leaf node. static struct ASTnode *fold2(struct ASTnode *n) { int val, leftval, rightval; // Get the values from each child leftval = n->left->a_intvalue; rightval = n->right->a_intvalue; // Perform some of the binary operations. // For any AST op we can't do, return // the original tree. switch (n->op) { case A_ADD: val = leftval + rightval; break; case A_SUBTRACT: val = leftval - rightval; break; case A_MULTIPLY: val = leftval * rightval; break; case A_DIVIDE: // Don't try to divide by zero. if (rightval == 0) return (n); val = leftval / rightval; break; default: return (n); } // Return a leaf node with the new value return (mkastleaf(A_INTLIT, n->type, NULL, NULL, val)); } // Fold an AST tree with a unary operator // and one INTLIT children. Return either // the original tree or a new leaf node. static struct ASTnode *fold1(struct ASTnode *n) { int val; // Get the child value. Do the // operation if recognised. // Return the new leaf node. val = n->left->a_intvalue; switch (n->op) { case A_WIDEN: break; case A_INVERT: val = ~val; break; case A_LOGNOT: val = !val; break; default: return (n); } // Return a leaf node with the new value return (mkastleaf(A_INTLIT, n->type, NULL, NULL, val)); } // Attempt to do constant folding on // the AST tree with the root node n static struct ASTnode *fold(struct ASTnode *n) { if (n == NULL) return (NULL); // Fold on the left child, then // do the same on the right child n->left = fold(n->left); n->right = fold(n->right); // If both children are A_INTLITs, do a fold2() if (n->left && n->left->op == A_INTLIT) { if (n->right && n->right->op == A_INTLIT) n = fold2(n); else // If only the left is A_INTLIT, do a fold1() n = fold1(n); } // Return the possibly modified tree return (n); } // Optimise an AST tree by // constant folding in all sub-trees struct ASTnode *optimise(struct ASTnode *n) { n = fold(n); return (n); } ================================================ FILE: 56_Local_Arrays/scan.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { int i; for (i = 0; s[i] != '\0'; i++) if (s[i] == (char) c) return (i); return (-1); } // Get the next character from the input file. static int next(void) { int c, l; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return (c); } c = fgetc(Infile); // Read from input file while (Linestart && c == '#') { // We've hit a pre-processor statement Linestart = 0; // No longer at the start of the line scan(&Token); // Get the line number into l if (Token.token != T_INTLIT) fatals("Expecting pre-processor line number, got:", Text); l = Token.intvalue; scan(&Token); // Get the filename in Text if (Token.token != T_STRLIT) fatals("Expecting pre-processor file name, got:", Text); if (Text[0] != '<') { // If this is a real filename if (strcmp(Text, Infilename)) // and not the one we have now Infilename = strdup(Text); // save it. Then update the line num Line = l; } while ((c = fgetc(Infile)) != '\n'); // Skip to the end of the line c = fgetc(Infile); // and get the next character Linestart = 1; // Now back at the start of the line } Linestart = 0; // No longer at the start of the line if ('\n' == c) { Line++; // Increment line count Linestart = 1; // Now back at the start of the line } return (c); } // Put back an unwanted character static void putback(int c) { Putback = c; } // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } // Read in a hexadecimal constant from the input static int hexchar(void) { int c, h, n = 0, f = 0; // Loop getting characters while (isxdigit(c = next())) { // Convert from char to int value h = chrpos("0123456789abcdef", tolower(c)); // Add to running hex value n = n * 16 + h; f = 1; } // We hit a non-hex character, put it back putback(c); // Flag tells us we never saw any hex characters if (!f) fatal("missing digits after '\\x'"); if (n > 255) fatal("value out of range after '\\x'"); return (n); } // Return the next character from a character // or string literal static int scanch(void) { int i, c, c2; // Get the next input character and interpret // metacharacters that start with a backslash c = next(); if (c == '\\') { switch (c = next()) { case 'a': return ('\a'); case 'b': return ('\b'); case 'f': return ('\f'); case 'n': return ('\n'); case 'r': return ('\r'); case 't': return ('\t'); case 'v': return ('\v'); case '\\': return ('\\'); case '"': return ('"'); case '\'': return ('\''); // Deal with octal constants by reading in // characters until we hit a non-octal digit. // Build up the octal value in c2 and count // # digits in i. Permit only 3 octal digits. case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': for (i = c2 = 0; isdigit(c) && c < '8'; c = next()) { if (++i > 3) break; c2 = c2 * 8 + (c - '0'); } putback(c); // Put back the first non-octal char return (c2); case 'x': return (hexchar()); default: fatalc("unknown escape sequence", c); } } return (c); // Just an ordinary old character! } // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0, radix = 10; // Assume the radix is 10, but if it starts with 0 if (c == '0') { // and the next character is 'x', it's radix 16 if ((c = next()) == 'x') { radix = 16; c = next(); } else // Otherwise, it's radix 8 radix = 8; } // Convert each character into an int value while ((k = chrpos("0123456789abcdef", tolower(c))) >= 0) { if (k >= radix) fatalc("invalid digit in integer literal", c); val = val * radix + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return (val); } // Scan in a string literal from the input file, // and store it in buf[]. Return the length of // the string. static int scanstr(char *buf) { int i, c; // Loop while we have enough buffer space for (i = 0; i < TEXTLEN - 1; i++) { // Get the next char and append to buf // Return when we hit the ending double quote if ((c = scanch()) == '"') { buf[i] = 0; return (i); } buf[i] = (char)c; } // Ran out of buf[] space fatal("String literal too long"); return (0); } // Scan an identifier from the input file and // store it in buf[]. Return the identifier's length static int scanident(int c, char *buf, int lim) { int i = 0; // Allow digits, alpha and underscores while (isalpha(c) || isdigit(c) || '_' == c) { // Error if we hit the identifier length limit, // else append to buf[] and get next character if (lim - 1 == i) { fatal("Identifier too long"); } else if (i < lim - 1) { buf[i++] = (char)c; } c = next(); } // We hit a non-valid character, put it back. // NUL-terminate the buf[] and return the length putback(c); buf[i] = '\0'; return (i); } // Given a word from the input, return the matching // keyword token number or 0 if it's not a keyword. // Switch on the first letter so that we don't have // to waste time strcmp()ing against all the keywords. static int keyword(char *s) { switch (*s) { case 'b': if (!strcmp(s, "break")) return (T_BREAK); break; case 'c': if (!strcmp(s, "case")) return (T_CASE); if (!strcmp(s, "char")) return (T_CHAR); if (!strcmp(s, "continue")) return (T_CONTINUE); break; case 'd': if (!strcmp(s, "default")) return (T_DEFAULT); break; case 'e': if (!strcmp(s, "else")) return (T_ELSE); if (!strcmp(s, "enum")) return (T_ENUM); if (!strcmp(s, "extern")) return (T_EXTERN); break; case 'f': if (!strcmp(s, "for")) return (T_FOR); break; case 'i': if (!strcmp(s, "if")) return (T_IF); if (!strcmp(s, "int")) return (T_INT); break; case 'l': if (!strcmp(s, "long")) return (T_LONG); break; case 'r': if (!strcmp(s, "return")) return (T_RETURN); break; case 's': if (!strcmp(s, "sizeof")) return (T_SIZEOF); if (!strcmp(s, "static")) return (T_STATIC); if (!strcmp(s, "struct")) return (T_STRUCT); if (!strcmp(s, "switch")) return (T_SWITCH); break; case 't': if (!strcmp(s, "typedef")) return (T_TYPEDEF); break; case 'u': if (!strcmp(s, "union")) return (T_UNION); break; case 'v': if (!strcmp(s, "void")) return (T_VOID); break; case 'w': if (!strcmp(s, "while")) return (T_WHILE); break; } return (0); } // List of token strings, for debugging purposes char *Tstring[] = { "EOF", "=", "+=", "-=", "*=", "/=", "?", "||", "&&", "|", "^", "&", "==", "!=", ",", ">", "<=", ">=", "<<", ">>", "+", "-", "*", "/", "++", "--", "~", "!", "void", "char", "int", "long", "if", "else", "while", "for", "return", "struct", "union", "enum", "typedef", "extern", "break", "continue", "switch", "case", "default", "sizeof", "static", "intlit", "strlit", ";", "identifier", "{", "}", "(", ")", "[", "]", ",", ".", "->", ":" }; // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c, tokentype; // If we have a lookahead token, return this token if (Peektoken.token != 0) { t->token = Peektoken.token; t->tokstr = Peektoken.tokstr; t->intvalue = Peektoken.intvalue; Peektoken.token = 0; return (1); } // Skip whitespace c = skip(); // Determine the token based on // the input character switch (c) { case EOF: t->token = T_EOF; return (0); case '+': if ((c = next()) == '+') { t->token = T_INC; } else if (c == '=') { t->token = T_ASPLUS; } else { putback(c); t->token = T_PLUS; } break; case '-': if ((c = next()) == '-') { t->token = T_DEC; } else if (c == '>') { t->token = T_ARROW; } else if (c == '=') { t->token = T_ASMINUS; } else if (isdigit(c)) { // Negative int literal t->intvalue = -scanint(c); t->token = T_INTLIT; } else { putback(c); t->token = T_MINUS; } break; case '*': if ((c = next()) == '=') { t->token = T_ASSTAR; } else { putback(c); t->token = T_STAR; } break; case '/': if ((c = next()) == '=') { t->token = T_ASSLASH; } else { putback(c); t->token = T_SLASH; } break; case ';': t->token = T_SEMI; break; case '{': t->token = T_LBRACE; break; case '}': t->token = T_RBRACE; break; case '(': t->token = T_LPAREN; break; case ')': t->token = T_RPAREN; break; case '[': t->token = T_LBRACKET; break; case ']': t->token = T_RBRACKET; break; case '~': t->token = T_INVERT; break; case '^': t->token = T_XOR; break; case ',': t->token = T_COMMA; break; case '.': t->token = T_DOT; break; case ':': t->token = T_COLON; break; case '?': t->token = T_QUESTION; break; case '=': if ((c = next()) == '=') { t->token = T_EQ; } else { putback(c); t->token = T_ASSIGN; } break; case '!': if ((c = next()) == '=') { t->token = T_NE; } else { putback(c); t->token = T_LOGNOT; } break; case '<': if ((c = next()) == '=') { t->token = T_LE; } else if (c == '<') { t->token = T_LSHIFT; } else { putback(c); t->token = T_LT; } break; case '>': if ((c = next()) == '=') { t->token = T_GE; } else if (c == '>') { t->token = T_RSHIFT; } else { putback(c); t->token = T_GT; } break; case '&': if ((c = next()) == '&') { t->token = T_LOGAND; } else { putback(c); t->token = T_AMPER; } break; case '|': if ((c = next()) == '|') { t->token = T_LOGOR; } else { putback(c); t->token = T_OR; } break; case '\'': // If it's a quote, scan in the // literal character value and // the trailing quote t->intvalue = scanch(); t->token = T_INTLIT; if (next() != '\'') fatal("Expected '\\'' at end of char literal"); break; case '"': // Scan in a literal string scanstr(Text); t->token = T_STRLIT; break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } else if (isalpha(c) || '_' == c) { // Read in a keyword or identifier scanident(c, Text, TEXTLEN); // If it's a recognised keyword, return that token if ((tokentype = keyword(Text)) != 0) { t->token = tokentype; break; } // Not a recognised keyword, so it must be an identifier t->token = T_IDENT; break; } // The character isn't part of any recognised token, error fatalc("Unrecognised character", c); } // We found a token t->tokstr = Tstring[t->token]; return (1); } ================================================ FILE: 56_Local_Arrays/stmt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of statements // Copyright (c) 2019 Warren Toomey, GPL3 // Prototypes static struct ASTnode *single_statement(void); // compound_statement: // empty, i.e. no statement // | statement // | statement statements // ; // // statement: declaration // | expression_statement // | function_call // | if_statement // | while_statement // | for_statement // | return_statement // ; // if_statement: if_head // | if_head 'else' statement // ; // // if_head: 'if' '(' true_false_expression ')' statement ; // // Parse an IF statement including any // optional ELSE clause and return its AST static struct ASTnode *if_statement(void) { struct ASTnode *condAST, *trueAST, *falseAST = NULL; // Ensure we have 'if' '(' match(T_IF, "if"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST->ctype, condAST, NULL, 0); rparen(); // Get the AST for the statement trueAST = single_statement(); // If we have an 'else', skip it // and get the AST for the statement if (Token.token == T_ELSE) { scan(&Token); falseAST = single_statement(); } // Build and return the AST for this statement return (mkastnode(A_IF, P_NONE, NULL, condAST, trueAST, falseAST, NULL, 0)); } // while_statement: 'while' '(' true_false_expression ')' statement ; // // Parse a WHILE statement and return its AST static struct ASTnode *while_statement(void) { struct ASTnode *condAST, *bodyAST; // Ensure we have 'while' '(' match(T_WHILE, "while"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST->ctype, condAST, NULL, 0); rparen(); // Get the AST for the statement. // Update the loop depth in the process Looplevel++; bodyAST = single_statement(); Looplevel--; // Build and return the AST for this statement return (mkastnode(A_WHILE, P_NONE, NULL, condAST, NULL, bodyAST, NULL, 0)); } // for_statement: 'for' '(' expression_list ';' // true_false_expression ';' // expression_list ')' statement ; // // Parse a FOR statement and return its AST static struct ASTnode *for_statement(void) { struct ASTnode *condAST, *bodyAST; struct ASTnode *preopAST, *postopAST; struct ASTnode *tree; // Ensure we have 'for' '(' match(T_FOR, "for"); lparen(); // Get the pre_op expression and the ';' preopAST = expression_list(T_SEMI); semi(); // Get the condition and the ';'. // Force a non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST->ctype, condAST, NULL, 0); semi(); // Get the post_op expression and the ')' postopAST = expression_list(T_RPAREN); rparen(); // Get the statement which is the body // Update the loop depth in the process Looplevel++; bodyAST = single_statement(); Looplevel--; // Glue the statement and the postop tree tree = mkastnode(A_GLUE, P_NONE, NULL, bodyAST, NULL, postopAST, NULL, 0); // Make a WHILE loop with the condition and this new body tree = mkastnode(A_WHILE, P_NONE, NULL, condAST, NULL, tree, NULL, 0); // And glue the preop tree to the A_WHILE tree return (mkastnode(A_GLUE, P_NONE, NULL, preopAST, NULL, tree, NULL, 0)); } // return_statement: 'return' '(' expression ')' ; // // Parse a return statement and return its AST static struct ASTnode *return_statement(void) { struct ASTnode *tree= NULL; // Ensure we have 'return' match(T_RETURN, "return"); // See if we have a return value if (Token.token == T_LPAREN) { // Can't return a value if function returns P_VOID if (Functionid->type == P_VOID) fatal("Can't return from a void function"); // Skip the left parenthesis lparen(); // Parse the following expression tree = binexpr(0); // Ensure this is compatible with the function's type tree = modify_type(tree, Functionid->type, Functionid->ctype, 0); if (tree == NULL) fatal("Incompatible type to return"); // Get the ')' rparen(); } // Add on the A_RETURN node tree = mkastunary(A_RETURN, P_NONE, NULL, tree, NULL, 0); // Get the ';' semi(); return (tree); } // break_statement: 'break' ; // // Parse a break statement and return its AST static struct ASTnode *break_statement(void) { if (Looplevel == 0 && Switchlevel == 0) fatal("no loop or switch to break out from"); scan(&Token); semi(); return (mkastleaf(A_BREAK, P_NONE, NULL, NULL, 0)); } // continue_statement: 'continue' ; // // Parse a continue statement and return its AST static struct ASTnode *continue_statement(void) { if (Looplevel == 0) fatal("no loop to continue to"); scan(&Token); semi(); return (mkastleaf(A_CONTINUE, P_NONE, NULL, NULL, 0)); } // Parse a switch statement and return its AST static struct ASTnode *switch_statement(void) { struct ASTnode *left, *body, *n, *c; struct ASTnode *casetree = NULL, *casetail; int inloop = 1, casecount = 0; int seendefault = 0; int ASTop, casevalue; // Skip the 'switch' and '(' scan(&Token); lparen(); // Get the switch expression, the ')' and the '{' left = binexpr(0); rparen(); lbrace(); // Ensure that this is of int type if (!inttype(left->type)) fatal("Switch expression is not of integer type"); // Build an A_SWITCH subtree with the expression as // the child n = mkastunary(A_SWITCH, P_NONE, NULL, left, NULL, 0); // Now parse the cases Switchlevel++; while (inloop) { switch (Token.token) { // Leave the loop when we hit a '}' case T_RBRACE: if (casecount == 0) fatal("No cases in switch"); inloop = 0; break; case T_CASE: case T_DEFAULT: // Ensure this isn't after a previous 'default' if (seendefault) fatal("case or default after existing default"); // Set the AST operation. Scan the case value if required if (Token.token == T_DEFAULT) { ASTop = A_DEFAULT; seendefault = 1; scan(&Token); } else { ASTop = A_CASE; scan(&Token); left = binexpr(0); // Ensure the case value is an integer literal if (left->op != A_INTLIT) fatal("Expecting integer literal for case value"); casevalue = left->a_intvalue; // Walk the list of existing case values to ensure // that there isn't a duplicate case value for (c = casetree; c != NULL; c = c->right) if (casevalue == c->a_intvalue) fatal("Duplicate case value"); } // Scan the ':' and increment the casecount match(T_COLON, ":"); casecount++; // If the next token is a T_CASE, the existing case will fall // into the next case. Otherwise, parse the case body. if (Token.token == T_CASE) body = NULL; else body = compound_statement(1); // Build a sub-tree with any compound statement as the left child // and link it in to the growing A_CASE tree if (casetree == NULL) { casetree = casetail = mkastunary(ASTop, P_NONE, NULL, body, NULL, casevalue); } else { casetail->right = mkastunary(ASTop, P_NONE, NULL, body, NULL, casevalue); casetail = casetail->right; } break; default: fatals("Unexpected token in switch", Token.tokstr); } } Switchlevel--; // We have a sub-tree with the cases and any default. Put the // case count into the A_SWITCH node and attach the case tree. n->a_intvalue = casecount; n->right = casetree; rbrace(); return (n); } // Parse a single statement and return its AST. static struct ASTnode *single_statement(void) { struct ASTnode *stmt; struct symtable *ctype; switch (Token.token) { case T_SEMI: // An empty statement semi(); break; case T_LBRACE: // We have a '{', so this is a compound statement lbrace(); stmt = compound_statement(0); rbrace(); return (stmt); case T_IDENT: // We have to see if the identifier matches a typedef. // If not, treat it as an expression. // Otherwise, fall down to the parse_type() call. if (findtypedef(Text) == NULL) { stmt = binexpr(0); semi(); return (stmt); } case T_CHAR: case T_INT: case T_LONG: case T_STRUCT: case T_UNION: case T_ENUM: case T_TYPEDEF: // The beginning of a variable declaration list. declaration_list(&ctype, C_LOCAL, T_SEMI, T_EOF, &stmt); semi(); return (stmt); // Any assignments from the declarations case T_IF: return (if_statement()); case T_WHILE: return (while_statement()); case T_FOR: return (for_statement()); case T_RETURN: return (return_statement()); case T_BREAK: return (break_statement()); case T_CONTINUE: return (continue_statement()); case T_SWITCH: return (switch_statement()); default: // For now, see if this is an expression. // This catches assignment statements. stmt = binexpr(0); semi(); return (stmt); } return (NULL); // Keep -Wall happy } // Parse a compound statement // and return its AST. If inswitch is true, // we look for a '}', 'case' or 'default' token // to end the parsing. Otherwise, look for // just a '}' to end the parsing. struct ASTnode *compound_statement(int inswitch) { struct ASTnode *left = NULL; struct ASTnode *tree; while (1) { // Leave if we've hit the end token. We do this first to allow // an empty compound statement if (Token.token == T_RBRACE) return (left); if (inswitch && (Token.token == T_CASE || Token.token == T_DEFAULT)) return (left); // Parse a single statement tree = single_statement(); // For each new tree, either save it in left // if left is empty, or glue the left and the // new tree together if (tree != NULL) { if (left == NULL) left = tree; else left = mkastnode(A_GLUE, P_NONE, NULL, left, NULL, tree, NULL, 0); } } return (NULL); // Keep -Wall happy } ================================================ FILE: 56_Local_Arrays/sym.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Symbol table functions // Copyright (c) 2019 Warren Toomey, GPL3 // Append a node to the singly-linked list pointed to by head or tail void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node) { // Check for valid pointers if (head == NULL || tail == NULL || node == NULL) fatal("Either head, tail or node is NULL in appendsym"); // Append to the list if (*tail) { (*tail)->next = node; *tail = node; } else *head = *tail = node; node->next = NULL; } // Create a symbol node to be added to a symbol table list. // Set up the node's: // + type: char, int etc. // + ctype: composite type pointer for struct/union // + structural type: var, function, array etc. // + size: number of elements, or endlabel: end label for a function // + posn: Position information for local symbols // Return a pointer to the new node struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn) { // Get a new node struct symtable *node = (struct symtable *) malloc(sizeof(struct symtable)); if (node == NULL) fatal("Unable to malloc a symbol table node in newsym"); // Fill in the values if (name == NULL) node->name = NULL; else node->name = strdup(name); node->type = type; node->ctype = ctype; node->stype = stype; node->class = class; node->nelems = nelems; // For pointers and integer types, set the size // of the symbol. structs and union declarations // manually set this up themselves. if (ptrtype(type) || inttype(type)) node->size = nelems * typesize(type, ctype); node->st_posn = posn; node->next = NULL; node->member = NULL; node->initlist = NULL; return (node); } // Add a symbol to the global symbol list struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn) { struct symtable *sym = newsym(name, type, ctype, stype, class, nelems, posn); // For structs and unions, copy the size from the type node if (type == P_STRUCT || type == P_UNION) sym->size = ctype->size; appendsym(&Globhead, &Globtail, sym); return (sym); } // Add a symbol to the local symbol list struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int nelems) { struct symtable *sym = newsym(name, type, ctype, stype, C_LOCAL, nelems, 0); // For structs and unions, copy the size from the type node if (type == P_STRUCT || type == P_UNION) sym->size = ctype->size; appendsym(&Loclhead, &Locltail, sym); return (sym); } // Add a symbol to the parameter list struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype) { struct symtable *sym = newsym(name, type, ctype, stype, C_PARAM, 1, 0); appendsym(&Parmhead, &Parmtail, sym); return (sym); } // Add a symbol to the temporary member list struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int nelems) { struct symtable *sym = newsym(name, type, ctype, stype, C_MEMBER, nelems, 0); // For structs and unions, copy the size from the type node if (type == P_STRUCT || type == P_UNION) sym->size = ctype->size; appendsym(&Membhead, &Membtail, sym); return (sym); } // Add a struct to the struct list struct symtable *addstruct(char *name) { struct symtable *sym = newsym(name, P_STRUCT, NULL, 0, C_STRUCT, 0, 0); appendsym(&Structhead, &Structtail, sym); return (sym); } // Add a struct to the union list struct symtable *addunion(char *name) { struct symtable *sym = newsym(name, P_UNION, NULL, 0, C_UNION, 0, 0); appendsym(&Unionhead, &Uniontail, sym); return (sym); } // Add an enum type or value to the enum list. // Class is C_ENUMTYPE or C_ENUMVAL. // Use posn to store the int value. struct symtable *addenum(char *name, int class, int value) { struct symtable *sym = newsym(name, P_INT, NULL, 0, class, 0, value); appendsym(&Enumhead, &Enumtail, sym); return (sym); } // Add a typedef to the typedef list struct symtable *addtypedef(char *name, int type, struct symtable *ctype) { struct symtable *sym = newsym(name, type, ctype, 0, C_TYPEDEF, 0, 0); appendsym(&Typehead, &Typetail, sym); return (sym); } // Search for a symbol in a specific list. // Return a pointer to the found node or NULL if not found. // If class is not zero, also match on the given class static struct symtable *findsyminlist(char *s, struct symtable *list, int class) { for (; list != NULL; list = list->next) if ((list->name != NULL) && !strcmp(s, list->name)) if (class == 0 || class == list->class) return (list); return (NULL); } // Determine if the symbol s is in the global symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findglob(char *s) { return (findsyminlist(s, Globhead, 0)); } // Determine if the symbol s is in the local symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findlocl(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member, 0); if (node) return (node); } return (findsyminlist(s, Loclhead, 0)); } // Determine if the symbol s is in the symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findsymbol(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member, 0); if (node) return (node); } // Otherwise, try the local and global symbol lists node = findsyminlist(s, Loclhead, 0); if (node) return (node); return (findsyminlist(s, Globhead, 0)); } // Find a member in the member list // Return a pointer to the found node or NULL if not found. struct symtable *findmember(char *s) { return (findsyminlist(s, Membhead, 0)); } // Find a struct in the struct list // Return a pointer to the found node or NULL if not found. struct symtable *findstruct(char *s) { return (findsyminlist(s, Structhead, 0)); } // Find a struct in the union list // Return a pointer to the found node or NULL if not found. struct symtable *findunion(char *s) { return (findsyminlist(s, Unionhead, 0)); } // Find an enum type in the enum list // Return a pointer to the found node or NULL if not found. struct symtable *findenumtype(char *s) { return (findsyminlist(s, Enumhead, C_ENUMTYPE)); } // Find an enum value in the enum list // Return a pointer to the found node or NULL if not found. struct symtable *findenumval(char *s) { return (findsyminlist(s, Enumhead, C_ENUMVAL)); } // Find a type in the tyedef list // Return a pointer to the found node or NULL if not found. struct symtable *findtypedef(char *s) { return (findsyminlist(s, Typehead, 0)); } // Reset the contents of the symbol table void clear_symtable(void) { Globhead = Globtail = NULL; Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Membhead = Membtail = NULL; Structhead = Structtail = NULL; Unionhead = Uniontail = NULL; Enumhead = Enumtail = NULL; Typehead = Typetail = NULL; } // Clear all the entries in the local symbol table void freeloclsyms(void) { Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Functionid = NULL; } // Remove all static symbols from the global symbol table void freestaticsyms(void) { // g points at current node, prev at the previous one struct symtable *g, *prev = NULL; // Walk the global table looking for static entries for (g = Globhead; g != NULL; g = g->next) { if (g->class == C_STATIC) { // If there's a previous node, rearrange the prev pointer // to skip over the current node. If not, g is the head, // so do the same to Globhead if (prev != NULL) prev->next = g->next; else Globhead->next = g->next; // If g is the tail, point Globtail at the previous node // (if there is one), or Globhead if (g == Globtail) { if (prev != NULL) Globtail = prev; else Globtail = Globhead; } } } // Point prev at g before we move up to the next node prev = g; } // Dump a single symbol static void dumpsym(struct symtable *sym, int indent) { int i; for (i = 0; i < indent; i++) printf(" "); switch (sym->type & (~0xf)) { case P_VOID: printf("void "); break; case P_CHAR: printf("char "); break; case P_INT: printf("int "); break; case P_LONG: printf("long "); break; case P_STRUCT: if (sym->ctype != NULL) printf("struct %s ", sym->ctype->name); else printf("struct %s ", sym->name); break; case P_UNION: if (sym->ctype != NULL) printf("union %s ", sym->ctype->name); else printf("union %s ", sym->name); break; default: printf("unknown type "); } for (i = 0; i < (sym->type & 0xf); i++) printf("*"); printf("%s", sym->name); switch (sym->stype) { case S_VARIABLE: break; case S_FUNCTION: printf("()"); break; case S_ARRAY: printf("[]"); break; default: printf(" unknown stype"); } switch (sym->class) { case C_GLOBAL: printf(": global"); break; case C_LOCAL: printf(": local"); break; case C_PARAM: printf(": param"); break; case C_EXTERN: printf(": extern"); break; case C_STATIC: printf(": static"); break; case C_STRUCT: printf(": struct"); break; case C_UNION: printf(": union"); break; case C_MEMBER: printf(": member"); break; case C_ENUMTYPE: printf(": enumtype"); break; case C_ENUMVAL: printf(": enumval"); break; case C_TYPEDEF: printf(": typedef"); break; default: printf(": unknown class"); } switch (sym->stype) { case S_VARIABLE: if (sym->class == C_ENUMVAL) printf(", value %d\n", sym->st_posn); else printf(", size %d\n", sym->size); break; case S_FUNCTION: printf(", %d params\n", sym->nelems); break; case S_ARRAY: printf(", %d elems, size %d\n", sym->nelems, sym->size); break; } switch (sym->type & (~0xf)) { case P_STRUCT: case P_UNION: dumptable(sym->member, NULL, 4); } switch (sym->stype) { case S_FUNCTION: dumptable(sym->member, NULL, 4); } } // Dump one symbol table void dumptable(struct symtable *head, char *name, int indent) { struct symtable *sym; if (head != NULL && name != NULL) printf("%s\n--------\n", name); for (sym = head; sym != NULL; sym = sym->next) dumpsym(sym, indent); } void dumpsymtables(void) { dumptable(Globhead, "Global", 0); printf("\n"); dumptable(Enumhead, "Enums", 0); printf("\n"); dumptable(Typehead, "Typedefs", 0); } ================================================ FILE: 56_Local_Arrays/tests/err.input031.c ================================================ Expecting a primary expression, got token:+ on line 5 of input031.c ================================================ FILE: 56_Local_Arrays/tests/err.input032.c ================================================ Unknown variable or function:pizza on line 4 of input032.c ================================================ FILE: 56_Local_Arrays/tests/err.input033.c ================================================ Incompatible type to return on line 4 of input033.c ================================================ FILE: 56_Local_Arrays/tests/err.input034.c ================================================ For now, declaration of non-global arrays is not implemented on line 4 of input034.c ================================================ FILE: 56_Local_Arrays/tests/err.input035.c ================================================ Duplicate local variable declaration:a on line 4 of input035.c ================================================ FILE: 56_Local_Arrays/tests/err.input036.c ================================================ Type doesn't match prototype for parameter:2 on line 4 of input036.c ================================================ FILE: 56_Local_Arrays/tests/err.input037.c ================================================ Expected:comma on line 3 of input037.c ================================================ FILE: 56_Local_Arrays/tests/err.input038.c ================================================ Type doesn't match prototype for parameter:2 on line 4 of input038.c ================================================ FILE: 56_Local_Arrays/tests/err.input039.c ================================================ No statements in function with non-void type on line 4 of input039.c ================================================ FILE: 56_Local_Arrays/tests/err.input040.c ================================================ No return for function with non-void type on line 4 of input040.c ================================================ FILE: 56_Local_Arrays/tests/err.input041.c ================================================ Can't return from a void function on line 3 of input041.c ================================================ FILE: 56_Local_Arrays/tests/err.input042.c ================================================ Unknown variable or function:fred on line 3 of input042.c ================================================ FILE: 56_Local_Arrays/tests/err.input043.c ================================================ Unknown variable or function:b on line 3 of input043.c ================================================ FILE: 56_Local_Arrays/tests/err.input044.c ================================================ Unknown variable or function:z on line 3 of input044.c ================================================ FILE: 56_Local_Arrays/tests/err.input045.c ================================================ & operator must be followed by an identifier on line 3 of input045.c ================================================ FILE: 56_Local_Arrays/tests/err.input046.c ================================================ * operator must be followed by an identifier or * on line 3 of input046.c ================================================ FILE: 56_Local_Arrays/tests/err.input047.c ================================================ ++ operator must be followed by an identifier on line 3 of input047.c ================================================ FILE: 56_Local_Arrays/tests/err.input048.c ================================================ -- operator must be followed by an identifier on line 3 of input048.c ================================================ FILE: 56_Local_Arrays/tests/err.input049.c ================================================ Incompatible expression in assignment on line 6 of input049.c ================================================ FILE: 56_Local_Arrays/tests/err.input050.c ================================================ Incompatible types in binary expression on line 6 of input050.c ================================================ FILE: 56_Local_Arrays/tests/err.input051.c ================================================ Expected '\'' at end of char literal on line 4 of input051.c ================================================ FILE: 56_Local_Arrays/tests/err.input052.c ================================================ Unrecognised character:$ on line 5 of input052.c ================================================ FILE: 56_Local_Arrays/tests/err.input056.c ================================================ unknown struct/union type:var1 on line 2 of input056.c ================================================ FILE: 56_Local_Arrays/tests/err.input057.c ================================================ previously defined struct/union:fred on line 2 of input057.c ================================================ FILE: 56_Local_Arrays/tests/err.input059.c ================================================ Unknown variable or function:y on line 3 of input059.c ================================================ FILE: 56_Local_Arrays/tests/err.input060.c ================================================ Expression is not a struct/union on line 3 of input060.c ================================================ FILE: 56_Local_Arrays/tests/err.input061.c ================================================ Expression is not a pointer to a struct/union on line 3 of input061.c ================================================ FILE: 56_Local_Arrays/tests/err.input064.c ================================================ undeclared enum type::fred on line 1 of input064.c ================================================ FILE: 56_Local_Arrays/tests/err.input065.c ================================================ enum type redeclared::fred on line 2 of input065.c ================================================ FILE: 56_Local_Arrays/tests/err.input066.c ================================================ enum value redeclared::z on line 2 of input066.c ================================================ FILE: 56_Local_Arrays/tests/err.input068.c ================================================ redefinition of typedef:FOO on line 2 of input068.c ================================================ FILE: 56_Local_Arrays/tests/err.input069.c ================================================ unknown type:FLOO on line 2 of input069.c ================================================ FILE: 56_Local_Arrays/tests/err.input072.c ================================================ no loop or switch to break out from on line 1 of input072.c ================================================ FILE: 56_Local_Arrays/tests/err.input073.c ================================================ no loop to continue to on line 1 of input073.c ================================================ FILE: 56_Local_Arrays/tests/err.input075.c ================================================ Unexpected token in switch:if on line 4 of input075.c ================================================ FILE: 56_Local_Arrays/tests/err.input076.c ================================================ No cases in switch on line 3 of input076.c ================================================ FILE: 56_Local_Arrays/tests/err.input077.c ================================================ case or default after existing default on line 6 of input077.c ================================================ FILE: 56_Local_Arrays/tests/err.input078.c ================================================ case or default after existing default on line 6 of input078.c ================================================ FILE: 56_Local_Arrays/tests/err.input079.c ================================================ Duplicate case value on line 6 of input079.c ================================================ FILE: 56_Local_Arrays/tests/err.input085.c ================================================ Bad type in parameter list on line 1 of input085.c ================================================ FILE: 56_Local_Arrays/tests/err.input086.c ================================================ Function definition not at global level on line 3 of input086.c ================================================ FILE: 56_Local_Arrays/tests/err.input087.c ================================================ Bad type in member list on line 4 of input087.c ================================================ FILE: 56_Local_Arrays/tests/err.input092.c ================================================ Type mismatch: literal vs. variable on line 1 of input092.c ================================================ FILE: 56_Local_Arrays/tests/err.input093.c ================================================ Unknown variable or function:fred on line 1 of input093.c ================================================ FILE: 56_Local_Arrays/tests/err.input094.c ================================================ Type mismatch: literal vs. variable on line 1 of input094.c ================================================ FILE: 56_Local_Arrays/tests/err.input095.c ================================================ Variable can not be initialised:x on line 1 of input095.c ================================================ FILE: 56_Local_Arrays/tests/err.input096.c ================================================ Array size is illegal:0 on line 1 of input096.c ================================================ FILE: 56_Local_Arrays/tests/err.input097.c ================================================ For now, declaration of non-global arrays is not implemented on line 2 of input097.c ================================================ FILE: 56_Local_Arrays/tests/err.input098.c ================================================ Too many values in initialisation list on line 1 of input098.c ================================================ FILE: 56_Local_Arrays/tests/err.input102.c ================================================ Cannot cast to a struct, union or void type on line 3 of input102.c ================================================ FILE: 56_Local_Arrays/tests/err.input103.c ================================================ Cannot cast to a struct, union or void type on line 3 of input103.c ================================================ FILE: 56_Local_Arrays/tests/err.input104.c ================================================ Cannot cast to a struct, union or void type on line 2 of input104.c ================================================ FILE: 56_Local_Arrays/tests/err.input105.c ================================================ Incompatible expression in assignment on line 4 of input105.c ================================================ FILE: 56_Local_Arrays/tests/err.input118.c ================================================ Compiler doesn't support static or extern local declarations on line 2 of input118.c ================================================ FILE: 56_Local_Arrays/tests/err.input124.c ================================================ Cannot ++ on rvalue on line 6 of input124.c ================================================ FILE: 56_Local_Arrays/tests/err.input126.c ================================================ Unknown variable or function:ptr on line 7 of input126.c ================================================ FILE: 56_Local_Arrays/tests/err.input129.c ================================================ Cannot ++ and/or -- more than once on line 6 of input129.c ================================================ FILE: 56_Local_Arrays/tests/err.input141.c ================================================ Declaration of array parameters is not implemented on line 4 of input141.c ================================================ FILE: 56_Local_Arrays/tests/err.input142.c ================================================ Array must have non-zero elements:fred on line 1 of input142.c ================================================ FILE: 56_Local_Arrays/tests/input001.c ================================================ int printf(char *fmt); void main() { printf("%d\n", 12 * 3); printf("%d\n", 18 - 2 * 4); printf("%d\n", 1 + 2 + 9 - 5/2 + 3*5); } ================================================ FILE: 56_Local_Arrays/tests/input002.c ================================================ int printf(char *fmt); void main() { int fred; int jim; fred= 5; jim= 12; printf("%d\n", fred + jim); } ================================================ FILE: 56_Local_Arrays/tests/input003.c ================================================ int printf(char *fmt); void main() { int x; x= 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); } ================================================ FILE: 56_Local_Arrays/tests/input004.c ================================================ int printf(char *fmt); void main() { int x; x= 7 < 9; printf("%d\n", x); x= 7 <= 9; printf("%d\n", x); x= 7 != 9; printf("%d\n", x); x= 7 == 7; printf("%d\n", x); x= 7 >= 7; printf("%d\n", x); x= 7 <= 7; printf("%d\n", x); x= 9 > 7; printf("%d\n", x); x= 9 >= 7; printf("%d\n", x); x= 9 != 7; printf("%d\n", x); } ================================================ FILE: 56_Local_Arrays/tests/input005.c ================================================ int printf(char *fmt); void main() { int i; int j; i=6; j=12; if (i < j) { printf("%d\n", i); } else { printf("%d\n", j); } } ================================================ FILE: 56_Local_Arrays/tests/input006.c ================================================ int printf(char *fmt); void main() { int i; i=1; while (i <= 10) { printf("%d\n", i); i= i + 1; } } ================================================ FILE: 56_Local_Arrays/tests/input007.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 56_Local_Arrays/tests/input008.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 56_Local_Arrays/tests/input009.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } void fred() { int a; int b; a= 12; b= 3 * a; if (a >= b) { printf("%d\n", 2 * b - a); } } ================================================ FILE: 56_Local_Arrays/tests/input010.c ================================================ int printf(char *fmt); void main() { int i; char j; j= 20; printf("%d\n", j); i= 10; printf("%d\n", i); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 2; j= j + 1) { printf("%d\n", j); } } ================================================ FILE: 56_Local_Arrays/tests/input011.c ================================================ int printf(char *fmt); int main() { int i; char j; long k; i= 10; printf("%d\n", i); j= 20; printf("%d\n", j); k= 30; printf("%d\n", k); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 4; j= j + 1) { printf("%d\n", j); } for (k= 1; k <= 5; k= k + 1) { printf("%d\n", k); } return(i); printf("%d\n", 12345); return(3); } ================================================ FILE: 56_Local_Arrays/tests/input012.c ================================================ int printf(char *fmt); int fred() { return(5); } void main() { int x; x= fred(2); printf("%d\n", x); } ================================================ FILE: 56_Local_Arrays/tests/input013.c ================================================ int printf(char *fmt); int fred() { return(56); } void main() { int dummy; int result; dummy= printf("%d\n", 23); result= fred(10); dummy= printf("%d\n", result); } ================================================ FILE: 56_Local_Arrays/tests/input014.c ================================================ int printf(char *fmt); int fred() { return(20); } int main() { int result; printf("%d\n", 10); result= fred(15); printf("%d\n", result); printf("%d\n", fred(15)+10); return(0); } ================================================ FILE: 56_Local_Arrays/tests/input015.c ================================================ int printf(char *fmt); int main() { char a; char *b; char c; int d; int *e; int f; a= 18; printf("%d\n", a); b= &a; c= *b; printf("%d\n", c); d= 12; printf("%d\n", d); e= &d; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 56_Local_Arrays/tests/input016.c ================================================ int printf(char *fmt); int c; int d; int *e; int f; int main() { c= 12; d=18; printf("%d\n", c); e= &c + 1; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 56_Local_Arrays/tests/input017.c ================================================ int printf(char *fmt); int main() { char a; char *b; int d; int *e; b= &a; *b= 19; printf("%d\n", a); e= &d; *e= 12; printf("%d\n", d); return(0); } ================================================ FILE: 56_Local_Arrays/tests/input018.c ================================================ int printf(char *fmt); int main() { int a; int b; a= b= 34; printf("%d\n", a); printf("%d\n", b); return(0); } ================================================ FILE: 56_Local_Arrays/tests/input018a.c ================================================ int printf(char *fmt); int a; int *b; char c; char *d; int main() { b= &a; *b= 15; printf("%d\n", a); d= &c; *d= 16; printf("%d\n", c); return(0); } ================================================ FILE: 56_Local_Arrays/tests/input019.c ================================================ int printf(char *fmt); int a; int b; int c; int d; int e; int main() { a= 2; b= 4; c= 3; d= 2; e= (a+b) * (c+d); printf("%d\n", e); return(0); } ================================================ FILE: 56_Local_Arrays/tests/input020.c ================================================ int printf(char *fmt); int a; int b[25]; int main() { b[3]= 12; a= b[3]; printf("%d\n", a); return(0); } ================================================ FILE: 56_Local_Arrays/tests/input021.c ================================================ int printf(char *fmt); char c; char *str; int main() { c= '\n'; printf("%d\n", c); for (str= "Hello world\n"; *str != 0; str= str + 1) { printf("%c", *str); } return(0); } ================================================ FILE: 56_Local_Arrays/tests/input022.c ================================================ int printf(char *fmt); char a; char b; char c; int d; int e; int f; long g; long h; long i; int main() { b= 5; c= 7; a= b + c++; printf("%d\n", a); e= 5; f= 7; d= e + f++; printf("%d\n", d); h= 5; i= 7; g= h + i++; printf("%d\n", g); a= b-- + c; printf("%d\n", a); d= e-- + f; printf("%d\n", d); g= h-- + i; printf("%d\n", g); a= ++b + c; printf("%d\n", a); d= ++e + f; printf("%d\n", d); g= ++h + i; printf("%d\n", g); a= b * --c; printf("%d\n", a); d= e * --f; printf("%d\n", d); g= h * --i; printf("%d\n", g); return(0); } ================================================ FILE: 56_Local_Arrays/tests/input023.c ================================================ int printf(char *fmt); char *str; int x; int main() { x= -23; printf("%d\n", x); printf("%d\n", -10 * -10); x= 1; x= ~x; printf("%d\n", x); x= 2 > 5; printf("%d\n", x); x= !x; printf("%d\n", x); x= !x; printf("%d\n", x); x= 13; if (x) { printf("%d\n", 13); } x= 0; if (!x) { printf("%d\n", 14); } for (str= "Hello world\n"; *str; str++) { printf("%c", *str); } return(0); } ================================================ FILE: 56_Local_Arrays/tests/input024.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { a= 42; b= 19; printf("%d\n", a & b); printf("%d\n", a | b); printf("%d\n", a ^ b); printf("%d\n", 1 << 3); printf("%d\n", 63 >> 3); return(0); } ================================================ FILE: 56_Local_Arrays/tests/input025.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { char z; int y; int x; x= 10; y= 20; z= 30; printf("%d\n", x); printf("%d\n", y); printf("%d\n", z); a= 5; b= 15; c= 25; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); return(0); } ================================================ FILE: 56_Local_Arrays/tests/input026.c ================================================ int printf(char *fmt); int main(int a, char b, long c, int d, int e, int f, int g, int h) { int i; int j; int k; a= 13; printf("%d\n", a); b= 23; printf("%d\n", b); c= 34; printf("%d\n", c); d= 44; printf("%d\n", d); e= 54; printf("%d\n", e); f= 64; printf("%d\n", f); g= 74; printf("%d\n", g); h= 84; printf("%d\n", h); i= 94; printf("%d\n", i); j= 95; printf("%d\n", j); k= 96; printf("%d\n", k); return(0); } ================================================ FILE: 56_Local_Arrays/tests/input027.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int param5(int a, int b, int c, int d, int e) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param2(int a, int b) { int c; int d; int e; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param0() { int a; int b; int c; int d; int e; a= 1; b= 2; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int main() { param8(1,2,3,4,5,6,7,8); param5(1,2,3,4,5); param2(1,2); param0(); return(0); } ================================================ FILE: 56_Local_Arrays/tests/input028.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 56_Local_Arrays/tests/input029.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h); int fred(int a, int b, int c); int main(); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 56_Local_Arrays/tests/input030.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input030.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 56_Local_Arrays/tests/input031.c ================================================ int printf(char *fmt); int main() { int x; x= 2 + + 3 - * / ; } ================================================ FILE: 56_Local_Arrays/tests/input032.c ================================================ int printf(char *fmt); int main() { pizza cow llama sausage; } ================================================ FILE: 56_Local_Arrays/tests/input033.c ================================================ int printf(char *fmt); int main() { char *z; return(z); } ================================================ FILE: 56_Local_Arrays/tests/input035.c ================================================ int printf(char *fmt); int fred(int a, int b) { int a; return(a); } ================================================ FILE: 56_Local_Arrays/tests/input036.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c); ================================================ FILE: 56_Local_Arrays/tests/input037.c ================================================ int printf(char *fmt); int fred(int a, char b +, int z); ================================================ FILE: 56_Local_Arrays/tests/input038.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c, int g); ================================================ FILE: 56_Local_Arrays/tests/input039.c ================================================ int printf(char *fmt); int main() { int a; } ================================================ FILE: 56_Local_Arrays/tests/input040.c ================================================ int printf(char *fmt); int main() { int a; a= 5; } ================================================ FILE: 56_Local_Arrays/tests/input041.c ================================================ int printf(char *fmt); void fred() { return(5); } ================================================ FILE: 56_Local_Arrays/tests/input042.c ================================================ int printf(char *fmt); int main() { fred(5); } ================================================ FILE: 56_Local_Arrays/tests/input043.c ================================================ int printf(char *fmt); int main() { int a; a= b[4]; } ================================================ FILE: 56_Local_Arrays/tests/input044.c ================================================ int printf(char *fmt); int main() { int a; a= z; } ================================================ FILE: 56_Local_Arrays/tests/input045.c ================================================ int printf(char *fmt); int main() { int a; a= &5; } ================================================ FILE: 56_Local_Arrays/tests/input046.c ================================================ int printf(char *fmt); int main() { int a; a= *5; } ================================================ FILE: 56_Local_Arrays/tests/input047.c ================================================ int printf(char *fmt); int main() { int a; a= ++5; } ================================================ FILE: 56_Local_Arrays/tests/input048.c ================================================ int printf(char *fmt); int main() { int a; a= --5; } ================================================ FILE: 56_Local_Arrays/tests/input049.c ================================================ int printf(char *fmt); int main() { int x; char y; y= x; } ================================================ FILE: 56_Local_Arrays/tests/input050.c ================================================ int printf(char *fmt); int main() { char *a; char *b; a= a + b; } ================================================ FILE: 56_Local_Arrays/tests/input051.c ================================================ int printf(char *fmt); int main() { char a; a= 'fred'; } ================================================ FILE: 56_Local_Arrays/tests/input052.c ================================================ int printf(char *fmt); int main() { int a; a= $5.00; } ================================================ FILE: 56_Local_Arrays/tests/input053.c ================================================ int printf(char *fmt); int main() { printf("Hello world, %d\n", 23); return(0); } ================================================ FILE: 56_Local_Arrays/tests/input054.c ================================================ int printf(char *fmt); int main() { int i; for (i=0; i < 20; i++) { printf("Hello world, %d\n", i); } return(0); } ================================================ FILE: 56_Local_Arrays/tests/input055.c ================================================ int printf(char *fmt); int main(int argc, char **argv) { int i; char *argument; printf("Hello world\n"); for (i=0; i < argc; i++) { argument= *argv; argv= argv + 1; printf("Argument %d is %s\n", i, argument); } return(0); } ================================================ FILE: 56_Local_Arrays/tests/input056.c ================================================ struct foo { int x; }; struct mary var1; ================================================ FILE: 56_Local_Arrays/tests/input057.c ================================================ struct fred { int x; } ; struct fred { char y; } ; ================================================ FILE: 56_Local_Arrays/tests/input058.c ================================================ int printf(char *fmt); struct fred { int x; char y; long z; }; struct fred var2; struct fred *varptr; int main() { long result; var2.x= 12; printf("%d\n", var2.x); var2.y= 'c'; printf("%d\n", var2.y); var2.z= 4005; printf("%d\n", var2.z); result= var2.x + var2.y + var2.z; printf("%d\n", result); varptr= &var2; result= varptr->x + varptr->y + varptr->z; printf("%d\n", result); return(0); } ================================================ FILE: 56_Local_Arrays/tests/input059.c ================================================ int main() { int x; x= y.foo; } ================================================ FILE: 56_Local_Arrays/tests/input060.c ================================================ int main() { int x; x= x.foo; } ================================================ FILE: 56_Local_Arrays/tests/input061.c ================================================ int main() { int x; x= x->foo; } ================================================ FILE: 56_Local_Arrays/tests/input062.c ================================================ int printf(char *fmt); union fred { char w; int x; int y; long z; }; union fred var1; union fred *varptr; int main() { var1.x= 65; printf("%d\n", var1.x); var1.x= 66; printf("%d\n", var1.x); printf("%d\n", var1.y); printf("The next two depend on the endian of the platform\n"); printf("%d\n", var1.w); printf("%d\n", var1.z); varptr= &var1; varptr->x= 67; printf("%d\n", varptr->x); printf("%d\n", varptr->y); return(0); } ================================================ FILE: 56_Local_Arrays/tests/input063.c ================================================ int printf(char *fmt); enum fred { apple=1, banana, carrot, pear=10, peach, mango, papaya }; enum jane { aple=1, bnana, crrot, par=10, pech, mago, paaya }; enum fred var1; enum jane var2; enum fred var3; int main() { var1= carrot + pear + mango; printf("%d\n", var1); return(0); } ================================================ FILE: 56_Local_Arrays/tests/input064.c ================================================ enum fred var3; ================================================ FILE: 56_Local_Arrays/tests/input065.c ================================================ enum fred { x, y, z }; enum fred { a, b }; ================================================ FILE: 56_Local_Arrays/tests/input066.c ================================================ enum fred { x, y, z }; enum mary { a, b, z }; ================================================ FILE: 56_Local_Arrays/tests/input067.c ================================================ int printf(char *fmt); typedef int FOO; FOO var1; struct bar { int x; int y} ; typedef struct bar BAR; BAR var2; int main() { var1= 5; printf("%d\n", var1); var2.x= 7; var2.y= 10; printf("%d\n", var2.x + var2.y); return(0); } ================================================ FILE: 56_Local_Arrays/tests/input068.c ================================================ typedef int FOO; typedef char FOO; ================================================ FILE: 56_Local_Arrays/tests/input069.c ================================================ typedef int FOO; FLOO y; ================================================ FILE: 56_Local_Arrays/tests/input070.c ================================================ #include typedef int FOO; int main() { FOO x; x= 56; printf("%d\n", x); return(0); } ================================================ FILE: 56_Local_Arrays/tests/input071.c ================================================ #include int main() { int x; x = 0; while (x < 100) { if (x == 5) { x = x + 2; continue; } printf("%d\n", x); if (x == 14) { break; } x = x + 1; } printf("Done\n"); return (0); } ================================================ FILE: 56_Local_Arrays/tests/input072.c ================================================ int main() { break; } ================================================ FILE: 56_Local_Arrays/tests/input073.c ================================================ int main() { continue; } ================================================ FILE: 56_Local_Arrays/tests/input074.c ================================================ #include int main() { int x; int y; y= 0; for (x=0; x < 5; x++) { switch(x) { case 1: { y= 5; break; } case 2: { y= 7; break; } case 3: { y= 9; } default: { y= 100; } } printf("%d\n", y); } return(0); } ================================================ FILE: 56_Local_Arrays/tests/input075.c ================================================ int main() { int x; switch(x) { if (x<5); } } ================================================ FILE: 56_Local_Arrays/tests/input076.c ================================================ int main() { int x; switch(x) { } } ================================================ FILE: 56_Local_Arrays/tests/input077.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } default: { x= 3; } case 4: { x= 2; } } } ================================================ FILE: 56_Local_Arrays/tests/input078.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } default: { x= 3; } default: { x= 2; } } } ================================================ FILE: 56_Local_Arrays/tests/input079.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } case 2: { x= 2; } case 1: { x= 2; } default: { x= 2; } } } ================================================ FILE: 56_Local_Arrays/tests/input080.c ================================================ #include int x; int y; int main() { for (x=0, y=1; x < 6; x++, y=y+2) { printf("%d %d\n", x, y); } return(0); } ================================================ FILE: 56_Local_Arrays/tests/input081.c ================================================ #include int x; int y; int main() { x= 0; y=1; for (;x<5;) { printf("%d %d\n", x, y); x=x+1; y=y+2; } return(0); } ================================================ FILE: 56_Local_Arrays/tests/input082.c ================================================ #include int main() { int x; x= 10; // Dangling else test if (x > 5) if (x > 15) printf("x > 15\n"); else printf("15 >= x > 5\n"); else printf("5 >= x\n"); return(0); } ================================================ FILE: 56_Local_Arrays/tests/input083.c ================================================ #include int main() { int x; // Dangling else test. // We should not print anything for x<= 5 for (x=0; x < 12; x++) if (x > 5) if (x > 10) printf("10 < %2d\n", x); else printf(" 5 < %2d <= 10\n", x); return(0); } ================================================ FILE: 56_Local_Arrays/tests/input084.c ================================================ #include int main() { int x, y; x=2; y=3; printf("%d %d\n", x, y); char a, *b; a= 'f'; b= &a; printf("%c %c\n", a, *b); return(0); } ================================================ FILE: 56_Local_Arrays/tests/input085.c ================================================ int main(struct foo { int x; }; , int a) { return(0); } ================================================ FILE: 56_Local_Arrays/tests/input086.c ================================================ int main() { int fred() { return(5); } int x; x=2; return(x); } ================================================ FILE: 56_Local_Arrays/tests/input087.c ================================================ struct foo { int x; int y; struct blah { int g; }; }; ================================================ FILE: 56_Local_Arrays/tests/input088.c ================================================ #include struct foo { int x; int y; } fred, mary; struct foo james; int main() { fred.x= 5; mary.y= 6; printf("%d %d\n", fred.x, mary.y); return(0); } ================================================ FILE: 56_Local_Arrays/tests/input089.c ================================================ #include int x= 23; char y= 'H'; char *z= "Hello world"; int main() { printf("%d %c %s\n", x, y, z); return(0); } ================================================ FILE: 56_Local_Arrays/tests/input090.c ================================================ #include int a = 23, b = 100; char y = 'H', *z = "Hello world"; int main() { printf("%d %d %c %s\n", a, b, y, z); return (0); } ================================================ FILE: 56_Local_Arrays/tests/input091.c ================================================ #include int fred[] = { 1, 2, 3, 4, 5 }; int jim[10] = { 1, 2, 3, 4, 5 }; int main() { int i; for (i=0; i < 5; i++) printf("%d\n", fred[i]); for (i=0; i < 10; i++) printf("%d\n", jim[i]); return(0); } ================================================ FILE: 56_Local_Arrays/tests/input092.c ================================================ char x= 3000; ================================================ FILE: 56_Local_Arrays/tests/input093.c ================================================ char x= fred; ================================================ FILE: 56_Local_Arrays/tests/input094.c ================================================ char *s= 54; ================================================ FILE: 56_Local_Arrays/tests/input095.c ================================================ int fred(int x=2) { return(x); } ================================================ FILE: 56_Local_Arrays/tests/input096.c ================================================ int fred[0]; ================================================ FILE: 56_Local_Arrays/tests/input098.c ================================================ int fred[3]= { 1, 2, 3, 4, 5 }; ================================================ FILE: 56_Local_Arrays/tests/input099.c ================================================ #include // List of token strings, for debugging purposes. // As yet, we can't store a NULL into the list char *Tstring[] = { "EOF", "=", "||", "&&", "|", "^", "&", "==", "!=", ",", ">", "<=", ">=", "<<", ">>", "+", "-", "*", "/", "++", "--", "~", "!", "void", "char", "int", "long", "if", "else", "while", "for", "return", "struct", "union", "enum", "typedef", "extern", "break", "continue", "switch", "case", "default", "intlit", "strlit", ";", "identifier", "{", "}", "(", ")", "[", "]", ",", ".", "->", ":", "" }; int main() { int i; char *str; i=0; while (1) { str= Tstring[i]; if (*str == 0) break; printf("%s\n", str); i++; } return(0); } ================================================ FILE: 56_Local_Arrays/tests/input100.c ================================================ #include int main() { int x= 3, y=14; int z= 2 * x + y; char *str= "Hello world"; printf("%s %d %d\n", str, x+y, z); return(0); } ================================================ FILE: 56_Local_Arrays/tests/input101.c ================================================ #include int main() { int x= 65535; char y; char *str; y= (char )x; printf("0x%x\n", y); str= (char *)0; printf("0x%lx\n", (long)str); return(0); } ================================================ FILE: 56_Local_Arrays/tests/input102.c ================================================ int main() { struct foo { int p; }; int y= (struct foo) x; } ================================================ FILE: 56_Local_Arrays/tests/input103.c ================================================ int main() { union foo { int p; }; int y= (union foo) x; } ================================================ FILE: 56_Local_Arrays/tests/input104.c ================================================ int main() { int y= (void) x; } ================================================ FILE: 56_Local_Arrays/tests/input105.c ================================================ int main() { int x; char *y; y= (char) x; } ================================================ FILE: 56_Local_Arrays/tests/input106.c ================================================ #include char *y= (char *)0; int main() { printf("0x%lx\n", (long)y); return(0); } ================================================ FILE: 56_Local_Arrays/tests/input107.c ================================================ #include char *y[] = { "fish", "cow", NULL }; char *z= NULL; int main() { int i; char *ptr; for (i=0; i < 3; i++) { ptr= y[i]; if (ptr != (char *)0) printf("%s\n", y[i]); else printf("NULL\n"); } return(0); } ================================================ FILE: 56_Local_Arrays/tests/input108.c ================================================ int main() { char *str= (void *)0; return(0); } ================================================ FILE: 56_Local_Arrays/tests/input109.c ================================================ #include int main() { int x= 17 - 1; printf("%d\n", x); return(0); } ================================================ FILE: 56_Local_Arrays/tests/input110.c ================================================ #include int x; int y; int main() { x= 3; y= 15; y += x; printf("%d\n", y); x= 3; y= 15; y -= x; printf("%d\n", y); x= 3; y= 15; y *= x; printf("%d\n", y); x= 3; y= 15; y /= x; printf("%d\n", y); return(0); } ================================================ FILE: 56_Local_Arrays/tests/input111.c ================================================ #include int main() { int x= 2000 + 3 + 4 * 5 + 6; printf("%d\n", x); return(0); } ================================================ FILE: 56_Local_Arrays/tests/input112.c ================================================ #include char* y = NULL; int x= 10 + 6; int fred [ 2 + 3 ]; int main() { fred[2]= x; printf("%d\n", fred[2]); return(0); } ================================================ FILE: 56_Local_Arrays/tests/input113.c ================================================ #include void fred(void); void fred(void) { printf("fred says hello\n"); } int main(void) { fred(); return(0); } ================================================ FILE: 56_Local_Arrays/tests/input114.c ================================================ #include int main() { int x= 0x4a; printf("%c\n", x); return(0); } ================================================ FILE: 56_Local_Arrays/tests/input115.c ================================================ #include struct foo { int x; char y; long z; }; typedef struct foo blah; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol struct symtable *ctype; // If struct/union, ptr to that type int stype; // Structural type for the symbol int class; // Storage class for the symbol int size; // Total size in bytes of this symbol int nelems; // Functions: # params. Arrays: # elements #define st_endlabel st_posn // For functions, the end label int st_posn; // For locals, the negative offset // from the stack base pointer int *initlist; // List of initial values struct symtable *next; // Next symbol in one list struct symtable *member; // First member of a function, struct, }; // union or enum // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates int rvalue; // True if the node is an rvalue struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; struct symtable *sym; // For many AST nodes, the pointer to // the symbol in the symbol table #define a_intvalue a_size // For A_INTLIT, the integer value int a_size; // For A_SCALE, the size to scale by }; int main() { printf("%ld\n", sizeof(char)); printf("%ld\n", sizeof(int)); printf("%ld\n", sizeof(long)); printf("%ld\n", sizeof(char *)); printf("%ld\n", sizeof(blah)); printf("%ld\n", sizeof(struct symtable)); printf("%ld\n", sizeof(struct ASTnode)); return(0); } ================================================ FILE: 56_Local_Arrays/tests/input116.c ================================================ #include static int counter=0; static int fred(void) { return(counter++); } int main(void) { int i; for (i=0; i < 5; i++) printf("%d\n", fred()); return(0); } ================================================ FILE: 56_Local_Arrays/tests/input117.c ================================================ #include static char *fred(void) { return("Hello"); } int main(void) { printf("%s\n", fred()); return(0); } ================================================ FILE: 56_Local_Arrays/tests/input118.c ================================================ int main(void) { static int x; } ================================================ FILE: 56_Local_Arrays/tests/input119.c ================================================ #include int x; int y= 3; int main() { x= y != 3 ? 6 : 8; printf("%d\n", x); x= (y == 3) ? 6 : 8; printf("%d\n", x); return(0); } ================================================ FILE: 56_Local_Arrays/tests/input120.c ================================================ #include int x; int y= 3; int main() { for (y= 0; y < 10; y++) { x= y > 4 ? y + 2 : y + 9; printf("%d\n", x); } return(0); } ================================================ FILE: 56_Local_Arrays/tests/input121.c ================================================ #include int x; int y= 3; int main() { for (y= 0; y < 10; y++) { x= (y < 4) ? y + 2 : (y > 7) ? 1000 : y + 9; printf("%d\n", x); } return(0); } ================================================ FILE: 56_Local_Arrays/tests/input122.c ================================================ #include int x, y, z1, z2; int main() { for (x= 0; x <= 1; x++) { for (y= 0; y <= 1; y++) { z1= x || y; z2= x && y; printf("x %d, y %d, x || y %d, x && y %d\n", x, y, z1, z2); } } //z= x || y; return(0); } ================================================ FILE: 56_Local_Arrays/tests/input123.c ================================================ #include int main() { int x; for (x=0; x < 20; x++) switch(x) { case 2: case 3: case 5: case 7: case 11: printf("%2d infant prime\n", x); break; case 13: case 17: case 19: printf("%2d teen prime\n", x); break; case 0: case 1: case 4: case 6: case 8: case 9: case 10: case 12: printf("%2d infant composite\n", x); break; default: printf("%2d teen composite\n", x); break; } return(0); } ================================================ FILE: 56_Local_Arrays/tests/input124.c ================================================ #include int ary[5]; int main() { ary++; return(0); } ================================================ FILE: 56_Local_Arrays/tests/input125.c ================================================ #include int ary[5]; int *ptr; int x; int main() { ary[3]= 2008; ptr= ary; // Load ary's address into ptr x= ary[3]; printf("%d\n", x); x= ptr[3]; printf("%d\n", x); // Treat ptr as an array return(0); } ================================================ FILE: 56_Local_Arrays/tests/input126.c ================================================ #include int ary[5]; int main() { ary[3]= 2008; ptr= &ary; return(0); } ================================================ FILE: 56_Local_Arrays/tests/input127.c ================================================ #include int ary[5]; void fred(int *ptr) { // Receive a pointer printf("%d\n", ptr[3]); } int main() { ary[3]= 2008; printf("%d\n", ary[3]); fred(ary); // Pass ary as a pointer return(0); } ================================================ FILE: 56_Local_Arrays/tests/input128.c ================================================ #include struct foo { int val; struct foo *next; }; struct foo head, mid, tail; int main() { struct foo *ptr; tail.val= 20; tail.next= NULL; mid.val= 15; mid.next= &tail; head.val= 10; head.next= ∣ ptr= &head; printf("%d %d\n", head.val, ptr->val); printf("%d %d\n", mid.val, ptr->next->val); printf("%d %d\n", tail.val, ptr->next->next->val); return(0); } ================================================ FILE: 56_Local_Arrays/tests/input129.c ================================================ #include int x= 6; int main() { printf("%d\n", x++ ++); return(0); } ================================================ FILE: 56_Local_Arrays/tests/input130.c ================================================ #include char *x= "foo"; int main() { printf("Hello " "world" "\n"); return(0); } ================================================ FILE: 56_Local_Arrays/tests/input131.c ================================================ #include void donothing() { } int main() { int x=0; printf("Doing nothing... "); donothing(); printf("nothing done\n"); while (++x < 100) ; printf("x is now %d\n", x); return(0); } ================================================ FILE: 56_Local_Arrays/tests/input132.c ================================================ extern int fred; int fred; int mary; extern int mary; int main() { return(0); } ================================================ FILE: 56_Local_Arrays/tests/input133.c ================================================ #include extern int fred[]; int fred[23]; char mary[100]; extern char mary[]; void main() { printf("OK\n"); } ================================================ FILE: 56_Local_Arrays/tests/input134.c ================================================ #include char y = 'a'; char *x; int main() { x= &y; if (x && y == 'a') printf("1st match\n"); x= NULL; if (x && y == 'a') printf("2nd match\n"); x= &y; y='b'; if (x && y == 'a') printf("3rd match\n"); return(0); } ================================================ FILE: 56_Local_Arrays/tests/input135.c ================================================ #include void fred() { int x= 5; printf("testing x\n"); if (x > 4) return; printf("x below 5\n"); } int main() { fred(); return(0); } ================================================ FILE: 56_Local_Arrays/tests/input136.c ================================================ #include int add(int x, int y) { return(x+y); } int main() { int result; result= 3 * add(2,3) - 5 * add(4,6); printf("%d\n", result); return(0); } ================================================ FILE: 56_Local_Arrays/tests/input137.c ================================================ #include int a=1, b=2, c=3, d=4, e=5, f=6, g=7, h=8; int main() { int x; x= ((((((a + b) + c) + d) + e) + f) + g) + h; x= a + (b + (c + (d + (e + (f + (g + h)))))); printf("x is %d\n", x); return(0); } ================================================ FILE: 56_Local_Arrays/tests/input138.c ================================================ #include int x, y, z; int a=1; int *aptr; int main() { // See if generic AND works for (x=0; x <= 1; x++) for (y=0; y <= 1; y++) { z= x && y; printf("%d %d | %d\n", x, y, z); } // See if generic AND works for (x=0; x <= 1; x++) for (y=0; y <= 1; y++) { z= x || y; printf("%d %d | %d\n", x, y, z); } // Now some lazy evaluation aptr= NULL; if (aptr && *aptr == 1) printf("aptr points at 1\n"); else printf("aptr is NULL or doesn't point at 1\n"); aptr= &a; if (aptr && *aptr == 1) printf("aptr points at 1\n"); else printf("aptr is NULL or doesn't point at 1\n"); return(0); } ================================================ FILE: 56_Local_Arrays/tests/input139.c ================================================ #include int same(int x) { return(x); } int main() { int a= 3; if (same(a) && same(a) >= same(a)) printf("same apparently\n"); return(0); } ================================================ FILE: 56_Local_Arrays/tests/input140.c ================================================ #include int main() { int i; int ary[5]; char z; // Write below the array z= 'H'; // Fill the array for (i=0; i < 5; i++) ary[i]= i * i; // Write above the array i=14; // Print out the array for (i=0; i < 5; i++) printf("%d\n", ary[i]); // See if either side is OK printf("%d %c\n", i, z); return(0); } ================================================ FILE: 56_Local_Arrays/tests/input141.c ================================================ static int fred[5]; int jim; int foo(int mary[6]) { return(5); } ================================================ FILE: 56_Local_Arrays/tests/input142.c ================================================ static int fred[]; int jim; ================================================ FILE: 56_Local_Arrays/tests/mktests ================================================ #!/bin/sh # Make the output files for each test # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make install) fi for i in input*c do if [ ! -f "out.$i" -a ! -f "err.$i" ] then ../cwj -o out $i 2> "err.$i" # If the err file is empty if [ ! -s "err.$i" ] then rm -f "err.$i" cc -o out $i ./out > "out.$i" fi fi rm -f out out.s done ================================================ FILE: 56_Local_Arrays/tests/out.input001.c ================================================ 36 10 25 ================================================ FILE: 56_Local_Arrays/tests/out.input002.c ================================================ 17 ================================================ FILE: 56_Local_Arrays/tests/out.input003.c ================================================ 1 2 3 4 5 ================================================ FILE: 56_Local_Arrays/tests/out.input004.c ================================================ 1 1 1 1 1 1 1 1 1 ================================================ FILE: 56_Local_Arrays/tests/out.input005.c ================================================ 6 ================================================ FILE: 56_Local_Arrays/tests/out.input006.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 56_Local_Arrays/tests/out.input007.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 56_Local_Arrays/tests/out.input008.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 56_Local_Arrays/tests/out.input009.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 56_Local_Arrays/tests/out.input010.c ================================================ 20 10 1 2 3 4 5 253 254 255 0 1 ================================================ FILE: 56_Local_Arrays/tests/out.input011.c ================================================ 10 20 30 1 2 3 4 5 253 254 255 0 1 2 3 1 2 3 4 5 ================================================ FILE: 56_Local_Arrays/tests/out.input012.c ================================================ 5 ================================================ FILE: 56_Local_Arrays/tests/out.input013.c ================================================ 23 56 ================================================ FILE: 56_Local_Arrays/tests/out.input014.c ================================================ 10 20 30 ================================================ FILE: 56_Local_Arrays/tests/out.input015.c ================================================ 18 18 12 12 ================================================ FILE: 56_Local_Arrays/tests/out.input016.c ================================================ 12 18 ================================================ FILE: 56_Local_Arrays/tests/out.input017.c ================================================ 19 12 ================================================ FILE: 56_Local_Arrays/tests/out.input018.c ================================================ 34 34 ================================================ FILE: 56_Local_Arrays/tests/out.input018a.c ================================================ 15 16 ================================================ FILE: 56_Local_Arrays/tests/out.input019.c ================================================ 30 ================================================ FILE: 56_Local_Arrays/tests/out.input020.c ================================================ 12 ================================================ FILE: 56_Local_Arrays/tests/out.input021.c ================================================ 10 Hello world ================================================ FILE: 56_Local_Arrays/tests/out.input022.c ================================================ 12 12 12 13 13 13 13 13 13 35 35 35 ================================================ FILE: 56_Local_Arrays/tests/out.input023.c ================================================ -23 100 -2 0 1 0 13 14 Hello world ================================================ FILE: 56_Local_Arrays/tests/out.input024.c ================================================ 2 59 57 8 7 ================================================ FILE: 56_Local_Arrays/tests/out.input025.c ================================================ 10 20 30 5 15 25 ================================================ FILE: 56_Local_Arrays/tests/out.input026.c ================================================ 13 23 34 44 54 64 74 84 94 95 96 ================================================ FILE: 56_Local_Arrays/tests/out.input027.c ================================================ 1 2 3 4 5 6 7 8 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 ================================================ FILE: 56_Local_Arrays/tests/out.input028.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 56_Local_Arrays/tests/out.input029.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 56_Local_Arrays/tests/out.input030.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input030.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 56_Local_Arrays/tests/out.input053.c ================================================ Hello world, 23 ================================================ FILE: 56_Local_Arrays/tests/out.input054.c ================================================ Hello world, 0 Hello world, 1 Hello world, 2 Hello world, 3 Hello world, 4 Hello world, 5 Hello world, 6 Hello world, 7 Hello world, 8 Hello world, 9 Hello world, 10 Hello world, 11 Hello world, 12 Hello world, 13 Hello world, 14 Hello world, 15 Hello world, 16 Hello world, 17 Hello world, 18 Hello world, 19 ================================================ FILE: 56_Local_Arrays/tests/out.input055.c ================================================ Hello world Argument 0 is ./out ================================================ FILE: 56_Local_Arrays/tests/out.input058.c ================================================ 12 99 4005 4116 4116 ================================================ FILE: 56_Local_Arrays/tests/out.input062.c ================================================ 65 66 66 The next two depend on the endian of the platform 66 66 67 67 ================================================ FILE: 56_Local_Arrays/tests/out.input063.c ================================================ 25 ================================================ FILE: 56_Local_Arrays/tests/out.input067.c ================================================ 5 17 ================================================ FILE: 56_Local_Arrays/tests/out.input070.c ================================================ 56 ================================================ FILE: 56_Local_Arrays/tests/out.input071.c ================================================ 0 1 2 3 4 7 8 9 10 11 12 13 14 Done ================================================ FILE: 56_Local_Arrays/tests/out.input074.c ================================================ 100 5 7 100 100 ================================================ FILE: 56_Local_Arrays/tests/out.input080.c ================================================ 0 1 1 3 2 5 3 7 4 9 5 11 ================================================ FILE: 56_Local_Arrays/tests/out.input081.c ================================================ 0 1 1 3 2 5 3 7 4 9 ================================================ FILE: 56_Local_Arrays/tests/out.input082.c ================================================ 15 >= x > 5 ================================================ FILE: 56_Local_Arrays/tests/out.input083.c ================================================ 5 < 6 <= 10 5 < 7 <= 10 5 < 8 <= 10 5 < 9 <= 10 5 < 10 <= 10 10 < 11 ================================================ FILE: 56_Local_Arrays/tests/out.input084.c ================================================ 2 3 f f ================================================ FILE: 56_Local_Arrays/tests/out.input088.c ================================================ 5 6 ================================================ FILE: 56_Local_Arrays/tests/out.input089.c ================================================ 23 H Hello world ================================================ FILE: 56_Local_Arrays/tests/out.input090.c ================================================ 23 100 H Hello world ================================================ FILE: 56_Local_Arrays/tests/out.input091.c ================================================ 1 2 3 4 5 1 2 3 4 5 0 0 0 0 0 ================================================ FILE: 56_Local_Arrays/tests/out.input099.c ================================================ EOF = || && | ^ & == != , > <= >= << >> + - * / ++ -- ~ ! void char int long if else while for return struct union enum typedef extern break continue switch case default intlit strlit ; identifier { } ( ) [ ] , . -> : ================================================ FILE: 56_Local_Arrays/tests/out.input100.c ================================================ Hello world 17 20 ================================================ FILE: 56_Local_Arrays/tests/out.input101.c ================================================ 0xff 0x0 ================================================ FILE: 56_Local_Arrays/tests/out.input106.c ================================================ 0x0 ================================================ FILE: 56_Local_Arrays/tests/out.input107.c ================================================ fish cow NULL ================================================ FILE: 56_Local_Arrays/tests/out.input108.c ================================================ ================================================ FILE: 56_Local_Arrays/tests/out.input109.c ================================================ 16 ================================================ FILE: 56_Local_Arrays/tests/out.input110.c ================================================ 18 12 45 5 ================================================ FILE: 56_Local_Arrays/tests/out.input111.c ================================================ 2029 ================================================ FILE: 56_Local_Arrays/tests/out.input112.c ================================================ 16 ================================================ FILE: 56_Local_Arrays/tests/out.input113.c ================================================ fred says hello ================================================ FILE: 56_Local_Arrays/tests/out.input114.c ================================================ J ================================================ FILE: 56_Local_Arrays/tests/out.input115.c ================================================ 1 4 8 8 13 64 48 ================================================ FILE: 56_Local_Arrays/tests/out.input116.c ================================================ 0 1 2 3 4 ================================================ FILE: 56_Local_Arrays/tests/out.input117.c ================================================ Hello ================================================ FILE: 56_Local_Arrays/tests/out.input119.c ================================================ 8 6 ================================================ FILE: 56_Local_Arrays/tests/out.input120.c ================================================ 9 10 11 12 13 7 8 9 10 11 ================================================ FILE: 56_Local_Arrays/tests/out.input121.c ================================================ 2 3 4 5 13 14 15 16 1000 1000 ================================================ FILE: 56_Local_Arrays/tests/out.input122.c ================================================ x 0, y 0, x || y 0, x && y 0 x 0, y 1, x || y 1, x && y 0 x 1, y 0, x || y 1, x && y 0 x 1, y 1, x || y 1, x && y 1 ================================================ FILE: 56_Local_Arrays/tests/out.input123.c ================================================ 0 infant composite 1 infant composite 2 infant prime 3 infant prime 4 infant composite 5 infant prime 6 infant composite 7 infant prime 8 infant composite 9 infant composite 10 infant composite 11 infant prime 12 infant composite 13 teen prime 14 teen composite 15 teen composite 16 teen composite 17 teen prime 18 teen composite 19 teen prime ================================================ FILE: 56_Local_Arrays/tests/out.input125.c ================================================ 2008 2008 ================================================ FILE: 56_Local_Arrays/tests/out.input127.c ================================================ 2008 2008 ================================================ FILE: 56_Local_Arrays/tests/out.input128.c ================================================ 10 10 15 15 20 20 ================================================ FILE: 56_Local_Arrays/tests/out.input130.c ================================================ Hello world ================================================ FILE: 56_Local_Arrays/tests/out.input131.c ================================================ Doing nothing... nothing done x is now 100 ================================================ FILE: 56_Local_Arrays/tests/out.input132.c ================================================ ================================================ FILE: 56_Local_Arrays/tests/out.input133.c ================================================ OK ================================================ FILE: 56_Local_Arrays/tests/out.input134.c ================================================ 1st match ================================================ FILE: 56_Local_Arrays/tests/out.input135.c ================================================ testing x ================================================ FILE: 56_Local_Arrays/tests/out.input136.c ================================================ -35 ================================================ FILE: 56_Local_Arrays/tests/out.input137.c ================================================ x is 36 ================================================ FILE: 56_Local_Arrays/tests/out.input138.c ================================================ 0 0 | 0 0 1 | 0 1 0 | 0 1 1 | 1 0 0 | 0 0 1 | 1 1 0 | 1 1 1 | 1 aptr is NULL or doesn't point at 1 aptr points at 1 ================================================ FILE: 56_Local_Arrays/tests/out.input139.c ================================================ same apparently ================================================ FILE: 56_Local_Arrays/tests/out.input140.c ================================================ 0 1 4 9 16 5 H ================================================ FILE: 56_Local_Arrays/tests/runtests ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make install) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../cwj -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../cwj $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.s "trial.$i" done ================================================ FILE: 56_Local_Arrays/tests/runtestsn ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../compn ] then (cd ..; make install) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../compn -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../compn $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.o out.s "trial.$i" done ================================================ FILE: 56_Local_Arrays/tree.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree functions // Copyright (c) 2019 Warren Toomey, GPL3 // Build and return a generic AST node struct ASTnode *mkastnode(int op, int type, struct symtable *ctype, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue) { struct ASTnode *n; // Malloc a new ASTnode n = (struct ASTnode *) malloc(sizeof(struct ASTnode)); if (n == NULL) fatal("Unable to malloc in mkastnode()"); // Copy in the field values and return it n->op = op; n->type = type; n->ctype = ctype; n->left = left; n->mid = mid; n->right = right; n->sym = sym; n->a_intvalue = intvalue; return (n); } // Make an AST leaf node struct ASTnode *mkastleaf(int op, int type, struct symtable *ctype, struct symtable *sym, int intvalue) { return (mkastnode(op, type, ctype, NULL, NULL, NULL, sym, intvalue)); } // Make a unary AST node: only one child struct ASTnode *mkastunary(int op, int type, struct symtable *ctype, struct ASTnode *left, struct symtable *sym, int intvalue) { return (mkastnode(op, type, ctype, left, NULL, NULL, sym, intvalue)); } // Generate and return a new label number // just for AST dumping purposes static int dumpid = 1; static int gendumplabel(void) { return (dumpid++); } // Given an AST tree, print it out and follow the // traversal of the tree that genAST() follows void dumpAST(struct ASTnode *n, int label, int level) { int Lfalse, Lstart, Lend; int i; switch (n->op) { case A_IF: Lfalse = gendumplabel(); for (i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_IF"); if (n->right) { Lend = gendumplabel(); fprintf(stdout, ", end L%d", Lend); } fprintf(stdout, "\n"); dumpAST(n->left, Lfalse, level + 2); dumpAST(n->mid, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); return; case A_WHILE: Lstart = gendumplabel(); for (i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_WHILE, start L%d\n", Lstart); Lend = gendumplabel(); dumpAST(n->left, Lend, level + 2); dumpAST(n->right, NOLABEL, level + 2); return; } // Reset level to -2 for A_GLUE if (n->op == A_GLUE) level = -2; // General AST node handling if (n->left) dumpAST(n->left, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); for (i = 0; i < level; i++) fprintf(stdout, " "); switch (n->op) { case A_GLUE: fprintf(stdout, "\n\n"); return; case A_FUNCTION: fprintf(stdout, "A_FUNCTION %s\n", n->sym->name); return; case A_ADD: fprintf(stdout, "A_ADD\n"); return; case A_SUBTRACT: fprintf(stdout, "A_SUBTRACT\n"); return; case A_MULTIPLY: fprintf(stdout, "A_MULTIPLY\n"); return; case A_DIVIDE: fprintf(stdout, "A_DIVIDE\n"); return; case A_EQ: fprintf(stdout, "A_EQ\n"); return; case A_NE: fprintf(stdout, "A_NE\n"); return; case A_LT: fprintf(stdout, "A_LE\n"); return; case A_GT: fprintf(stdout, "A_GT\n"); return; case A_LE: fprintf(stdout, "A_LE\n"); return; case A_GE: fprintf(stdout, "A_GE\n"); return; case A_INTLIT: fprintf(stdout, "A_INTLIT %d\n", n->a_intvalue); return; case A_STRLIT: fprintf(stdout, "A_STRLIT rval label L%d\n", n->a_intvalue); return; case A_IDENT: if (n->rvalue) fprintf(stdout, "A_IDENT rval %s\n", n->sym->name); else fprintf(stdout, "A_IDENT %s\n", n->sym->name); return; case A_ASSIGN: fprintf(stdout, "A_ASSIGN\n"); return; case A_WIDEN: fprintf(stdout, "A_WIDEN\n"); return; case A_RETURN: fprintf(stdout, "A_RETURN\n"); return; case A_FUNCCALL: fprintf(stdout, "A_FUNCCALL %s\n", n->sym->name); return; case A_ADDR: fprintf(stdout, "A_ADDR %s\n", n->sym->name); return; case A_DEREF: if (n->rvalue) fprintf(stdout, "A_DEREF rval\n"); else fprintf(stdout, "A_DEREF\n"); return; case A_SCALE: fprintf(stdout, "A_SCALE %d\n", n->a_size); return; case A_PREINC: fprintf(stdout, "A_PREINC %s\n", n->sym->name); return; case A_PREDEC: fprintf(stdout, "A_PREDEC %s\n", n->sym->name); return; case A_POSTINC: fprintf(stdout, "A_POSTINC\n"); return; case A_POSTDEC: fprintf(stdout, "A_POSTDEC\n"); return; case A_NEGATE: fprintf(stdout, "A_NEGATE\n"); return; case A_BREAK: fprintf(stdout, "A_BREAK\n"); return; case A_CONTINUE: fprintf(stdout, "A_CONTINUE\n"); return; case A_CASE: fprintf(stdout, "A_CASE %d\n", n->a_intvalue); return; case A_DEFAULT: fprintf(stdout, "A_DEFAULT\n"); return; case A_SWITCH: fprintf(stdout, "A_SWITCH\n"); return; case A_CAST: fprintf(stdout, "A_CAST %d\n", n->type); return; case A_ASPLUS: fprintf(stdout, "A_ASPLUS\n"); return; case A_ASMINUS: fprintf(stdout, "A_ASMINUS\n"); return; case A_ASSTAR: fprintf(stdout, "A_ASSTAR\n"); return; case A_ASSLASH: fprintf(stdout, "A_ASSLASH\n"); return; case A_TOBOOL: fprintf(stdout, "A_TOBOOL\n"); return; case A_LOGOR: fprintf(stdout, "A_LOGOR\n"); return; case A_LOGAND: fprintf(stdout, "A_LOGAND\n"); return; default: fatald("Unknown dumpAST operator", n->op); } } ================================================ FILE: 56_Local_Arrays/types.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Types and type handling // Copyright (c) 2019 Warren Toomey, GPL3 // Return true if a type is an int type // of any size, false otherwise int inttype(int type) { return (((type & 0xf) == 0) && (type >= P_CHAR && type <= P_LONG)); } // Return true if a type is of pointer type int ptrtype(int type) { return ((type & 0xf) != 0); } // Given a primitive type, return // the type which is a pointer to it int pointer_to(int type) { if ((type & 0xf) == 0xf) fatald("Unrecognised in pointer_to: type", type); return (type + 1); } // Given a primitive pointer type, return // the type which it points to int value_at(int type) { if ((type & 0xf) == 0x0) fatald("Unrecognised in value_at: type", type); return (type - 1); } // Given a type and a composite type pointer, return // the size of this type in bytes int typesize(int type, struct symtable *ctype) { if (type == P_STRUCT || type == P_UNION) return (ctype->size); return (genprimsize(type)); } // Given an AST tree and a type which we want it to become, // possibly modify the tree by widening or scaling so that // it is compatible with this type. Return the original tree // if no changes occurred, a modified tree, or NULL if the // tree is not compatible with the given type. // If this will be part of a binary operation, the AST op is not zero. struct ASTnode *modify_type(struct ASTnode *tree, int rtype, struct symtable *rctype, int op) { int ltype; int lsize, rsize; ltype = tree->type; // For A_LOGOR and A_LOGAND, both types have to be int or pointer types if (op==A_LOGOR || op==A_LOGAND) { if (!inttype(ltype) && !ptrtype(ltype)) return(NULL); if (!inttype(ltype) && !ptrtype(rtype)) return(NULL); return (tree); } // XXX No idea on these yet if (ltype == P_STRUCT || ltype == P_UNION) fatal("Don't know how to do this yet"); if (rtype == P_STRUCT || rtype == P_UNION) fatal("Don't know how to do this yet"); // Compare scalar int types if (inttype(ltype) && inttype(rtype)) { // Both types same, nothing to do if (ltype == rtype) return (tree); // Get the sizes for each type lsize = typesize(ltype, NULL); rsize = typesize(rtype, NULL); // Tree's size is too big if (lsize > rsize) return (NULL); // Widen to the right if (rsize > lsize) return (mkastunary(A_WIDEN, rtype, NULL, tree, NULL, 0)); } // For pointers if (ptrtype(ltype) && ptrtype(rtype)) { // We can compare them if (op >= A_EQ && op <= A_GE) return (tree); // A comparison of the same type for a non-binary operation is OK, // or when the left tree is of `void *` type. if (op == 0 && (ltype == rtype || ltype == pointer_to(P_VOID))) return (tree); } // We can scale only on A_ADD or A_SUBTRACT operation if (op == A_ADD || op == A_SUBTRACT) { // Left is int type, right is pointer type and the size // of the original type is >1: scale the left if (inttype(ltype) && ptrtype(rtype)) { rsize = genprimsize(value_at(rtype)); if (rsize > 1) return (mkastunary(A_SCALE, rtype, rctype, tree, NULL, rsize)); else return (tree); // Size 1, no need to scale } } // If we get here, the types are not compatible return (NULL); } ================================================ FILE: 57_Mop_up_pt3/Makefile ================================================ # Define the location of the include directory # and the location to install the compiler binary INCDIR=/tmp/include BINDIR=/tmp HSRCS= data.h decl.h defs.h incdir.h SRCS= cg.c decl.c expr.c gen.c main.c misc.c \ opt.c scan.c stmt.c sym.c tree.c types.c SRCN= cgn.c decl.c expr.c gen.c main.c misc.c \ opt.c scan.c stmt.c sym.c tree.c types.c ARMSRCS= cg_arm.c decl.c expr.c gen.c main.c misc.c \ opt.c scan.c stmt.c sym.c tree.c types.c cwj: $(SRCS) $(HSRCS) cc -o cwj -g -Wall -DINCDIR=\"$(INCDIR)\" $(SRCS) compn: $(SRCN) $(HSRCS) cc -D__NASM__ -o compn -g -Wall -DINCDIR=\"$(INCDIR)\" $(SRCN) cwjarm: $(ARMSRCS) $(HSRCS) cc -o cwjarm -g -Wall $(ARMSRCS) cp cwjarm cwj incdir.h: echo "#define INCDIR \"$(INCDIR)\"" > incdir.h install: cwj mkdir -p $(INCDIR) rsync -a include/. $(INCDIR) cp cwj $(BINDIR) chmod +x $(BINDIR)/cwj installn: compn mkdir -p $(INCDIR) rsync -a include/. $(INCDIR) cp compn $(BINDIR) chmod +x $(BINDIR)/compn clean: rm -f cwj cwjarm compn *.o *.s out a.out incdir.h test: install tests/runtests (cd tests; chmod +x runtests; ./runtests) armtest: cwjarm tests/runtests (cd tests; chmod +x runtests; ./runtests) testn: installn tests/runtestsn (cd tests; chmod +x runtestsn; ./runtestsn) ================================================ FILE: 57_Mop_up_pt3/Readme.md ================================================ # Part 57: Mopping Up, part 3 In this part of our compiler writing journey, I fix up a few more small issues with the compiler. ## No -D Flag Our compiler doesn't have a run-time `-D` flag to define a symbol to the pre-processor, and it would be somewhat complicated to add it in. But we use this in the `Makefile` to set the location of the directory where our header files are. I've rewritten the `Makefile` to write this location into a new header file: ``` # Define the location of the include directory INCDIR=/tmp/include ... incdir.h: echo "#define INCDIR \"$(INCDIR)\"" > incdir.h ``` and in `defs.h` we now have: ```c #include #include #include #include #include "incdir.h" ``` This ensures that the location of this directory is known to the source code. ## Loading Extern Variables I've added these three external variables in `include/stdio.h`: ```c extern FILE *stdin; extern FILE *stdout; extern FILE *stderr; ``` but when I tried to use them they were being treated as local variables! It turns out my logic to choose a global variable was wrong. In `genAST()` in `gen.c`, we now have: ```c case A_IDENT: // Load our value if we are an rvalue // or we are being dereferenced if (n->rvalue || parentASTop == A_DEREF) { if (n->sym->class == C_GLOBAL || n->sym->class == C_STATIC || n->sym->class == C_EXTERN) { return (cgloadglob(n->sym, n->op)); } else { return (cgloadlocal(n->sym, n->op)); } ``` with the `C_EXTERN` alternative being added. ## Problems with the Pratt Parser Way back in part 3 of this journey, I introduced the [Pratt parser](https://en.wikipedia.org/wiki/Pratt_parser) which has a table of precedence values associated with each token. We've been using it ever since as it works. However, I've introduced tokens that don't get parsed by the Pratt parser: prefix operators, postfix operators, casts, array element access etc. And along the way I broke the chain that ensures the Pratt parser knows the precedence of the previous operator token. Here is the basic Pratt algorihm again, as shown by the code in `binexpr()` in `expr.c`: ```c // Get the tree on the left. // Fetch the next token at the same time. left = prefix(); tokentype = Token.token; // While the precedence of this token is more than that of the // previous token precedence, or it's right associative and // equal to the previous token's precedence while ((op_precedence(tokentype) > ptp) || (rightassoc(tokentype) && op_precedence(tokentype) == ptp)) { // Fetch in the next integer literal scan(&Token); // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Join that sub-tree with ours (code not given) // Update the details of the current token. // Leave the loop if a terminating token (code not given) tokentype = Token.token; } // Return the tree we have when the precedence // is the same or lower return (left); ``` We must ensure that `binexpr()` gets called with the precedence of the previous token. Now let's look at how this got broken. Consider this expression that checks if three pointers are valid: ```c if (a == NULL || b == NULL || c == NULL) ``` The `==` operator has higher precedence that the `||` operator, so the Pratt parser should treat this the same as: ```c if ((a == NULL) || (b == NULL) || (c == NULL)) ``` Now, NULL is defined as this expression, and it includes a cast: ```c #define NULL (void *)0 ``` So let's look at the call chain of the IF line above: + `binexpr(0)` is called from `if_statement()` + `binexpr(0)` parses the `==` (which has precedence 40) and calls `binexpr(40)` + `binexpr(40)` calls `prefix()` + `prefix()` calls `postfix()` + `postfix()` calls `primary()` + `primary()` sees the left parenthesis at the start of the `(void *)0` and calls `paren_expression()` + `paren_expression()` sees the `void` token and calls `parse_cast()`. Once the cast is parsed, it calls `binexpr(0)` to parse the `0`. And that's the problem. The value of NULL, i.e. `0` should still be at precedence level 40, but `paren_expression()` just reset it back to zero. This means that we will now parse `NULL || b`, making an AST tree out of it instead of parsing `a == NULL` and building that AST tree. The solution is to ensure that the previous token precedence is passed through the call chain all the way from `binexpr()` up to `paren_expression()`. This means that: + `prefix()`, `postfix()`, `primary()` and `paren_expression()` all now take an `int ptp` argument and this is passed on. The program `tests/input143.c` checks that this change now works for `if (a==NULL || b==NULL || c==NULL)`. ## Pointers, `+=` and `-=` A while back, I realised that if we were adding an integer value to a pointer, we needed to scale the integer by the type size that the pointer points at. For example: ```c int list[]= {3, 5, 7, 9, 11, 13, 15}; int *lptr; int main() { lptr= list; printf("%d\n", *lptr); lptr= lptr + 1; printf("%d\n", *lptr); } ``` should print the value at the base of `list`, i.e. 3. The `lptr` should be incremented by the *size* of `int`, i.e. 4, so that it now points at the next element in the `list`. Now, we do this for the `+` and `-` operators, but I forgot to implement it for the `+=` and `-=` operators. Fortunately this was easy to fix. At the bottom of `modify_type()` in `types.c`, we now have: ```c // We can scale only on add and subtract operations if (op == A_ADD || op == A_SUBTRACT || op == A_ASPLUS || op == A_ASMINUS) { // Left is int type, right is pointer type and the size // of the original type is >1: scale the left if (inttype(ltype) && ptrtype(rtype)) { rsize = genprimsize(value_at(rtype)); if (rsize > 1) return (mkastunary(A_SCALE, rtype, rctype, tree, NULL, rsize)); else return (tree); // Size 1, no need to scale } } ``` You can see I've added A_ASPLUS and A_ASMINUS to the list of operations where we can scale an int value. ## Conclusion and What's Next That's enough mopping up for now. When I fixed up the `+=` and `-=` problem, it highlighted a big issue with the `++` and `--` operators (prefix and postfix) as applied to pointers. In the next part of our compiler writing journey, I will tackle this issue. [Next step](../58_Ptr_Increments/Readme.md) ================================================ FILE: 57_Mop_up_pt3/cg.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\t.text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\t.data\n", Outfile); currSeg = data_seg; } } // Given a scalar type value, return the // size of the type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch (type) { case P_CHAR: return (offset); case P_INT: case P_LONG: break; default: if (!ptrtype(type)) fatald("Bad type in cg_align:", type); } // Here we have an int or a long. Align it on a 4-byte offset // I put the generic code here so it can be reused elsewhere. alignment = 4; offset = (offset + direction * (alignment - 1)) & ~(alignment - 1); return (offset); } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. static int newlocaloffset(int size) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (size > 4) ? size : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "%r10", "%r11", "%r12", "%r13", "%r9", "%r8", "%rcx", "%rdx", "%rsi", "%rdi" }; static char *breglist[] = { "%r10b", "%r11b", "%r12b", "%r13b", "%r9b", "%r8b", "%cl", "%dl", "%sil", "%dil" }; static char *dreglist[] = { "%r10d", "%r11d", "%r12d", "%r13d", "%r9d", "%r8d", "%ecx", "%edx", "%esi", "%edi" }; // Push and pop a register on/off the stack static void pushreg(int r) { fprintf(Outfile, "\tpushq\t%s\n", reglist[r]); } static void popreg(int r) { fprintf(Outfile, "\tpopq\t%s\n", reglist[r]); } // Set all registers as available. // But if reg is positive, don't free that one. void freeall_registers(int keepreg) { int i; fprintf(Outfile, "# freeing all registers\n"); for (i = 0; i < NUMFREEREGS; i++) if (i != keepreg) freereg[i] = 1; } // When we need to spill a register, we choose // the following register and then cycle through // the remaining registers. The spillreg increments // continually, so we need to take a modulo NUMFREEREGS // on it. static int spillreg=0; // Allocate a free register. Return the number of // the register. Die if no available registers. int alloc_register(void) { int reg; for (reg = 0; reg < NUMFREEREGS; reg++) { if (freereg[reg]) { freereg[reg] = 0; fprintf(Outfile, "# allocated register %s\n", reglist[reg]); return (reg); } } // We have no registers, so we must spill one reg= (spillreg % NUMFREEREGS); spillreg++; fprintf(Outfile, "# spilling reg %s\n", reglist[reg]); pushreg(reg); return (reg); } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) { fprintf(Outfile, "# error trying to free register %s\n", reglist[reg]); fatald("Error trying to free register", reg); } // If this was a spilled register, get it back if (spillreg > 0) { spillreg--; reg= (spillreg % NUMFREEREGS); fprintf(Outfile, "# unspilling reg %s\n", reglist[reg]); popreg(reg); } else { fprintf(Outfile, "# freeing reg %s\n", reglist[reg]); freereg[reg] = 1; } } // Spill all registers on the stack void spill_all_regs(void) { int i; for (i = 0; i < NUMFREEREGS; i++) pushreg(i); } // Unspill all registers from the stack static void unspill_all_regs(void) { int i; for (i = NUMFREEREGS-1; i >= 0; i--) popreg(i); } // Print out the assembly preamble void cgpreamble() { freeall_registers(NOREG); cgtextseg(); fprintf(Outfile, "# internal switch(expr) routine\n" "# %%rsi = switch table, %%rax = expr\n" "# from SubC: http://www.t3x.org/subc/\n" "\n" "switch:\n" " pushq %%rsi\n" " movq %%rdx, %%rsi\n" " movq %%rax, %%rbx\n" " cld\n" " lodsq\n" " movq %%rax, %%rcx\n" "next:\n" " lodsq\n" " movq %%rax, %%rdx\n" " lodsq\n" " cmpq %%rdx, %%rbx\n" " jnz no\n" " popq %%rsi\n" " jmp *%%rax\n" "no:\n" " loop next\n" " lodsq\n" " popq %%rsi\n" " jmp *%%rax\n" "\n"); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the %rsp and %rsp if (sym->class == C_GLOBAL) fprintf(Outfile, "\t.globl\t%s\n" "\t.type\t%s, @function\n", name, name); fprintf(Outfile, "%s:\n" "\tpushq\t%%rbp\n" "\tmovq\t%%rsp, %%rbp\n", name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->st_posn = paramOffset; paramOffset += 8; } else { parm->st_posn = newlocaloffset(parm->size); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->st_posn = newlocaloffset(locvar->size); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\taddq\t$%d,%%rsp\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->st_endlabel); fprintf(Outfile, "\taddq\t$%d,%%rsp\n", stackOffset); fputs("\tpopq %rbp\n" "\tret\n", Outfile); freeall_registers(NOREG); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmovq\t$%d, %s\n", value, reglist[r]); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%s(%%rip)\n", sym->name); } else // Print out the code to initialise it switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovzbq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%s(%%rip)\n", sym->name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", sym->name); fprintf(Outfile, "\tmovslq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%s(%%rip)\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%s(%%rip)\n", sym->name); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", sym->st_posn); fprintf(Outfile, "\tmovq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%d(%%rbp)\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecq\t%d(%%rbp)\n", sym->st_posn); } else switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", sym->st_posn); fprintf(Outfile, "\tmovzbq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincb\t%d(%%rbp)\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecb\t%d(%%rbp)\n", sym->st_posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", sym->st_posn); fprintf(Outfile, "\tmovslq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincl\t%d(%%rbp)\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdecl\t%d(%%rbp)\n", sym->st_posn); break; default: fatald("Bad type in cgloadlocal:", sym->type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tleaq\tL%d(%%rip), %s\n", label, reglist[r]); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\taddq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsubq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timulq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s,%%rax\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidivq\t%s\n", reglist[r2]); fprintf(Outfile, "\tmovq\t%%rax,%s\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tandq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } int cgor(int r1, int r2) { fprintf(Outfile, "\torq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txorq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshlq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshrq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tnegq\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnotq\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); return (r); } // Load a boolean value (only 0 or 1) // into the given register void cgloadboolean(int r, int val) { fprintf(Outfile, "\tmovq\t$%d, %s\n", val, reglist[r]); } // Convert an integer value to a boolean value. Jump if // it's an IF, WHILE, LOGAND or LOGOR operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); switch(op) { case A_IF: case A_WHILE: case A_LOGAND: fprintf(Outfile, "\tje\tL%d\n", label); break; case A_LOGOR: fprintf(Outfile, "\tjne\tL%d\n", label); break; default: fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(struct symtable *sym, int numargs) { int outr; // Call the function fprintf(Outfile, "\tcall\t%s@PLT\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\taddq\t$%d, %%rsp\n", 8 * (numargs - 6)); // Unspill all the registers unspill_all_regs(); // Get a new register and copy the return value into it outr = alloc_register(); fprintf(Outfile, "\tmovq\t%%rax, %s\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpushq\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmovq\t%s, %s\n", reglist[r], reglist[FIRSTPARAMREG - argposn + 1]); } free_register(r); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsalq\t$%d, %s\n", val, reglist[r]); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %s(%%rip)\n", reglist[r], sym->name); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %s(%%rip)\n", breglist[r], sym->name); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %s(%%rip)\n", dreglist[r], sym->name); break; default: fatald("Bad type in cgstorglob:", sym->type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %d(%%rbp)\n", reglist[r], sym->st_posn); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %d(%%rbp)\n", breglist[r], sym->st_posn); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %d(%%rbp)\n", dreglist[r], sym->st_posn); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size, type; int initvalue; int i; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the variable (or its elements if an array) // and the type of the variable if (node->stype == S_ARRAY) { size = typesize(value_at(node->type), node->ctype); type = value_at(node->type); } else { size = node->size; type = node->type; } // Generate the global identity and the label cgdataseg(); if (node->class == C_GLOBAL) fprintf(Outfile, "\t.globl\t%s\n", node->name); fprintf(Outfile, "%s:\n", node->name); // Output space for one or more elements for (i = 0; i < node->nelems; i++) { // Get any initial value initvalue = 0; if (node->initlist != NULL) initvalue = node->initlist[i]; // Generate the space for this type switch (size) { case 1: fprintf(Outfile, "\t.byte\t%d\n", initvalue); break; case 4: fprintf(Outfile, "\t.long\t%d\n", initvalue); break; case 8: // Generate the pointer to a string literal. Treat a zero value // as actually zero, not the label L0 if (node->initlist != NULL && type == pointer_to(P_CHAR) && initvalue != 0) fprintf(Outfile, "\t.quad\tL%d\n", initvalue); else fprintf(Outfile, "\t.quad\t%d\n", initvalue); break; default: for (i = 0; i < size; i++) fprintf(Outfile, "\t.byte\t0\n"); } } } // Generate a global string and its start label // Don't output the label if append is true. void cgglobstr(int l, char *strvalue, int append) { char *cptr; if (!append) cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\t.byte\t%d\n", *cptr); } } void cgglobstrend(void) { fprintf(Outfile, "\t.byte\t0\n"); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(NOREG); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Only return a value if we have a value to return if (reg != NOREG) { // Deal with pointers here as we can't put them in // the switch statement if (ptrtype(sym->type)) fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); else { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzbl\t%s, %%eax\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %%eax\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } } } cgjump(sym->st_endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = alloc_register(); if (sym->class == C_GLOBAL || sym->class == C_STATIC) fprintf(Outfile, "\tleaq\t%s(%%rip), %s\n", sym->name, reglist[r]); else fprintf(Outfile, "\tleaq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzbq\t(%s), %s\n", reglist[r], reglist[r]); break; case 4: fprintf(Outfile, "\tmovslq\t(%s), %s\n", reglist[r], reglist[r]); break; case 8: fprintf(Outfile, "\tmovq\t(%s), %s\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { // Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmovb\t%s, (%s)\n", breglist[r1], reglist[r2]); break; case 4: fprintf(Outfile, "\tmovl\t%s, (%s)\n", dreglist[r1], reglist[r2]); break; case 8: fprintf(Outfile, "\tmovq\t%s, (%s)\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } // Generate a switch jump table and the code to // load the registers and call the switch() code void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel) { int i, label; // Get a label for the switch table label = genlabel(); cglabel(label); // Heuristic. If we have no cases, create one case // which points to the default case if (casecount == 0) { caseval[0] = 0; caselabel[0] = defaultlabel; casecount = 1; } // Generate the switch jump table. fprintf(Outfile, "\t.quad\t%d\n", casecount); for (i = 0; i < casecount; i++) fprintf(Outfile, "\t.quad\t%d, L%d\n", caseval[i], caselabel[i]); fprintf(Outfile, "\t.quad\tL%d\n", defaultlabel); // Load the specific registers cglabel(toplabel); fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); fprintf(Outfile, "\tleaq\tL%d(%%rip), %%rdx\n", label); fprintf(Outfile, "\tjmp\tswitch\n"); } // Move value between registers void cgmove(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s, %s\n", reglist[r1], reglist[r2]); } ================================================ FILE: 57_Mop_up_pt3/cg_arm.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for ARMv6 on Raspberry Pi // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. static int freereg[4]; static char *reglist[4] = { "r4", "r5", "r6", "r7" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // We have to store large integer literal values in memory. // Keep a list of them which will be output in the postamble #define MAXINTS 1024 int Intlist[MAXINTS]; static int Intslot = 0; // Determine the offset of a large integer // literal from the .L3 label. If the integer // isn't in the list, add it. static void set_int_offset(int val) { int offset = -1; // See if it is already there for (int i = 0; i < Intslot; i++) { if (Intlist[i] == val) { offset = 4 * i; break; } } // Not in the list, so add it if (offset == -1) { offset = 4 * Intslot; if (Intslot == MAXINTS) fatal("Out of int slots in set_int_offset()"); Intlist[Intslot++] = val; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L3+%d\n", offset); } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n", Outfile); } // Print out the assembly postamble void cgpostamble() { // Print out the global variables fprintf(Outfile, ".L2:\n"); for (int i = 0; i < Globs; i++) { if (Symtable[i].stype == S_VARIABLE) fprintf(Outfile, "\t.word %s\n", Symtable[i].name); } // Print out the integer literals fprintf(Outfile, ".L3:\n"); for (int i = 0; i < Intslot; i++) { fprintf(Outfile, "\t.word %d\n", Intlist[i]); } } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Symtable[id].name; fprintf(Outfile, "\t.text\n" "\t.globl\t%s\n" "\t.type\t%s, \%%function\n" "%s:\n" "\tpush\t{fp, lr}\n" "\tadd\tfp, sp, #4\n" "\tsub\tsp, sp, #8\n" "\tstr\tr0, [fp, #-8]\n", name, name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Symtable[id].endlabel); fputs("\tsub\tsp, fp, #4\n" "\tpop\t{fp, pc}\n" "\t.align\t2\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); // If the literal value is small, do it with one instruction if (value <= 1000) fprintf(Outfile, "\tmov\t%s, #%d\n", reglist[r], value); else { set_int_offset(value); fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); } return (r); } // Determine the offset of a variable from the .L2 // label. Yes, this is inefficient code. static void set_var_offset(int id) { int offset = 0; // Walk the symbol table up to id. // Find S_VARIABLEs and add on 4 until // we get to our variable for (int i = 0; i < id; i++) { if (Symtable[i].stype == S_VARIABLE) offset += 4; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L2+%d\n", offset); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tldrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgloadglob:", Symtable[id].type); } return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s, %s\n", reglist[r1], reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\tmul\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { // To do a divide: r0 holds the dividend, r1 holds the divisor. // The quotient is in r0. fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r1]); fprintf(Outfile, "\tmov\tr1, %s\n", reglist[r2]); fprintf(Outfile, "\tbl\t__aeabi_idiv\n"); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r1]); free_register(r2); return (r1); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r]); fprintf(Outfile, "\tbl\t%s\n", Symtable[id].name); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r]); return (r); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tlsl\t%s, %s, #%d\n", reglist[r], reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, int id) { // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tstr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgstorglob:", Symtable[id].type); } return (r); } // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (4); switch (type) { case P_CHAR: return (1); case P_INT: case P_LONG: return (4); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Symtable[id].type); fprintf(Outfile, "\t.data\n" "\t.globl\t%s\n", Symtable[id].name); switch (typesize) { case 1: fprintf(Outfile, "%s:\t.byte\t0\n", Symtable[id].name); break; case 4: fprintf(Outfile, "%s:\t.long\t0\n", Symtable[id].name); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "moveq", "movne", "movlt", "movgt", "movle", "movge" }; // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "movne", "moveq", "movge", "movle", "movgt", "movlt" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #1\n", cmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #0\n", invcmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\tuxtb\t%s, %s\n", reglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tb\tL%d\n", l); } // List of inverted branch instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *brlist[] = { "bne", "beq", "bge", "ble", "bgt", "blt" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", brlist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[reg]); cgjump(Symtable[id].endlabel); } // Generate code to load the address of a global // identifier into a variable. Return a new register int cgaddress(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); fprintf(Outfile, "\tmov\t%s, r3\n", reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tldrb\t%s, [%s]\n", reglist[r], reglist[r]); break; case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [%s]\n", reglist[r], reglist[r]); break; } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [%s]\n", reglist[r1], reglist[r2]); break; case P_INT: case P_LONG: fprintf(Outfile, "\tstr\t%s, [%s]\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 57_Mop_up_pt3/cgn.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\tsection .text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\tsection .data\n", Outfile); currSeg = data_seg; } } // Given a scalar type value, return the // size of the type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch (type) { case P_CHAR: return (offset); case P_INT: case P_LONG: break; default: if (!ptrtype(type)) fatald("Bad type in cg_align:", type); } // Here we have an int or a long. Align it on a 4-byte offset // I put the generic code here so it can be reused elsewhere. alignment = 4; offset = (offset + direction * (alignment - 1)) & ~(alignment - 1); return (offset); } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. int newlocaloffset(int size) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (size > 4) ? size : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "r10", "r11", "r12", "r13", "r9", "r8", "rcx", "rdx", "rsi", "rdi" }; static char *breglist[] = { "r10b", "r11b", "r12b", "r13b", "r9b", "r8b", "cl", "dl", "sil", "dil" }; static char *dreglist[] = { "r10d", "r11d", "r12d", "r13d", "r9d", "r8d", "ecx", "edx", "esi", "edi" }; // Push and pop a register on/off the stack static void pushreg(int r) { fprintf(Outfile, "\tpush\t%s\n", reglist[r]); } static void popreg(int r) { fprintf(Outfile, "\tpop\t%s\n", reglist[r]); } // Set all registers as available // But if reg is positive, don't free that one. void freeall_registers(int keepreg) { int i; fprintf(Outfile, "; freeing all registers\n"); for (i = 0; i < NUMFREEREGS; i++) if (i != keepreg) freereg[i] = 1; } // When we need to spill a register, we choose // the following register and then cycle through // the remaining registers. The spillreg increments // continually, so we need to take a modulo NUMFREEREGS // on it. static int spillreg=0; // Allocate a free register. Return the number of // the register. Die if no available registers. int alloc_register(void) { int reg; for (reg = 0; reg < NUMFREEREGS; reg++) { if (freereg[reg]) { freereg[reg] = 0; fprintf(Outfile, "; allocated register %s\n", reglist[reg]); return (reg); } } // We have no registers, so we must spill one reg = (spillreg % NUMFREEREGS); spillreg++; fprintf(Outfile, "; spilling reg %s\n", reglist[reg]); pushreg(reg); return (reg); } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) { fprintf(Outfile, "# error trying to free register %s\n", reglist[reg]); fatald("Error trying to free register", reg); } // If this was a spilled register, get it back if (spillreg > 0) { spillreg--; reg= (spillreg % NUMFREEREGS); fprintf(Outfile, "; unspilling reg %s\n", reglist[reg]); popreg(reg); } else { fprintf(Outfile, "; freeing reg %s\n", reglist[reg]); freereg[reg] = 1; } } // Spill all registers on the stack void spill_all_regs(void) { int i; for (i = 0; i < NUMFREEREGS; i++) pushreg(i); } // Unspill all registers from the stack static void unspill_all_regs(void) { int i; for (i = NUMFREEREGS-1; i >= 0; i--) popreg(i); } // Print out the assembly preamble void cgpreamble() { freeall_registers(NOREG); cgtextseg(); fprintf(Outfile, "; internal switch(expr) routine\n" "; rsi = switch table, rax = expr\n" "; from SubC: http://www.t3x.org/subc/\n" "\n" "switch:\n" " push rsi\n" " mov rsi, rdx\n" " mov rbx, rax\n" " cld\n" " lodsq\n" " mov rcx, rax\n" "next:\n" " lodsq\n" " mov rdx, rax\n" " lodsq\n" " cmp rbx, rdx\n" " jnz no\n" " pop rsi\n" " jmp rax\n" "no:\n" " loop next\n" " lodsq\n" " pop rsi\n" " jmp rax\n" "\n"); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the rsp and rbp if (sym->class == C_GLOBAL) fprintf(Outfile, "\tglobal\t%s\n", name); fprintf(Outfile, "%s:\n" "\tpush\trbp\n" "\tmov\trbp, rsp\n", name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->st_posn = paramOffset; paramOffset += 8; } else { parm->st_posn = newlocaloffset(parm->size); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->st_posn = newlocaloffset(locvar->size); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\tadd\trsp, %d\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->st_endlabel); fprintf(Outfile, "\tadd\trsp, %d\n", stackOffset); fputs("\tpop rbp\n" "\tret\n", Outfile); freeall_registers(NOREG); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], value); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadglob(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", sym->name); fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], sym->name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword [%s]\n", sym->name); } else // Print out the code to initialise it switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", sym->name); fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], sym->name); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte [%s]\n", sym->name); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword [%s]\n", sym->name); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", sym->name); fprintf(Outfile, "\tmovsx\t%s, word [%s]\n", dreglist[r], sym->name); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword [%s]\n", sym->name); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword [%s]\n", sym->name); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Load a value from a local variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadlocal(struct symtable *sym, int op) { // Get a new register int r = alloc_register(); // Print out the code to initialise it if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", sym->st_posn); fprintf(Outfile, "\tmov\t%s, [rbp+%d]\n", reglist[r], sym->st_posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tqword\t[rbp+%d]\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tqword\t[rbp+%d]\n", sym->st_posn); } else switch (sym->type) { case P_CHAR: if (op == A_PREINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", sym->st_posn); fprintf(Outfile, "\tmovzx\t%s, byte [rbp+%d]\n", reglist[r], sym->st_posn); if (op == A_POSTINC) fprintf(Outfile, "\tinc\tbyte\t[rbp+%d]\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tbyte\t[rbp+%d]\n", sym->st_posn); break; case P_INT: if (op == A_PREINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", sym->st_posn); if (op == A_PREDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", sym->st_posn); fprintf(Outfile, "\tmovsx\t%s, dword [rbp+%d]\n", reglist[r], sym->st_posn); fprintf(Outfile, "\tmovsxd\t%s, %s\n", reglist[r], dreglist[r]); if (op == A_POSTINC) if (op == A_POSTINC) fprintf(Outfile, "\tinc\tdword\t[rbp+%d]\n", sym->st_posn); if (op == A_POSTDEC) fprintf(Outfile, "\tdec\tdword\t[rbp+%d]\n", sym->st_posn); break; default: fatald("Bad type in cgloadlocal:", sym->type); } return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, L%d\n", reglist[r], label); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timul\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { fprintf(Outfile, "\tmov\trax, %s\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidiv\t%s\n", reglist[r2]); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tand\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } int cgor(int r1, int r2) { fprintf(Outfile, "\tor\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txor\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshl\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshr\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tneg\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnot\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r], breglist[r]); return (r); } // Load a boolean value (only 0 or 1) // into the given register void cgloadboolean(int r, int val) { fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], val); } // Convert an integer value to a boolean value. Jump if // it's an IF, WHILE, LOGAND or LOGOR operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); switch(op) { case A_IF: case A_WHILE: case A_LOGAND: fprintf(Outfile, "\tje\tL%d\n", label); break; case A_LOGOR: fprintf(Outfile, "\tjne\tL%d\n", label); break; default: fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, byte %s\n", reglist[r], breglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(struct symtable *sym, int numargs) { int outr; // Call the function fprintf(Outfile, "\tcall\t%s\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\tadd\trsp, %d\n", 8 * (numargs - 6)); // Unspill all the registers unspill_all_regs(); // Get a new register and copy the return value into it outr = alloc_register(); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpush\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmov\t%s, %s\n", reglist[FIRSTPARAMREG - argposn + 1], reglist[r]); } free_register(r); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsal\t%s, %d\n", reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, dreglist[r]); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\tqword\t[rbp+%d], %s\n", sym->st_posn, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\tbyte\t[rbp+%d], %s\n", sym->st_posn, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\tdword\t[rbp+%d], %s\n", sym->st_posn, dreglist[r]); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size, type; int initvalue; int i; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the variable (or its elements if an array) // and the type of the variable if (node->stype == S_ARRAY) { size = typesize(value_at(node->type), node->ctype); type = value_at(node->type); } else { size = node->size; type = node->type; } // Generate the global identity and the label cgdataseg(); if (node->class == C_GLOBAL) fprintf(Outfile, "\tglobal\t%s\n", node->name); fprintf(Outfile, "%s:\n", node->name); // Output space for one or more elements for (i = 0; i < node->nelems; i++) { // Get any initial value initvalue = 0; if (node->initlist != NULL) initvalue = node->initlist[i]; // Generate the space for this type // original version switch(size) { case 1: fprintf(Outfile, "\tdb\t%d\n", initvalue); break; case 4: fprintf(Outfile, "\tdd\t%d\n", initvalue); break; case 8: // Generate the pointer to a string literal. Treat a zero value // as actually zero, not the label L0 if (node->initlist != NULL && type == pointer_to(P_CHAR) && initvalue != 0) fprintf(Outfile, "\tdq\tL%d\n", initvalue); else fprintf(Outfile, "\tdq\t%d\n", initvalue); break; default: for (i = 0; i < size; i++) fprintf(Outfile, "\tdb\t0\n"); /* compact version using times instead of loop fprintf(Outfile, "\ttimes\t%d\tdb\t0\n", size); */ } } } // Generate a global string and its start label // Don't output the label if append is true. void cgglobstr(int l, char *strvalue, int append) { char *cptr; if (!append) cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\tdb\t%d\n", *cptr); } } void cgglobstrend(void) { fprintf(Outfile, "\tdb\t0\n"); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r2], breglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(NOREG); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Only return a value if we have a value to return if (reg != NOREG) { // Deal with pointers here as we can't put them in // the switch statement if (ptrtype(sym->type)) fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); else { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzx\teax, %s\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmov\teax, %s\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } } } cgjump(sym->st_endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = alloc_register(); if (sym->class == C_GLOBAL || sym->class == C_STATIC) fprintf(Outfile, "\tmov\t%s, %s\n", reglist[r], sym->name); else fprintf(Outfile, "\tlea\t%s, [rbp+%d]\n", reglist[r], sym->st_posn); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], reglist[r]); break; case 4: fprintf(Outfile, "\tmovsx\t%s, dword [%s]\n", reglist[r], reglist[r]); break; case 8: fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { //Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmov\t[%s], byte %s\n", reglist[r2], breglist[r1]); break; case 4: fprintf(Outfile, "\tmov\t[%s], dword %s\n", reglist[r2], dreglist[r1]); break; case 8: fprintf(Outfile, "\tmov\t[%s], %s\n", reglist[r2], reglist[r1]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } // Generate a switch jump table and the code to // load the registers and call the switch() code void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel) { int i, label; // Get a label for the switch table label = genlabel(); cglabel(label); // Heuristic. If we have no cases, create one case // which points to the default case if (casecount == 0) { caseval[0] = 0; caselabel[0] = defaultlabel; casecount = 1; } // Generate the switch jump table. fprintf(Outfile, "\tdq\t%d\n", casecount); for (i = 0; i < casecount; i++) fprintf(Outfile, "\tdq\t%d, L%d\n", caseval[i], caselabel[i]); fprintf(Outfile, "\tdq\tL%d\n", defaultlabel); // Load the specific registers cglabel(toplabel); fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); fprintf(Outfile, "\tmov\trdx, L%d\n", label); fprintf(Outfile, "\tjmp\tswitch\n"); } // Move value between registers void cgmove(int r1, int r2) { fprintf(Outfile, "\tmov\t%s, %s\n", reglist[r2], reglist[r1]); } ================================================ FILE: 57_Mop_up_pt3/data.h ================================================ #ifndef extern_ #define extern_ extern #endif // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 extern_ int Line; // Current line number extern_ int Linestart; // True if at start of a line extern_ int Putback; // Character put back by scanner extern_ struct symtable *Functionid; // Symbol ptr of the current function extern_ FILE *Infile; // Input and output files extern_ FILE *Outfile; extern_ char *Infilename; // Name of file we are parsing extern_ char *Outfilename; // Name of file we opened as Outfile extern_ struct token Token; // Last token scanned extern_ struct token Peektoken; // A look-ahead token extern_ char Text[TEXTLEN + 1]; // Last identifier scanned extern_ int Looplevel; // Depth of nested loops extern_ int Switchlevel; // Depth of nested switches extern char *Tstring[]; // List of token strings // Symbol table lists extern_ struct symtable *Globhead, *Globtail; // Global variables and functions extern_ struct symtable *Loclhead, *Locltail; // Local variables extern_ struct symtable *Parmhead, *Parmtail; // Local parameters extern_ struct symtable *Membhead, *Membtail; // Temp list of struct/union members extern_ struct symtable *Structhead, *Structtail; // List of struct types extern_ struct symtable *Unionhead, *Uniontail; // List of union types extern_ struct symtable *Enumhead, *Enumtail; // List of enum types and values extern_ struct symtable *Typehead, *Typetail; // List of typedefs // Command-line flags extern_ int O_dumpAST; // If true, dump the AST trees extern_ int O_dumpsym; // If true, dump the symbol table extern_ int O_keepasm; // If true, keep any assembly files extern_ int O_assemble; // If true, assemble the assembly files extern_ int O_dolink; // If true, link the object files extern_ int O_verbose; // If true, print info on compilation stages ================================================ FILE: 57_Mop_up_pt3/decl.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of declarations // Copyright (c) 2019 Warren Toomey, GPL3 static struct symtable *composite_declaration(int type); static int typedef_declaration(struct symtable **ctype); static int type_of_typedef(char *name, struct symtable **ctype); static void enum_declaration(void); // Parse the current token and return a primitive type enum value, // a pointer to any composite type and possibly modify // the class of the type. int parse_type(struct symtable **ctype, int *class) { int type, exstatic = 1; // See if the class has been changed to extern or static while (exstatic) { switch (Token.token) { case T_EXTERN: if (*class == C_STATIC) fatal("Illegal to have extern and static at the same time"); *class = C_EXTERN; scan(&Token); break; case T_STATIC: if (*class == C_LOCAL) fatal("Compiler doesn't support static local declarations"); if (*class == C_EXTERN) fatal("Illegal to have extern and static at the same time"); *class = C_STATIC; scan(&Token); break; default: exstatic = 0; } } // Now work on the actual type keyword switch (Token.token) { case T_VOID: type = P_VOID; scan(&Token); break; case T_CHAR: type = P_CHAR; scan(&Token); break; case T_INT: type = P_INT; scan(&Token); break; case T_LONG: type = P_LONG; scan(&Token); break; // For the following, if we have a ';' after the // parsing then there is no type, so return -1. // Example: struct x {int y; int z}; case T_STRUCT: type = P_STRUCT; *ctype = composite_declaration(P_STRUCT); if (Token.token == T_SEMI) type = -1; break; case T_UNION: type = P_UNION; *ctype = composite_declaration(P_UNION); if (Token.token == T_SEMI) type = -1; break; case T_ENUM: type = P_INT; // Enums are really ints enum_declaration(); if (Token.token == T_SEMI) type = -1; break; case T_TYPEDEF: type = typedef_declaration(ctype); if (Token.token == T_SEMI) type = -1; break; case T_IDENT: type = type_of_typedef(Text, ctype); break; default: fatals("Illegal type, token", Token.tokstr); } return (type); } // Given a type parsed by parse_type(), scan in any following // '*' tokens and return the new type int parse_stars(int type) { while (1) { if (Token.token != T_STAR) break; type = pointer_to(type); scan(&Token); } return (type); } // Parse a type which appears inside a cast int parse_cast(struct symtable **ctype) { int type, class = 0; // Get the type inside the parentheses type = parse_stars(parse_type(ctype, &class)); // Do some error checking. I'm sure more can be done if (type == P_STRUCT || type == P_UNION || type == P_VOID) fatal("Cannot cast to a struct, union or void type"); return (type); } // Given a type, parse an expression of literals and ensure // that the type of this expression matches the given type. // Parse any type cast that precedes the expression. // If an integer literal, return this value. // If a string literal, return the label number of the string. int parse_literal(int type) { struct ASTnode *tree; // Parse the expression and optimise the resulting AST tree tree = optimise(binexpr(0)); // If there's a cast, get the child and // mark it as having the type from the cast if (tree->op == A_CAST) { tree->left->type = tree->type; tree = tree->left; } // The tree must now have an integer or string literal if (tree->op != A_INTLIT && tree->op != A_STRLIT) fatal("Cannot initialise globals with a general expression"); // If the type is char * and if (type == pointer_to(P_CHAR)) { // We have a string literal, return the label number if (tree->op == A_STRLIT) return (tree->a_intvalue); // We have a zero int literal, so that's a NULL if (tree->op == A_INTLIT && tree->a_intvalue == 0) return (0); } // We only get here with an integer literal. The input type // is an integer type and is wide enough to hold the literal value if (inttype(type) && typesize(type, NULL) >= typesize(tree->type, NULL)) return (tree->a_intvalue); fatal("Type mismatch: literal vs. variable"); return (0); // Keep -Wall happy } // Given a pointer to a symbol that may already exist // return true if this symbol doesn't exist. We use // this function to convert externs into globals int is_new_symbol(struct symtable *sym, int class, int type, struct symtable *ctype) { // There is no existing symbol, thus is new if (sym==NULL) return(1); // global versus extern: if they match that it's not new // and we can convert the class to global if ((sym->class== C_GLOBAL && class== C_EXTERN) || (sym->class== C_EXTERN && class== C_GLOBAL)) { // If the types don't match, there's a problem if (type != sym->type) fatals("Type mismatch between global/extern", sym->name); // Struct/unions, also compare the ctype if (type >= P_STRUCT && ctype != sym->ctype) fatals("Type mismatch between global/extern", sym->name); // If we get to here, the types match, so mark the symbol // as global sym->class= C_GLOBAL; // Return that symbol is not new return(0); } // It must be a duplicate symbol if we get here fatals("Duplicate global variable declaration", sym->name); return(-1); // Keep -Wall happy } // Given the type, name and class of a scalar variable, // parse any initialisation value and allocate storage for it. // Return the variable's symbol table entry. static struct symtable *scalar_declaration(char *varname, int type, struct symtable *ctype, int class, struct ASTnode **tree) { struct symtable *sym = NULL; struct ASTnode *varnode, *exprnode; *tree = NULL; // Add this as a known scalar switch (class) { case C_STATIC: case C_EXTERN: case C_GLOBAL: // See if this variable is new or already exists sym= findglob(varname); if (is_new_symbol(sym, class, type, ctype)) sym = addglob(varname, type, ctype, S_VARIABLE, class, 1, 0); break; case C_LOCAL: sym = addlocl(varname, type, ctype, S_VARIABLE, 1); break; case C_PARAM: sym = addparm(varname, type, ctype, S_VARIABLE); break; case C_MEMBER: sym = addmemb(varname, type, ctype, S_VARIABLE, 1); break; } // The variable is being initialised if (Token.token == T_ASSIGN) { // Only possible for a global or local if (class != C_GLOBAL && class != C_LOCAL && class != C_STATIC) fatals("Variable can not be initialised", varname); scan(&Token); // Globals must be assigned a literal value if (class == C_GLOBAL || class == C_STATIC) { // Create one initial value for the variable and // parse this value sym->initlist = (int *) malloc(sizeof(int)); sym->initlist[0] = parse_literal(type); } if (class == C_LOCAL) { // Make an A_IDENT AST node with the variable varnode = mkastleaf(A_IDENT, sym->type, sym->ctype, sym, 0); // Get the expression for the assignment, make into a rvalue exprnode = binexpr(0); exprnode->rvalue = 1; // Ensure the expression's type matches the variable exprnode = modify_type(exprnode, varnode->type, varnode->ctype, 0); if (exprnode == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree *tree = mkastnode(A_ASSIGN, exprnode->type, exprnode->ctype, exprnode, NULL, varnode, NULL, 0); } } // Generate any global space if (class == C_GLOBAL || class == C_STATIC) genglobsym(sym); return (sym); } // Given the type, name and class of an variable, parse // the size of the array, if any. Then parse any initialisation // value and allocate storage for it. // Return the variable's symbol table entry. static struct symtable *array_declaration(char *varname, int type, struct symtable *ctype, int class) { struct symtable *sym; // New symbol table entry int nelems = -1; // Assume the number of elements won't be given int maxelems; // The maximum number of elements in the init list int *initlist; // The list of initial elements int i = 0, j; // Skip past the '[' scan(&Token); // See we have an array size if (Token.token != T_RBRACKET) { nelems = parse_literal(P_INT); if (nelems <= 0) fatald("Array size is illegal", nelems); } // Ensure we have a following ']' match(T_RBRACKET, "]"); // Add this as a known array. We treat the // array as a pointer to its elements' type switch (class) { case C_STATIC: case C_EXTERN: case C_GLOBAL: // See if this variable is new or already exists sym= findglob(varname); if (is_new_symbol(sym, class, pointer_to(type), ctype)) sym = addglob(varname, pointer_to(type), ctype, S_ARRAY, class, 0, 0); break; case C_LOCAL: sym = addlocl(varname, pointer_to(type), ctype, S_ARRAY, 0); break; default: fatal("Declaration of array parameters is not implemented"); } // Array initialisation if (Token.token == T_ASSIGN) { if (class != C_GLOBAL && class != C_STATIC) fatals("Variable can not be initialised", varname); scan(&Token); // Get the following left curly bracket match(T_LBRACE, "{"); #define TABLE_INCREMENT 10 // If the array already has nelems, allocate that many elements // in the list. Otherwise, start with TABLE_INCREMENT. if (nelems != -1) maxelems = nelems; else maxelems = TABLE_INCREMENT; initlist = (int *) malloc(maxelems * sizeof(int)); // Loop getting a new literal value from the list while (1) { // Check we can add the next value, then parse and add it if (nelems != -1 && i == maxelems) fatal("Too many values in initialisation list"); initlist[i++] = parse_literal(type); // Increase the list size if the original size was // not set and we have hit the end of the current list if (nelems == -1 && i == maxelems) { maxelems += TABLE_INCREMENT; initlist = (int *) realloc(initlist, maxelems * sizeof(int)); } // Leave when we hit the right curly bracket if (Token.token == T_RBRACE) { scan(&Token); break; } // Next token must be a comma, then comma(); } // Zero any unused elements in the initlist. // Attach the list to the symbol table entry for (j = i; j < sym->nelems; j++) initlist[j] = 0; if (i > nelems) nelems = i; sym->initlist = initlist; } // Set the size of the array and the number of elements // Only externs can have no elements. if (class != C_EXTERN && nelems<=0) fatals("Array must have non-zero elements", sym->name); sym->nelems = nelems; sym->size = sym->nelems * typesize(type, ctype); // Generate any global space if (class == C_GLOBAL || class == C_STATIC) genglobsym(sym); return (sym); } // Given a pointer to the new function being declared and // a possibly NULL pointer to the function's previous declaration, // parse a list of parameters and cross-check them against the // previous declaration. Return the count of parameters static int param_declaration_list(struct symtable *oldfuncsym, struct symtable *newfuncsym) { int type, paramcnt = 0; struct symtable *ctype; struct symtable *protoptr = NULL; struct ASTnode *unused; // Get the pointer to the first prototype parameter if (oldfuncsym != NULL) protoptr = oldfuncsym->member; // Loop getting any parameters while (Token.token != T_RPAREN) { // If the first token is 'void' if (Token.token == T_VOID) { // Peek at the next token. If a ')', the function // has no parameters, so leave the loop. scan(&Peektoken); if (Peektoken.token == T_RPAREN) { // Move the Peektoken into the Token paramcnt = 0; scan(&Token); break; } } // Get the type of the next parameter type = declaration_list(&ctype, C_PARAM, T_COMMA, T_RPAREN, &unused); if (type == -1) fatal("Bad type in parameter list"); // Ensure the type of this parameter matches the prototype if (protoptr != NULL) { if (type != protoptr->type) fatald("Type doesn't match prototype for parameter", paramcnt + 1); protoptr = protoptr->next; } paramcnt++; // Stop when we hit the right parenthesis if (Token.token == T_RPAREN) break; // We need a comma as separator comma(); } if (oldfuncsym != NULL && paramcnt != oldfuncsym->nelems) fatals("Parameter count mismatch for function", oldfuncsym->name); // Return the count of parameters return (paramcnt); } // // function_declaration: type identifier '(' parameter_list ')' ; // | type identifier '(' parameter_list ')' compound_statement ; // // Parse the declaration of function. static struct symtable *function_declaration(char *funcname, int type, struct symtable *ctype, int class) { struct ASTnode *tree, *finalstmt; struct symtable *oldfuncsym, *newfuncsym = NULL; int endlabel, paramcnt; // Text has the identifier's name. If this exists and is a // function, get the id. Otherwise, set oldfuncsym to NULL. if ((oldfuncsym = findsymbol(funcname)) != NULL) if (oldfuncsym->stype != S_FUNCTION) oldfuncsym = NULL; // If this is a new function declaration, get a // label-id for the end label, and add the function // to the symbol table, if (oldfuncsym == NULL) { endlabel = genlabel(); // Assumtion: functions only return scalar types, so NULL below newfuncsym = addglob(funcname, type, NULL, S_FUNCTION, class, 0, endlabel); } // Scan in the '(', any parameters and the ')'. // Pass in any existing function prototype pointer lparen(); paramcnt = param_declaration_list(oldfuncsym, newfuncsym); rparen(); // If this is a new function declaration, update the // function symbol entry with the number of parameters. // Also copy the parameter list into the function's node. if (newfuncsym) { newfuncsym->nelems = paramcnt; newfuncsym->member = Parmhead; oldfuncsym = newfuncsym; } // Clear out the parameter list Parmhead = Parmtail = NULL; // Declaration ends in a semicolon, only a prototype. if (Token.token == T_SEMI) return (oldfuncsym); // This is not just a prototype. // Set the Functionid global to the function's symbol pointer Functionid = oldfuncsym; // Get the AST tree for the compound statement and mark // that we have parsed no loops or switches yet Looplevel = 0; Switchlevel = 0; lbrace(); tree = compound_statement(0); rbrace(); // If the function type isn't P_VOID .. if (type != P_VOID) { // Error if no statements in the function if (tree == NULL) fatal("No statements in function with non-void type"); // Check that the last AST operation in the // compound statement was a return statement finalstmt = (tree->op == A_GLUE) ? tree->right : tree; if (finalstmt == NULL || finalstmt->op != A_RETURN) fatal("No return for function with non-void type"); } // Build the A_FUNCTION node which has the function's symbol pointer // and the compound statement sub-tree tree = mkastunary(A_FUNCTION, type, ctype, tree, oldfuncsym, endlabel); // Do optimisations on the AST tree tree = optimise(tree); // Dump the AST tree if requested if (O_dumpAST) { dumpAST(tree, NOLABEL, 0); fprintf(stdout, "\n\n"); } // Generate the assembly code for it genAST(tree, NOLABEL, NOLABEL, NOLABEL, 0); // Now free the symbols associated // with this function freeloclsyms(); return (oldfuncsym); } // Parse composite type declarations: structs or unions. // Either find an existing struct/union declaration, or build // a struct/union symbol table entry and return its pointer. static struct symtable *composite_declaration(int type) { struct symtable *ctype = NULL; struct symtable *m; struct ASTnode *unused; int offset; int t; // Skip the struct/union keyword scan(&Token); // See if there is a following struct/union name if (Token.token == T_IDENT) { // Find any matching composite type if (type == P_STRUCT) ctype = findstruct(Text); else ctype = findunion(Text); scan(&Token); } // If the next token isn't an LBRACE , this is // the usage of an existing struct/union type. // Return the pointer to the type. if (Token.token != T_LBRACE) { if (ctype == NULL) fatals("unknown struct/union type", Text); return (ctype); } // Ensure this struct/union type hasn't been // previously defined if (ctype) fatals("previously defined struct/union", Text); // Build the composite type and skip the left brace if (type == P_STRUCT) ctype = addstruct(Text); else ctype = addunion(Text); scan(&Token); // Scan in the list of members while (1) { // Get the next member. m is used as a dummy t = declaration_list(&m, C_MEMBER, T_SEMI, T_RBRACE, &unused); if (t == -1) fatal("Bad type in member list"); if (Token.token == T_SEMI) scan(&Token); if (Token.token == T_RBRACE) break; } // Attach to the struct type's node rbrace(); if (Membhead == NULL) fatals("No members in struct", ctype->name); ctype->member = Membhead; Membhead = Membtail = NULL; // Set the offset of the initial member // and find the first free byte after it m = ctype->member; m->st_posn = 0; offset = typesize(m->type, m->ctype); // Set the position of each successive member in the composite type // Unions are easy. For structs, align the member and find the next free byte for (m = m->next; m != NULL; m = m->next) { // Set the offset for this member if (type == P_STRUCT) m->st_posn = genalign(m->type, offset, 1); else m->st_posn = 0; // Get the offset of the next free byte after this member offset += typesize(m->type, m->ctype); } // Set the overall size of the composite type ctype->size = offset; return (ctype); } // Parse an enum declaration static void enum_declaration(void) { struct symtable *etype = NULL; char *name = NULL; int intval = 0; // Skip the enum keyword. scan(&Token); // If there's a following enum type name, get a // pointer to any existing enum type node. if (Token.token == T_IDENT) { etype = findenumtype(Text); name = strdup(Text); // As it gets tromped soon scan(&Token); } // If the next token isn't a LBRACE, check // that we have an enum type name, then return if (Token.token != T_LBRACE) { if (etype == NULL) fatals("undeclared enum type:", name); return; } // We do have an LBRACE. Skip it scan(&Token); // If we have an enum type name, ensure that it // hasn't been declared before. if (etype != NULL) fatals("enum type redeclared:", etype->name); else // Build an enum type node for this identifier etype = addenum(name, C_ENUMTYPE, 0); // Loop to get all the enum values while (1) { // Ensure we have an identifier // Copy it in case there's an int literal coming up ident(); name = strdup(Text); // Ensure this enum value hasn't been declared before etype = findenumval(name); if (etype != NULL) fatals("enum value redeclared:", Text); // If the next token is an '=', skip it and // get the following int literal if (Token.token == T_ASSIGN) { scan(&Token); if (Token.token != T_INTLIT) fatal("Expected int literal after '='"); intval = Token.intvalue; scan(&Token); } // Build an enum value node for this identifier. // Increment the value for the next enum identifier. etype = addenum(name, C_ENUMVAL, intval++); // Bail out on a right curly bracket, else get a comma if (Token.token == T_RBRACE) break; comma(); } scan(&Token); // Skip over the right curly bracket } // Parse a typedef declaration and return the type // and ctype that it represents static int typedef_declaration(struct symtable **ctype) { int type, class = 0; // Skip the typedef keyword. scan(&Token); // Get the actual type following the keyword type = parse_type(ctype, &class); if (class != 0) fatal("Can't have extern in a typedef declaration"); // See if the typedef identifier already exists if (findtypedef(Text) != NULL) fatals("redefinition of typedef", Text); // Get any following '*' tokens type = parse_stars(type); // It doesn't exist so add it to the typedef list addtypedef(Text, type, *ctype); scan(&Token); return (type); } // Given a typedef name, return the type it represents static int type_of_typedef(char *name, struct symtable **ctype) { struct symtable *t; // Look up the typedef in the list t = findtypedef(name); if (t == NULL) fatals("unknown type", name); scan(&Token); *ctype = t->ctype; return (t->type); } // Parse the declaration of a variable or function. // The type and any following '*'s have been scanned, and we // have the identifier in the Token variable. // The class argument is the variable's class. // Return a pointer to the symbol's entry in the symbol table static struct symtable *symbol_declaration(int type, struct symtable *ctype, int class, struct ASTnode **tree) { struct symtable *sym = NULL; char *varname = strdup(Text); // Ensure that we have an identifier. // We copied it above so we can scan more tokens in, e.g. // an assignment expression for a local variable. ident(); // Deal with function declarations if (Token.token == T_LPAREN) { return (function_declaration(varname, type, ctype, class)); } // See if this array or scalar variable has already been declared switch (class) { case C_EXTERN: case C_STATIC: case C_GLOBAL: case C_LOCAL: case C_PARAM: if (findlocl(varname) != NULL) fatals("Duplicate local variable declaration", varname); case C_MEMBER: if (findmember(varname) != NULL) fatals("Duplicate struct/union member declaration", varname); } // Add the array or scalar variable to the symbol table if (Token.token == T_LBRACKET) { sym = array_declaration(varname, type, ctype, class); *tree= NULL; // Local arrays are not initialised } else sym = scalar_declaration(varname, type, ctype, class, tree); return (sym); } // Parse a list of symbols where there is an initial type. // Return the type of the symbols. et1 and et2 are end tokens. int declaration_list(struct symtable **ctype, int class, int et1, int et2, struct ASTnode **gluetree) { int inittype, type; struct symtable *sym; struct ASTnode *tree; *gluetree = NULL; // Get the initial type. If -1, it was // a composite type definition, return this if ((inittype = parse_type(ctype, &class)) == -1) return (inittype); // Now parse the list of symbols while (1) { // See if this symbol is a pointer type = parse_stars(inittype); // Parse this symbol sym = symbol_declaration(type, *ctype, class, &tree); // We parsed a function, there is no list so leave if (sym->stype == S_FUNCTION) { if (class != C_GLOBAL && class != C_STATIC) fatal("Function definition not at global level"); return (type); } // Glue any AST tree from a local declaration // to build a sequence of assignments to perform if (*gluetree == NULL) *gluetree = tree; else *gluetree = mkastnode(A_GLUE, P_NONE, NULL, *gluetree, NULL, tree, NULL, 0); // We are at the end of the list, leave if (Token.token == et1 || Token.token == et2) return (type); // Otherwise, we need a comma as separator comma(); } return(0); // Keep -Wall happy } // Parse one or more global declarations, either // variables, functions or structs void global_declarations(void) { struct symtable *ctype= NULL; struct ASTnode *unused; while (Token.token != T_EOF) { declaration_list(&ctype, C_GLOBAL, T_SEMI, T_EOF, &unused); // Skip any semicolons and right curly brackets if (Token.token == T_SEMI) scan(&Token); } } ================================================ FILE: 57_Mop_up_pt3/decl.h ================================================ // Function prototypes for all compiler files // Copyright (c) 2019 Warren Toomey, GPL3 // scan.c void reject_token(struct token *t); int scan(struct token *t); // tree.c struct ASTnode *mkastnode(int op, int type, struct symtable *ctype, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue); struct ASTnode *mkastleaf(int op, int type, struct symtable *ctype, struct symtable *sym, int intvalue); struct ASTnode *mkastunary(int op, int type, struct symtable *ctype, struct ASTnode *left, struct symtable *sym, int intvalue); void dumpAST(struct ASTnode *n, int label, int level); // gen.c int genlabel(void); int genAST(struct ASTnode *n, int iflabel, int looptoplabel, int loopendlabel, int parentASTop); void genpreamble(); void genpostamble(); void genfreeregs(int keepreg); void genglobsym(struct symtable *node); int genglobstr(char *strvalue, int append); void genglobstrend(void); int genprimsize(int type); int genalign(int type, int offset, int direction); void genreturn(int reg, int id); // cg.c int cgprimsize(int type); int cgalign(int type, int offset, int direction); void cgtextseg(); void cgdataseg(); int alloc_register(void); void freeall_registers(int keepreg); void spill_all_regs(void); void cgpreamble(); void cgpostamble(); void cgfuncpreamble(struct symtable *sym); void cgfuncpostamble(struct symtable *sym); int cgloadint(int value, int type); int cgloadglob(struct symtable *sym, int op); int cgloadlocal(struct symtable *sym, int op); int cgloadglobstr(int label); int cgadd(int r1, int r2); int cgsub(int r1, int r2); int cgmul(int r1, int r2); int cgdiv(int r1, int r2); int cgshlconst(int r, int val); int cgcall(struct symtable *sym, int numargs); void cgcopyarg(int r, int argposn); int cgstorglob(int r, struct symtable *sym); int cgstorlocal(int r, struct symtable *sym); void cgglobsym(struct symtable *node); void cgglobstr(int l, char *strvalue, int append); void cgglobstrend(void); int cgcompare_and_set(int ASTop, int r1, int r2); int cgcompare_and_jump(int ASTop, int r1, int r2, int label); void cglabel(int l); void cgjump(int l); int cgwiden(int r, int oldtype, int newtype); void cgreturn(int reg, struct symtable *sym); int cgaddress(struct symtable *sym); int cgderef(int r, int type); int cgstorderef(int r1, int r2, int type); int cgnegate(int r); int cginvert(int r); int cglognot(int r); void cgloadboolean(int r, int val); int cgboolean(int r, int op, int label); int cgand(int r1, int r2); int cgor(int r1, int r2); int cgxor(int r1, int r2); int cgshl(int r1, int r2); int cgshr(int r1, int r2); void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel); void cgmove(int r1, int r2); // expr.c struct ASTnode *expression_list(int endtoken); struct ASTnode *binexpr(int ptp); // stmt.c struct ASTnode *compound_statement(int inswitch); // misc.c void match(int t, char *what); void semi(void); void lbrace(void); void rbrace(void); void lparen(void); void rparen(void); void ident(void); void comma(void); void fatal(char *s); void fatals(char *s1, char *s2); void fatald(char *s, int d); void fatalc(char *s, int c); // sym.c void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node); struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn); struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn); struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int nelems); struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype); struct symtable *addstruct(char *name); struct symtable *addunion(char *name); struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int nelems); struct symtable *addenum(char *name, int class, int value); struct symtable *addtypedef(char *name, int type, struct symtable *ctype); struct symtable *findglob(char *s); struct symtable *findlocl(char *s); struct symtable *findsymbol(char *s); struct symtable *findmember(char *s); struct symtable *findstruct(char *s); struct symtable *findunion(char *s); struct symtable *findenumtype(char *s); struct symtable *findenumval(char *s); struct symtable *findtypedef(char *s); void clear_symtable(void); void freeloclsyms(void); void freestaticsyms(void); void dumptable(struct symtable *head, char *name, int indent); void dumpsymtables(void); // decl.c int parse_type(struct symtable **ctype, int *class); int parse_stars(int type); int parse_cast(struct symtable **ctype); int declaration_list(struct symtable **ctype, int class, int et1, int et2, struct ASTnode **gluetree); void global_declarations(void); // types.c int inttype(int type); int ptrtype(int type); int pointer_to(int type); int value_at(int type); int typesize(int type, struct symtable *ctype); struct ASTnode *modify_type(struct ASTnode *tree, int rtype, struct symtable *rctype, int op); // opt.c struct ASTnode *optimise(struct ASTnode *n); ================================================ FILE: 57_Mop_up_pt3/defs.h ================================================ #include #include #include #include #include "incdir.h" // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 enum { TEXTLEN = 512 // Length of identifiers in input }; // Commands and default filenames #define AOUT "a.out" #ifdef __NASM__ #define ASCMD "nasm -f elf64 -w-ptr -pnasmext.inc -o " #define LDCMD "cc -no-pie -fno-plt -Wall -o " #else #define ASCMD "as -o " #define LDCMD "cc -o " #endif #define CPPCMD "cpp -nostdinc -isystem " // Token types enum { T_EOF, // Binary operators T_ASSIGN, T_ASPLUS, T_ASMINUS, T_ASSTAR, T_ASSLASH, T_QUESTION, T_LOGOR, T_LOGAND, T_OR, T_XOR, T_AMPER, T_EQ, T_NE, T_LT, T_GT, T_LE, T_GE, T_LSHIFT, T_RSHIFT, T_PLUS, T_MINUS, T_STAR, T_SLASH, // Other operators T_INC, T_DEC, T_INVERT, T_LOGNOT, // Type keywords T_VOID, T_CHAR, T_INT, T_LONG, // Other keywords T_IF, T_ELSE, T_WHILE, T_FOR, T_RETURN, T_STRUCT, T_UNION, T_ENUM, T_TYPEDEF, T_EXTERN, T_BREAK, T_CONTINUE, T_SWITCH, T_CASE, T_DEFAULT, T_SIZEOF, T_STATIC, // Structural tokens T_INTLIT, T_STRLIT, T_SEMI, T_IDENT, T_LBRACE, T_RBRACE, T_LPAREN, T_RPAREN, T_LBRACKET, T_RBRACKET, T_COMMA, T_DOT, T_ARROW, T_COLON }; // Token structure struct token { int token; // Token type, from the enum list above char *tokstr; // String version of the token int intvalue; // For T_INTLIT, the integer value }; // AST node types. The first few line up // with the related tokens enum { A_ASSIGN = 1, A_ASPLUS, A_ASMINUS, A_ASSTAR, A_ASSLASH, // 1 A_TERNARY, A_LOGOR, A_LOGAND, A_OR, A_XOR, A_AND, // 6 A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE, A_LSHIFT, A_RSHIFT, // 12 A_ADD, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, // 20 A_INTLIT, A_STRLIT, A_IDENT, A_GLUE, // 24 A_IF, A_WHILE, A_FUNCTION, A_WIDEN, A_RETURN, // 28 A_FUNCCALL, A_DEREF, A_ADDR, A_SCALE, // 33 A_PREINC, A_PREDEC, A_POSTINC, A_POSTDEC, // 37 A_NEGATE, A_INVERT, A_LOGNOT, A_TOBOOL, A_BREAK, // 41 A_CONTINUE, A_SWITCH, A_CASE, A_DEFAULT, A_CAST // 46 }; // Primitive types. The bottom 4 bits is an integer // value that represents the level of indirection, // e.g. 0= no pointer, 1= pointer, 2= pointer pointer etc. enum { P_NONE, P_VOID = 16, P_CHAR = 32, P_INT = 48, P_LONG = 64, P_STRUCT=80, P_UNION=96 }; // Structural types enum { S_VARIABLE, S_FUNCTION, S_ARRAY }; // Storage classes enum { C_GLOBAL = 1, // Globally visible symbol C_LOCAL, // Locally visible symbol C_PARAM, // Locally visible function parameter C_EXTERN, // External globally visible symbol C_STATIC, // Static symbol, visible in one file C_STRUCT, // A struct C_UNION, // A union C_MEMBER, // Member of a struct or union C_ENUMTYPE, // A named enumeration type C_ENUMVAL, // A named enumeration value C_TYPEDEF // A named typedef }; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol struct symtable *ctype; // If struct/union, ptr to that type int stype; // Structural type for the symbol int class; // Storage class for the symbol int size; // Total size in bytes of this symbol int nelems; // Functions: # params. Arrays: # elements #define st_endlabel st_posn // For functions, the end label int st_posn; // For locals, the negative offset // from the stack base pointer int *initlist; // List of initial values struct symtable *next; // Next symbol in one list struct symtable *member; // First member of a function, struct, }; // union or enum // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates struct symtable *ctype; // If struct/union, ptr to that type int rvalue; // True if the node is an rvalue struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; struct symtable *sym; // For many AST nodes, the pointer to // the symbol in the symbol table #define a_intvalue a_size // For A_INTLIT, the integer value int a_size; // For A_SCALE, the size to scale by }; enum { NOREG = -1, // Use NOREG when the AST generation // functions have no register to return NOLABEL = 0 // Use NOLABEL when we have no label to // pass to genAST() }; ================================================ FILE: 57_Mop_up_pt3/expr.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of expressions // Copyright (c) 2019 Warren Toomey, GPL3 // expression_list: // | expression // | expression ',' expression_list // ; // Parse a list of zero or more comma-separated expressions and // return an AST composed of A_GLUE nodes with the left-hand child // being the sub-tree of previous expressions (or NULL) and the right-hand // child being the next expression. Each A_GLUE node will have size field // set to the number of expressions in the tree at this point. If no // expressions are parsed, NULL is returned struct ASTnode *expression_list(int endtoken) { struct ASTnode *tree = NULL; struct ASTnode *child = NULL; int exprcount = 0; // Loop until the end token while (Token.token != endtoken) { // Parse the next expression and increment the expression count child = binexpr(0); exprcount++; // Build an A_GLUE AST node with the previous tree as the left child // and the new expression as the right child. Store the expression count. tree = mkastnode(A_GLUE, P_NONE, NULL, tree, NULL, child, NULL, exprcount); // Stop when we reach the end token if (Token.token == endtoken) break; // Must have a ',' at this point match(T_COMMA, ","); } // Return the tree of expressions return (tree); } // Parse a function call and return its AST static struct ASTnode *funccall(void) { struct ASTnode *tree; struct symtable *funcptr; // Check that the identifier has been defined as a function, // then make a leaf node for it. if ((funcptr = findsymbol(Text)) == NULL || funcptr->stype != S_FUNCTION) { fatals("Undeclared function", Text); } // Get the '(' lparen(); // Parse the argument expression list tree = expression_list(T_RPAREN); // XXX Check type of each argument against the function's prototype // Build the function call AST node. Store the // function's return type as this node's type. // Also record the function's symbol-id tree = mkastunary(A_FUNCCALL, funcptr->type, funcptr->ctype, tree, funcptr, 0); // Get the ')' rparen(); return (tree); } // Parse the index into an array and return an AST tree for it static struct ASTnode *array_access(struct ASTnode *left) { struct ASTnode *right; // Check that the sub-tree is a pointer if (!ptrtype(left->type)) fatal("Not an array or pointer"); // Get the '[' scan(&Token); // Parse the following expression right = binexpr(0); // Get the ']' match(T_RBRACKET, "]"); // Ensure that this is of int type if (!inttype(right->type)) fatal("Array index is not of integer type"); // Make the left tree an rvalue left->rvalue = 1; // Scale the index by the size of the element's type right = modify_type(right, left->type, left->ctype, A_ADD); // Return an AST tree where the array's base has the offset added to it, // and dereference the element. Still an lvalue at this point. left = mkastnode(A_ADD, left->type, left->ctype, left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, value_at(left->type), left->ctype, left, NULL, 0); return (left); } // Parse the member reference of a struct or union // and return an AST tree for it. If withpointer is true, // the access is through a pointer to the member. static struct ASTnode *member_access(struct ASTnode *left, int withpointer) { struct ASTnode *right; struct symtable *typeptr; struct symtable *m; // Check that the left AST tree is a pointer to struct or union if (withpointer && left->type != pointer_to(P_STRUCT) && left->type != pointer_to(P_UNION)) fatal("Expression is not a pointer to a struct/union"); // Or, check that the left AST tree is a struct or union. // If so, change it from an A_IDENT to an A_ADDR so that // we get the base address, not the value at this address. if (!withpointer) { if (left->type == P_STRUCT || left->type == P_UNION) left->op = A_ADDR; else fatal("Expression is not a struct/union"); } // Get the details of the composite type typeptr = left->ctype; // Skip the '.' or '->' token and get the member's name scan(&Token); ident(); // Find the matching member's name in the type // Die if we can't find it for (m = typeptr->member; m != NULL; m = m->next) if (!strcmp(m->name, Text)) break; if (m == NULL) fatals("No member found in struct/union: ", Text); // Make the left tree an rvalue left->rvalue = 1; // Build an A_INTLIT node with the offset right = mkastleaf(A_INTLIT, P_INT, NULL, NULL, m->st_posn); // Add the member's offset to the base of the struct/union // and dereference it. Still an lvalue at this point left = mkastnode(A_ADD, pointer_to(m->type), m->ctype, left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, m->type, m->ctype, left, NULL, 0); return (left); } // Parse a parenthesised expression and // return an AST node representing it. static struct ASTnode *paren_expression(int ptp) { struct ASTnode *n; int type = 0; struct symtable *ctype = NULL; // Beginning of a parenthesised expression, skip the '('. scan(&Token); // If the token after is a type identifier, this is a cast expression switch (Token.token) { case T_IDENT: // We have to see if the identifier matches a typedef. // If not, treat it as an expression. if (findtypedef(Text) == NULL) { n = binexpr(0); // ptp is zero as expression inside ( ) break; } case T_VOID: case T_CHAR: case T_INT: case T_LONG: case T_STRUCT: case T_UNION: case T_ENUM: // Get the type inside the parentheses type = parse_cast(&ctype); // Skip the closing ')' and then parse the following expression rparen(); default: n = binexpr(ptp); // Scan in the expression. We pass in ptp // as the cast doesn't change the // expression's precedence } // We now have at least an expression in n, and possibly a non-zero type // in type if there was a cast. Skip the closing ')' if there was no cast. if (type == 0) rparen(); else // Otherwise, make a unary AST node for the cast n = mkastunary(A_CAST, type, ctype, n, NULL, 0); return (n); } // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(int ptp) { struct ASTnode *n; struct symtable *enumptr; struct symtable *varptr; int id; int type = 0; int size, class; struct symtable *ctype; switch (Token.token) { case T_STATIC: case T_EXTERN: fatal("Compiler doesn't support static or extern local declarations"); case T_SIZEOF: // Skip the T_SIZEOF and ensure we have a left parenthesis scan(&Token); if (Token.token != T_LPAREN) fatal("Left parenthesis expected after sizeof"); scan(&Token); // Get the type inside the parentheses type = parse_stars(parse_type(&ctype, &class)); // Get the type's size size = typesize(type, ctype); rparen(); // Make a leaf node int literal with the size return (mkastleaf(A_INTLIT, P_INT, NULL, NULL, size)); case T_INTLIT: // For an INTLIT token, make a leaf AST node for it. // Make it a P_CHAR if it's within the P_CHAR range if (Token.intvalue >= 0 && Token.intvalue < 256) n = mkastleaf(A_INTLIT, P_CHAR, NULL, NULL, Token.intvalue); else n = mkastleaf(A_INTLIT, P_INT, NULL, NULL, Token.intvalue); break; case T_STRLIT: // For a STRLIT token, generate the assembly for it. id = genglobstr(Text, 0); // For successive STRLIT tokens, append their contents // to this one while (1) { scan(&Peektoken); if (Peektoken.token != T_STRLIT) break; genglobstr(Text, 1); scan(&Token); // To skip it properly } // Now make a leaf AST node for it. id is the string's label. genglobstrend(); n = mkastleaf(A_STRLIT, pointer_to(P_CHAR), NULL, NULL, id); break; case T_IDENT: // If the identifier matches an enum value, // return an A_INTLIT node if ((enumptr = findenumval(Text)) != NULL) { n = mkastleaf(A_INTLIT, P_INT, NULL, NULL, enumptr->st_posn); break; } // See if this identifier exists as a symbol. For arrays, set rvalue to 1. if ((varptr = findsymbol(Text)) == NULL) fatals("Unknown variable or function", Text); switch (varptr->stype) { case S_VARIABLE: n = mkastleaf(A_IDENT, varptr->type, varptr->ctype, varptr, 0); break; case S_ARRAY: n = mkastleaf(A_ADDR, varptr->type, varptr->ctype, varptr, 0); n->rvalue = 1; break; case S_FUNCTION: // Function call, see if the next token is a left parenthesis scan(&Token); if (Token.token != T_LPAREN) fatals("Function name used without parentheses", Text); return (funccall()); default: fatals("Identifier not a scalar or array variable", Text); } break; case T_LPAREN: return (paren_expression(ptp)); default: fatals("Expecting a primary expression, got token", Token.tokstr); } // Scan in the next token and return the leaf node scan(&Token); return (n); } // Parse a postfix expression and return // an AST node representing it. The // identifier is already in Text. static struct ASTnode *postfix(int ptp) { struct ASTnode *n; // Get the primary expression n = primary(ptp); // Loop until there are no more postfix operators while (1) { switch (Token.token) { case T_LBRACKET: // An array reference n = array_access(n); break; case T_DOT: // Access into a struct or union n = member_access(n, 0); break; case T_ARROW: // Pointer access into a struct or union n = member_access(n, 1); break; case T_INC: // Post-increment: skip over the token if (n->rvalue == 1) fatal("Cannot ++ on rvalue"); scan(&Token); // Can't do it twice if (n->op == A_POSTINC || n->op == A_POSTDEC) fatal("Cannot ++ and/or -- more than once"); // and change the AST operation n->op = A_POSTINC; break; case T_DEC: // Post-decrement: skip over the token if (n->rvalue == 1) fatal("Cannot -- on rvalue"); scan(&Token); // Can't do it twice if (n->op == A_POSTINC || n->op == A_POSTDEC) fatal("Cannot ++ and/or -- more than once"); // and change the AST operation n->op = A_POSTDEC; break; default: return (n); } } return (NULL); // Keep -Wall happy } // Convert a binary operator token into a binary AST operation. // We rely on a 1:1 mapping from token to AST operation static int binastop(int tokentype) { if (tokentype > T_EOF && tokentype <= T_SLASH) return (tokentype); fatals("Syntax error, token", Tstring[tokentype]); return (0); // Keep -Wall happy } // Return true if a token is right-associative, // false otherwise. static int rightassoc(int tokentype) { if (tokentype >= T_ASSIGN && tokentype <= T_ASSLASH) return (1); return (0); } // Operator precedence for each token. Must // match up with the order of tokens in defs.h static int OpPrec[] = { 0, 10, 10, // T_EOF, T_ASSIGN, T_ASPLUS, 10, 10, 10, // T_ASMINUS, T_ASSTAR, T_ASSLASH, 15, // T_QUESTION, 20, 30, // T_LOGOR, T_LOGAND 40, 50, 60, // T_OR, T_XOR, T_AMPER 70, 70, // T_EQ, T_NE 80, 80, 80, 80, // T_LT, T_GT, T_LE, T_GE 90, 90, // T_LSHIFT, T_RSHIFT 100, 100, // T_PLUS, T_MINUS 110, 110 // T_STAR, T_SLASH }; // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec; if (tokentype > T_SLASH) fatals("Token with no precedence in op_precedence:", Tstring[tokentype]); prec = OpPrec[tokentype]; if (prec == 0) fatals("Syntax error, token", Tstring[tokentype]); return (prec); } // prefix_expression: postfix_expression // | '*' prefix_expression // | '&' prefix_expression // | '-' prefix_expression // | '++' prefix_expression // | '--' prefix_expression // ; // Parse a prefix expression and return // a sub-tree representing it. static struct ASTnode *prefix(int ptp) { struct ASTnode *tree; switch (Token.token) { case T_AMPER: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(ptp); // Ensure that it's an identifier if (tree->op != A_IDENT) fatal("& operator must be followed by an identifier"); // Prevent '&' being performed on an array if (tree->sym->stype == S_ARRAY) fatal("& operator cannot be performed on an array"); // Now change the operator to A_ADDR and the type to // a pointer to the original type tree->op = A_ADDR; tree->type = pointer_to(tree->type); break; case T_STAR: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(ptp); // Ensure the tree's type is a pointer if (!ptrtype(tree->type)) fatal("* operator must be followed by an expression of pointer type"); // Prepend an A_DEREF operation to the tree tree = mkastunary(A_DEREF, value_at(tree->type), tree->ctype, tree, NULL, 0); break; case T_MINUS: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(ptp); // Prepend a A_NEGATE operation to the tree and // make the child an rvalue. Because chars are unsigned, // also widen this if needed to int so that it's signed tree->rvalue = 1; if (tree->type == P_CHAR) tree->type = P_INT; tree = mkastunary(A_NEGATE, tree->type, tree->ctype, tree, NULL, 0); break; case T_INVERT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(ptp); // Prepend a A_INVERT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_INVERT, tree->type, tree->ctype, tree, NULL, 0); break; case T_LOGNOT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(ptp); // Prepend a A_LOGNOT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_LOGNOT, tree->type, tree->ctype, tree, NULL, 0); break; case T_INC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(ptp); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("++ operator must be followed by an identifier"); // Prepend an A_PREINC operation to the tree tree = mkastunary(A_PREINC, tree->type, tree->ctype, tree, NULL, 0); break; case T_DEC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(ptp); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("-- operator must be followed by an identifier"); // Prepend an A_PREDEC operation to the tree tree = mkastunary(A_PREDEC, tree->type, tree->ctype, tree, NULL, 0); break; default: tree = postfix(ptp); } return (tree); } // Return an AST tree whose root is a binary operator. // Parameter ptp is the previous token's precedence. struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; struct ASTnode *ltemp, *rtemp; int ASTop; int tokentype; // Get the tree on the left. // Fetch the next token at the same time. left = prefix(ptp); // If we hit one of several terminating tokens, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA || tokentype == T_COLON || tokentype == T_RBRACE) { left->rvalue = 1; return (left); } // While the precedence of this token is more than that of the // previous token precedence, or it's right associative and // equal to the previous token's precedence while ((op_precedence(tokentype) > ptp) || (rightassoc(tokentype) && op_precedence(tokentype) == ptp)) { // Fetch in the next integer literal scan(&Token); // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Determine the operation to be performed on the sub-trees ASTop = binastop(tokentype); switch (ASTop) { case A_TERNARY: // Ensure we have a ':' token, scan in the expression after it match(T_COLON, ":"); ltemp = binexpr(0); // Build and return the AST for this statement. Use the middle // expression's type as the return type. XXX We should also // consider the third expression's type. return (mkastnode (A_TERNARY, right->type, right->ctype, left, right, ltemp, NULL, 0)); case A_ASSIGN: // Assignment // Make the right tree into an rvalue right->rvalue = 1; // Ensure the right's type matches the left right = modify_type(right, left->type, left->ctype, 0); if (right == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree. However, switch // left and right around, so that the right expression's // code will be generated before the left expression ltemp = left; left = right; right = ltemp; break; default: // We are not doing a ternary or assignment, so both trees should // be rvalues. Convert both trees into rvalue if they are lvalue trees left->rvalue = 1; right->rvalue = 1; // Ensure the two types are compatible by trying // to modify each tree to match the other's type. ltemp = modify_type(left, right->type, right->ctype, ASTop); rtemp = modify_type(right, left->type, left->ctype, ASTop); if (ltemp == NULL && rtemp == NULL) fatal("Incompatible types in binary expression"); if (ltemp != NULL) left = ltemp; if (rtemp != NULL) right = rtemp; } // Join that sub-tree with ours. Convert the token // into an AST operation at the same time. left = mkastnode(binastop(tokentype), left->type, left->ctype, left, NULL, right, NULL, 0); // Some operators produce an int result regardless of their operands switch (binastop(tokentype)) { case A_LOGOR: case A_LOGAND: case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: left->type = P_INT; } // Update the details of the current token. // If we hit a terminating token, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA || tokentype == T_COLON || tokentype == T_RBRACE) { left->rvalue = 1; return (left); } } // Return the tree we have when the precedence // is the same or lower left->rvalue = 1; return (left); } ================================================ FILE: 57_Mop_up_pt3/gen.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Generic code generator // Copyright (c) 2019 Warren Toomey, GPL3 // Generate and return a new label number static int labelid = 1; int genlabel(void) { return (labelid++); } // Generate the code for an IF statement // and an optional ELSE clause. static int genIF(struct ASTnode *n, int looptoplabel, int loopendlabel) { int Lfalse, Lend; // Generate two labels: one for the // false compound statement, and one // for the end of the overall IF statement. // When there is no ELSE clause, Lfalse _is_ // the ending label! Lfalse = genlabel(); if (n->right) Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. genAST(n->left, Lfalse, NOLABEL, NOLABEL, n->op); genfreeregs(NOREG); // Generate the true compound statement genAST(n->mid, NOLABEL, looptoplabel, loopendlabel, n->op); genfreeregs(NOREG); // If there is an optional ELSE clause, // generate the jump to skip to the end if (n->right) cgjump(Lend); // Now the false label cglabel(Lfalse); // Optional ELSE clause: generate the // false compound statement and the // end label if (n->right) { genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); genfreeregs(NOREG); cglabel(Lend); } return (NOREG); } // Generate the code for a WHILE statement static int genWHILE(struct ASTnode *n) { int Lstart, Lend; // Generate the start and end labels // and output the start label Lstart = genlabel(); Lend = genlabel(); cglabel(Lstart); // Generate the condition code followed // by a jump to the end label. genAST(n->left, Lend, Lstart, Lend, n->op); genfreeregs(NOREG); // Generate the compound statement for the body genAST(n->right, NOLABEL, Lstart, Lend, n->op); genfreeregs(NOREG); // Finally output the jump back to the condition, // and the end label cgjump(Lstart); cglabel(Lend); return (NOREG); } // Generate the code for a SWITCH statement static int genSWITCH(struct ASTnode *n) { int *caseval, *caselabel; int Ljumptop, Lend; int i, reg, defaultlabel = 0, casecount = 0; struct ASTnode *c; // Create arrays for the case values and associated labels. // Ensure that we have at least one position in each array. caseval = (int *) malloc((n->a_intvalue + 1) * sizeof(int)); caselabel = (int *) malloc((n->a_intvalue + 1) * sizeof(int)); // Generate labels for the top of the jump table, and the // end of the switch statement. Set a default label for // the end of the switch, in case we don't have a default. Ljumptop = genlabel(); Lend = genlabel(); defaultlabel = Lend; // Output the code to calculate the switch condition reg = genAST(n->left, NOLABEL, NOLABEL, NOLABEL, 0); cgjump(Ljumptop); genfreeregs(reg); // Walk the right-child linked list to // generate the code for each case for (i = 0, c = n->right; c != NULL; i++, c = c->right) { // Get a label for this case. Store it // and the case value in the arrays. // Record if it is the default case. caselabel[i] = genlabel(); caseval[i] = c->a_intvalue; cglabel(caselabel[i]); if (c->op == A_DEFAULT) defaultlabel = caselabel[i]; else casecount++; // Generate the case code. Pass in the end label for the breaks. // If case has no body, we will fall into the following body. if (c->left) genAST(c->left, NOLABEL, NOLABEL, Lend, 0); genfreeregs(NOREG); } // Ensure the last case jumps past the switch table cgjump(Lend); // Now output the switch table and the end label. cgswitch(reg, casecount, Ljumptop, caselabel, caseval, defaultlabel); cglabel(Lend); return (NOREG); } // Generate the code for an // A_LOGAND or A_LOGOR operation static int gen_logandor(struct ASTnode *n) { // Generate two labels int Lfalse = genlabel(); int Lend = genlabel(); int reg; // Generate the code for the left expression // followed by the jump to the false label reg= genAST(n->left, NOLABEL, NOLABEL, NOLABEL, 0); cgboolean(reg, n->op, Lfalse); genfreeregs(NOREG); // Generate the code for the right expression // followed by the jump to the false label reg= genAST(n->right, NOLABEL, NOLABEL, NOLABEL, 0); cgboolean(reg, n->op, Lfalse); genfreeregs(reg); // We didn't jump so set the right boolean value if (n->op== A_LOGAND) { cgloadboolean(reg, 1); cgjump(Lend); cglabel(Lfalse); cgloadboolean(reg, 0); } else { cgloadboolean(reg, 0); cgjump(Lend); cglabel(Lfalse); cgloadboolean(reg, 1); } cglabel(Lend); return(reg); } // Generate the code to copy the arguments of a // function call to its parameters, then call the // function itself. Return the register that holds // the function's return value. static int gen_funccall(struct ASTnode *n) { struct ASTnode *gluetree = n->left; int reg; int numargs = 0; // Save the registers before we copy the arguments spill_all_regs(); // If there is a list of arguments, walk this list // from the last argument (right-hand child) to the // first while (gluetree) { // Calculate the expression's value reg = genAST(gluetree->right, NOLABEL, NOLABEL, NOLABEL, gluetree->op); // Copy this into the n'th function parameter: size is 1, 2, 3, ... cgcopyarg(reg, gluetree->a_size); // Keep the first (highest) number of arguments if (numargs == 0) numargs = gluetree->a_size; gluetree = gluetree->left; } // Call the function, clean up the stack (based on numargs), // and return its result return (cgcall(n->sym, numargs)); } // Generate code for a ternary expression static int gen_ternary(struct ASTnode *n) { int Lfalse, Lend; int reg, expreg; // Generate two labels: one for the // false expression, and one for the // end of the overall expression Lfalse = genlabel(); Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. genAST(n->left, Lfalse, NOLABEL, NOLABEL, n->op); genfreeregs(NOREG); // Get a register to hold the result of the two expressions reg = alloc_register(); // Generate the true expression and the false label. // Move the expression result into the known register. expreg = genAST(n->mid, NOLABEL, NOLABEL, NOLABEL, n->op); cgmove(expreg, reg); // Don't free the register holding the result, though! genfreeregs(reg); cgjump(Lend); cglabel(Lfalse); // Generate the false expression and the end label. // Move the expression result into the known register. expreg = genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); cgmove(expreg, reg); // Don't free the register holding the result, though! genfreeregs(reg); cglabel(Lend); return (reg); } // Given an AST, an optional label, and the AST op // of the parent, generate assembly code recursively. // Return the register id with the tree's final value. int genAST(struct ASTnode *n, int iflabel, int looptoplabel, int loopendlabel, int parentASTop) { int leftreg= NOREG, rightreg= NOREG; // Empty tree, do nothing if (n==NULL) return(NOREG); // We have some specific AST node handling at the top // so that we don't evaluate the child sub-trees immediately switch (n->op) { case A_IF: return (genIF(n, looptoplabel, loopendlabel)); case A_WHILE: return (genWHILE(n)); case A_SWITCH: return (genSWITCH(n)); case A_FUNCCALL: return (gen_funccall(n)); case A_TERNARY: return (gen_ternary(n)); case A_LOGOR: return (gen_logandor(n)); case A_LOGAND: return (gen_logandor(n)); case A_GLUE: // Do each child statement, and free the // registers after each child if (n->left != NULL) genAST(n->left, iflabel, looptoplabel, loopendlabel, n->op); genfreeregs(NOREG); if (n->right != NULL) genAST(n->right, iflabel, looptoplabel, loopendlabel, n->op); genfreeregs(NOREG); return (NOREG); case A_FUNCTION: // Generate the function's preamble before the code // in the child sub-tree cgfuncpreamble(n->sym); genAST(n->left, NOLABEL, NOLABEL, NOLABEL, n->op); cgfuncpostamble(n->sym); return (NOREG); } // General AST node handling below // Get the left and right sub-tree values if (n->left) leftreg = genAST(n->left, NOLABEL, NOLABEL, NOLABEL, n->op); if (n->right) rightreg = genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); switch (n->op) { case A_ADD: return (cgadd(leftreg, rightreg)); case A_SUBTRACT: return (cgsub(leftreg, rightreg)); case A_MULTIPLY: return (cgmul(leftreg, rightreg)); case A_DIVIDE: return (cgdiv(leftreg, rightreg)); case A_AND: return (cgand(leftreg, rightreg)); case A_OR: return (cgor(leftreg, rightreg)); case A_XOR: return (cgxor(leftreg, rightreg)); case A_LSHIFT: return (cgshl(leftreg, rightreg)); case A_RSHIFT: return (cgshr(leftreg, rightreg)); case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: // If the parent AST node is an A_IF, A_WHILE or A_TERNARY, // generate a compare followed by a jump. Otherwise, compare // registers and set one to 1 or 0 based on the comparison. if (parentASTop == A_IF || parentASTop == A_WHILE || parentASTop == A_TERNARY) return (cgcompare_and_jump(n->op, leftreg, rightreg, iflabel)); else return (cgcompare_and_set(n->op, leftreg, rightreg)); case A_INTLIT: return (cgloadint(n->a_intvalue, n->type)); case A_STRLIT: return (cgloadglobstr(n->a_intvalue)); case A_IDENT: // Load our value if we are an rvalue // or we are being dereferenced if (n->rvalue || parentASTop == A_DEREF) { if (n->sym->class == C_GLOBAL || n->sym->class == C_STATIC || n->sym->class == C_EXTERN) { return (cgloadglob(n->sym, n->op)); } else { return (cgloadlocal(n->sym, n->op)); } } else return (NOREG); case A_ASPLUS: case A_ASMINUS: case A_ASSTAR: case A_ASSLASH: case A_ASSIGN: // For the '+=' and friends operators, generate suitable code // and get the register with the result. Then take the left child, // make it the right child so that we can fall into the assignment code. switch (n->op) { case A_ASPLUS: leftreg = cgadd(leftreg, rightreg); n->right = n->left; break; case A_ASMINUS: leftreg = cgsub(leftreg, rightreg); n->right = n->left; break; case A_ASSTAR: leftreg = cgmul(leftreg, rightreg); n->right = n->left; break; case A_ASSLASH: leftreg = cgdiv(leftreg, rightreg); n->right = n->left; break; } // Now into the assignment code // Are we assigning to an identifier or through a pointer? switch (n->right->op) { case A_IDENT: if (n->right->sym->class == C_GLOBAL || n->right->sym->class == C_STATIC) return (cgstorglob(leftreg, n->right->sym)); else return (cgstorlocal(leftreg, n->right->sym)); case A_DEREF: return (cgstorderef(leftreg, rightreg, n->right->type)); default: fatald("Can't A_ASSIGN in genAST(), op", n->op); } case A_WIDEN: // Widen the child's type to the parent's type return (cgwiden(leftreg, n->left->type, n->type)); case A_RETURN: cgreturn(leftreg, Functionid); return (NOREG); case A_ADDR: return (cgaddress(n->sym)); case A_DEREF: // If we are an rvalue, dereference to get the value we point at, // otherwise leave it for A_ASSIGN to store through the pointer if (n->rvalue) return (cgderef(leftreg, n->left->type)); else return (leftreg); case A_SCALE: // Small optimisation: use shift if the // scale value is a known power of two switch (n->a_size) { case 2: return (cgshlconst(leftreg, 1)); case 4: return (cgshlconst(leftreg, 2)); case 8: return (cgshlconst(leftreg, 3)); default: // Load a register with the size and // multiply the leftreg by this size rightreg = cgloadint(n->a_size, P_INT); return (cgmul(leftreg, rightreg)); } case A_POSTINC: case A_POSTDEC: // Load and decrement the variable's value into a register // and post increment/decrement it if (n->sym->class == C_GLOBAL || n->sym->class == C_STATIC) return (cgloadglob(n->sym, n->op)); else return (cgloadlocal(n->sym, n->op)); case A_PREINC: case A_PREDEC: // Load and decrement the variable's value into a register // and pre increment/decrement it if (n->left->sym->class == C_GLOBAL || n->left->sym->class == C_STATIC) return (cgloadglob(n->left->sym, n->op)); else return (cgloadlocal(n->left->sym, n->op)); case A_NEGATE: return (cgnegate(leftreg)); case A_INVERT: return (cginvert(leftreg)); case A_LOGNOT: return (cglognot(leftreg)); case A_TOBOOL: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, set the register // to 0 or 1 based on it's zeroeness or non-zeroeness return (cgboolean(leftreg, parentASTop, iflabel)); case A_BREAK: cgjump(loopendlabel); return (NOREG); case A_CONTINUE: cgjump(looptoplabel); return (NOREG); case A_CAST: return (leftreg); // Not much to do default: fatald("Unknown AST operator", n->op); } return (NOREG); // Keep -Wall happy } void genpreamble() { cgpreamble(); } void genpostamble() { cgpostamble(); } void genfreeregs(int keepreg) { freeall_registers(keepreg); } void genglobsym(struct symtable *node) { cgglobsym(node); } // Generate a global string. // If append is true, append to // previous genglobstr() call. int genglobstr(char *strvalue, int append) { int l = genlabel(); cgglobstr(l, strvalue, append); return (l); } void genglobstrend(void) { cgglobstrend(); } int genprimsize(int type) { return (cgprimsize(type)); } int genalign(int type, int offset, int direction) { return (cgalign(type, offset, direction)); } ================================================ FILE: 57_Mop_up_pt3/include/ctype.h ================================================ #ifndef _CTYPE_H_ # define _CTYPE_H_ int isalnum(int c); int isalpha(int c); int iscntrl(int c); int isdigit(int c); int isgraph(int c); int islower(int c); int isprint(int c); int ispunct(int c); int isspace(int c); int isupper(int c); int isxdigit(int c); int isascii(int c); int isblank(int c); int toupper(int c); int tolower(int c); #endif // _CTYPE_H_ ================================================ FILE: 57_Mop_up_pt3/include/errno.h ================================================ #ifndef _ERRNO_H_ # define _ERRNO_H_ int * __errno_location(void); #define errno (* __errno_location()) #endif // _ERRNO_H_ ================================================ FILE: 57_Mop_up_pt3/include/fcntl.h ================================================ #ifndef _FCNTL_H_ # define _FCNTL_H_ #define O_RDONLY 00 #define O_WRONLY 01 #define O_RDWR 02 int open(char *pathname, int flags); #endif // _FCNTL_H_ ================================================ FILE: 57_Mop_up_pt3/include/stddef.h ================================================ #ifndef _STDDEF_H_ # define _STDDEF_H_ #ifndef NULL # define NULL (void *)0 #endif typedef long size_t; #endif //_STDDEF_H_ ================================================ FILE: 57_Mop_up_pt3/include/stdio.h ================================================ #ifndef _STDIO_H_ # define _STDIO_H_ #include #ifndef NULL # define NULL (void *)0 #endif #ifndef EOF # define EOF (-1) #endif // This FILE definition will do for now typedef char * FILE; FILE *fopen(char *pathname, char *mode); size_t fread(void *ptr, size_t size, size_t nmemb, FILE *stream); size_t fwrite(void *ptr, size_t size, size_t nmemb, FILE *stream); int fclose(FILE *stream); int printf(char *format); int fprintf(FILE *stream, char *format); int sprintf(char *str, char *format); int snprintf(char *str, size_t size, char *format); int fgetc(FILE *stream); int fputc(int c, FILE *stream); int fputs(char *s, FILE *stream); int putc(int c, FILE *stream); int putchar(int c); int puts(char *s); FILE *popen(char *command, char *type); int pclose(FILE *stream); extern FILE *stdin; extern FILE *stdout; extern FILE *stderr; #endif // _STDIO_H_ ================================================ FILE: 57_Mop_up_pt3/include/stdlib.h ================================================ #ifndef _STDLIB_H_ # define _STDLIB_H_ void exit(int status); void _Exit(int status); void *malloc(int size); void free(void *ptr); void *calloc(int nmemb, int size); void *realloc(void *ptr, int size); int system(char *command); #endif // _STDLIB_H_ ================================================ FILE: 57_Mop_up_pt3/include/string.h ================================================ #ifndef _STRING_H_ # define _STRING_H_ char *strdup(char *s); char *strchr(char *s, int c); char *strrchr(char *s, int c); int strcmp(char *s1, char *s2); int strncmp(char *s1, char *s2, size_t n); char *strerror(int errnum); #endif // _STRING_H_ ================================================ FILE: 57_Mop_up_pt3/include/unistd.h ================================================ #ifndef _UNISTD_H_ # define _UNISTD_H_ void _exit(int status); int unlink(char *pathname); #endif // _UNISTD_H_ ================================================ FILE: 57_Mop_up_pt3/main.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "decl.h" #include #include // Compiler setup and top-level execution // Copyright (c) 2019 Warren Toomey, GPL3 // Given a string with a '.' and at least a 1-character suffix // after the '.', change the suffix to be the given character. // Return the new string or NULL if the original string could // not be modified char *alter_suffix(char *str, char suffix) { char *posn; char *newstr; // Clone the string if ((newstr = strdup(str)) == NULL) return (NULL); // Find the '.' if ((posn = strrchr(newstr, '.')) == NULL) return (NULL); // Ensure there is a suffix posn++; if (*posn == '\0') return (NULL); // Change the suffix and NUL-terminate the string *posn = suffix; posn++; *posn = '\0'; return (newstr); } // Given an input filename, compile that file // down to assembly code. Return the new file's name static char *do_compile(char *filename) { char cmd[TEXTLEN]; // Change the input file's suffix to .s Outfilename = alter_suffix(filename, 's'); if (Outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .c on the end\n", filename); exit(1); } // Generate the pre-processor command snprintf(cmd, TEXTLEN, "%s %s %s", CPPCMD, INCDIR, filename); // Open up the pre-processor pipe if ((Infile = popen(cmd, "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", filename, strerror(errno)); exit(1); } Infilename = filename; // Create the output file if ((Outfile = fopen(Outfilename, "w")) == NULL) { fprintf(stderr, "Unable to create %s: %s\n", Outfilename, strerror(errno)); exit(1); } Line = 1; // Reset the scanner Linestart = 1; Putback = '\n'; clear_symtable(); // Clear the symbol table if (O_verbose) printf("compiling %s\n", filename); scan(&Token); // Get the first token from the input Peektoken.token = 0; // and set there is no lookahead token genpreamble(); // Output the preamble global_declarations(); // Parse the global declarations genpostamble(); // Output the postamble fclose(Outfile); // Close the output file // Dump the symbol table if requested if (O_dumpsym) { printf("Symbols for %s\n", filename); dumpsymtables(); fprintf(stdout, "\n\n"); } freestaticsyms(); // Free any static symbols in the file return (Outfilename); } // Given an input filename, assemble that file // down to object code. Return the object filename char *do_assemble(char *filename) { char cmd[TEXTLEN]; int err; char *outfilename = alter_suffix(filename, 'o'); if (outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .s on the end\n", filename); exit(1); } // Build the assembly command and run it snprintf(cmd, TEXTLEN, "%s %s %s", ASCMD, outfilename, filename); if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Assembly of %s failed\n", filename); exit(1); } return (outfilename); } // Given a list of object files and an output filename, // link all of the object filenames together. void do_link(char *outfilename, char **objlist) { int cnt, size = TEXTLEN; char cmd[TEXTLEN], *cptr; int err; // Start with the linker command and the output file cptr = cmd; cnt = snprintf(cptr, size, "%s %s ", LDCMD, outfilename); cptr += cnt; size -= cnt; // Now append each object file while (*objlist != NULL) { cnt = snprintf(cptr, size, "%s ", *objlist); cptr += cnt; size -= cnt; objlist++; } if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Linking failed\n"); exit(1); } } // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "Usage: %s [-vcSTM] [-o outfile] file [file ...]\n", prog); fprintf(stderr, " -v give verbose output of the compilation stages\n"); fprintf(stderr, " -c generate object files but don't link them\n"); fprintf(stderr, " -S generate assembly files but don't link them\n"); fprintf(stderr, " -T dump the AST trees for each input file\n"); fprintf(stderr, " -M dump the symbol table for each input file\n"); fprintf(stderr, " -o outfile, produce the outfile executable file\n"); exit(1); } // Main program: check arguments and print a usage // if we don't have an argument. Open up the input // file and call scanfile() to scan the tokens in it. enum { MAXOBJ = 100 }; int main(int argc, char *argv[]) { char *outfilename = AOUT; char *asmfile, *objfile; char *objlist[MAXOBJ]; int i, objcnt = 0; // Initialise our variables O_dumpAST = 0; O_dumpsym = 0; O_keepasm = 0; O_assemble = 0; O_verbose = 0; O_dolink = 1; // Scan for command-line options for (i = 1; i < argc; i++) { // No leading '-', stop scanning for options if (*argv[i] != '-') break; // For each option in this argument for (int j = 1; (*argv[i] == '-') && argv[i][j]; j++) { switch (argv[i][j]) { case 'o': outfilename = argv[++i]; // Save & skip to next argument break; case 'T': O_dumpAST = 1; break; case 'M': O_dumpsym = 1; break; case 'c': O_assemble = 1; O_keepasm = 0; O_dolink = 0; break; case 'S': O_keepasm = 1; O_assemble = 0; O_dolink = 0; break; case 'v': O_verbose = 1; break; default: usage(argv[0]); } } } // Ensure we have at lease one input file argument if (i >= argc) usage(argv[0]); // Work on each input file in turn while (i < argc) { asmfile = do_compile(argv[i]); // Compile the source file if (O_dolink || O_assemble) { objfile = do_assemble(asmfile); // Assemble it to object forma if (objcnt == (MAXOBJ - 2)) { fprintf(stderr, "Too many object files for the compiler to handle\n"); exit(1); } objlist[objcnt++] = objfile; // Add the object file's name objlist[objcnt] = NULL; // to the list of object files } if (!O_keepasm) // Remove the assembly file if unlink(asmfile); // we don't need to keep it i++; } // Now link all the object files together if (O_dolink) { do_link(outfilename, objlist); // If we don't need to keep the object // files, then remove them if (!O_assemble) { for (i = 0; objlist[i] != NULL; i++) unlink(objlist[i]); } } return (0); } ================================================ FILE: 57_Mop_up_pt3/misc.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" #include #include // Miscellaneous functions // Copyright (c) 2019 Warren Toomey, GPL3 // Ensure that the current token is t, // and fetch the next token. Otherwise // throw an error void match(int t, char *what) { if (Token.token == t) { scan(&Token); } else { fatals("Expected", what); } } // Match a semicolon and fetch the next token void semi(void) { match(T_SEMI, ";"); } // Match a left brace and fetch the next token void lbrace(void) { match(T_LBRACE, "{"); } // Match a right brace and fetch the next token void rbrace(void) { match(T_RBRACE, "}"); } // Match a left parenthesis and fetch the next token void lparen(void) { match(T_LPAREN, "("); } // Match a right parenthesis and fetch the next token void rparen(void) { match(T_RPAREN, ")"); } // Match an identifier and fetch the next token void ident(void) { match(T_IDENT, "identifier"); } // Match a comma and fetch the next token void comma(void) { match(T_COMMA, "comma"); } // Print out fatal messages void fatal(char *s) { fprintf(stderr, "%s on line %d of %s\n", s, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatals(char *s1, char *s2) { fprintf(stderr, "%s:%s on line %d of %s\n", s1, s2, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatald(char *s, int d) { fprintf(stderr, "%s:%d on line %d of %s\n", s, d, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatalc(char *s, int c) { fprintf(stderr, "%s:%c on line %d of %s\n", s, c, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } ================================================ FILE: 57_Mop_up_pt3/opt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST Tree Optimisation Code // Copyright (c) 2019 Warren Toomey, GPL3 // Fold an AST tree with a binary operator // and two A_INTLIT children. Return either // the original tree or a new leaf node. static struct ASTnode *fold2(struct ASTnode *n) { int val, leftval, rightval; // Get the values from each child leftval = n->left->a_intvalue; rightval = n->right->a_intvalue; // Perform some of the binary operations. // For any AST op we can't do, return // the original tree. switch (n->op) { case A_ADD: val = leftval + rightval; break; case A_SUBTRACT: val = leftval - rightval; break; case A_MULTIPLY: val = leftval * rightval; break; case A_DIVIDE: // Don't try to divide by zero. if (rightval == 0) return (n); val = leftval / rightval; break; default: return (n); } // Return a leaf node with the new value return (mkastleaf(A_INTLIT, n->type, NULL, NULL, val)); } // Fold an AST tree with a unary operator // and one INTLIT children. Return either // the original tree or a new leaf node. static struct ASTnode *fold1(struct ASTnode *n) { int val; // Get the child value. Do the // operation if recognised. // Return the new leaf node. val = n->left->a_intvalue; switch (n->op) { case A_WIDEN: break; case A_INVERT: val = ~val; break; case A_LOGNOT: val = !val; break; default: return (n); } // Return a leaf node with the new value return (mkastleaf(A_INTLIT, n->type, NULL, NULL, val)); } // Attempt to do constant folding on // the AST tree with the root node n static struct ASTnode *fold(struct ASTnode *n) { if (n == NULL) return (NULL); // Fold on the left child, then // do the same on the right child n->left = fold(n->left); n->right = fold(n->right); // If both children are A_INTLITs, do a fold2() if (n->left && n->left->op == A_INTLIT) { if (n->right && n->right->op == A_INTLIT) n = fold2(n); else // If only the left is A_INTLIT, do a fold1() n = fold1(n); } // Return the possibly modified tree return (n); } // Optimise an AST tree by // constant folding in all sub-trees struct ASTnode *optimise(struct ASTnode *n) { n = fold(n); return (n); } ================================================ FILE: 57_Mop_up_pt3/scan.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { int i; for (i = 0; s[i] != '\0'; i++) if (s[i] == (char) c) return (i); return (-1); } // Get the next character from the input file. static int next(void) { int c, l; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return (c); } c = fgetc(Infile); // Read from input file while (Linestart && c == '#') { // We've hit a pre-processor statement Linestart = 0; // No longer at the start of the line scan(&Token); // Get the line number into l if (Token.token != T_INTLIT) fatals("Expecting pre-processor line number, got:", Text); l = Token.intvalue; scan(&Token); // Get the filename in Text if (Token.token != T_STRLIT) fatals("Expecting pre-processor file name, got:", Text); if (Text[0] != '<') { // If this is a real filename if (strcmp(Text, Infilename)) // and not the one we have now Infilename = strdup(Text); // save it. Then update the line num Line = l; } while ((c = fgetc(Infile)) != '\n'); // Skip to the end of the line c = fgetc(Infile); // and get the next character Linestart = 1; // Now back at the start of the line } Linestart = 0; // No longer at the start of the line if ('\n' == c) { Line++; // Increment line count Linestart = 1; // Now back at the start of the line } return (c); } // Put back an unwanted character static void putback(int c) { Putback = c; } // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } // Read in a hexadecimal constant from the input static int hexchar(void) { int c, h, n = 0, f = 0; // Loop getting characters while (isxdigit(c = next())) { // Convert from char to int value h = chrpos("0123456789abcdef", tolower(c)); // Add to running hex value n = n * 16 + h; f = 1; } // We hit a non-hex character, put it back putback(c); // Flag tells us we never saw any hex characters if (!f) fatal("missing digits after '\\x'"); if (n > 255) fatal("value out of range after '\\x'"); return (n); } // Return the next character from a character // or string literal static int scanch(void) { int i, c, c2; // Get the next input character and interpret // metacharacters that start with a backslash c = next(); if (c == '\\') { switch (c = next()) { case 'a': return ('\a'); case 'b': return ('\b'); case 'f': return ('\f'); case 'n': return ('\n'); case 'r': return ('\r'); case 't': return ('\t'); case 'v': return ('\v'); case '\\': return ('\\'); case '"': return ('"'); case '\'': return ('\''); // Deal with octal constants by reading in // characters until we hit a non-octal digit. // Build up the octal value in c2 and count // # digits in i. Permit only 3 octal digits. case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': for (i = c2 = 0; isdigit(c) && c < '8'; c = next()) { if (++i > 3) break; c2 = c2 * 8 + (c - '0'); } putback(c); // Put back the first non-octal char return (c2); case 'x': return (hexchar()); default: fatalc("unknown escape sequence", c); } } return (c); // Just an ordinary old character! } // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0, radix = 10; // Assume the radix is 10, but if it starts with 0 if (c == '0') { // and the next character is 'x', it's radix 16 if ((c = next()) == 'x') { radix = 16; c = next(); } else // Otherwise, it's radix 8 radix = 8; } // Convert each character into an int value while ((k = chrpos("0123456789abcdef", tolower(c))) >= 0) { if (k >= radix) fatalc("invalid digit in integer literal", c); val = val * radix + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return (val); } // Scan in a string literal from the input file, // and store it in buf[]. Return the length of // the string. static int scanstr(char *buf) { int i, c; // Loop while we have enough buffer space for (i = 0; i < TEXTLEN - 1; i++) { // Get the next char and append to buf // Return when we hit the ending double quote if ((c = scanch()) == '"') { buf[i] = 0; return (i); } buf[i] = (char)c; } // Ran out of buf[] space fatal("String literal too long"); return (0); } // Scan an identifier from the input file and // store it in buf[]. Return the identifier's length static int scanident(int c, char *buf, int lim) { int i = 0; // Allow digits, alpha and underscores while (isalpha(c) || isdigit(c) || '_' == c) { // Error if we hit the identifier length limit, // else append to buf[] and get next character if (lim - 1 == i) { fatal("Identifier too long"); } else if (i < lim - 1) { buf[i++] = (char)c; } c = next(); } // We hit a non-valid character, put it back. // NUL-terminate the buf[] and return the length putback(c); buf[i] = '\0'; return (i); } // Given a word from the input, return the matching // keyword token number or 0 if it's not a keyword. // Switch on the first letter so that we don't have // to waste time strcmp()ing against all the keywords. static int keyword(char *s) { switch (*s) { case 'b': if (!strcmp(s, "break")) return (T_BREAK); break; case 'c': if (!strcmp(s, "case")) return (T_CASE); if (!strcmp(s, "char")) return (T_CHAR); if (!strcmp(s, "continue")) return (T_CONTINUE); break; case 'd': if (!strcmp(s, "default")) return (T_DEFAULT); break; case 'e': if (!strcmp(s, "else")) return (T_ELSE); if (!strcmp(s, "enum")) return (T_ENUM); if (!strcmp(s, "extern")) return (T_EXTERN); break; case 'f': if (!strcmp(s, "for")) return (T_FOR); break; case 'i': if (!strcmp(s, "if")) return (T_IF); if (!strcmp(s, "int")) return (T_INT); break; case 'l': if (!strcmp(s, "long")) return (T_LONG); break; case 'r': if (!strcmp(s, "return")) return (T_RETURN); break; case 's': if (!strcmp(s, "sizeof")) return (T_SIZEOF); if (!strcmp(s, "static")) return (T_STATIC); if (!strcmp(s, "struct")) return (T_STRUCT); if (!strcmp(s, "switch")) return (T_SWITCH); break; case 't': if (!strcmp(s, "typedef")) return (T_TYPEDEF); break; case 'u': if (!strcmp(s, "union")) return (T_UNION); break; case 'v': if (!strcmp(s, "void")) return (T_VOID); break; case 'w': if (!strcmp(s, "while")) return (T_WHILE); break; } return (0); } // List of token strings, for debugging purposes char *Tstring[] = { "EOF", "=", "+=", "-=", "*=", "/=", "?", "||", "&&", "|", "^", "&", "==", "!=", ",", ">", "<=", ">=", "<<", ">>", "+", "-", "*", "/", "++", "--", "~", "!", "void", "char", "int", "long", "if", "else", "while", "for", "return", "struct", "union", "enum", "typedef", "extern", "break", "continue", "switch", "case", "default", "sizeof", "static", "intlit", "strlit", ";", "identifier", "{", "}", "(", ")", "[", "]", ",", ".", "->", ":" }; // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c, tokentype; // If we have a lookahead token, return this token if (Peektoken.token != 0) { t->token = Peektoken.token; t->tokstr = Peektoken.tokstr; t->intvalue = Peektoken.intvalue; Peektoken.token = 0; return (1); } // Skip whitespace c = skip(); // Determine the token based on // the input character switch (c) { case EOF: t->token = T_EOF; return (0); case '+': if ((c = next()) == '+') { t->token = T_INC; } else if (c == '=') { t->token = T_ASPLUS; } else { putback(c); t->token = T_PLUS; } break; case '-': if ((c = next()) == '-') { t->token = T_DEC; } else if (c == '>') { t->token = T_ARROW; } else if (c == '=') { t->token = T_ASMINUS; } else if (isdigit(c)) { // Negative int literal t->intvalue = -scanint(c); t->token = T_INTLIT; } else { putback(c); t->token = T_MINUS; } break; case '*': if ((c = next()) == '=') { t->token = T_ASSTAR; } else { putback(c); t->token = T_STAR; } break; case '/': if ((c = next()) == '=') { t->token = T_ASSLASH; } else { putback(c); t->token = T_SLASH; } break; case ';': t->token = T_SEMI; break; case '{': t->token = T_LBRACE; break; case '}': t->token = T_RBRACE; break; case '(': t->token = T_LPAREN; break; case ')': t->token = T_RPAREN; break; case '[': t->token = T_LBRACKET; break; case ']': t->token = T_RBRACKET; break; case '~': t->token = T_INVERT; break; case '^': t->token = T_XOR; break; case ',': t->token = T_COMMA; break; case '.': t->token = T_DOT; break; case ':': t->token = T_COLON; break; case '?': t->token = T_QUESTION; break; case '=': if ((c = next()) == '=') { t->token = T_EQ; } else { putback(c); t->token = T_ASSIGN; } break; case '!': if ((c = next()) == '=') { t->token = T_NE; } else { putback(c); t->token = T_LOGNOT; } break; case '<': if ((c = next()) == '=') { t->token = T_LE; } else if (c == '<') { t->token = T_LSHIFT; } else { putback(c); t->token = T_LT; } break; case '>': if ((c = next()) == '=') { t->token = T_GE; } else if (c == '>') { t->token = T_RSHIFT; } else { putback(c); t->token = T_GT; } break; case '&': if ((c = next()) == '&') { t->token = T_LOGAND; } else { putback(c); t->token = T_AMPER; } break; case '|': if ((c = next()) == '|') { t->token = T_LOGOR; } else { putback(c); t->token = T_OR; } break; case '\'': // If it's a quote, scan in the // literal character value and // the trailing quote t->intvalue = scanch(); t->token = T_INTLIT; if (next() != '\'') fatal("Expected '\\'' at end of char literal"); break; case '"': // Scan in a literal string scanstr(Text); t->token = T_STRLIT; break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } else if (isalpha(c) || '_' == c) { // Read in a keyword or identifier scanident(c, Text, TEXTLEN); // If it's a recognised keyword, return that token if ((tokentype = keyword(Text)) != 0) { t->token = tokentype; break; } // Not a recognised keyword, so it must be an identifier t->token = T_IDENT; break; } // The character isn't part of any recognised token, error fatalc("Unrecognised character", c); } // We found a token t->tokstr = Tstring[t->token]; return (1); } ================================================ FILE: 57_Mop_up_pt3/stmt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of statements // Copyright (c) 2019 Warren Toomey, GPL3 // Prototypes static struct ASTnode *single_statement(void); // compound_statement: // empty, i.e. no statement // | statement // | statement statements // ; // // statement: declaration // | expression_statement // | function_call // | if_statement // | while_statement // | for_statement // | return_statement // ; // if_statement: if_head // | if_head 'else' statement // ; // // if_head: 'if' '(' true_false_expression ')' statement ; // // Parse an IF statement including any // optional ELSE clause and return its AST static struct ASTnode *if_statement(void) { struct ASTnode *condAST, *trueAST, *falseAST = NULL; // Ensure we have 'if' '(' match(T_IF, "if"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST->ctype, condAST, NULL, 0); rparen(); // Get the AST for the statement trueAST = single_statement(); // If we have an 'else', skip it // and get the AST for the statement if (Token.token == T_ELSE) { scan(&Token); falseAST = single_statement(); } // Build and return the AST for this statement return (mkastnode(A_IF, P_NONE, NULL, condAST, trueAST, falseAST, NULL, 0)); } // while_statement: 'while' '(' true_false_expression ')' statement ; // // Parse a WHILE statement and return its AST static struct ASTnode *while_statement(void) { struct ASTnode *condAST, *bodyAST; // Ensure we have 'while' '(' match(T_WHILE, "while"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST->ctype, condAST, NULL, 0); rparen(); // Get the AST for the statement. // Update the loop depth in the process Looplevel++; bodyAST = single_statement(); Looplevel--; // Build and return the AST for this statement return (mkastnode(A_WHILE, P_NONE, NULL, condAST, NULL, bodyAST, NULL, 0)); } // for_statement: 'for' '(' expression_list ';' // true_false_expression ';' // expression_list ')' statement ; // // Parse a FOR statement and return its AST static struct ASTnode *for_statement(void) { struct ASTnode *condAST, *bodyAST; struct ASTnode *preopAST, *postopAST; struct ASTnode *tree; // Ensure we have 'for' '(' match(T_FOR, "for"); lparen(); // Get the pre_op expression and the ';' preopAST = expression_list(T_SEMI); semi(); // Get the condition and the ';'. // Force a non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST->ctype, condAST, NULL, 0); semi(); // Get the post_op expression and the ')' postopAST = expression_list(T_RPAREN); rparen(); // Get the statement which is the body // Update the loop depth in the process Looplevel++; bodyAST = single_statement(); Looplevel--; // Glue the statement and the postop tree tree = mkastnode(A_GLUE, P_NONE, NULL, bodyAST, NULL, postopAST, NULL, 0); // Make a WHILE loop with the condition and this new body tree = mkastnode(A_WHILE, P_NONE, NULL, condAST, NULL, tree, NULL, 0); // And glue the preop tree to the A_WHILE tree return (mkastnode(A_GLUE, P_NONE, NULL, preopAST, NULL, tree, NULL, 0)); } // return_statement: 'return' '(' expression ')' ; // // Parse a return statement and return its AST static struct ASTnode *return_statement(void) { struct ASTnode *tree= NULL; // Ensure we have 'return' match(T_RETURN, "return"); // See if we have a return value if (Token.token == T_LPAREN) { // Can't return a value if function returns P_VOID if (Functionid->type == P_VOID) fatal("Can't return from a void function"); // Skip the left parenthesis lparen(); // Parse the following expression tree = binexpr(0); // Ensure this is compatible with the function's type tree = modify_type(tree, Functionid->type, Functionid->ctype, 0); if (tree == NULL) fatal("Incompatible type to return"); // Get the ')' rparen(); } // Add on the A_RETURN node tree = mkastunary(A_RETURN, P_NONE, NULL, tree, NULL, 0); // Get the ';' semi(); return (tree); } // break_statement: 'break' ; // // Parse a break statement and return its AST static struct ASTnode *break_statement(void) { if (Looplevel == 0 && Switchlevel == 0) fatal("no loop or switch to break out from"); scan(&Token); semi(); return (mkastleaf(A_BREAK, P_NONE, NULL, NULL, 0)); } // continue_statement: 'continue' ; // // Parse a continue statement and return its AST static struct ASTnode *continue_statement(void) { if (Looplevel == 0) fatal("no loop to continue to"); scan(&Token); semi(); return (mkastleaf(A_CONTINUE, P_NONE, NULL, NULL, 0)); } // Parse a switch statement and return its AST static struct ASTnode *switch_statement(void) { struct ASTnode *left, *body, *n, *c; struct ASTnode *casetree = NULL, *casetail; int inloop = 1, casecount = 0; int seendefault = 0; int ASTop, casevalue; // Skip the 'switch' and '(' scan(&Token); lparen(); // Get the switch expression, the ')' and the '{' left = binexpr(0); rparen(); lbrace(); // Ensure that this is of int type if (!inttype(left->type)) fatal("Switch expression is not of integer type"); // Build an A_SWITCH subtree with the expression as // the child n = mkastunary(A_SWITCH, P_NONE, NULL, left, NULL, 0); // Now parse the cases Switchlevel++; while (inloop) { switch (Token.token) { // Leave the loop when we hit a '}' case T_RBRACE: if (casecount == 0) fatal("No cases in switch"); inloop = 0; break; case T_CASE: case T_DEFAULT: // Ensure this isn't after a previous 'default' if (seendefault) fatal("case or default after existing default"); // Set the AST operation. Scan the case value if required if (Token.token == T_DEFAULT) { ASTop = A_DEFAULT; seendefault = 1; scan(&Token); } else { ASTop = A_CASE; scan(&Token); left = binexpr(0); // Ensure the case value is an integer literal if (left->op != A_INTLIT) fatal("Expecting integer literal for case value"); casevalue = left->a_intvalue; // Walk the list of existing case values to ensure // that there isn't a duplicate case value for (c = casetree; c != NULL; c = c->right) if (casevalue == c->a_intvalue) fatal("Duplicate case value"); } // Scan the ':' and increment the casecount match(T_COLON, ":"); casecount++; // If the next token is a T_CASE, the existing case will fall // into the next case. Otherwise, parse the case body. if (Token.token == T_CASE) body = NULL; else body = compound_statement(1); // Build a sub-tree with any compound statement as the left child // and link it in to the growing A_CASE tree if (casetree == NULL) { casetree = casetail = mkastunary(ASTop, P_NONE, NULL, body, NULL, casevalue); } else { casetail->right = mkastunary(ASTop, P_NONE, NULL, body, NULL, casevalue); casetail = casetail->right; } break; default: fatals("Unexpected token in switch", Token.tokstr); } } Switchlevel--; // We have a sub-tree with the cases and any default. Put the // case count into the A_SWITCH node and attach the case tree. n->a_intvalue = casecount; n->right = casetree; rbrace(); return (n); } // Parse a single statement and return its AST. static struct ASTnode *single_statement(void) { struct ASTnode *stmt; struct symtable *ctype; switch (Token.token) { case T_SEMI: // An empty statement semi(); break; case T_LBRACE: // We have a '{', so this is a compound statement lbrace(); stmt = compound_statement(0); rbrace(); return (stmt); case T_IDENT: // We have to see if the identifier matches a typedef. // If not, treat it as an expression. // Otherwise, fall down to the parse_type() call. if (findtypedef(Text) == NULL) { stmt = binexpr(0); semi(); return (stmt); } case T_CHAR: case T_INT: case T_LONG: case T_STRUCT: case T_UNION: case T_ENUM: case T_TYPEDEF: // The beginning of a variable declaration list. declaration_list(&ctype, C_LOCAL, T_SEMI, T_EOF, &stmt); semi(); return (stmt); // Any assignments from the declarations case T_IF: return (if_statement()); case T_WHILE: return (while_statement()); case T_FOR: return (for_statement()); case T_RETURN: return (return_statement()); case T_BREAK: return (break_statement()); case T_CONTINUE: return (continue_statement()); case T_SWITCH: return (switch_statement()); default: // For now, see if this is an expression. // This catches assignment statements. stmt = binexpr(0); semi(); return (stmt); } return (NULL); // Keep -Wall happy } // Parse a compound statement // and return its AST. If inswitch is true, // we look for a '}', 'case' or 'default' token // to end the parsing. Otherwise, look for // just a '}' to end the parsing. struct ASTnode *compound_statement(int inswitch) { struct ASTnode *left = NULL; struct ASTnode *tree; while (1) { // Leave if we've hit the end token. We do this first to allow // an empty compound statement if (Token.token == T_RBRACE) return (left); if (inswitch && (Token.token == T_CASE || Token.token == T_DEFAULT)) return (left); // Parse a single statement tree = single_statement(); // For each new tree, either save it in left // if left is empty, or glue the left and the // new tree together if (tree != NULL) { if (left == NULL) left = tree; else left = mkastnode(A_GLUE, P_NONE, NULL, left, NULL, tree, NULL, 0); } } return (NULL); // Keep -Wall happy } ================================================ FILE: 57_Mop_up_pt3/sym.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Symbol table functions // Copyright (c) 2019 Warren Toomey, GPL3 // Append a node to the singly-linked list pointed to by head or tail void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node) { // Check for valid pointers if (head == NULL || tail == NULL || node == NULL) fatal("Either head, tail or node is NULL in appendsym"); // Append to the list if (*tail) { (*tail)->next = node; *tail = node; } else *head = *tail = node; node->next = NULL; } // Create a symbol node to be added to a symbol table list. // Set up the node's: // + type: char, int etc. // + ctype: composite type pointer for struct/union // + structural type: var, function, array etc. // + size: number of elements, or endlabel: end label for a function // + posn: Position information for local symbols // Return a pointer to the new node struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn) { // Get a new node struct symtable *node = (struct symtable *) malloc(sizeof(struct symtable)); if (node == NULL) fatal("Unable to malloc a symbol table node in newsym"); // Fill in the values if (name == NULL) node->name = NULL; else node->name = strdup(name); node->type = type; node->ctype = ctype; node->stype = stype; node->class = class; node->nelems = nelems; // For pointers and integer types, set the size // of the symbol. structs and union declarations // manually set this up themselves. if (ptrtype(type) || inttype(type)) node->size = nelems * typesize(type, ctype); node->st_posn = posn; node->next = NULL; node->member = NULL; node->initlist = NULL; return (node); } // Add a symbol to the global symbol list struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn) { struct symtable *sym = newsym(name, type, ctype, stype, class, nelems, posn); // For structs and unions, copy the size from the type node if (type == P_STRUCT || type == P_UNION) sym->size = ctype->size; appendsym(&Globhead, &Globtail, sym); return (sym); } // Add a symbol to the local symbol list struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int nelems) { struct symtable *sym = newsym(name, type, ctype, stype, C_LOCAL, nelems, 0); // For structs and unions, copy the size from the type node if (type == P_STRUCT || type == P_UNION) sym->size = ctype->size; appendsym(&Loclhead, &Locltail, sym); return (sym); } // Add a symbol to the parameter list struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype) { struct symtable *sym = newsym(name, type, ctype, stype, C_PARAM, 1, 0); appendsym(&Parmhead, &Parmtail, sym); return (sym); } // Add a symbol to the temporary member list struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int nelems) { struct symtable *sym = newsym(name, type, ctype, stype, C_MEMBER, nelems, 0); // For structs and unions, copy the size from the type node if (type == P_STRUCT || type == P_UNION) sym->size = ctype->size; appendsym(&Membhead, &Membtail, sym); return (sym); } // Add a struct to the struct list struct symtable *addstruct(char *name) { struct symtable *sym = newsym(name, P_STRUCT, NULL, 0, C_STRUCT, 0, 0); appendsym(&Structhead, &Structtail, sym); return (sym); } // Add a struct to the union list struct symtable *addunion(char *name) { struct symtable *sym = newsym(name, P_UNION, NULL, 0, C_UNION, 0, 0); appendsym(&Unionhead, &Uniontail, sym); return (sym); } // Add an enum type or value to the enum list. // Class is C_ENUMTYPE or C_ENUMVAL. // Use posn to store the int value. struct symtable *addenum(char *name, int class, int value) { struct symtable *sym = newsym(name, P_INT, NULL, 0, class, 0, value); appendsym(&Enumhead, &Enumtail, sym); return (sym); } // Add a typedef to the typedef list struct symtable *addtypedef(char *name, int type, struct symtable *ctype) { struct symtable *sym = newsym(name, type, ctype, 0, C_TYPEDEF, 0, 0); appendsym(&Typehead, &Typetail, sym); return (sym); } // Search for a symbol in a specific list. // Return a pointer to the found node or NULL if not found. // If class is not zero, also match on the given class static struct symtable *findsyminlist(char *s, struct symtable *list, int class) { for (; list != NULL; list = list->next) if ((list->name != NULL) && !strcmp(s, list->name)) if (class == 0 || class == list->class) return (list); return (NULL); } // Determine if the symbol s is in the global symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findglob(char *s) { return (findsyminlist(s, Globhead, 0)); } // Determine if the symbol s is in the local symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findlocl(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member, 0); if (node) return (node); } return (findsyminlist(s, Loclhead, 0)); } // Determine if the symbol s is in the symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findsymbol(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member, 0); if (node) return (node); } // Otherwise, try the local and global symbol lists node = findsyminlist(s, Loclhead, 0); if (node) return (node); return (findsyminlist(s, Globhead, 0)); } // Find a member in the member list // Return a pointer to the found node or NULL if not found. struct symtable *findmember(char *s) { return (findsyminlist(s, Membhead, 0)); } // Find a struct in the struct list // Return a pointer to the found node or NULL if not found. struct symtable *findstruct(char *s) { return (findsyminlist(s, Structhead, 0)); } // Find a struct in the union list // Return a pointer to the found node or NULL if not found. struct symtable *findunion(char *s) { return (findsyminlist(s, Unionhead, 0)); } // Find an enum type in the enum list // Return a pointer to the found node or NULL if not found. struct symtable *findenumtype(char *s) { return (findsyminlist(s, Enumhead, C_ENUMTYPE)); } // Find an enum value in the enum list // Return a pointer to the found node or NULL if not found. struct symtable *findenumval(char *s) { return (findsyminlist(s, Enumhead, C_ENUMVAL)); } // Find a type in the tyedef list // Return a pointer to the found node or NULL if not found. struct symtable *findtypedef(char *s) { return (findsyminlist(s, Typehead, 0)); } // Reset the contents of the symbol table void clear_symtable(void) { Globhead = Globtail = NULL; Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Membhead = Membtail = NULL; Structhead = Structtail = NULL; Unionhead = Uniontail = NULL; Enumhead = Enumtail = NULL; Typehead = Typetail = NULL; } // Clear all the entries in the local symbol table void freeloclsyms(void) { Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Functionid = NULL; } // Remove all static symbols from the global symbol table void freestaticsyms(void) { // g points at current node, prev at the previous one struct symtable *g, *prev = NULL; // Walk the global table looking for static entries for (g = Globhead; g != NULL; g = g->next) { if (g->class == C_STATIC) { // If there's a previous node, rearrange the prev pointer // to skip over the current node. If not, g is the head, // so do the same to Globhead if (prev != NULL) prev->next = g->next; else Globhead->next = g->next; // If g is the tail, point Globtail at the previous node // (if there is one), or Globhead if (g == Globtail) { if (prev != NULL) Globtail = prev; else Globtail = Globhead; } } } // Point prev at g before we move up to the next node prev = g; } // Dump a single symbol static void dumpsym(struct symtable *sym, int indent) { int i; for (i = 0; i < indent; i++) printf(" "); switch (sym->type & (~0xf)) { case P_VOID: printf("void "); break; case P_CHAR: printf("char "); break; case P_INT: printf("int "); break; case P_LONG: printf("long "); break; case P_STRUCT: if (sym->ctype != NULL) printf("struct %s ", sym->ctype->name); else printf("struct %s ", sym->name); break; case P_UNION: if (sym->ctype != NULL) printf("union %s ", sym->ctype->name); else printf("union %s ", sym->name); break; default: printf("unknown type "); } for (i = 0; i < (sym->type & 0xf); i++) printf("*"); printf("%s", sym->name); switch (sym->stype) { case S_VARIABLE: break; case S_FUNCTION: printf("()"); break; case S_ARRAY: printf("[]"); break; default: printf(" unknown stype"); } switch (sym->class) { case C_GLOBAL: printf(": global"); break; case C_LOCAL: printf(": local"); break; case C_PARAM: printf(": param"); break; case C_EXTERN: printf(": extern"); break; case C_STATIC: printf(": static"); break; case C_STRUCT: printf(": struct"); break; case C_UNION: printf(": union"); break; case C_MEMBER: printf(": member"); break; case C_ENUMTYPE: printf(": enumtype"); break; case C_ENUMVAL: printf(": enumval"); break; case C_TYPEDEF: printf(": typedef"); break; default: printf(": unknown class"); } switch (sym->stype) { case S_VARIABLE: if (sym->class == C_ENUMVAL) printf(", value %d\n", sym->st_posn); else printf(", size %d\n", sym->size); break; case S_FUNCTION: printf(", %d params\n", sym->nelems); break; case S_ARRAY: printf(", %d elems, size %d\n", sym->nelems, sym->size); break; } switch (sym->type & (~0xf)) { case P_STRUCT: case P_UNION: dumptable(sym->member, NULL, 4); } switch (sym->stype) { case S_FUNCTION: dumptable(sym->member, NULL, 4); } } // Dump one symbol table void dumptable(struct symtable *head, char *name, int indent) { struct symtable *sym; if (head != NULL && name != NULL) printf("%s\n--------\n", name); for (sym = head; sym != NULL; sym = sym->next) dumpsym(sym, indent); } void dumpsymtables(void) { dumptable(Globhead, "Global", 0); printf("\n"); dumptable(Enumhead, "Enums", 0); printf("\n"); dumptable(Typehead, "Typedefs", 0); } ================================================ FILE: 57_Mop_up_pt3/tests/err.input031.c ================================================ Expecting a primary expression, got token:+ on line 5 of input031.c ================================================ FILE: 57_Mop_up_pt3/tests/err.input032.c ================================================ Unknown variable or function:pizza on line 4 of input032.c ================================================ FILE: 57_Mop_up_pt3/tests/err.input033.c ================================================ Incompatible type to return on line 4 of input033.c ================================================ FILE: 57_Mop_up_pt3/tests/err.input034.c ================================================ For now, declaration of non-global arrays is not implemented on line 4 of input034.c ================================================ FILE: 57_Mop_up_pt3/tests/err.input035.c ================================================ Duplicate local variable declaration:a on line 4 of input035.c ================================================ FILE: 57_Mop_up_pt3/tests/err.input036.c ================================================ Type doesn't match prototype for parameter:2 on line 4 of input036.c ================================================ FILE: 57_Mop_up_pt3/tests/err.input037.c ================================================ Expected:comma on line 3 of input037.c ================================================ FILE: 57_Mop_up_pt3/tests/err.input038.c ================================================ Type doesn't match prototype for parameter:2 on line 4 of input038.c ================================================ FILE: 57_Mop_up_pt3/tests/err.input039.c ================================================ No statements in function with non-void type on line 4 of input039.c ================================================ FILE: 57_Mop_up_pt3/tests/err.input040.c ================================================ No return for function with non-void type on line 4 of input040.c ================================================ FILE: 57_Mop_up_pt3/tests/err.input041.c ================================================ Can't return from a void function on line 3 of input041.c ================================================ FILE: 57_Mop_up_pt3/tests/err.input042.c ================================================ Unknown variable or function:fred on line 3 of input042.c ================================================ FILE: 57_Mop_up_pt3/tests/err.input043.c ================================================ Unknown variable or function:b on line 3 of input043.c ================================================ FILE: 57_Mop_up_pt3/tests/err.input044.c ================================================ Unknown variable or function:z on line 3 of input044.c ================================================ FILE: 57_Mop_up_pt3/tests/err.input045.c ================================================ & operator must be followed by an identifier on line 3 of input045.c ================================================ FILE: 57_Mop_up_pt3/tests/err.input046.c ================================================ * operator must be followed by an expression of pointer type on line 3 of input046.c ================================================ FILE: 57_Mop_up_pt3/tests/err.input047.c ================================================ ++ operator must be followed by an identifier on line 3 of input047.c ================================================ FILE: 57_Mop_up_pt3/tests/err.input048.c ================================================ -- operator must be followed by an identifier on line 3 of input048.c ================================================ FILE: 57_Mop_up_pt3/tests/err.input049.c ================================================ Incompatible expression in assignment on line 6 of input049.c ================================================ FILE: 57_Mop_up_pt3/tests/err.input050.c ================================================ Incompatible types in binary expression on line 6 of input050.c ================================================ FILE: 57_Mop_up_pt3/tests/err.input051.c ================================================ Expected '\'' at end of char literal on line 4 of input051.c ================================================ FILE: 57_Mop_up_pt3/tests/err.input052.c ================================================ Unrecognised character:$ on line 5 of input052.c ================================================ FILE: 57_Mop_up_pt3/tests/err.input056.c ================================================ unknown struct/union type:var1 on line 2 of input056.c ================================================ FILE: 57_Mop_up_pt3/tests/err.input057.c ================================================ previously defined struct/union:fred on line 2 of input057.c ================================================ FILE: 57_Mop_up_pt3/tests/err.input059.c ================================================ Unknown variable or function:y on line 3 of input059.c ================================================ FILE: 57_Mop_up_pt3/tests/err.input060.c ================================================ Expression is not a struct/union on line 3 of input060.c ================================================ FILE: 57_Mop_up_pt3/tests/err.input061.c ================================================ Expression is not a pointer to a struct/union on line 3 of input061.c ================================================ FILE: 57_Mop_up_pt3/tests/err.input064.c ================================================ undeclared enum type::fred on line 1 of input064.c ================================================ FILE: 57_Mop_up_pt3/tests/err.input065.c ================================================ enum type redeclared::fred on line 2 of input065.c ================================================ FILE: 57_Mop_up_pt3/tests/err.input066.c ================================================ enum value redeclared::z on line 2 of input066.c ================================================ FILE: 57_Mop_up_pt3/tests/err.input068.c ================================================ redefinition of typedef:FOO on line 2 of input068.c ================================================ FILE: 57_Mop_up_pt3/tests/err.input069.c ================================================ unknown type:FLOO on line 2 of input069.c ================================================ FILE: 57_Mop_up_pt3/tests/err.input072.c ================================================ no loop or switch to break out from on line 1 of input072.c ================================================ FILE: 57_Mop_up_pt3/tests/err.input073.c ================================================ no loop to continue to on line 1 of input073.c ================================================ FILE: 57_Mop_up_pt3/tests/err.input075.c ================================================ Unexpected token in switch:if on line 4 of input075.c ================================================ FILE: 57_Mop_up_pt3/tests/err.input076.c ================================================ No cases in switch on line 3 of input076.c ================================================ FILE: 57_Mop_up_pt3/tests/err.input077.c ================================================ case or default after existing default on line 6 of input077.c ================================================ FILE: 57_Mop_up_pt3/tests/err.input078.c ================================================ case or default after existing default on line 6 of input078.c ================================================ FILE: 57_Mop_up_pt3/tests/err.input079.c ================================================ Duplicate case value on line 6 of input079.c ================================================ FILE: 57_Mop_up_pt3/tests/err.input085.c ================================================ Bad type in parameter list on line 1 of input085.c ================================================ FILE: 57_Mop_up_pt3/tests/err.input086.c ================================================ Function definition not at global level on line 3 of input086.c ================================================ FILE: 57_Mop_up_pt3/tests/err.input087.c ================================================ Bad type in member list on line 4 of input087.c ================================================ FILE: 57_Mop_up_pt3/tests/err.input092.c ================================================ Type mismatch: literal vs. variable on line 1 of input092.c ================================================ FILE: 57_Mop_up_pt3/tests/err.input093.c ================================================ Unknown variable or function:fred on line 1 of input093.c ================================================ FILE: 57_Mop_up_pt3/tests/err.input094.c ================================================ Type mismatch: literal vs. variable on line 1 of input094.c ================================================ FILE: 57_Mop_up_pt3/tests/err.input095.c ================================================ Variable can not be initialised:x on line 1 of input095.c ================================================ FILE: 57_Mop_up_pt3/tests/err.input096.c ================================================ Array size is illegal:0 on line 1 of input096.c ================================================ FILE: 57_Mop_up_pt3/tests/err.input097.c ================================================ For now, declaration of non-global arrays is not implemented on line 2 of input097.c ================================================ FILE: 57_Mop_up_pt3/tests/err.input098.c ================================================ Too many values in initialisation list on line 1 of input098.c ================================================ FILE: 57_Mop_up_pt3/tests/err.input102.c ================================================ Cannot cast to a struct, union or void type on line 3 of input102.c ================================================ FILE: 57_Mop_up_pt3/tests/err.input103.c ================================================ Cannot cast to a struct, union or void type on line 3 of input103.c ================================================ FILE: 57_Mop_up_pt3/tests/err.input104.c ================================================ Cannot cast to a struct, union or void type on line 2 of input104.c ================================================ FILE: 57_Mop_up_pt3/tests/err.input105.c ================================================ Incompatible expression in assignment on line 4 of input105.c ================================================ FILE: 57_Mop_up_pt3/tests/err.input118.c ================================================ Compiler doesn't support static or extern local declarations on line 2 of input118.c ================================================ FILE: 57_Mop_up_pt3/tests/err.input124.c ================================================ Cannot ++ on rvalue on line 6 of input124.c ================================================ FILE: 57_Mop_up_pt3/tests/err.input126.c ================================================ Unknown variable or function:ptr on line 7 of input126.c ================================================ FILE: 57_Mop_up_pt3/tests/err.input129.c ================================================ Cannot ++ and/or -- more than once on line 6 of input129.c ================================================ FILE: 57_Mop_up_pt3/tests/err.input141.c ================================================ Declaration of array parameters is not implemented on line 4 of input141.c ================================================ FILE: 57_Mop_up_pt3/tests/err.input142.c ================================================ Array must have non-zero elements:fred on line 1 of input142.c ================================================ FILE: 57_Mop_up_pt3/tests/input001.c ================================================ int printf(char *fmt); void main() { printf("%d\n", 12 * 3); printf("%d\n", 18 - 2 * 4); printf("%d\n", 1 + 2 + 9 - 5/2 + 3*5); } ================================================ FILE: 57_Mop_up_pt3/tests/input002.c ================================================ int printf(char *fmt); void main() { int fred; int jim; fred= 5; jim= 12; printf("%d\n", fred + jim); } ================================================ FILE: 57_Mop_up_pt3/tests/input003.c ================================================ int printf(char *fmt); void main() { int x; x= 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); } ================================================ FILE: 57_Mop_up_pt3/tests/input004.c ================================================ int printf(char *fmt); void main() { int x; x= 7 < 9; printf("%d\n", x); x= 7 <= 9; printf("%d\n", x); x= 7 != 9; printf("%d\n", x); x= 7 == 7; printf("%d\n", x); x= 7 >= 7; printf("%d\n", x); x= 7 <= 7; printf("%d\n", x); x= 9 > 7; printf("%d\n", x); x= 9 >= 7; printf("%d\n", x); x= 9 != 7; printf("%d\n", x); } ================================================ FILE: 57_Mop_up_pt3/tests/input005.c ================================================ int printf(char *fmt); void main() { int i; int j; i=6; j=12; if (i < j) { printf("%d\n", i); } else { printf("%d\n", j); } } ================================================ FILE: 57_Mop_up_pt3/tests/input006.c ================================================ int printf(char *fmt); void main() { int i; i=1; while (i <= 10) { printf("%d\n", i); i= i + 1; } } ================================================ FILE: 57_Mop_up_pt3/tests/input007.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 57_Mop_up_pt3/tests/input008.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 57_Mop_up_pt3/tests/input009.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } void fred() { int a; int b; a= 12; b= 3 * a; if (a >= b) { printf("%d\n", 2 * b - a); } } ================================================ FILE: 57_Mop_up_pt3/tests/input010.c ================================================ int printf(char *fmt); void main() { int i; char j; j= 20; printf("%d\n", j); i= 10; printf("%d\n", i); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 2; j= j + 1) { printf("%d\n", j); } } ================================================ FILE: 57_Mop_up_pt3/tests/input011.c ================================================ int printf(char *fmt); int main() { int i; char j; long k; i= 10; printf("%d\n", i); j= 20; printf("%d\n", j); k= 30; printf("%d\n", k); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 4; j= j + 1) { printf("%d\n", j); } for (k= 1; k <= 5; k= k + 1) { printf("%d\n", k); } return(i); printf("%d\n", 12345); return(3); } ================================================ FILE: 57_Mop_up_pt3/tests/input012.c ================================================ int printf(char *fmt); int fred() { return(5); } void main() { int x; x= fred(2); printf("%d\n", x); } ================================================ FILE: 57_Mop_up_pt3/tests/input013.c ================================================ int printf(char *fmt); int fred() { return(56); } void main() { int dummy; int result; dummy= printf("%d\n", 23); result= fred(10); dummy= printf("%d\n", result); } ================================================ FILE: 57_Mop_up_pt3/tests/input014.c ================================================ int printf(char *fmt); int fred() { return(20); } int main() { int result; printf("%d\n", 10); result= fred(15); printf("%d\n", result); printf("%d\n", fred(15)+10); return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input015.c ================================================ int printf(char *fmt); int main() { char a; char *b; char c; int d; int *e; int f; a= 18; printf("%d\n", a); b= &a; c= *b; printf("%d\n", c); d= 12; printf("%d\n", d); e= &d; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input016.c ================================================ int printf(char *fmt); int c; int d; int *e; int f; int main() { c= 12; d=18; printf("%d\n", c); e= &c + 1; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input017.c ================================================ int printf(char *fmt); int main() { char a; char *b; int d; int *e; b= &a; *b= 19; printf("%d\n", a); e= &d; *e= 12; printf("%d\n", d); return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input018.c ================================================ int printf(char *fmt); int main() { int a; int b; a= b= 34; printf("%d\n", a); printf("%d\n", b); return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input018a.c ================================================ int printf(char *fmt); int a; int *b; char c; char *d; int main() { b= &a; *b= 15; printf("%d\n", a); d= &c; *d= 16; printf("%d\n", c); return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input019.c ================================================ int printf(char *fmt); int a; int b; int c; int d; int e; int main() { a= 2; b= 4; c= 3; d= 2; e= (a+b) * (c+d); printf("%d\n", e); return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input020.c ================================================ int printf(char *fmt); int a; int b[25]; int main() { b[3]= 12; a= b[3]; printf("%d\n", a); return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input021.c ================================================ int printf(char *fmt); char c; char *str; int main() { c= '\n'; printf("%d\n", c); for (str= "Hello world\n"; *str != 0; str= str + 1) { printf("%c", *str); } return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input022.c ================================================ int printf(char *fmt); char a; char b; char c; int d; int e; int f; long g; long h; long i; int main() { b= 5; c= 7; a= b + c++; printf("%d\n", a); e= 5; f= 7; d= e + f++; printf("%d\n", d); h= 5; i= 7; g= h + i++; printf("%d\n", g); a= b-- + c; printf("%d\n", a); d= e-- + f; printf("%d\n", d); g= h-- + i; printf("%d\n", g); a= ++b + c; printf("%d\n", a); d= ++e + f; printf("%d\n", d); g= ++h + i; printf("%d\n", g); a= b * --c; printf("%d\n", a); d= e * --f; printf("%d\n", d); g= h * --i; printf("%d\n", g); return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input023.c ================================================ int printf(char *fmt); char *str; int x; int main() { x= -23; printf("%d\n", x); printf("%d\n", -10 * -10); x= 1; x= ~x; printf("%d\n", x); x= 2 > 5; printf("%d\n", x); x= !x; printf("%d\n", x); x= !x; printf("%d\n", x); x= 13; if (x) { printf("%d\n", 13); } x= 0; if (!x) { printf("%d\n", 14); } for (str= "Hello world\n"; *str; str++) { printf("%c", *str); } return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input024.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { a= 42; b= 19; printf("%d\n", a & b); printf("%d\n", a | b); printf("%d\n", a ^ b); printf("%d\n", 1 << 3); printf("%d\n", 63 >> 3); return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input025.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { char z; int y; int x; x= 10; y= 20; z= 30; printf("%d\n", x); printf("%d\n", y); printf("%d\n", z); a= 5; b= 15; c= 25; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input026.c ================================================ int printf(char *fmt); int main(int a, char b, long c, int d, int e, int f, int g, int h) { int i; int j; int k; a= 13; printf("%d\n", a); b= 23; printf("%d\n", b); c= 34; printf("%d\n", c); d= 44; printf("%d\n", d); e= 54; printf("%d\n", e); f= 64; printf("%d\n", f); g= 74; printf("%d\n", g); h= 84; printf("%d\n", h); i= 94; printf("%d\n", i); j= 95; printf("%d\n", j); k= 96; printf("%d\n", k); return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input027.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int param5(int a, int b, int c, int d, int e) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param2(int a, int b) { int c; int d; int e; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param0() { int a; int b; int c; int d; int e; a= 1; b= 2; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int main() { param8(1,2,3,4,5,6,7,8); param5(1,2,3,4,5); param2(1,2); param0(); return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input028.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input029.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h); int fred(int a, int b, int c); int main(); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input030.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input030.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 57_Mop_up_pt3/tests/input031.c ================================================ int printf(char *fmt); int main() { int x; x= 2 + + 3 - * / ; } ================================================ FILE: 57_Mop_up_pt3/tests/input032.c ================================================ int printf(char *fmt); int main() { pizza cow llama sausage; } ================================================ FILE: 57_Mop_up_pt3/tests/input033.c ================================================ int printf(char *fmt); int main() { char *z; return(z); } ================================================ FILE: 57_Mop_up_pt3/tests/input035.c ================================================ int printf(char *fmt); int fred(int a, int b) { int a; return(a); } ================================================ FILE: 57_Mop_up_pt3/tests/input036.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c); ================================================ FILE: 57_Mop_up_pt3/tests/input037.c ================================================ int printf(char *fmt); int fred(int a, char b +, int z); ================================================ FILE: 57_Mop_up_pt3/tests/input038.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c, int g); ================================================ FILE: 57_Mop_up_pt3/tests/input039.c ================================================ int printf(char *fmt); int main() { int a; } ================================================ FILE: 57_Mop_up_pt3/tests/input040.c ================================================ int printf(char *fmt); int main() { int a; a= 5; } ================================================ FILE: 57_Mop_up_pt3/tests/input041.c ================================================ int printf(char *fmt); void fred() { return(5); } ================================================ FILE: 57_Mop_up_pt3/tests/input042.c ================================================ int printf(char *fmt); int main() { fred(5); } ================================================ FILE: 57_Mop_up_pt3/tests/input043.c ================================================ int printf(char *fmt); int main() { int a; a= b[4]; } ================================================ FILE: 57_Mop_up_pt3/tests/input044.c ================================================ int printf(char *fmt); int main() { int a; a= z; } ================================================ FILE: 57_Mop_up_pt3/tests/input045.c ================================================ int printf(char *fmt); int main() { int a; a= &5; } ================================================ FILE: 57_Mop_up_pt3/tests/input046.c ================================================ int printf(char *fmt); int main() { int a; a= *5; } ================================================ FILE: 57_Mop_up_pt3/tests/input047.c ================================================ int printf(char *fmt); int main() { int a; a= ++5; } ================================================ FILE: 57_Mop_up_pt3/tests/input048.c ================================================ int printf(char *fmt); int main() { int a; a= --5; } ================================================ FILE: 57_Mop_up_pt3/tests/input049.c ================================================ int printf(char *fmt); int main() { int x; char y; y= x; } ================================================ FILE: 57_Mop_up_pt3/tests/input050.c ================================================ int printf(char *fmt); int main() { char *a; char *b; a= a + b; } ================================================ FILE: 57_Mop_up_pt3/tests/input051.c ================================================ int printf(char *fmt); int main() { char a; a= 'fred'; } ================================================ FILE: 57_Mop_up_pt3/tests/input052.c ================================================ int printf(char *fmt); int main() { int a; a= $5.00; } ================================================ FILE: 57_Mop_up_pt3/tests/input053.c ================================================ int printf(char *fmt); int main() { printf("Hello world, %d\n", 23); return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input054.c ================================================ int printf(char *fmt); int main() { int i; for (i=0; i < 20; i++) { printf("Hello world, %d\n", i); } return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input055.c ================================================ int printf(char *fmt); int main(int argc, char **argv) { int i; char *argument; printf("Hello world\n"); for (i=0; i < argc; i++) { argument= *argv; argv= argv + 1; printf("Argument %d is %s\n", i, argument); } return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input056.c ================================================ struct foo { int x; }; struct mary var1; ================================================ FILE: 57_Mop_up_pt3/tests/input057.c ================================================ struct fred { int x; } ; struct fred { char y; } ; ================================================ FILE: 57_Mop_up_pt3/tests/input058.c ================================================ int printf(char *fmt); struct fred { int x; char y; long z; }; struct fred var2; struct fred *varptr; int main() { long result; var2.x= 12; printf("%d\n", var2.x); var2.y= 'c'; printf("%d\n", var2.y); var2.z= 4005; printf("%d\n", var2.z); result= var2.x + var2.y + var2.z; printf("%d\n", result); varptr= &var2; result= varptr->x + varptr->y + varptr->z; printf("%d\n", result); return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input059.c ================================================ int main() { int x; x= y.foo; } ================================================ FILE: 57_Mop_up_pt3/tests/input060.c ================================================ int main() { int x; x= x.foo; } ================================================ FILE: 57_Mop_up_pt3/tests/input061.c ================================================ int main() { int x; x= x->foo; } ================================================ FILE: 57_Mop_up_pt3/tests/input062.c ================================================ int printf(char *fmt); union fred { char w; int x; int y; long z; }; union fred var1; union fred *varptr; int main() { var1.x= 65; printf("%d\n", var1.x); var1.x= 66; printf("%d\n", var1.x); printf("%d\n", var1.y); printf("The next two depend on the endian of the platform\n"); printf("%d\n", var1.w); printf("%d\n", var1.z); varptr= &var1; varptr->x= 67; printf("%d\n", varptr->x); printf("%d\n", varptr->y); return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input063.c ================================================ int printf(char *fmt); enum fred { apple=1, banana, carrot, pear=10, peach, mango, papaya }; enum jane { aple=1, bnana, crrot, par=10, pech, mago, paaya }; enum fred var1; enum jane var2; enum fred var3; int main() { var1= carrot + pear + mango; printf("%d\n", var1); return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input064.c ================================================ enum fred var3; ================================================ FILE: 57_Mop_up_pt3/tests/input065.c ================================================ enum fred { x, y, z }; enum fred { a, b }; ================================================ FILE: 57_Mop_up_pt3/tests/input066.c ================================================ enum fred { x, y, z }; enum mary { a, b, z }; ================================================ FILE: 57_Mop_up_pt3/tests/input067.c ================================================ int printf(char *fmt); typedef int FOO; FOO var1; struct bar { int x; int y} ; typedef struct bar BAR; BAR var2; int main() { var1= 5; printf("%d\n", var1); var2.x= 7; var2.y= 10; printf("%d\n", var2.x + var2.y); return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input068.c ================================================ typedef int FOO; typedef char FOO; ================================================ FILE: 57_Mop_up_pt3/tests/input069.c ================================================ typedef int FOO; FLOO y; ================================================ FILE: 57_Mop_up_pt3/tests/input070.c ================================================ #include typedef int FOO; int main() { FOO x; x= 56; printf("%d\n", x); return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input071.c ================================================ #include int main() { int x; x = 0; while (x < 100) { if (x == 5) { x = x + 2; continue; } printf("%d\n", x); if (x == 14) { break; } x = x + 1; } printf("Done\n"); return (0); } ================================================ FILE: 57_Mop_up_pt3/tests/input072.c ================================================ int main() { break; } ================================================ FILE: 57_Mop_up_pt3/tests/input073.c ================================================ int main() { continue; } ================================================ FILE: 57_Mop_up_pt3/tests/input074.c ================================================ #include int main() { int x; int y; y= 0; for (x=0; x < 5; x++) { switch(x) { case 1: { y= 5; break; } case 2: { y= 7; break; } case 3: { y= 9; } default: { y= 100; } } printf("%d\n", y); } return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input075.c ================================================ int main() { int x; switch(x) { if (x<5); } } ================================================ FILE: 57_Mop_up_pt3/tests/input076.c ================================================ int main() { int x; switch(x) { } } ================================================ FILE: 57_Mop_up_pt3/tests/input077.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } default: { x= 3; } case 4: { x= 2; } } } ================================================ FILE: 57_Mop_up_pt3/tests/input078.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } default: { x= 3; } default: { x= 2; } } } ================================================ FILE: 57_Mop_up_pt3/tests/input079.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } case 2: { x= 2; } case 1: { x= 2; } default: { x= 2; } } } ================================================ FILE: 57_Mop_up_pt3/tests/input080.c ================================================ #include int x; int y; int main() { for (x=0, y=1; x < 6; x++, y=y+2) { printf("%d %d\n", x, y); } return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input081.c ================================================ #include int x; int y; int main() { x= 0; y=1; for (;x<5;) { printf("%d %d\n", x, y); x=x+1; y=y+2; } return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input082.c ================================================ #include int main() { int x; x= 10; // Dangling else test if (x > 5) if (x > 15) printf("x > 15\n"); else printf("15 >= x > 5\n"); else printf("5 >= x\n"); return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input083.c ================================================ #include int main() { int x; // Dangling else test. // We should not print anything for x<= 5 for (x=0; x < 12; x++) if (x > 5) if (x > 10) printf("10 < %2d\n", x); else printf(" 5 < %2d <= 10\n", x); return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input084.c ================================================ #include int main() { int x, y; x=2; y=3; printf("%d %d\n", x, y); char a, *b; a= 'f'; b= &a; printf("%c %c\n", a, *b); return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input085.c ================================================ int main(struct foo { int x; }; , int a) { return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input086.c ================================================ int main() { int fred() { return(5); } int x; x=2; return(x); } ================================================ FILE: 57_Mop_up_pt3/tests/input087.c ================================================ struct foo { int x; int y; struct blah { int g; }; }; ================================================ FILE: 57_Mop_up_pt3/tests/input088.c ================================================ #include struct foo { int x; int y; } fred, mary; struct foo james; int main() { fred.x= 5; mary.y= 6; printf("%d %d\n", fred.x, mary.y); return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input089.c ================================================ #include int x= 23; char y= 'H'; char *z= "Hello world"; int main() { printf("%d %c %s\n", x, y, z); return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input090.c ================================================ #include int a = 23, b = 100; char y = 'H', *z = "Hello world"; int main() { printf("%d %d %c %s\n", a, b, y, z); return (0); } ================================================ FILE: 57_Mop_up_pt3/tests/input091.c ================================================ #include int fred[] = { 1, 2, 3, 4, 5 }; int jim[10] = { 1, 2, 3, 4, 5 }; int main() { int i; for (i=0; i < 5; i++) printf("%d\n", fred[i]); for (i=0; i < 10; i++) printf("%d\n", jim[i]); return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input092.c ================================================ char x= 3000; ================================================ FILE: 57_Mop_up_pt3/tests/input093.c ================================================ char x= fred; ================================================ FILE: 57_Mop_up_pt3/tests/input094.c ================================================ char *s= 54; ================================================ FILE: 57_Mop_up_pt3/tests/input095.c ================================================ int fred(int x=2) { return(x); } ================================================ FILE: 57_Mop_up_pt3/tests/input096.c ================================================ int fred[0]; ================================================ FILE: 57_Mop_up_pt3/tests/input098.c ================================================ int fred[3]= { 1, 2, 3, 4, 5 }; ================================================ FILE: 57_Mop_up_pt3/tests/input099.c ================================================ #include // List of token strings, for debugging purposes. // As yet, we can't store a NULL into the list char *Tstring[] = { "EOF", "=", "||", "&&", "|", "^", "&", "==", "!=", ",", ">", "<=", ">=", "<<", ">>", "+", "-", "*", "/", "++", "--", "~", "!", "void", "char", "int", "long", "if", "else", "while", "for", "return", "struct", "union", "enum", "typedef", "extern", "break", "continue", "switch", "case", "default", "intlit", "strlit", ";", "identifier", "{", "}", "(", ")", "[", "]", ",", ".", "->", ":", "" }; int main() { int i; char *str; i=0; while (1) { str= Tstring[i]; if (*str == 0) break; printf("%s\n", str); i++; } return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input100.c ================================================ #include int main() { int x= 3, y=14; int z= 2 * x + y; char *str= "Hello world"; printf("%s %d %d\n", str, x+y, z); return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input101.c ================================================ #include int main() { int x= 65535; char y; char *str; y= (char )x; printf("0x%x\n", y); str= (char *)0; printf("0x%lx\n", (long)str); return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input102.c ================================================ int main() { struct foo { int p; }; int y= (struct foo) x; } ================================================ FILE: 57_Mop_up_pt3/tests/input103.c ================================================ int main() { union foo { int p; }; int y= (union foo) x; } ================================================ FILE: 57_Mop_up_pt3/tests/input104.c ================================================ int main() { int y= (void) x; } ================================================ FILE: 57_Mop_up_pt3/tests/input105.c ================================================ int main() { int x; char *y; y= (char) x; } ================================================ FILE: 57_Mop_up_pt3/tests/input106.c ================================================ #include char *y= (char *)0; int main() { printf("0x%lx\n", (long)y); return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input107.c ================================================ #include char *y[] = { "fish", "cow", NULL }; char *z= NULL; int main() { int i; char *ptr; for (i=0; i < 3; i++) { ptr= y[i]; if (ptr != (char *)0) printf("%s\n", y[i]); else printf("NULL\n"); } return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input108.c ================================================ int main() { char *str= (void *)0; return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input109.c ================================================ #include int main() { int x= 17 - 1; printf("%d\n", x); return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input110.c ================================================ #include int x; int y; int main() { x= 3; y= 15; y += x; printf("%d\n", y); x= 3; y= 15; y -= x; printf("%d\n", y); x= 3; y= 15; y *= x; printf("%d\n", y); x= 3; y= 15; y /= x; printf("%d\n", y); return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input111.c ================================================ #include int main() { int x= 2000 + 3 + 4 * 5 + 6; printf("%d\n", x); return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input112.c ================================================ #include char* y = NULL; int x= 10 + 6; int fred [ 2 + 3 ]; int main() { fred[2]= x; printf("%d\n", fred[2]); return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input113.c ================================================ #include void fred(void); void fred(void) { printf("fred says hello\n"); } int main(void) { fred(); return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input114.c ================================================ #include int main() { int x= 0x4a; printf("%c\n", x); return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input115.c ================================================ #include struct foo { int x; char y; long z; }; typedef struct foo blah; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol struct symtable *ctype; // If struct/union, ptr to that type int stype; // Structural type for the symbol int class; // Storage class for the symbol int size; // Total size in bytes of this symbol int nelems; // Functions: # params. Arrays: # elements #define st_endlabel st_posn // For functions, the end label int st_posn; // For locals, the negative offset // from the stack base pointer int *initlist; // List of initial values struct symtable *next; // Next symbol in one list struct symtable *member; // First member of a function, struct, }; // union or enum // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates int rvalue; // True if the node is an rvalue struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; struct symtable *sym; // For many AST nodes, the pointer to // the symbol in the symbol table #define a_intvalue a_size // For A_INTLIT, the integer value int a_size; // For A_SCALE, the size to scale by }; int main() { printf("%ld\n", sizeof(char)); printf("%ld\n", sizeof(int)); printf("%ld\n", sizeof(long)); printf("%ld\n", sizeof(char *)); printf("%ld\n", sizeof(blah)); printf("%ld\n", sizeof(struct symtable)); printf("%ld\n", sizeof(struct ASTnode)); return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input116.c ================================================ #include static int counter=0; static int fred(void) { return(counter++); } int main(void) { int i; for (i=0; i < 5; i++) printf("%d\n", fred()); return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input117.c ================================================ #include static char *fred(void) { return("Hello"); } int main(void) { printf("%s\n", fred()); return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input118.c ================================================ int main(void) { static int x; } ================================================ FILE: 57_Mop_up_pt3/tests/input119.c ================================================ #include int x; int y= 3; int main() { x= y != 3 ? 6 : 8; printf("%d\n", x); x= (y == 3) ? 6 : 8; printf("%d\n", x); return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input120.c ================================================ #include int x; int y= 3; int main() { for (y= 0; y < 10; y++) { x= y > 4 ? y + 2 : y + 9; printf("%d\n", x); } return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input121.c ================================================ #include int x; int y= 3; int main() { for (y= 0; y < 10; y++) { x= (y < 4) ? y + 2 : (y > 7) ? 1000 : y + 9; printf("%d\n", x); } return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input122.c ================================================ #include int x, y, z1, z2; int main() { for (x= 0; x <= 1; x++) { for (y= 0; y <= 1; y++) { z1= x || y; z2= x && y; printf("x %d, y %d, x || y %d, x && y %d\n", x, y, z1, z2); } } //z= x || y; return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input123.c ================================================ #include int main() { int x; for (x=0; x < 20; x++) switch(x) { case 2: case 3: case 5: case 7: case 11: printf("%2d infant prime\n", x); break; case 13: case 17: case 19: printf("%2d teen prime\n", x); break; case 0: case 1: case 4: case 6: case 8: case 9: case 10: case 12: printf("%2d infant composite\n", x); break; default: printf("%2d teen composite\n", x); break; } return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input124.c ================================================ #include int ary[5]; int main() { ary++; return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input125.c ================================================ #include int ary[5]; int *ptr; int x; int main() { ary[3]= 2008; ptr= ary; // Load ary's address into ptr x= ary[3]; printf("%d\n", x); x= ptr[3]; printf("%d\n", x); // Treat ptr as an array return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input126.c ================================================ #include int ary[5]; int main() { ary[3]= 2008; ptr= &ary; return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input127.c ================================================ #include int ary[5]; void fred(int *ptr) { // Receive a pointer printf("%d\n", ptr[3]); } int main() { ary[3]= 2008; printf("%d\n", ary[3]); fred(ary); // Pass ary as a pointer return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input128.c ================================================ #include struct foo { int val; struct foo *next; }; struct foo head, mid, tail; int main() { struct foo *ptr; tail.val= 20; tail.next= NULL; mid.val= 15; mid.next= &tail; head.val= 10; head.next= ∣ ptr= &head; printf("%d %d\n", head.val, ptr->val); printf("%d %d\n", mid.val, ptr->next->val); printf("%d %d\n", tail.val, ptr->next->next->val); return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input129.c ================================================ #include int x= 6; int main() { printf("%d\n", x++ ++); return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input130.c ================================================ #include char *x= "foo"; int main() { printf("Hello " "world" "\n"); return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input131.c ================================================ #include void donothing() { } int main() { int x=0; printf("Doing nothing... "); donothing(); printf("nothing done\n"); while (++x < 100) ; printf("x is now %d\n", x); return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input132.c ================================================ extern int fred; int fred; int mary; extern int mary; int main() { return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input133.c ================================================ #include extern int fred[]; int fred[23]; char mary[100]; extern char mary[]; void main() { printf("OK\n"); } ================================================ FILE: 57_Mop_up_pt3/tests/input134.c ================================================ #include char y = 'a'; char *x; int main() { x= &y; if (x && y == 'a') printf("1st match\n"); x= NULL; if (x && y == 'a') printf("2nd match\n"); x= &y; y='b'; if (x && y == 'a') printf("3rd match\n"); return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input135.c ================================================ #include void fred() { int x= 5; printf("testing x\n"); if (x > 4) return; printf("x below 5\n"); } int main() { fred(); return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input136.c ================================================ #include int add(int x, int y) { return(x+y); } int main() { int result; result= 3 * add(2,3) - 5 * add(4,6); printf("%d\n", result); return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input137.c ================================================ #include int a=1, b=2, c=3, d=4, e=5, f=6, g=7, h=8; int main() { int x; x= ((((((a + b) + c) + d) + e) + f) + g) + h; x= a + (b + (c + (d + (e + (f + (g + h)))))); printf("x is %d\n", x); return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input138.c ================================================ #include int x, y, z; int a=1; int *aptr; int main() { // See if generic AND works for (x=0; x <= 1; x++) for (y=0; y <= 1; y++) { z= x && y; printf("%d %d | %d\n", x, y, z); } // See if generic AND works for (x=0; x <= 1; x++) for (y=0; y <= 1; y++) { z= x || y; printf("%d %d | %d\n", x, y, z); } // Now some lazy evaluation aptr= NULL; if (aptr && *aptr == 1) printf("aptr points at 1\n"); else printf("aptr is NULL or doesn't point at 1\n"); aptr= &a; if (aptr && *aptr == 1) printf("aptr points at 1\n"); else printf("aptr is NULL or doesn't point at 1\n"); return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input139.c ================================================ #include int same(int x) { return(x); } int main() { int a= 3; if (same(a) && same(a) >= same(a)) printf("same apparently\n"); return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input140.c ================================================ #include int main() { int i; int ary[5]; char z; // Write below the array z= 'H'; // Fill the array for (i=0; i < 5; i++) ary[i]= i * i; // Write above the array i=14; // Print out the array for (i=0; i < 5; i++) printf("%d\n", ary[i]); // See if either side is OK printf("%d %c\n", i, z); return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input141.c ================================================ static int fred[5]; int jim; int foo(int mary[6]) { return(5); } ================================================ FILE: 57_Mop_up_pt3/tests/input142.c ================================================ static int fred[]; int jim; ================================================ FILE: 57_Mop_up_pt3/tests/input143.c ================================================ #include char foo; char *a, *b, *c; int main() { a= b= c= NULL; if (a==NULL || b==NULL || c==NULL) printf("One of the three is NULL\n"); a= &foo; if (a==NULL || b==NULL || c==NULL) printf("One of the three is NULL\n"); b= &foo; if (a==NULL || b==NULL || c==NULL) printf("One of the three is NULL\n"); c= &foo; if (a==NULL || b==NULL || c==NULL) printf("One of the three is NULL\n"); else printf("All three are non-NULL\n"); return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input144.c ================================================ #include #include #include char *filename= "fred"; int main() { fprintf(stdout, "Unable to open %s: %s\n", filename, strerror(errno)); return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/input145.c ================================================ #include char *str= "qwertyuiop"; int list[]= {3, 5, 7, 9, 11, 13, 15}; int *lptr; int main() { printf("%c\n", *str); str= str + 1; printf("%c\n", *str); str += 1; printf("%c\n", *str); str += 1; printf("%c\n", *str); str -= 1; printf("%c\n", *str); lptr= list; printf("%d\n", *lptr); lptr= lptr + 1; printf("%d\n", *lptr); lptr += 1; printf("%d\n", *lptr); lptr += 1; printf("%d\n", *lptr); lptr -= 1; printf("%d\n", *lptr); return(0); } ================================================ FILE: 57_Mop_up_pt3/tests/mktests ================================================ #!/bin/sh # Make the output files for each test # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make install) fi for i in input*c do if [ ! -f "out.$i" -a ! -f "err.$i" ] then ../cwj -o out $i 2> "err.$i" # If the err file is empty if [ ! -s "err.$i" ] then rm -f "err.$i" cc -o out $i ./out > "out.$i" fi fi rm -f out out.s done ================================================ FILE: 57_Mop_up_pt3/tests/nasmext.inc ================================================ extern printint extern printchar extern open extern close extern read extern write extern printf extern fprintf extern stdout extern strerror extern __errno_location ================================================ FILE: 57_Mop_up_pt3/tests/out.input001.c ================================================ 36 10 25 ================================================ FILE: 57_Mop_up_pt3/tests/out.input002.c ================================================ 17 ================================================ FILE: 57_Mop_up_pt3/tests/out.input003.c ================================================ 1 2 3 4 5 ================================================ FILE: 57_Mop_up_pt3/tests/out.input004.c ================================================ 1 1 1 1 1 1 1 1 1 ================================================ FILE: 57_Mop_up_pt3/tests/out.input005.c ================================================ 6 ================================================ FILE: 57_Mop_up_pt3/tests/out.input006.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 57_Mop_up_pt3/tests/out.input007.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 57_Mop_up_pt3/tests/out.input008.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 57_Mop_up_pt3/tests/out.input009.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 57_Mop_up_pt3/tests/out.input010.c ================================================ 20 10 1 2 3 4 5 253 254 255 0 1 ================================================ FILE: 57_Mop_up_pt3/tests/out.input011.c ================================================ 10 20 30 1 2 3 4 5 253 254 255 0 1 2 3 1 2 3 4 5 ================================================ FILE: 57_Mop_up_pt3/tests/out.input012.c ================================================ 5 ================================================ FILE: 57_Mop_up_pt3/tests/out.input013.c ================================================ 23 56 ================================================ FILE: 57_Mop_up_pt3/tests/out.input014.c ================================================ 10 20 30 ================================================ FILE: 57_Mop_up_pt3/tests/out.input015.c ================================================ 18 18 12 12 ================================================ FILE: 57_Mop_up_pt3/tests/out.input016.c ================================================ 12 18 ================================================ FILE: 57_Mop_up_pt3/tests/out.input017.c ================================================ 19 12 ================================================ FILE: 57_Mop_up_pt3/tests/out.input018.c ================================================ 34 34 ================================================ FILE: 57_Mop_up_pt3/tests/out.input018a.c ================================================ 15 16 ================================================ FILE: 57_Mop_up_pt3/tests/out.input019.c ================================================ 30 ================================================ FILE: 57_Mop_up_pt3/tests/out.input020.c ================================================ 12 ================================================ FILE: 57_Mop_up_pt3/tests/out.input021.c ================================================ 10 Hello world ================================================ FILE: 57_Mop_up_pt3/tests/out.input022.c ================================================ 12 12 12 13 13 13 13 13 13 35 35 35 ================================================ FILE: 57_Mop_up_pt3/tests/out.input023.c ================================================ -23 100 -2 0 1 0 13 14 Hello world ================================================ FILE: 57_Mop_up_pt3/tests/out.input024.c ================================================ 2 59 57 8 7 ================================================ FILE: 57_Mop_up_pt3/tests/out.input025.c ================================================ 10 20 30 5 15 25 ================================================ FILE: 57_Mop_up_pt3/tests/out.input026.c ================================================ 13 23 34 44 54 64 74 84 94 95 96 ================================================ FILE: 57_Mop_up_pt3/tests/out.input027.c ================================================ 1 2 3 4 5 6 7 8 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 ================================================ FILE: 57_Mop_up_pt3/tests/out.input028.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 57_Mop_up_pt3/tests/out.input029.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 57_Mop_up_pt3/tests/out.input030.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input030.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 57_Mop_up_pt3/tests/out.input053.c ================================================ Hello world, 23 ================================================ FILE: 57_Mop_up_pt3/tests/out.input054.c ================================================ Hello world, 0 Hello world, 1 Hello world, 2 Hello world, 3 Hello world, 4 Hello world, 5 Hello world, 6 Hello world, 7 Hello world, 8 Hello world, 9 Hello world, 10 Hello world, 11 Hello world, 12 Hello world, 13 Hello world, 14 Hello world, 15 Hello world, 16 Hello world, 17 Hello world, 18 Hello world, 19 ================================================ FILE: 57_Mop_up_pt3/tests/out.input055.c ================================================ Hello world Argument 0 is ./out ================================================ FILE: 57_Mop_up_pt3/tests/out.input058.c ================================================ 12 99 4005 4116 4116 ================================================ FILE: 57_Mop_up_pt3/tests/out.input062.c ================================================ 65 66 66 The next two depend on the endian of the platform 66 66 67 67 ================================================ FILE: 57_Mop_up_pt3/tests/out.input063.c ================================================ 25 ================================================ FILE: 57_Mop_up_pt3/tests/out.input067.c ================================================ 5 17 ================================================ FILE: 57_Mop_up_pt3/tests/out.input070.c ================================================ 56 ================================================ FILE: 57_Mop_up_pt3/tests/out.input071.c ================================================ 0 1 2 3 4 7 8 9 10 11 12 13 14 Done ================================================ FILE: 57_Mop_up_pt3/tests/out.input074.c ================================================ 100 5 7 100 100 ================================================ FILE: 57_Mop_up_pt3/tests/out.input080.c ================================================ 0 1 1 3 2 5 3 7 4 9 5 11 ================================================ FILE: 57_Mop_up_pt3/tests/out.input081.c ================================================ 0 1 1 3 2 5 3 7 4 9 ================================================ FILE: 57_Mop_up_pt3/tests/out.input082.c ================================================ 15 >= x > 5 ================================================ FILE: 57_Mop_up_pt3/tests/out.input083.c ================================================ 5 < 6 <= 10 5 < 7 <= 10 5 < 8 <= 10 5 < 9 <= 10 5 < 10 <= 10 10 < 11 ================================================ FILE: 57_Mop_up_pt3/tests/out.input084.c ================================================ 2 3 f f ================================================ FILE: 57_Mop_up_pt3/tests/out.input088.c ================================================ 5 6 ================================================ FILE: 57_Mop_up_pt3/tests/out.input089.c ================================================ 23 H Hello world ================================================ FILE: 57_Mop_up_pt3/tests/out.input090.c ================================================ 23 100 H Hello world ================================================ FILE: 57_Mop_up_pt3/tests/out.input091.c ================================================ 1 2 3 4 5 1 2 3 4 5 0 0 0 0 0 ================================================ FILE: 57_Mop_up_pt3/tests/out.input099.c ================================================ EOF = || && | ^ & == != , > <= >= << >> + - * / ++ -- ~ ! void char int long if else while for return struct union enum typedef extern break continue switch case default intlit strlit ; identifier { } ( ) [ ] , . -> : ================================================ FILE: 57_Mop_up_pt3/tests/out.input100.c ================================================ Hello world 17 20 ================================================ FILE: 57_Mop_up_pt3/tests/out.input101.c ================================================ 0xff 0x0 ================================================ FILE: 57_Mop_up_pt3/tests/out.input106.c ================================================ 0x0 ================================================ FILE: 57_Mop_up_pt3/tests/out.input107.c ================================================ fish cow NULL ================================================ FILE: 57_Mop_up_pt3/tests/out.input108.c ================================================ ================================================ FILE: 57_Mop_up_pt3/tests/out.input109.c ================================================ 16 ================================================ FILE: 57_Mop_up_pt3/tests/out.input110.c ================================================ 18 12 45 5 ================================================ FILE: 57_Mop_up_pt3/tests/out.input111.c ================================================ 2029 ================================================ FILE: 57_Mop_up_pt3/tests/out.input112.c ================================================ 16 ================================================ FILE: 57_Mop_up_pt3/tests/out.input113.c ================================================ fred says hello ================================================ FILE: 57_Mop_up_pt3/tests/out.input114.c ================================================ J ================================================ FILE: 57_Mop_up_pt3/tests/out.input115.c ================================================ 1 4 8 8 13 64 48 ================================================ FILE: 57_Mop_up_pt3/tests/out.input116.c ================================================ 0 1 2 3 4 ================================================ FILE: 57_Mop_up_pt3/tests/out.input117.c ================================================ Hello ================================================ FILE: 57_Mop_up_pt3/tests/out.input119.c ================================================ 8 6 ================================================ FILE: 57_Mop_up_pt3/tests/out.input120.c ================================================ 9 10 11 12 13 7 8 9 10 11 ================================================ FILE: 57_Mop_up_pt3/tests/out.input121.c ================================================ 2 3 4 5 13 14 15 16 1000 1000 ================================================ FILE: 57_Mop_up_pt3/tests/out.input122.c ================================================ x 0, y 0, x || y 0, x && y 0 x 0, y 1, x || y 1, x && y 0 x 1, y 0, x || y 1, x && y 0 x 1, y 1, x || y 1, x && y 1 ================================================ FILE: 57_Mop_up_pt3/tests/out.input123.c ================================================ 0 infant composite 1 infant composite 2 infant prime 3 infant prime 4 infant composite 5 infant prime 6 infant composite 7 infant prime 8 infant composite 9 infant composite 10 infant composite 11 infant prime 12 infant composite 13 teen prime 14 teen composite 15 teen composite 16 teen composite 17 teen prime 18 teen composite 19 teen prime ================================================ FILE: 57_Mop_up_pt3/tests/out.input125.c ================================================ 2008 2008 ================================================ FILE: 57_Mop_up_pt3/tests/out.input127.c ================================================ 2008 2008 ================================================ FILE: 57_Mop_up_pt3/tests/out.input128.c ================================================ 10 10 15 15 20 20 ================================================ FILE: 57_Mop_up_pt3/tests/out.input130.c ================================================ Hello world ================================================ FILE: 57_Mop_up_pt3/tests/out.input131.c ================================================ Doing nothing... nothing done x is now 100 ================================================ FILE: 57_Mop_up_pt3/tests/out.input132.c ================================================ ================================================ FILE: 57_Mop_up_pt3/tests/out.input133.c ================================================ OK ================================================ FILE: 57_Mop_up_pt3/tests/out.input134.c ================================================ 1st match ================================================ FILE: 57_Mop_up_pt3/tests/out.input135.c ================================================ testing x ================================================ FILE: 57_Mop_up_pt3/tests/out.input136.c ================================================ -35 ================================================ FILE: 57_Mop_up_pt3/tests/out.input137.c ================================================ x is 36 ================================================ FILE: 57_Mop_up_pt3/tests/out.input138.c ================================================ 0 0 | 0 0 1 | 0 1 0 | 0 1 1 | 1 0 0 | 0 0 1 | 1 1 0 | 1 1 1 | 1 aptr is NULL or doesn't point at 1 aptr points at 1 ================================================ FILE: 57_Mop_up_pt3/tests/out.input139.c ================================================ same apparently ================================================ FILE: 57_Mop_up_pt3/tests/out.input140.c ================================================ 0 1 4 9 16 5 H ================================================ FILE: 57_Mop_up_pt3/tests/out.input143.c ================================================ One of the three is NULL One of the three is NULL One of the three is NULL All three are non-NULL ================================================ FILE: 57_Mop_up_pt3/tests/out.input144.c ================================================ Unable to open fred: Success ================================================ FILE: 57_Mop_up_pt3/tests/out.input145.c ================================================ q w e r e 3 5 7 9 7 ================================================ FILE: 57_Mop_up_pt3/tests/runtests ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make install) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../cwj -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../cwj $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.s "trial.$i" done ================================================ FILE: 57_Mop_up_pt3/tests/runtestsn ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../compn ] then (cd ..; make install) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../compn -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../compn $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.o out.s "trial.$i" done ================================================ FILE: 57_Mop_up_pt3/tree.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree functions // Copyright (c) 2019 Warren Toomey, GPL3 // Build and return a generic AST node struct ASTnode *mkastnode(int op, int type, struct symtable *ctype, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue) { struct ASTnode *n; // Malloc a new ASTnode n = (struct ASTnode *) malloc(sizeof(struct ASTnode)); if (n == NULL) fatal("Unable to malloc in mkastnode()"); // Copy in the field values and return it n->op = op; n->type = type; n->ctype = ctype; n->left = left; n->mid = mid; n->right = right; n->sym = sym; n->a_intvalue = intvalue; return (n); } // Make an AST leaf node struct ASTnode *mkastleaf(int op, int type, struct symtable *ctype, struct symtable *sym, int intvalue) { return (mkastnode(op, type, ctype, NULL, NULL, NULL, sym, intvalue)); } // Make a unary AST node: only one child struct ASTnode *mkastunary(int op, int type, struct symtable *ctype, struct ASTnode *left, struct symtable *sym, int intvalue) { return (mkastnode(op, type, ctype, left, NULL, NULL, sym, intvalue)); } // Generate and return a new label number // just for AST dumping purposes static int dumpid = 1; static int gendumplabel(void) { return (dumpid++); } // Given an AST tree, print it out and follow the // traversal of the tree that genAST() follows void dumpAST(struct ASTnode *n, int label, int level) { int Lfalse, Lstart, Lend; int i; switch (n->op) { case A_IF: Lfalse = gendumplabel(); for (i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_IF"); if (n->right) { Lend = gendumplabel(); fprintf(stdout, ", end L%d", Lend); } fprintf(stdout, "\n"); dumpAST(n->left, Lfalse, level + 2); dumpAST(n->mid, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); return; case A_WHILE: Lstart = gendumplabel(); for (i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_WHILE, start L%d\n", Lstart); Lend = gendumplabel(); dumpAST(n->left, Lend, level + 2); dumpAST(n->right, NOLABEL, level + 2); return; } // Reset level to -2 for A_GLUE if (n->op == A_GLUE) level = -2; // General AST node handling if (n->left) dumpAST(n->left, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); for (i = 0; i < level; i++) fprintf(stdout, " "); switch (n->op) { case A_GLUE: fprintf(stdout, "\n\n"); return; case A_FUNCTION: fprintf(stdout, "A_FUNCTION %s\n", n->sym->name); return; case A_ADD: fprintf(stdout, "A_ADD\n"); return; case A_SUBTRACT: fprintf(stdout, "A_SUBTRACT\n"); return; case A_MULTIPLY: fprintf(stdout, "A_MULTIPLY\n"); return; case A_DIVIDE: fprintf(stdout, "A_DIVIDE\n"); return; case A_EQ: fprintf(stdout, "A_EQ\n"); return; case A_NE: fprintf(stdout, "A_NE\n"); return; case A_LT: fprintf(stdout, "A_LE\n"); return; case A_GT: fprintf(stdout, "A_GT\n"); return; case A_LE: fprintf(stdout, "A_LE\n"); return; case A_GE: fprintf(stdout, "A_GE\n"); return; case A_INTLIT: fprintf(stdout, "A_INTLIT %d\n", n->a_intvalue); return; case A_STRLIT: fprintf(stdout, "A_STRLIT rval label L%d\n", n->a_intvalue); return; case A_IDENT: if (n->rvalue) fprintf(stdout, "A_IDENT rval %s\n", n->sym->name); else fprintf(stdout, "A_IDENT %s\n", n->sym->name); return; case A_ASSIGN: fprintf(stdout, "A_ASSIGN\n"); return; case A_WIDEN: fprintf(stdout, "A_WIDEN\n"); return; case A_RETURN: fprintf(stdout, "A_RETURN\n"); return; case A_FUNCCALL: fprintf(stdout, "A_FUNCCALL %s\n", n->sym->name); return; case A_ADDR: fprintf(stdout, "A_ADDR %s\n", n->sym->name); return; case A_DEREF: if (n->rvalue) fprintf(stdout, "A_DEREF rval\n"); else fprintf(stdout, "A_DEREF\n"); return; case A_SCALE: fprintf(stdout, "A_SCALE %d\n", n->a_size); return; case A_PREINC: fprintf(stdout, "A_PREINC %s\n", n->sym->name); return; case A_PREDEC: fprintf(stdout, "A_PREDEC %s\n", n->sym->name); return; case A_POSTINC: fprintf(stdout, "A_POSTINC\n"); return; case A_POSTDEC: fprintf(stdout, "A_POSTDEC\n"); return; case A_NEGATE: fprintf(stdout, "A_NEGATE\n"); return; case A_BREAK: fprintf(stdout, "A_BREAK\n"); return; case A_CONTINUE: fprintf(stdout, "A_CONTINUE\n"); return; case A_CASE: fprintf(stdout, "A_CASE %d\n", n->a_intvalue); return; case A_DEFAULT: fprintf(stdout, "A_DEFAULT\n"); return; case A_SWITCH: fprintf(stdout, "A_SWITCH\n"); return; case A_CAST: fprintf(stdout, "A_CAST %d\n", n->type); return; case A_ASPLUS: fprintf(stdout, "A_ASPLUS\n"); return; case A_ASMINUS: fprintf(stdout, "A_ASMINUS\n"); return; case A_ASSTAR: fprintf(stdout, "A_ASSTAR\n"); return; case A_ASSLASH: fprintf(stdout, "A_ASSLASH\n"); return; case A_TOBOOL: fprintf(stdout, "A_TOBOOL\n"); return; case A_LOGOR: fprintf(stdout, "A_LOGOR\n"); return; case A_LOGAND: fprintf(stdout, "A_LOGAND\n"); return; default: fatald("Unknown dumpAST operator", n->op); } } ================================================ FILE: 57_Mop_up_pt3/types.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Types and type handling // Copyright (c) 2019 Warren Toomey, GPL3 // Return true if a type is an int type // of any size, false otherwise int inttype(int type) { return (((type & 0xf) == 0) && (type >= P_CHAR && type <= P_LONG)); } // Return true if a type is of pointer type int ptrtype(int type) { return ((type & 0xf) != 0); } // Given a primitive type, return // the type which is a pointer to it int pointer_to(int type) { if ((type & 0xf) == 0xf) fatald("Unrecognised in pointer_to: type", type); return (type + 1); } // Given a primitive pointer type, return // the type which it points to int value_at(int type) { if ((type & 0xf) == 0x0) fatald("Unrecognised in value_at: type", type); return (type - 1); } // Given a type and a composite type pointer, return // the size of this type in bytes int typesize(int type, struct symtable *ctype) { if (type == P_STRUCT || type == P_UNION) return (ctype->size); return (genprimsize(type)); } // Given an AST tree and a type which we want it to become, // possibly modify the tree by widening or scaling so that // it is compatible with this type. Return the original tree // if no changes occurred, a modified tree, or NULL if the // tree is not compatible with the given type. // If this will be part of a binary operation, the AST op is not zero. struct ASTnode *modify_type(struct ASTnode *tree, int rtype, struct symtable *rctype, int op) { int ltype; int lsize, rsize; ltype = tree->type; // For A_LOGOR and A_LOGAND, both types have to be int or pointer types if (op==A_LOGOR || op==A_LOGAND) { if (!inttype(ltype) && !ptrtype(ltype)) return(NULL); if (!inttype(ltype) && !ptrtype(rtype)) return(NULL); return (tree); } // XXX No idea on these yet if (ltype == P_STRUCT || ltype == P_UNION) fatal("Don't know how to do this yet"); if (rtype == P_STRUCT || rtype == P_UNION) fatal("Don't know how to do this yet"); // Compare scalar int types if (inttype(ltype) && inttype(rtype)) { // Both types same, nothing to do if (ltype == rtype) return (tree); // Get the sizes for each type lsize = typesize(ltype, NULL); rsize = typesize(rtype, NULL); // Tree's size is too big if (lsize > rsize) return (NULL); // Widen to the right if (rsize > lsize) return (mkastunary(A_WIDEN, rtype, NULL, tree, NULL, 0)); } // For pointers if (ptrtype(ltype) && ptrtype(rtype)) { // We can compare them if (op >= A_EQ && op <= A_GE) return (tree); // A comparison of the same type for a non-binary operation is OK, // or when the left tree is of `void *` type. if (op == 0 && (ltype == rtype || ltype == pointer_to(P_VOID))) return (tree); } // We can scale only on add and subtract operations if (op == A_ADD || op == A_SUBTRACT || op == A_ASPLUS || op == A_ASMINUS) { // Left is int type, right is pointer type and the size // of the original type is >1: scale the left if (inttype(ltype) && ptrtype(rtype)) { rsize = genprimsize(value_at(rtype)); if (rsize > 1) return (mkastunary(A_SCALE, rtype, rctype, tree, NULL, rsize)); else return (tree); // Size 1, no need to scale } } // If we get here, the types are not compatible return (NULL); } ================================================ FILE: 58_Ptr_Increments/Makefile ================================================ # Define the location of the include directory # and the location to install the compiler binary INCDIR=/tmp/include BINDIR=/tmp HSRCS= data.h decl.h defs.h incdir.h SRCS= cg.c decl.c expr.c gen.c main.c misc.c \ opt.c scan.c stmt.c sym.c tree.c types.c SRCN= cgn.c decl.c expr.c gen.c main.c misc.c \ opt.c scan.c stmt.c sym.c tree.c types.c ARMSRCS= cg_arm.c decl.c expr.c gen.c main.c misc.c \ opt.c scan.c stmt.c sym.c tree.c types.c cwj: $(SRCS) $(HSRCS) cc -o cwj -g -Wall -DINCDIR=\"$(INCDIR)\" $(SRCS) compn: $(SRCN) $(HSRCS) cc -D__NASM__ -o compn -g -Wall -DINCDIR=\"$(INCDIR)\" $(SRCN) cwjarm: $(ARMSRCS) $(HSRCS) cc -o cwjarm -g -Wall $(ARMSRCS) cp cwjarm cwj incdir.h: echo "#define INCDIR \"$(INCDIR)\"" > incdir.h install: cwj mkdir -p $(INCDIR) rsync -a include/. $(INCDIR) cp cwj $(BINDIR) chmod +x $(BINDIR)/cwj installn: compn mkdir -p $(INCDIR) rsync -a include/. $(INCDIR) cp compn $(BINDIR) chmod +x $(BINDIR)/compn clean: rm -f cwj cwjarm compn *.o *.s out a.out incdir.h test: install tests/runtests (cd tests; chmod +x runtests; ./runtests) armtest: cwjarm tests/runtests (cd tests; chmod +x runtests; ./runtests) testn: installn tests/runtestsn (cd tests; chmod +x runtestsn; ./runtestsn) ================================================ FILE: 58_Ptr_Increments/Readme.md ================================================ # Part 58: Fixing Pointer Increments/Decrements In the last part of our compiler writing journey, I mentioned that there was a problem with pointer increments and decrements. Let's see what the problem is and how I fixed it. We saw with the AST operations A_ADD, A_SUBTRACT, A_ASPLUS and A_ASMINUS where one operand is a pointer and the other is an integer type, we need to scale the integer value by the size of the type that the pointer points at. In `modify_type()` in `types.c`: ```c // We can scale only on add and subtract operations if (op == A_ADD || op == A_SUBTRACT || op == A_ASPLUS || op == A_ASMINUS) { // Left is int type, right is pointer type and the size // of the original type is >1: scale the left if (inttype(ltype) && ptrtype(rtype)) { rsize = genprimsize(value_at(rtype)); if (rsize > 1) return (mkastunary(A_SCALE, rtype, rctype, tree, NULL, rsize)); else return (tree); // Size 1, no need to scale } } ``` But this scaling doesn't occur when we use `++` or `--`, either as preincrement/decrement or postincrement/decrement operators. Here, we simply strap an A_PREINC, A_PREDEC, A_POSTINC or A_POSTDEC AST node to the AST tree that we are operating on, and then leave it to the code generator to deal with the situation. Up to now, this got resolved when we call either `cgloadglob()` or `cgloadlocal()` in `cg.c` to load the value of a global or local variable. For example: ```c int cgloadglob(struct symtable *sym, int op) { ... if (cgprimsize(sym->type) == 8) { if (op == A_PREINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", sym->name); ... fprintf(Outfile, "\tmovq\t%s(%%rip), %s\n", sym->name, reglist[r]); if (op == A_POSTINC) fprintf(Outfile, "\tincq\t%s(%%rip)\n", sym->name); } ... } ``` Note, however, that the `incq` increments by one. That's fine if the variable we are incrementing is of integer type, but it fails to deal with variables that are of pointer type. As well, the functions `cgloadglob()` and `cgloadlocal()` are very similar. They differ in what instructions we use to access the variable: is it at a fixed location, or a location relative to the stack frame. ## Fixing the Problem For a while I thought I could get the parser to build an AST tree similar to the one that `modify_type()` does, but I gave up on that. Thank goodness. I decided that, as `++` and `--` are already being done in `cgloadglob()`, that I should attack the problem here. Halfway through, I realised that I could merge `cgloadglob()` and `cgloadlocal()` into a single function. Let's look at the solution in stages. ```c // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadvar(struct symtable *sym, int op) { int r, postreg, offset=1; // Get a new register r = alloc_register(); // If the symbol is a pointer, use the size // of the type that it points to as any // increment or decrement. If not, it's one. if (ptrtype(sym->type)) offset= typesize(value_at(sym->type), sym->ctype); ``` We start by assuming that we will be doing +1 as an increment. However, once we realise that we could be incrementing a pointer, we change this to the the size of the type that it points to. ```c // Negate the offset for decrements if (op==A_PREDEC || op==A_POSTDEC) offset= -offset; ``` Now the `offset` is negative if we are going to do a decrement. ```c // If we have a pre-operation if (op==A_PREINC || op==A_PREDEC) { // Load the symbol's address if (sym->class == C_LOCAL || sym->class == C_PARAM) fprintf(Outfile, "\tleaq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); else fprintf(Outfile, "\tleaq\t%s(%%rip), %s\n", sym->name, reglist[r]); ``` This is where our algorithm differs from the old code. The old code used the `incq` instruction, but that limits the variable change to exactly one. Now that we have the variable's address in our register... ```c // and change the value at that address switch (sym->size) { case 1: fprintf(Outfile, "\taddb\t$%d,(%s)\n", offset, reglist[r]); break; case 4: fprintf(Outfile, "\taddl\t$%d,(%s)\n", offset, reglist[r]); break; case 8: fprintf(Outfile, "\taddq\t$%d,(%s)\n", offset, reglist[r]); break; } } ``` we can add the offset on to the variable, using the register as a pointer to the variable. We have to use different instructions based on the size of the variable. We've done any pre-increment or pre-decrement operation. Now we can load the variable's value into a register: ```c // Now load the output register with the value if (sym->class == C_LOCAL || sym->class == C_PARAM) { switch (sym->size) { case 1: fprintf(Outfile, "\tmovzbq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); break; case 4: fprintf(Outfile, "\tmovslq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); break; case 8: fprintf(Outfile, "\tmovq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); } } else { switch (sym->size) { case 1: fprintf(Outfile, "\tmovzbq\t%s(%%rip), %s\n", sym->name, reglist[r]); break; case 4: fprintf(Outfile, "\tmovslq\t%s(%%rip), %s\n", sym->name, reglist[r]); break; case 8: fprintf(Outfile, "\tmovq\t%s(%%rip), %s\n", sym->name, reglist[r]); } } ``` Depending on if the symbol is local, or global, we load from a named location or from an location relative to the frame pointer. We choose an instruction to zero pad the result based on the symbol's size. The value is safely in register `r`. But now we need to do any post-increment or post-decrement. We can re-use the pre-op code, but we'll need a new register: ```c // If we have a post-operation, get a new register if (op==A_POSTINC || op==A_POSTDEC) { postreg = alloc_register(); // Same code as before, but using postreg // and free the register free_register(postreg); } // Return the register with the value return(r); } ``` So the code for `cgloadvar()` is about as complex as the old code, but it now deals with pointer increments. The `tests/input145.c` test program verifies that this new code works: ```c int list[]= {3, 5, 7, 9, 11, 13, 15}; int *lptr; int main() { lptr= list; printf("%d\n", *lptr); lptr= lptr + 1; printf("%d\n", *lptr); lptr += 1; printf("%d\n", *lptr); lptr += 1; printf("%d\n", *lptr); lptr -= 1; printf("%d\n", *lptr); lptr++ ; printf("%d\n", *lptr); lptr-- ; printf("%d\n", *lptr); ++lptr ; printf("%d\n", *lptr); --lptr ; printf("%d\n", *lptr); } ``` ## How Did I Miss Modulo? With this fixed, I went back to feeding the compiler source code to itself and found, to my amazement, that the modulo operators `%` and `%=` were missing. I have no idea why I hadn't put them in before. ### New Tokens and AST Operators Adding new operators to the compiler now is tricky because we have to synchronise changes in several places. Let's see where. In `defs.h` we need to add the tokens: ```c // Token types enum { T_EOF, // Binary operators T_ASSIGN, T_ASPLUS, T_ASMINUS, T_ASSTAR, T_ASSLASH, T_ASMOD, T_QUESTION, T_LOGOR, T_LOGAND, T_OR, T_XOR, T_AMPER, T_EQ, T_NE, T_LT, T_GT, T_LE, T_GE, T_LSHIFT, T_RSHIFT, T_PLUS, T_MINUS, T_STAR, T_SLASH, T_MOD, ... }; ``` with T_ASMOD and T_MOD the new tokens. Now we need to create AST ops to match: ```c // AST node types. The first few line up // with the related tokens enum { A_ASSIGN = 1, A_ASPLUS, A_ASMINUS, A_ASSTAR, // 1 A_ASSLASH, A_ASMOD, A_TERNARY, A_LOGOR, // 5 A_LOGAND, A_OR, A_XOR, A_AND, A_EQ, A_NE, A_LT, // 9 A_GT, A_LE, A_GE, A_LSHIFT, A_RSHIFT, // 16 A_ADD, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, A_MOD, // 21 ... }; ``` Now we need to add the scanner changes to scan these tokens. I won't show the code, but I will show the change to the table of token strings in `scan.c`: ```c // List of token strings, for debugging purposes char *Tstring[] = { "EOF", "=", "+=", "-=", "*=", "/=", "%=", "?", "||", "&&", "|", "^", "&", "==", "!=", ",", ">", "<=", ">=", "<<", ">>", "+", "-", "*", "/", "%", ... }; ``` ### Operator Precedence Now we need to set the operators' precedence in `expr.c`. T_SLASH used to be the highest operator but it's been replaced with T_MOD: ```c // Convert a binary operator token into a binary AST operation. // We rely on a 1:1 mapping from token to AST operation static int binastop(int tokentype) { if (tokentype > T_EOF && tokentype <= T_MOD) return (tokentype); fatals("Syntax error, token", Tstring[tokentype]); return (0); // Keep -Wall happy } // Operator precedence for each token. Must // match up with the order of tokens in defs.h static int OpPrec[] = { 0, 10, 10, // T_EOF, T_ASSIGN, T_ASPLUS, 10, 10, // T_ASMINUS, T_ASSTAR, 10, 10, // T_ASSLASH, T_ASMOD, 15, // T_QUESTION, 20, 30, // T_LOGOR, T_LOGAND 40, 50, 60, // T_OR, T_XOR, T_AMPER 70, 70, // T_EQ, T_NE 80, 80, 80, 80, // T_LT, T_GT, T_LE, T_GE 90, 90, // T_LSHIFT, T_RSHIFT 100, 100, // T_PLUS, T_MINUS 110, 110, 110 // T_STAR, T_SLASH, T_MOD }; // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec; if (tokentype > T_MOD) fatals("Token with no precedence in op_precedence:", Tstring[tokentype]); prec = OpPrec[tokentype]; if (prec == 0) fatals("Syntax error, token", Tstring[tokentype]); return (prec); } ``` ### Code Generation We already have a `cgdiv()` function to generate the x86-64 instructions to do division. Looking at the manual for the `idiv` instruction: > idivq S: signed divide `%rdx:%rax` by S. The quotient is stored in `%rax`. The remainder is stored in `%rdx`. So we can modify `cgdiv()` to take the AST operation being performed, and it can do both division and remainder (modulo). The new function in `cg.c` is: ```c // Divide or modulo the first register by the second and // return the number of the register with the result int cgdivmod(int r1, int r2, int op) { fprintf(Outfile, "\tmovq\t%s,%%rax\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidivq\t%s\n", reglist[r2]); if (op== A_DIVIDE) fprintf(Outfile, "\tmovq\t%%rax,%s\n", reglist[r1]); else fprintf(Outfile, "\tmovq\t%%rdx,%s\n", reglist[r1]); free_register(r2); return (r1); } ``` The `tests/input147.c` confirms that the above changes work: ```c #include int a; int main() { printf("%d\n", 24 % 9); printf("%d\n", 31 % 11); a= 24; a %= 9; printf("%d\n",a); a= 31; a %= 11; printf("%d\n",a); return(0); } ``` ## Why Doesn't It Link We are now at the point where our compiler can parse each and every of its own source code files. But when I try to link them, I get a warning about missing `L0` labels. After a bit of investigation, it turns out that I wasn't properly propagating the end label for loops and switches in `genIF()` in `gen.c`. The fix is on line 49: ```c // Generate the code for an IF statement // and an optional ELSE clause. static int genIF(struct ASTnode *n, int looptoplabel, int loopendlabel) { ... // Optional ELSE clause: generate the // false compound statement and the // end label if (n->right) { genAST(n->right, NOLABEL, NOLABEL, loopendlabel, n->op); genfreeregs(NOREG); cglabel(Lend); } ... } ``` Now that `loopendlabel` is being propagated, I can do this (in a shell script I call `memake`): ``` #!/bin/sh make install rm *.s *.o for i in cg.c decl.c expr.c gen.c main.c misc.c \ opt.c scan.c stmt.c sym.c tree.c types.c do echo "./cwj -c $i"; ./cwj -c $i ; ./cwj -S $i done cc -o cwj0 cg.o decl.o expr.o gen.o main.o misc.o \ opt.o scan.o stmt.o sym.o tree.o types.o ``` We end up with a binary, `cwj0`, which is the result of the compiler compiling itself. ``` $ size cwj0 text data bss dec hex filename 106540 3008 48 109596 1ac1c cwj0 $ file cwj0 cwj0: ELF 64-bit LSB shared object, x86-64, version 1 (SYSV), dynamically linked, interpreter /lib64/l, for GNU/Linux 3.2.0, not stripped ``` ## Conclusion and What's Next For the pointer increment problem, I definitely had to scratch my head quite a lot and look at several possible alternate solutions. I did get halfway through trying to build a new AST tree with an A_SCALE in it. Then I tossed it all away and went for the change in `cgloadvar()`. That's much nicer. The modulo operators were simple to add (in theory), but annoyingly difficult to get everything synchronised (in practice). There is probably some scope to refactor here to make the synchronisation much easier. Then, while trying to link all the object files that our compiler had made from its own source code, I found that we were not propagating loop/switch end labels properly. We've now reached the point where our compiler can parse every one of its source code files, generate assembly code for them, and we can link them. We have reached the final stage of our journey, one that is probably going to be the most painful, the **WDIW** stage: why doesn't it work? Here, we don't have a debugger, we are going to have to look at lots of assembly output. We'll have to single-step assembly and look at register values. In the next part of our compiler writing journey, I will start on the **WDIW** stage. We are going to need some strategies to make our work effective. [Next step](../59_WDIW_pt1/Readme.md) ================================================ FILE: 58_Ptr_Increments/cg.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\t.text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\t.data\n", Outfile); currSeg = data_seg; } } // Given a scalar type value, return the // size of the type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch (type) { case P_CHAR: return (offset); case P_INT: case P_LONG: break; default: if (!ptrtype(type)) fatald("Bad type in cg_align:", type); } // Here we have an int or a long. Align it on a 4-byte offset // I put the generic code here so it can be reused elsewhere. alignment = 4; offset = (offset + direction * (alignment - 1)) & ~(alignment - 1); return (offset); } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. static int newlocaloffset(int size) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (size > 4) ? size : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "%r10", "%r11", "%r12", "%r13", "%r9", "%r8", "%rcx", "%rdx", "%rsi", "%rdi" }; static char *breglist[] = { "%r10b", "%r11b", "%r12b", "%r13b", "%r9b", "%r8b", "%cl", "%dl", "%sil", "%dil" }; static char *dreglist[] = { "%r10d", "%r11d", "%r12d", "%r13d", "%r9d", "%r8d", "%ecx", "%edx", "%esi", "%edi" }; // Push and pop a register on/off the stack static void pushreg(int r) { fprintf(Outfile, "\tpushq\t%s\n", reglist[r]); } static void popreg(int r) { fprintf(Outfile, "\tpopq\t%s\n", reglist[r]); } // Set all registers as available. // But if reg is positive, don't free that one. void freeall_registers(int keepreg) { int i; fprintf(Outfile, "# freeing all registers\n"); for (i = 0; i < NUMFREEREGS; i++) if (i != keepreg) freereg[i] = 1; } // When we need to spill a register, we choose // the following register and then cycle through // the remaining registers. The spillreg increments // continually, so we need to take a modulo NUMFREEREGS // on it. static int spillreg=0; // Allocate a free register. Return the number of // the register. Die if no available registers. int alloc_register(void) { int reg; for (reg = 0; reg < NUMFREEREGS; reg++) { if (freereg[reg]) { freereg[reg] = 0; fprintf(Outfile, "# allocated register %s\n", reglist[reg]); return (reg); } } // We have no registers, so we must spill one reg= (spillreg % NUMFREEREGS); spillreg++; fprintf(Outfile, "# spilling reg %s\n", reglist[reg]); pushreg(reg); return (reg); } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) { fprintf(Outfile, "# error trying to free register %s\n", reglist[reg]); fatald("Error trying to free register", reg); } // If this was a spilled register, get it back if (spillreg > 0) { spillreg--; reg= (spillreg % NUMFREEREGS); fprintf(Outfile, "# unspilling reg %s\n", reglist[reg]); popreg(reg); } else { fprintf(Outfile, "# freeing reg %s\n", reglist[reg]); freereg[reg] = 1; } } // Spill all registers on the stack void spill_all_regs(void) { int i; for (i = 0; i < NUMFREEREGS; i++) pushreg(i); } // Unspill all registers from the stack static void unspill_all_regs(void) { int i; for (i = NUMFREEREGS - 1; i >= 0; i--) popreg(i); } // Print out the assembly preamble void cgpreamble() { freeall_registers(NOREG); cgtextseg(); fprintf(Outfile, "# internal switch(expr) routine\n" "# %%rsi = switch table, %%rax = expr\n" "# from SubC: http://www.t3x.org/subc/\n" "\n" "__switch:\n" " pushq %%rsi\n" " movq %%rdx, %%rsi\n" " movq %%rax, %%rbx\n" " cld\n" " lodsq\n" " movq %%rax, %%rcx\n" "__next:\n" " lodsq\n" " movq %%rax, %%rdx\n" " lodsq\n" " cmpq %%rdx, %%rbx\n" " jnz __no\n" " popq %%rsi\n" " jmp *%%rax\n" "__no:\n" " loop __next\n" " lodsq\n" " popq %%rsi\n" " jmp *%%rax\n" "\n"); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the %rsp and %rsp if (sym->class == C_GLOBAL) fprintf(Outfile, "\t.globl\t%s\n" "\t.type\t%s, @function\n", name, name); fprintf(Outfile, "%s:\n" "\tpushq\t%%rbp\n" "\tmovq\t%%rsp, %%rbp\n", name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->st_posn = paramOffset; paramOffset += 8; } else { parm->st_posn = newlocaloffset(parm->size); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->st_posn = newlocaloffset(locvar->size); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\taddq\t$%d,%%rsp\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->st_endlabel); fprintf(Outfile, "\taddq\t$%d,%%rsp\n", stackOffset); fputs("\tpopq %rbp\n" "\tret\n", Outfile); freeall_registers(NOREG); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmovq\t$%d, %s\n", value, reglist[r]); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadvar(struct symtable *sym, int op) { int r, postreg, offset=1; // Get a new register r = alloc_register(); // If the symbol is a pointer, use the size // of the type that it points to as any // increment or decrement. If not, it's one. if (ptrtype(sym->type)) offset= typesize(value_at(sym->type), sym->ctype); // Negate the offset for decrements if (op==A_PREDEC || op==A_POSTDEC) offset= -offset; // If we have a pre-operation if (op==A_PREINC || op==A_PREDEC) { // Load the symbol's address if (sym->class == C_LOCAL || sym->class == C_PARAM) fprintf(Outfile, "\tleaq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); else fprintf(Outfile, "\tleaq\t%s(%%rip), %s\n", sym->name, reglist[r]); // and change the value at that address switch (sym->size) { case 1: fprintf(Outfile, "\taddb\t$%d,(%s)\n", offset, reglist[r]); break; case 4: fprintf(Outfile, "\taddl\t$%d,(%s)\n", offset, reglist[r]); break; case 8: fprintf(Outfile, "\taddq\t$%d,(%s)\n", offset, reglist[r]); break; } } // Now load the output register with the value if (sym->class == C_LOCAL || sym->class == C_PARAM) { switch (sym->size) { case 1: fprintf(Outfile, "\tmovzbq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); break; case 4: fprintf(Outfile, "\tmovslq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); break; case 8: fprintf(Outfile, "\tmovq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); } } else { switch (sym->size) { case 1: fprintf(Outfile, "\tmovzbq\t%s(%%rip), %s\n", sym->name, reglist[r]); break; case 4: fprintf(Outfile, "\tmovslq\t%s(%%rip), %s\n", sym->name, reglist[r]); break; case 8: fprintf(Outfile, "\tmovq\t%s(%%rip), %s\n", sym->name, reglist[r]); } } // If we have a post-operation, get a new register if (op==A_POSTINC || op==A_POSTDEC) { postreg = alloc_register(); // Load the symbol's address if (sym->class == C_LOCAL || sym->class == C_PARAM) fprintf(Outfile, "\tleaq\t%d(%%rbp), %s\n", sym->st_posn, reglist[postreg]); else fprintf(Outfile, "\tleaq\t%s(%%rip), %s\n", sym->name, reglist[postreg]); // and change the value at that address switch (sym->size) { case 1: fprintf(Outfile, "\taddb\t$%d,(%s)\n", offset, reglist[postreg]); break; case 4: fprintf(Outfile, "\taddl\t$%d,(%s)\n", offset, reglist[postreg]); break; case 8: fprintf(Outfile, "\taddq\t$%d,(%s)\n", offset, reglist[postreg]); break; } // and free the register free_register(postreg); } // Return the register with the value return(r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tleaq\tL%d(%%rip), %s\n", label, reglist[r]); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\taddq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsubq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timulq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Divide or modulo the first register by the second and // return the number of the register with the result int cgdivmod(int r1, int r2, int op) { fprintf(Outfile, "\tmovq\t%s,%%rax\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidivq\t%s\n", reglist[r2]); if (op== A_DIVIDE) fprintf(Outfile, "\tmovq\t%%rax,%s\n", reglist[r1]); else fprintf(Outfile, "\tmovq\t%%rdx,%s\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tandq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } int cgor(int r1, int r2) { fprintf(Outfile, "\torq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txorq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshlq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshrq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tnegq\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnotq\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); return (r); } // Load a boolean value (only 0 or 1) // into the given register void cgloadboolean(int r, int val) { fprintf(Outfile, "\tmovq\t$%d, %s\n", val, reglist[r]); } // Convert an integer value to a boolean value. Jump if // it's an IF, WHILE, LOGAND or LOGOR operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); switch(op) { case A_IF: case A_WHILE: case A_LOGAND: fprintf(Outfile, "\tje\tL%d\n", label); break; case A_LOGOR: fprintf(Outfile, "\tjne\tL%d\n", label); break; default: fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(struct symtable *sym, int numargs) { int outr; // Call the function fprintf(Outfile, "\tcall\t%s@PLT\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\taddq\t$%d, %%rsp\n", 8 * (numargs - 6)); // Unspill all the registers unspill_all_regs(); // Get a new register and copy the return value into it outr = alloc_register(); fprintf(Outfile, "\tmovq\t%%rax, %s\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpushq\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmovq\t%s, %s\n", reglist[r], reglist[FIRSTPARAMREG - argposn + 1]); } free_register(r); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsalq\t$%d, %s\n", val, reglist[r]); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %s(%%rip)\n", reglist[r], sym->name); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %s(%%rip)\n", breglist[r], sym->name); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %s(%%rip)\n", dreglist[r], sym->name); break; default: fatald("Bad type in cgstorglob:", sym->type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %d(%%rbp)\n", reglist[r], sym->st_posn); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %d(%%rbp)\n", breglist[r], sym->st_posn); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %d(%%rbp)\n", dreglist[r], sym->st_posn); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size, type; int initvalue; int i; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the variable (or its elements if an array) // and the type of the variable if (node->stype == S_ARRAY) { size = typesize(value_at(node->type), node->ctype); type = value_at(node->type); } else { size = node->size; type = node->type; } // Generate the global identity and the label cgdataseg(); if (node->class == C_GLOBAL) fprintf(Outfile, "\t.globl\t%s\n", node->name); fprintf(Outfile, "%s:\n", node->name); // Output space for one or more elements for (i = 0; i < node->nelems; i++) { // Get any initial value initvalue = 0; if (node->initlist != NULL) initvalue = node->initlist[i]; // Generate the space for this type switch (size) { case 1: fprintf(Outfile, "\t.byte\t%d\n", initvalue); break; case 4: fprintf(Outfile, "\t.long\t%d\n", initvalue); break; case 8: // Generate the pointer to a string literal. Treat a zero value // as actually zero, not the label L0 if (node->initlist != NULL && type == pointer_to(P_CHAR) && initvalue != 0) fprintf(Outfile, "\t.quad\tL%d\n", initvalue); else fprintf(Outfile, "\t.quad\t%d\n", initvalue); break; default: for (i = 0; i < size; i++) fprintf(Outfile, "\t.byte\t0\n"); } } } // Generate a global string and its start label // Don't output the label if append is true. void cgglobstr(int l, char *strvalue, int append) { char *cptr; if (!append) cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\t.byte\t%d\n", *cptr); } } void cgglobstrend(void) { fprintf(Outfile, "\t.byte\t0\n"); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(NOREG); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Only return a value if we have a value to return if (reg != NOREG) { // Deal with pointers here as we can't put them in // the switch statement if (ptrtype(sym->type)) fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); else { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzbl\t%s, %%eax\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %%eax\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } } } cgjump(sym->st_endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = alloc_register(); if (sym->class == C_GLOBAL || sym->class == C_STATIC) fprintf(Outfile, "\tleaq\t%s(%%rip), %s\n", sym->name, reglist[r]); else fprintf(Outfile, "\tleaq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzbq\t(%s), %s\n", reglist[r], reglist[r]); break; case 4: fprintf(Outfile, "\tmovslq\t(%s), %s\n", reglist[r], reglist[r]); break; case 8: fprintf(Outfile, "\tmovq\t(%s), %s\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { // Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmovb\t%s, (%s)\n", breglist[r1], reglist[r2]); break; case 4: fprintf(Outfile, "\tmovl\t%s, (%s)\n", dreglist[r1], reglist[r2]); break; case 8: fprintf(Outfile, "\tmovq\t%s, (%s)\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } // Generate a switch jump table and the code to // load the registers and call the switch() code void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel) { int i, label; // Get a label for the switch table label = genlabel(); cglabel(label); // Heuristic. If we have no cases, create one case // which points to the default case if (casecount == 0) { caseval[0] = 0; caselabel[0] = defaultlabel; casecount = 1; } // Generate the switch jump table. fprintf(Outfile, "\t.quad\t%d\n", casecount); for (i = 0; i < casecount; i++) fprintf(Outfile, "\t.quad\t%d, L%d\n", caseval[i], caselabel[i]); fprintf(Outfile, "\t.quad\tL%d\n", defaultlabel); // Load the specific registers cglabel(toplabel); fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); fprintf(Outfile, "\tleaq\tL%d(%%rip), %%rdx\n", label); fprintf(Outfile, "\tjmp\t__switch\n"); } // Move value between registers void cgmove(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s, %s\n", reglist[r1], reglist[r2]); } ================================================ FILE: 58_Ptr_Increments/cg_arm.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for ARMv6 on Raspberry Pi // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. static int freereg[4]; static char *reglist[4] = { "r4", "r5", "r6", "r7" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // We have to store large integer literal values in memory. // Keep a list of them which will be output in the postamble #define MAXINTS 1024 int Intlist[MAXINTS]; static int Intslot = 0; // Determine the offset of a large integer // literal from the .L3 label. If the integer // isn't in the list, add it. static void set_int_offset(int val) { int offset = -1; // See if it is already there for (int i = 0; i < Intslot; i++) { if (Intlist[i] == val) { offset = 4 * i; break; } } // Not in the list, so add it if (offset == -1) { offset = 4 * Intslot; if (Intslot == MAXINTS) fatal("Out of int slots in set_int_offset()"); Intlist[Intslot++] = val; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L3+%d\n", offset); } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n", Outfile); } // Print out the assembly postamble void cgpostamble() { // Print out the global variables fprintf(Outfile, ".L2:\n"); for (int i = 0; i < Globs; i++) { if (Symtable[i].stype == S_VARIABLE) fprintf(Outfile, "\t.word %s\n", Symtable[i].name); } // Print out the integer literals fprintf(Outfile, ".L3:\n"); for (int i = 0; i < Intslot; i++) { fprintf(Outfile, "\t.word %d\n", Intlist[i]); } } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Symtable[id].name; fprintf(Outfile, "\t.text\n" "\t.globl\t%s\n" "\t.type\t%s, \%%function\n" "%s:\n" "\tpush\t{fp, lr}\n" "\tadd\tfp, sp, #4\n" "\tsub\tsp, sp, #8\n" "\tstr\tr0, [fp, #-8]\n", name, name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Symtable[id].endlabel); fputs("\tsub\tsp, fp, #4\n" "\tpop\t{fp, pc}\n" "\t.align\t2\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); // If the literal value is small, do it with one instruction if (value <= 1000) fprintf(Outfile, "\tmov\t%s, #%d\n", reglist[r], value); else { set_int_offset(value); fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); } return (r); } // Determine the offset of a variable from the .L2 // label. Yes, this is inefficient code. static void set_var_offset(int id) { int offset = 0; // Walk the symbol table up to id. // Find S_VARIABLEs and add on 4 until // we get to our variable for (int i = 0; i < id; i++) { if (Symtable[i].stype == S_VARIABLE) offset += 4; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L2+%d\n", offset); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tldrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgloadglob:", Symtable[id].type); } return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s, %s\n", reglist[r1], reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\tmul\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { // To do a divide: r0 holds the dividend, r1 holds the divisor. // The quotient is in r0. fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r1]); fprintf(Outfile, "\tmov\tr1, %s\n", reglist[r2]); fprintf(Outfile, "\tbl\t__aeabi_idiv\n"); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r1]); free_register(r2); return (r1); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r]); fprintf(Outfile, "\tbl\t%s\n", Symtable[id].name); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r]); return (r); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tlsl\t%s, %s, #%d\n", reglist[r], reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, int id) { // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tstr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgstorglob:", Symtable[id].type); } return (r); } // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (4); switch (type) { case P_CHAR: return (1); case P_INT: case P_LONG: return (4); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Symtable[id].type); fprintf(Outfile, "\t.data\n" "\t.globl\t%s\n", Symtable[id].name); switch (typesize) { case 1: fprintf(Outfile, "%s:\t.byte\t0\n", Symtable[id].name); break; case 4: fprintf(Outfile, "%s:\t.long\t0\n", Symtable[id].name); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "moveq", "movne", "movlt", "movgt", "movle", "movge" }; // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "movne", "moveq", "movge", "movle", "movgt", "movlt" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #1\n", cmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #0\n", invcmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\tuxtb\t%s, %s\n", reglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tb\tL%d\n", l); } // List of inverted branch instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *brlist[] = { "bne", "beq", "bge", "ble", "bgt", "blt" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", brlist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[reg]); cgjump(Symtable[id].endlabel); } // Generate code to load the address of a global // identifier into a variable. Return a new register int cgaddress(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); fprintf(Outfile, "\tmov\t%s, r3\n", reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tldrb\t%s, [%s]\n", reglist[r], reglist[r]); break; case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [%s]\n", reglist[r], reglist[r]); break; } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [%s]\n", reglist[r1], reglist[r2]); break; case P_INT: case P_LONG: fprintf(Outfile, "\tstr\t%s, [%s]\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 58_Ptr_Increments/cgn.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\tsection .text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\tsection .data\n", Outfile); currSeg = data_seg; } } // Given a scalar type value, return the // size of the type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch (type) { case P_CHAR: return (offset); case P_INT: case P_LONG: break; default: if (!ptrtype(type)) fatald("Bad type in cg_align:", type); } // Here we have an int or a long. Align it on a 4-byte offset // I put the generic code here so it can be reused elsewhere. alignment = 4; offset = (offset + direction * (alignment - 1)) & ~(alignment - 1); return (offset); } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. int newlocaloffset(int size) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (size > 4) ? size : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "r10", "r11", "r12", "r13", "r9", "r8", "rcx", "rdx", "rsi", "rdi" }; static char *breglist[] = { "r10b", "r11b", "r12b", "r13b", "r9b", "r8b", "cl", "dl", "sil", "dil" }; static char *dreglist[] = { "r10d", "r11d", "r12d", "r13d", "r9d", "r8d", "ecx", "edx", "esi", "edi" }; // Push and pop a register on/off the stack static void pushreg(int r) { fprintf(Outfile, "\tpush\t%s\n", reglist[r]); } static void popreg(int r) { fprintf(Outfile, "\tpop\t%s\n", reglist[r]); } // Set all registers as available // But if reg is positive, don't free that one. void freeall_registers(int keepreg) { int i; fprintf(Outfile, "; freeing all registers\n"); for (i = 0; i < NUMFREEREGS; i++) if (i != keepreg) freereg[i] = 1; } // When we need to spill a register, we choose // the following register and then cycle through // the remaining registers. The spillreg increments // continually, so we need to take a modulo NUMFREEREGS // on it. static int spillreg=0; // Allocate a free register. Return the number of // the register. Die if no available registers. int alloc_register(void) { int reg; for (reg = 0; reg < NUMFREEREGS; reg++) { if (freereg[reg]) { freereg[reg] = 0; fprintf(Outfile, "; allocated register %s\n", reglist[reg]); return (reg); } } // We have no registers, so we must spill one reg = (spillreg % NUMFREEREGS); spillreg++; fprintf(Outfile, "; spilling reg %s\n", reglist[reg]); pushreg(reg); return (reg); } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) { fprintf(Outfile, "# error trying to free register %s\n", reglist[reg]); fatald("Error trying to free register", reg); } // If this was a spilled register, get it back if (spillreg > 0) { spillreg--; reg= (spillreg % NUMFREEREGS); fprintf(Outfile, "; unspilling reg %s\n", reglist[reg]); popreg(reg); } else { fprintf(Outfile, "; freeing reg %s\n", reglist[reg]); freereg[reg] = 1; } } // Spill all registers on the stack void spill_all_regs(void) { int i; for (i = 0; i < NUMFREEREGS; i++) pushreg(i); } // Unspill all registers from the stack static void unspill_all_regs(void) { int i; for (i = NUMFREEREGS - 1; i >= 0; i--) popreg(i); } // Print out the assembly preamble void cgpreamble() { freeall_registers(NOREG); cgtextseg(); fprintf(Outfile, "; internal switch(expr) routine\n" "; rsi = switch table, rax = expr\n" "; from SubC: http://www.t3x.org/subc/\n" "\n" "__switch:\n" " push rsi\n" " mov rsi, rdx\n" " mov rbx, rax\n" " cld\n" " lodsq\n" " mov rcx, rax\n" "__next:\n" " lodsq\n" " mov rdx, rax\n" " lodsq\n" " cmp rbx, rdx\n" " jnz __no\n" " pop rsi\n" " jmp rax\n" "__no:\n" " loop __next\n" " lodsq\n" " pop rsi\n" " jmp rax\n" "\n"); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the rsp and rbp if (sym->class == C_GLOBAL) fprintf(Outfile, "\tglobal\t%s\n", name); fprintf(Outfile, "%s:\n" "\tpush\trbp\n" "\tmov\trbp, rsp\n", name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->st_posn = paramOffset; paramOffset += 8; } else { parm->st_posn = newlocaloffset(parm->size); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->st_posn = newlocaloffset(locvar->size); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\tadd\trsp, %d\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->st_endlabel); fprintf(Outfile, "\tadd\trsp, %d\n", stackOffset); fputs("\tpop rbp\n" "\tret\n", Outfile); freeall_registers(NOREG); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], value); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadvar(struct symtable *sym, int op) { int r, postreg, offset = 1; // Get a new register r = alloc_register(); // If the symbol is a pointer, use the size // of the type that it points to as any // increment or decrement. If not, it's one. if (ptrtype(sym->type)) offset= typesize(value_at(sym->type), sym->ctype); // Negate the offset for decrements if (op==A_PREDEC || op==A_POSTDEC) offset= -offset; // If we have a pre-operation if (op==A_PREINC || op==A_PREDEC) { // Load the symbol's address if (sym->class == C_LOCAL || sym->class == C_PARAM) fprintf(Outfile, "\tlea\t%s, [rbp+%d]\n", reglist[r], sym->st_posn); else fprintf(Outfile, "\tlea\t%s, [%s]\n", reglist[r], sym->name); // and change the value at that address switch (sym->size) { case 1: fprintf(Outfile, "\tadd\tbyte [%s], %d\n", reglist[r], offset); break; case 4: fprintf(Outfile, "\tadd\tdword [%s], %d\n", reglist[r], offset); break; case 8: fprintf(Outfile, "\tadd\tqword [%s], %d\n", reglist[r], offset); break; } } // Now load the output register with the value if (sym->class == C_LOCAL || sym->class == C_PARAM) { switch (sym->size) { case 1: fprintf(Outfile, "\tmovzx\t%s, byte [rbp+%d]\n", reglist[r], sym->st_posn); break; case 4: fprintf(Outfile, "\tmovsxd\t%s, dword [rbp+%d]\n", reglist[r], sym->st_posn); break; case 8: fprintf(Outfile, "\tmov\t%s, [rbp+%d]\n", reglist[r], sym->st_posn); } } else { switch (sym->size) { case 1: fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], sym->name); break; case 4: fprintf(Outfile, "\tmovsxd\t%s, dword [%s]\n", reglist[r], sym->name); break; case 8: fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], sym->name); } } // If we have a post-operation, get a new register if (op==A_POSTINC || op==A_POSTDEC) { postreg = alloc_register(); // Load the symbol's address if (sym->class == C_LOCAL || sym->class == C_PARAM) fprintf(Outfile, "\tlea\t%s, [rbp+%d]\n", reglist[postreg], sym->st_posn); else fprintf(Outfile, "\tlea\t%s, [%s]\n", reglist[postreg], sym->name); // and change the value at that address switch (sym->size) { case 1: fprintf(Outfile, "\tadd\tbyte [%s], %d\n", reglist[postreg], offset); break; case 4: fprintf(Outfile, "\tadd\tdword [%s], %d\n", reglist[postreg], offset); break; case 8: fprintf(Outfile, "\tadd\tqword [%s], %d\n", reglist[postreg], offset); break; } // and free the register free_register(postreg); } // Return the register with the value return(r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, L%d\n", reglist[r], label); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timul\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Divide or modulo the first register by the second and // return the number of the register with the result int cgdivmod(int r1, int r2, int op) { fprintf(Outfile, "\tmov\trax, %s\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidiv\t%s\n", reglist[r2]); if (op == A_DIVIDE) fprintf(Outfile, "\tmov\t%s, rax\n", reglist[r1]); else fprintf(Outfile, "\tmov\t%s, rdx\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tand\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } int cgor(int r1, int r2) { fprintf(Outfile, "\tor\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txor\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshl\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshr\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tneg\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnot\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r], breglist[r]); return (r); } // Load a boolean value (only 0 or 1) // into the given register void cgloadboolean(int r, int val) { fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], val); } // Convert an integer value to a boolean value. Jump if // it's an IF, WHILE, LOGAND or LOGOR operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); switch(op) { case A_IF: case A_WHILE: case A_LOGAND: fprintf(Outfile, "\tje\tL%d\n", label); break; case A_LOGOR: fprintf(Outfile, "\tjne\tL%d\n", label); break; default: fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, byte %s\n", reglist[r], breglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(struct symtable *sym, int numargs) { int outr; // Call the function fprintf(Outfile, "\tcall\t%s\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\tadd\trsp, %d\n", 8 * (numargs - 6)); // Unspill all the registers unspill_all_regs(); // Get a new register and copy the return value into it outr = alloc_register(); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpush\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmov\t%s, %s\n", reglist[FIRSTPARAMREG - argposn + 1], reglist[r]); } free_register(r); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsal\t%s, %d\n", reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, dreglist[r]); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\tqword\t[rbp+%d], %s\n", sym->st_posn, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\tbyte\t[rbp+%d], %s\n", sym->st_posn, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\tdword\t[rbp+%d], %s\n", sym->st_posn, dreglist[r]); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size, type; int initvalue; int i; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the variable (or its elements if an array) // and the type of the variable if (node->stype == S_ARRAY) { size = typesize(value_at(node->type), node->ctype); type = value_at(node->type); } else { size = node->size; type = node->type; } // Generate the global identity and the label cgdataseg(); if (node->class == C_GLOBAL) fprintf(Outfile, "\tglobal\t%s\n", node->name); fprintf(Outfile, "%s:\n", node->name); // Output space for one or more elements for (i = 0; i < node->nelems; i++) { // Get any initial value initvalue = 0; if (node->initlist != NULL) initvalue = node->initlist[i]; // Generate the space for this type // original version switch(size) { case 1: fprintf(Outfile, "\tdb\t%d\n", initvalue); break; case 4: fprintf(Outfile, "\tdd\t%d\n", initvalue); break; case 8: // Generate the pointer to a string literal. Treat a zero value // as actually zero, not the label L0 if (node->initlist != NULL && type == pointer_to(P_CHAR) && initvalue != 0) fprintf(Outfile, "\tdq\tL%d\n", initvalue); else fprintf(Outfile, "\tdq\t%d\n", initvalue); break; default: for (i = 0; i < size; i++) fprintf(Outfile, "\tdb\t0\n"); /* compact version using times instead of loop fprintf(Outfile, "\ttimes\t%d\tdb\t0\n", size); */ } } } // Generate a global string and its start label // Don't output the label if append is true. void cgglobstr(int l, char *strvalue, int append) { char *cptr; if (!append) cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\tdb\t%d\n", *cptr); } } void cgglobstrend(void) { fprintf(Outfile, "\tdb\t0\n"); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r2], breglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(NOREG); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Only return a value if we have a value to return if (reg != NOREG) { // Deal with pointers here as we can't put them in // the switch statement if (ptrtype(sym->type)) fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); else { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzx\teax, %s\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmov\teax, %s\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } } } cgjump(sym->st_endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = alloc_register(); if (sym->class == C_GLOBAL || sym->class == C_STATIC) fprintf(Outfile, "\tmov\t%s, %s\n", reglist[r], sym->name); else fprintf(Outfile, "\tlea\t%s, [rbp+%d]\n", reglist[r], sym->st_posn); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], reglist[r]); break; case 4: fprintf(Outfile, "\tmovsx\t%s, dword [%s]\n", reglist[r], reglist[r]); break; case 8: fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { //Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmov\t[%s], byte %s\n", reglist[r2], breglist[r1]); break; case 4: fprintf(Outfile, "\tmov\t[%s], dword %s\n", reglist[r2], dreglist[r1]); break; case 8: fprintf(Outfile, "\tmov\t[%s], %s\n", reglist[r2], reglist[r1]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } // Generate a switch jump table and the code to // load the registers and call the switch() code void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel) { int i, label; // Get a label for the switch table label = genlabel(); cglabel(label); // Heuristic. If we have no cases, create one case // which points to the default case if (casecount == 0) { caseval[0] = 0; caselabel[0] = defaultlabel; casecount = 1; } // Generate the switch jump table. fprintf(Outfile, "\tdq\t%d\n", casecount); for (i = 0; i < casecount; i++) fprintf(Outfile, "\tdq\t%d, L%d\n", caseval[i], caselabel[i]); fprintf(Outfile, "\tdq\tL%d\n", defaultlabel); // Load the specific registers cglabel(toplabel); fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); fprintf(Outfile, "\tmov\trdx, L%d\n", label); fprintf(Outfile, "\tjmp\t__switch\n"); } // Move value between registers void cgmove(int r1, int r2) { fprintf(Outfile, "\tmov\t%s, %s\n", reglist[r2], reglist[r1]); } ================================================ FILE: 58_Ptr_Increments/data.h ================================================ #ifndef extern_ #define extern_ extern #endif // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 extern_ int Line; // Current line number extern_ int Linestart; // True if at start of a line extern_ int Putback; // Character put back by scanner extern_ struct symtable *Functionid; // Symbol ptr of the current function extern_ FILE *Infile; // Input and output files extern_ FILE *Outfile; extern_ char *Infilename; // Name of file we are parsing extern_ char *Outfilename; // Name of file we opened as Outfile extern_ struct token Token; // Last token scanned extern_ struct token Peektoken; // A look-ahead token extern_ char Text[TEXTLEN + 1]; // Last identifier scanned extern_ int Looplevel; // Depth of nested loops extern_ int Switchlevel; // Depth of nested switches extern char *Tstring[]; // List of token strings // Symbol table lists extern_ struct symtable *Globhead, *Globtail; // Global variables and functions extern_ struct symtable *Loclhead, *Locltail; // Local variables extern_ struct symtable *Parmhead, *Parmtail; // Local parameters extern_ struct symtable *Membhead, *Membtail; // Temp list of struct/union members extern_ struct symtable *Structhead, *Structtail; // List of struct types extern_ struct symtable *Unionhead, *Uniontail; // List of union types extern_ struct symtable *Enumhead, *Enumtail; // List of enum types and values extern_ struct symtable *Typehead, *Typetail; // List of typedefs // Command-line flags extern_ int O_dumpAST; // If true, dump the AST trees extern_ int O_dumpsym; // If true, dump the symbol table extern_ int O_keepasm; // If true, keep any assembly files extern_ int O_assemble; // If true, assemble the assembly files extern_ int O_dolink; // If true, link the object files extern_ int O_verbose; // If true, print info on compilation stages ================================================ FILE: 58_Ptr_Increments/decl.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of declarations // Copyright (c) 2019 Warren Toomey, GPL3 static struct symtable *composite_declaration(int type); static int typedef_declaration(struct symtable **ctype); static int type_of_typedef(char *name, struct symtable **ctype); static void enum_declaration(void); // Parse the current token and return a primitive type enum value, // a pointer to any composite type and possibly modify // the class of the type. int parse_type(struct symtable **ctype, int *class) { int type, exstatic = 1; // See if the class has been changed to extern or static while (exstatic) { switch (Token.token) { case T_EXTERN: if (*class == C_STATIC) fatal("Illegal to have extern and static at the same time"); *class = C_EXTERN; scan(&Token); break; case T_STATIC: if (*class == C_LOCAL) fatal("Compiler doesn't support static local declarations"); if (*class == C_EXTERN) fatal("Illegal to have extern and static at the same time"); *class = C_STATIC; scan(&Token); break; default: exstatic = 0; } } // Now work on the actual type keyword switch (Token.token) { case T_VOID: type = P_VOID; scan(&Token); break; case T_CHAR: type = P_CHAR; scan(&Token); break; case T_INT: type = P_INT; scan(&Token); break; case T_LONG: type = P_LONG; scan(&Token); break; // For the following, if we have a ';' after the // parsing then there is no type, so return -1. // Example: struct x {int y; int z}; case T_STRUCT: type = P_STRUCT; *ctype = composite_declaration(P_STRUCT); if (Token.token == T_SEMI) type = -1; break; case T_UNION: type = P_UNION; *ctype = composite_declaration(P_UNION); if (Token.token == T_SEMI) type = -1; break; case T_ENUM: type = P_INT; // Enums are really ints enum_declaration(); if (Token.token == T_SEMI) type = -1; break; case T_TYPEDEF: type = typedef_declaration(ctype); if (Token.token == T_SEMI) type = -1; break; case T_IDENT: type = type_of_typedef(Text, ctype); break; default: fatals("Illegal type, token", Token.tokstr); } return (type); } // Given a type parsed by parse_type(), scan in any following // '*' tokens and return the new type int parse_stars(int type) { while (1) { if (Token.token != T_STAR) break; type = pointer_to(type); scan(&Token); } return (type); } // Parse a type which appears inside a cast int parse_cast(struct symtable **ctype) { int type, class = 0; // Get the type inside the parentheses type = parse_stars(parse_type(ctype, &class)); // Do some error checking. I'm sure more can be done if (type == P_STRUCT || type == P_UNION || type == P_VOID) fatal("Cannot cast to a struct, union or void type"); return (type); } // Given a type, parse an expression of literals and ensure // that the type of this expression matches the given type. // Parse any type cast that precedes the expression. // If an integer literal, return this value. // If a string literal, return the label number of the string. int parse_literal(int type) { struct ASTnode *tree; // Parse the expression and optimise the resulting AST tree tree = optimise(binexpr(0)); // If there's a cast, get the child and // mark it as having the type from the cast if (tree->op == A_CAST) { tree->left->type = tree->type; tree = tree->left; } // The tree must now have an integer or string literal if (tree->op != A_INTLIT && tree->op != A_STRLIT) fatal("Cannot initialise globals with a general expression"); // If the type is char * and if (type == pointer_to(P_CHAR)) { // We have a string literal, return the label number if (tree->op == A_STRLIT) return (tree->a_intvalue); // We have a zero int literal, so that's a NULL if (tree->op == A_INTLIT && tree->a_intvalue == 0) return (0); } // We only get here with an integer literal. The input type // is an integer type and is wide enough to hold the literal value if (inttype(type) && typesize(type, NULL) >= typesize(tree->type, NULL)) return (tree->a_intvalue); fatal("Type mismatch: literal vs. variable"); return (0); // Keep -Wall happy } // Given a pointer to a symbol that may already exist // return true if this symbol doesn't exist. We use // this function to convert externs into globals int is_new_symbol(struct symtable *sym, int class, int type, struct symtable *ctype) { // There is no existing symbol, thus is new if (sym==NULL) return(1); // global versus extern: if they match that it's not new // and we can convert the class to global if ((sym->class== C_GLOBAL && class== C_EXTERN) || (sym->class== C_EXTERN && class== C_GLOBAL)) { // If the types don't match, there's a problem if (type != sym->type) fatals("Type mismatch between global/extern", sym->name); // Struct/unions, also compare the ctype if (type >= P_STRUCT && ctype != sym->ctype) fatals("Type mismatch between global/extern", sym->name); // If we get to here, the types match, so mark the symbol // as global sym->class= C_GLOBAL; // Return that symbol is not new return(0); } // It must be a duplicate symbol if we get here fatals("Duplicate global variable declaration", sym->name); return(-1); // Keep -Wall happy } // Given the type, name and class of a scalar variable, // parse any initialisation value and allocate storage for it. // Return the variable's symbol table entry. static struct symtable *scalar_declaration(char *varname, int type, struct symtable *ctype, int class, struct ASTnode **tree) { struct symtable *sym = NULL; struct ASTnode *varnode, *exprnode; *tree = NULL; // Add this as a known scalar switch (class) { case C_STATIC: case C_EXTERN: case C_GLOBAL: // See if this variable is new or already exists sym= findglob(varname); if (is_new_symbol(sym, class, type, ctype)) sym = addglob(varname, type, ctype, S_VARIABLE, class, 1, 0); break; case C_LOCAL: sym = addlocl(varname, type, ctype, S_VARIABLE, 1); break; case C_PARAM: sym = addparm(varname, type, ctype, S_VARIABLE); break; case C_MEMBER: sym = addmemb(varname, type, ctype, S_VARIABLE, 1); break; } // The variable is being initialised if (Token.token == T_ASSIGN) { // Only possible for a global or local if (class != C_GLOBAL && class != C_LOCAL && class != C_STATIC) fatals("Variable can not be initialised", varname); scan(&Token); // Globals must be assigned a literal value if (class == C_GLOBAL || class == C_STATIC) { // Create one initial value for the variable and // parse this value sym->initlist = (int *) malloc(sizeof(int)); sym->initlist[0] = parse_literal(type); } if (class == C_LOCAL) { // Make an A_IDENT AST node with the variable varnode = mkastleaf(A_IDENT, sym->type, sym->ctype, sym, 0); // Get the expression for the assignment, make into a rvalue exprnode = binexpr(0); exprnode->rvalue = 1; // Ensure the expression's type matches the variable exprnode = modify_type(exprnode, varnode->type, varnode->ctype, 0); if (exprnode == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree *tree = mkastnode(A_ASSIGN, exprnode->type, exprnode->ctype, exprnode, NULL, varnode, NULL, 0); } } // Generate any global space if (class == C_GLOBAL || class == C_STATIC) genglobsym(sym); return (sym); } // Given the type, name and class of an variable, parse // the size of the array, if any. Then parse any initialisation // value and allocate storage for it. // Return the variable's symbol table entry. static struct symtable *array_declaration(char *varname, int type, struct symtable *ctype, int class) { struct symtable *sym; // New symbol table entry int nelems = -1; // Assume the number of elements won't be given int maxelems; // The maximum number of elements in the init list int *initlist; // The list of initial elements int i = 0, j; // Skip past the '[' scan(&Token); // See we have an array size if (Token.token != T_RBRACKET) { nelems = parse_literal(P_INT); if (nelems <= 0) fatald("Array size is illegal", nelems); } // Ensure we have a following ']' match(T_RBRACKET, "]"); // Add this as a known array. We treat the // array as a pointer to its elements' type switch (class) { case C_STATIC: case C_EXTERN: case C_GLOBAL: // See if this variable is new or already exists sym= findglob(varname); if (is_new_symbol(sym, class, pointer_to(type), ctype)) sym = addglob(varname, pointer_to(type), ctype, S_ARRAY, class, 0, 0); break; case C_LOCAL: sym = addlocl(varname, pointer_to(type), ctype, S_ARRAY, 0); break; default: fatal("Declaration of array parameters is not implemented"); } // Array initialisation if (Token.token == T_ASSIGN) { if (class != C_GLOBAL && class != C_STATIC) fatals("Variable can not be initialised", varname); scan(&Token); // Get the following left curly bracket match(T_LBRACE, "{"); #define TABLE_INCREMENT 10 // If the array already has nelems, allocate that many elements // in the list. Otherwise, start with TABLE_INCREMENT. if (nelems != -1) maxelems = nelems; else maxelems = TABLE_INCREMENT; initlist = (int *) malloc(maxelems * sizeof(int)); // Loop getting a new literal value from the list while (1) { // Check we can add the next value, then parse and add it if (nelems != -1 && i == maxelems) fatal("Too many values in initialisation list"); initlist[i++] = parse_literal(type); // Increase the list size if the original size was // not set and we have hit the end of the current list if (nelems == -1 && i == maxelems) { maxelems += TABLE_INCREMENT; initlist = (int *) realloc(initlist, maxelems * sizeof(int)); } // Leave when we hit the right curly bracket if (Token.token == T_RBRACE) { scan(&Token); break; } // Next token must be a comma, then comma(); } // Zero any unused elements in the initlist. // Attach the list to the symbol table entry for (j = i; j < sym->nelems; j++) initlist[j] = 0; if (i > nelems) nelems = i; sym->initlist = initlist; } // Set the size of the array and the number of elements // Only externs can have no elements. if (class != C_EXTERN && nelems<=0) fatals("Array must have non-zero elements", sym->name); sym->nelems = nelems; sym->size = sym->nelems * typesize(type, ctype); // Generate any global space if (class == C_GLOBAL || class == C_STATIC) genglobsym(sym); return (sym); } // Given a pointer to the new function being declared and // a possibly NULL pointer to the function's previous declaration, // parse a list of parameters and cross-check them against the // previous declaration. Return the count of parameters static int param_declaration_list(struct symtable *oldfuncsym, struct symtable *newfuncsym) { int type, paramcnt = 0; struct symtable *ctype; struct symtable *protoptr = NULL; struct ASTnode *unused; // Get the pointer to the first prototype parameter if (oldfuncsym != NULL) protoptr = oldfuncsym->member; // Loop getting any parameters while (Token.token != T_RPAREN) { // If the first token is 'void' if (Token.token == T_VOID) { // Peek at the next token. If a ')', the function // has no parameters, so leave the loop. scan(&Peektoken); if (Peektoken.token == T_RPAREN) { // Move the Peektoken into the Token paramcnt = 0; scan(&Token); break; } } // Get the type of the next parameter type = declaration_list(&ctype, C_PARAM, T_COMMA, T_RPAREN, &unused); if (type == -1) fatal("Bad type in parameter list"); // Ensure the type of this parameter matches the prototype if (protoptr != NULL) { if (type != protoptr->type) fatald("Type doesn't match prototype for parameter", paramcnt + 1); protoptr = protoptr->next; } paramcnt++; // Stop when we hit the right parenthesis if (Token.token == T_RPAREN) break; // We need a comma as separator comma(); } if (oldfuncsym != NULL && paramcnt != oldfuncsym->nelems) fatals("Parameter count mismatch for function", oldfuncsym->name); // Return the count of parameters return (paramcnt); } // // function_declaration: type identifier '(' parameter_list ')' ; // | type identifier '(' parameter_list ')' compound_statement ; // // Parse the declaration of function. static struct symtable *function_declaration(char *funcname, int type, struct symtable *ctype, int class) { struct ASTnode *tree, *finalstmt; struct symtable *oldfuncsym, *newfuncsym = NULL; int endlabel, paramcnt; // Text has the identifier's name. If this exists and is a // function, get the id. Otherwise, set oldfuncsym to NULL. if ((oldfuncsym = findsymbol(funcname)) != NULL) if (oldfuncsym->stype != S_FUNCTION) oldfuncsym = NULL; // If this is a new function declaration, get a // label-id for the end label, and add the function // to the symbol table, if (oldfuncsym == NULL) { endlabel = genlabel(); // Assumtion: functions only return scalar types, so NULL below newfuncsym = addglob(funcname, type, NULL, S_FUNCTION, class, 0, endlabel); } // Scan in the '(', any parameters and the ')'. // Pass in any existing function prototype pointer lparen(); paramcnt = param_declaration_list(oldfuncsym, newfuncsym); rparen(); // If this is a new function declaration, update the // function symbol entry with the number of parameters. // Also copy the parameter list into the function's node. if (newfuncsym) { newfuncsym->nelems = paramcnt; newfuncsym->member = Parmhead; oldfuncsym = newfuncsym; } // Clear out the parameter list Parmhead = Parmtail = NULL; // Declaration ends in a semicolon, only a prototype. if (Token.token == T_SEMI) return (oldfuncsym); // This is not just a prototype. // Set the Functionid global to the function's symbol pointer Functionid = oldfuncsym; // Get the AST tree for the compound statement and mark // that we have parsed no loops or switches yet Looplevel = 0; Switchlevel = 0; lbrace(); tree = compound_statement(0); rbrace(); // If the function type isn't P_VOID .. if (type != P_VOID) { // Error if no statements in the function if (tree == NULL) fatal("No statements in function with non-void type"); // Check that the last AST operation in the // compound statement was a return statement finalstmt = (tree->op == A_GLUE) ? tree->right : tree; if (finalstmt == NULL || finalstmt->op != A_RETURN) fatal("No return for function with non-void type"); } // Build the A_FUNCTION node which has the function's symbol pointer // and the compound statement sub-tree tree = mkastunary(A_FUNCTION, type, ctype, tree, oldfuncsym, endlabel); // Do optimisations on the AST tree tree = optimise(tree); // Dump the AST tree if requested if (O_dumpAST) { dumpAST(tree, NOLABEL, 0); fprintf(stdout, "\n\n"); } // Generate the assembly code for it genAST(tree, NOLABEL, NOLABEL, NOLABEL, 0); // Now free the symbols associated // with this function freeloclsyms(); return (oldfuncsym); } // Parse composite type declarations: structs or unions. // Either find an existing struct/union declaration, or build // a struct/union symbol table entry and return its pointer. static struct symtable *composite_declaration(int type) { struct symtable *ctype = NULL; struct symtable *m; struct ASTnode *unused; int offset; int t; // Skip the struct/union keyword scan(&Token); // See if there is a following struct/union name if (Token.token == T_IDENT) { // Find any matching composite type if (type == P_STRUCT) ctype = findstruct(Text); else ctype = findunion(Text); scan(&Token); } // If the next token isn't an LBRACE , this is // the usage of an existing struct/union type. // Return the pointer to the type. if (Token.token != T_LBRACE) { if (ctype == NULL) fatals("unknown struct/union type", Text); return (ctype); } // Ensure this struct/union type hasn't been // previously defined if (ctype) fatals("previously defined struct/union", Text); // Build the composite type and skip the left brace if (type == P_STRUCT) ctype = addstruct(Text); else ctype = addunion(Text); scan(&Token); // Scan in the list of members while (1) { // Get the next member. m is used as a dummy t = declaration_list(&m, C_MEMBER, T_SEMI, T_RBRACE, &unused); if (t == -1) fatal("Bad type in member list"); if (Token.token == T_SEMI) scan(&Token); if (Token.token == T_RBRACE) break; } // Attach to the struct type's node rbrace(); if (Membhead == NULL) fatals("No members in struct", ctype->name); ctype->member = Membhead; Membhead = Membtail = NULL; // Set the offset of the initial member // and find the first free byte after it m = ctype->member; m->st_posn = 0; offset = typesize(m->type, m->ctype); // Set the position of each successive member in the composite type // Unions are easy. For structs, align the member and find the next free byte for (m = m->next; m != NULL; m = m->next) { // Set the offset for this member if (type == P_STRUCT) m->st_posn = genalign(m->type, offset, 1); else m->st_posn = 0; // Get the offset of the next free byte after this member offset += typesize(m->type, m->ctype); } // Set the overall size of the composite type ctype->size = offset; return (ctype); } // Parse an enum declaration static void enum_declaration(void) { struct symtable *etype = NULL; char *name = NULL; int intval = 0; // Skip the enum keyword. scan(&Token); // If there's a following enum type name, get a // pointer to any existing enum type node. if (Token.token == T_IDENT) { etype = findenumtype(Text); name = strdup(Text); // As it gets tromped soon scan(&Token); } // If the next token isn't a LBRACE, check // that we have an enum type name, then return if (Token.token != T_LBRACE) { if (etype == NULL) fatals("undeclared enum type:", name); return; } // We do have an LBRACE. Skip it scan(&Token); // If we have an enum type name, ensure that it // hasn't been declared before. if (etype != NULL) fatals("enum type redeclared:", etype->name); else // Build an enum type node for this identifier etype = addenum(name, C_ENUMTYPE, 0); // Loop to get all the enum values while (1) { // Ensure we have an identifier // Copy it in case there's an int literal coming up ident(); name = strdup(Text); // Ensure this enum value hasn't been declared before etype = findenumval(name); if (etype != NULL) fatals("enum value redeclared:", Text); // If the next token is an '=', skip it and // get the following int literal if (Token.token == T_ASSIGN) { scan(&Token); if (Token.token != T_INTLIT) fatal("Expected int literal after '='"); intval = Token.intvalue; scan(&Token); } // Build an enum value node for this identifier. // Increment the value for the next enum identifier. etype = addenum(name, C_ENUMVAL, intval++); // Bail out on a right curly bracket, else get a comma if (Token.token == T_RBRACE) break; comma(); } scan(&Token); // Skip over the right curly bracket } // Parse a typedef declaration and return the type // and ctype that it represents static int typedef_declaration(struct symtable **ctype) { int type, class = 0; // Skip the typedef keyword. scan(&Token); // Get the actual type following the keyword type = parse_type(ctype, &class); if (class != 0) fatal("Can't have extern in a typedef declaration"); // See if the typedef identifier already exists if (findtypedef(Text) != NULL) fatals("redefinition of typedef", Text); // Get any following '*' tokens type = parse_stars(type); // It doesn't exist so add it to the typedef list addtypedef(Text, type, *ctype); scan(&Token); return (type); } // Given a typedef name, return the type it represents static int type_of_typedef(char *name, struct symtable **ctype) { struct symtable *t; // Look up the typedef in the list t = findtypedef(name); if (t == NULL) fatals("unknown type", name); scan(&Token); *ctype = t->ctype; return (t->type); } // Parse the declaration of a variable or function. // The type and any following '*'s have been scanned, and we // have the identifier in the Token variable. // The class argument is the variable's class. // Return a pointer to the symbol's entry in the symbol table static struct symtable *symbol_declaration(int type, struct symtable *ctype, int class, struct ASTnode **tree) { struct symtable *sym = NULL; char *varname = strdup(Text); // Ensure that we have an identifier. // We copied it above so we can scan more tokens in, e.g. // an assignment expression for a local variable. ident(); // Deal with function declarations if (Token.token == T_LPAREN) { return (function_declaration(varname, type, ctype, class)); } // See if this array or scalar variable has already been declared switch (class) { case C_EXTERN: case C_STATIC: case C_GLOBAL: case C_LOCAL: case C_PARAM: if (findlocl(varname) != NULL) fatals("Duplicate local variable declaration", varname); case C_MEMBER: if (findmember(varname) != NULL) fatals("Duplicate struct/union member declaration", varname); } // Add the array or scalar variable to the symbol table if (Token.token == T_LBRACKET) { sym = array_declaration(varname, type, ctype, class); *tree= NULL; // Local arrays are not initialised } else sym = scalar_declaration(varname, type, ctype, class, tree); return (sym); } // Parse a list of symbols where there is an initial type. // Return the type of the symbols. et1 and et2 are end tokens. int declaration_list(struct symtable **ctype, int class, int et1, int et2, struct ASTnode **gluetree) { int inittype, type; struct symtable *sym; struct ASTnode *tree; *gluetree = NULL; // Get the initial type. If -1, it was // a composite type definition, return this if ((inittype = parse_type(ctype, &class)) == -1) return (inittype); // Now parse the list of symbols while (1) { // See if this symbol is a pointer type = parse_stars(inittype); // Parse this symbol sym = symbol_declaration(type, *ctype, class, &tree); // We parsed a function, there is no list so leave if (sym->stype == S_FUNCTION) { if (class != C_GLOBAL && class != C_STATIC) fatal("Function definition not at global level"); return (type); } // Glue any AST tree from a local declaration // to build a sequence of assignments to perform if (*gluetree == NULL) *gluetree = tree; else *gluetree = mkastnode(A_GLUE, P_NONE, NULL, *gluetree, NULL, tree, NULL, 0); // We are at the end of the list, leave if (Token.token == et1 || Token.token == et2) return (type); // Otherwise, we need a comma as separator comma(); } return(0); // Keep -Wall happy } // Parse one or more global declarations, either // variables, functions or structs void global_declarations(void) { struct symtable *ctype= NULL; struct ASTnode *unused; while (Token.token != T_EOF) { declaration_list(&ctype, C_GLOBAL, T_SEMI, T_EOF, &unused); // Skip any semicolons and right curly brackets if (Token.token == T_SEMI) scan(&Token); } } ================================================ FILE: 58_Ptr_Increments/decl.h ================================================ // Function prototypes for all compiler files // Copyright (c) 2019 Warren Toomey, GPL3 // scan.c void reject_token(struct token *t); int scan(struct token *t); // tree.c struct ASTnode *mkastnode(int op, int type, struct symtable *ctype, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue); struct ASTnode *mkastleaf(int op, int type, struct symtable *ctype, struct symtable *sym, int intvalue); struct ASTnode *mkastunary(int op, int type, struct symtable *ctype, struct ASTnode *left, struct symtable *sym, int intvalue); void dumpAST(struct ASTnode *n, int label, int level); // gen.c int genlabel(void); int genAST(struct ASTnode *n, int iflabel, int looptoplabel, int loopendlabel, int parentASTop); void genpreamble(); void genpostamble(); void genfreeregs(int keepreg); void genglobsym(struct symtable *node); int genglobstr(char *strvalue, int append); void genglobstrend(void); int genprimsize(int type); int genalign(int type, int offset, int direction); void genreturn(int reg, int id); // cg.c int cgprimsize(int type); int cgalign(int type, int offset, int direction); void cgtextseg(); void cgdataseg(); int alloc_register(void); void freeall_registers(int keepreg); void spill_all_regs(void); void cgpreamble(); void cgpostamble(); void cgfuncpreamble(struct symtable *sym); void cgfuncpostamble(struct symtable *sym); int cgloadint(int value, int type); int cgloadvar(struct symtable *sym, int op); int cgloadglobstr(int label); int cgadd(int r1, int r2); int cgsub(int r1, int r2); int cgmul(int r1, int r2); int cgdivmod(int r1, int r2, int op); int cgshlconst(int r, int val); int cgcall(struct symtable *sym, int numargs); void cgcopyarg(int r, int argposn); int cgstorglob(int r, struct symtable *sym); int cgstorlocal(int r, struct symtable *sym); void cgglobsym(struct symtable *node); void cgglobstr(int l, char *strvalue, int append); void cgglobstrend(void); int cgcompare_and_set(int ASTop, int r1, int r2); int cgcompare_and_jump(int ASTop, int r1, int r2, int label); void cglabel(int l); void cgjump(int l); int cgwiden(int r, int oldtype, int newtype); void cgreturn(int reg, struct symtable *sym); int cgaddress(struct symtable *sym); int cgderef(int r, int type); int cgstorderef(int r1, int r2, int type); int cgnegate(int r); int cginvert(int r); int cglognot(int r); void cgloadboolean(int r, int val); int cgboolean(int r, int op, int label); int cgand(int r1, int r2); int cgor(int r1, int r2); int cgxor(int r1, int r2); int cgshl(int r1, int r2); int cgshr(int r1, int r2); void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel); void cgmove(int r1, int r2); // expr.c struct ASTnode *expression_list(int endtoken); struct ASTnode *binexpr(int ptp); // stmt.c struct ASTnode *compound_statement(int inswitch); // misc.c void match(int t, char *what); void semi(void); void lbrace(void); void rbrace(void); void lparen(void); void rparen(void); void ident(void); void comma(void); void fatal(char *s); void fatals(char *s1, char *s2); void fatald(char *s, int d); void fatalc(char *s, int c); // sym.c void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node); struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn); struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn); struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int nelems); struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype); struct symtable *addstruct(char *name); struct symtable *addunion(char *name); struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int nelems); struct symtable *addenum(char *name, int class, int value); struct symtable *addtypedef(char *name, int type, struct symtable *ctype); struct symtable *findglob(char *s); struct symtable *findlocl(char *s); struct symtable *findsymbol(char *s); struct symtable *findmember(char *s); struct symtable *findstruct(char *s); struct symtable *findunion(char *s); struct symtable *findenumtype(char *s); struct symtable *findenumval(char *s); struct symtable *findtypedef(char *s); void clear_symtable(void); void freeloclsyms(void); void freestaticsyms(void); void dumptable(struct symtable *head, char *name, int indent); void dumpsymtables(void); // decl.c int parse_type(struct symtable **ctype, int *class); int parse_stars(int type); int parse_cast(struct symtable **ctype); int declaration_list(struct symtable **ctype, int class, int et1, int et2, struct ASTnode **gluetree); void global_declarations(void); // types.c int inttype(int type); int ptrtype(int type); int pointer_to(int type); int value_at(int type); int typesize(int type, struct symtable *ctype); struct ASTnode *modify_type(struct ASTnode *tree, int rtype, struct symtable *rctype, int op); // opt.c struct ASTnode *optimise(struct ASTnode *n); ================================================ FILE: 58_Ptr_Increments/defs.h ================================================ #include #include #include #include #include "incdir.h" // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 enum { TEXTLEN = 512 // Length of identifiers in input }; // Commands and default filenames #define AOUT "a.out" #ifdef __NASM__ #define ASCMD "nasm -f elf64 -w-ptr -pnasmext.inc -o " #define LDCMD "cc -no-pie -fno-plt -Wall -o " #else #define ASCMD "as -o " #define LDCMD "cc -o " #endif #define CPPCMD "cpp -nostdinc -isystem " // Token types enum { T_EOF, // Binary operators T_ASSIGN, T_ASPLUS, T_ASMINUS, T_ASSTAR, T_ASSLASH, T_ASMOD, T_QUESTION, T_LOGOR, T_LOGAND, T_OR, T_XOR, T_AMPER, T_EQ, T_NE, T_LT, T_GT, T_LE, T_GE, T_LSHIFT, T_RSHIFT, T_PLUS, T_MINUS, T_STAR, T_SLASH, T_MOD, // Other operators T_INC, T_DEC, T_INVERT, T_LOGNOT, // Type keywords T_VOID, T_CHAR, T_INT, T_LONG, // Other keywords T_IF, T_ELSE, T_WHILE, T_FOR, T_RETURN, T_STRUCT, T_UNION, T_ENUM, T_TYPEDEF, T_EXTERN, T_BREAK, T_CONTINUE, T_SWITCH, T_CASE, T_DEFAULT, T_SIZEOF, T_STATIC, // Structural tokens T_INTLIT, T_STRLIT, T_SEMI, T_IDENT, T_LBRACE, T_RBRACE, T_LPAREN, T_RPAREN, T_LBRACKET, T_RBRACKET, T_COMMA, T_DOT, T_ARROW, T_COLON }; // Token structure struct token { int token; // Token type, from the enum list above char *tokstr; // String version of the token int intvalue; // For T_INTLIT, the integer value }; // AST node types. The first few line up // with the related tokens enum { A_ASSIGN = 1, A_ASPLUS, A_ASMINUS, A_ASSTAR, // 1 A_ASSLASH, A_ASMOD, A_TERNARY, A_LOGOR, // 5 A_LOGAND, A_OR, A_XOR, A_AND, A_EQ, A_NE, A_LT, // 9 A_GT, A_LE, A_GE, A_LSHIFT, A_RSHIFT, // 16 A_ADD, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, A_MOD, // 21 A_INTLIT, A_STRLIT, A_IDENT, A_GLUE, // 26 A_IF, A_WHILE, A_FUNCTION, A_WIDEN, A_RETURN, // 30 A_FUNCCALL, A_DEREF, A_ADDR, A_SCALE, // 35 A_PREINC, A_PREDEC, A_POSTINC, A_POSTDEC, // 39 A_NEGATE, A_INVERT, A_LOGNOT, A_TOBOOL, A_BREAK, // 43 A_CONTINUE, A_SWITCH, A_CASE, A_DEFAULT, A_CAST // 48 }; // Primitive types. The bottom 4 bits is an integer // value that represents the level of indirection, // e.g. 0= no pointer, 1= pointer, 2= pointer pointer etc. enum { P_NONE, P_VOID = 16, P_CHAR = 32, P_INT = 48, P_LONG = 64, P_STRUCT=80, P_UNION=96 }; // Structural types enum { S_VARIABLE, S_FUNCTION, S_ARRAY }; // Storage classes enum { C_GLOBAL = 1, // Globally visible symbol C_LOCAL, // Locally visible symbol C_PARAM, // Locally visible function parameter C_EXTERN, // External globally visible symbol C_STATIC, // Static symbol, visible in one file C_STRUCT, // A struct C_UNION, // A union C_MEMBER, // Member of a struct or union C_ENUMTYPE, // A named enumeration type C_ENUMVAL, // A named enumeration value C_TYPEDEF // A named typedef }; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol struct symtable *ctype; // If struct/union, ptr to that type int stype; // Structural type for the symbol int class; // Storage class for the symbol int size; // Total size in bytes of this symbol int nelems; // Functions: # params. Arrays: # elements #define st_endlabel st_posn // For functions, the end label int st_posn; // For locals, the negative offset // from the stack base pointer int *initlist; // List of initial values struct symtable *next; // Next symbol in one list struct symtable *member; // First member of a function, struct, }; // union or enum // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates struct symtable *ctype; // If struct/union, ptr to that type int rvalue; // True if the node is an rvalue struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; struct symtable *sym; // For many AST nodes, the pointer to // the symbol in the symbol table #define a_intvalue a_size // For A_INTLIT, the integer value int a_size; // For A_SCALE, the size to scale by }; enum { NOREG = -1, // Use NOREG when the AST generation // functions have no register to return NOLABEL = 0 // Use NOLABEL when we have no label to // pass to genAST() }; ================================================ FILE: 58_Ptr_Increments/expr.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of expressions // Copyright (c) 2019 Warren Toomey, GPL3 // expression_list: // | expression // | expression ',' expression_list // ; // Parse a list of zero or more comma-separated expressions and // return an AST composed of A_GLUE nodes with the left-hand child // being the sub-tree of previous expressions (or NULL) and the right-hand // child being the next expression. Each A_GLUE node will have size field // set to the number of expressions in the tree at this point. If no // expressions are parsed, NULL is returned struct ASTnode *expression_list(int endtoken) { struct ASTnode *tree = NULL; struct ASTnode *child = NULL; int exprcount = 0; // Loop until the end token while (Token.token != endtoken) { // Parse the next expression and increment the expression count child = binexpr(0); exprcount++; // Build an A_GLUE AST node with the previous tree as the left child // and the new expression as the right child. Store the expression count. tree = mkastnode(A_GLUE, P_NONE, NULL, tree, NULL, child, NULL, exprcount); // Stop when we reach the end token if (Token.token == endtoken) break; // Must have a ',' at this point match(T_COMMA, ","); } // Return the tree of expressions return (tree); } // Parse a function call and return its AST static struct ASTnode *funccall(void) { struct ASTnode *tree; struct symtable *funcptr; // Check that the identifier has been defined as a function, // then make a leaf node for it. if ((funcptr = findsymbol(Text)) == NULL || funcptr->stype != S_FUNCTION) { fatals("Undeclared function", Text); } // Get the '(' lparen(); // Parse the argument expression list tree = expression_list(T_RPAREN); // XXX Check type of each argument against the function's prototype // Build the function call AST node. Store the // function's return type as this node's type. // Also record the function's symbol-id tree = mkastunary(A_FUNCCALL, funcptr->type, funcptr->ctype, tree, funcptr, 0); // Get the ')' rparen(); return (tree); } // Parse the index into an array and return an AST tree for it static struct ASTnode *array_access(struct ASTnode *left) { struct ASTnode *right; // Check that the sub-tree is a pointer if (!ptrtype(left->type)) fatal("Not an array or pointer"); // Get the '[' scan(&Token); // Parse the following expression right = binexpr(0); // Get the ']' match(T_RBRACKET, "]"); // Ensure that this is of int type if (!inttype(right->type)) fatal("Array index is not of integer type"); // Make the left tree an rvalue left->rvalue = 1; // Scale the index by the size of the element's type right = modify_type(right, left->type, left->ctype, A_ADD); // Return an AST tree where the array's base has the offset added to it, // and dereference the element. Still an lvalue at this point. left = mkastnode(A_ADD, left->type, left->ctype, left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, value_at(left->type), left->ctype, left, NULL, 0); return (left); } // Parse the member reference of a struct or union // and return an AST tree for it. If withpointer is true, // the access is through a pointer to the member. static struct ASTnode *member_access(struct ASTnode *left, int withpointer) { struct ASTnode *right; struct symtable *typeptr; struct symtable *m; // Check that the left AST tree is a pointer to struct or union if (withpointer && left->type != pointer_to(P_STRUCT) && left->type != pointer_to(P_UNION)) fatal("Expression is not a pointer to a struct/union"); // Or, check that the left AST tree is a struct or union. // If so, change it from an A_IDENT to an A_ADDR so that // we get the base address, not the value at this address. if (!withpointer) { if (left->type == P_STRUCT || left->type == P_UNION) left->op = A_ADDR; else fatal("Expression is not a struct/union"); } // Get the details of the composite type typeptr = left->ctype; // Skip the '.' or '->' token and get the member's name scan(&Token); ident(); // Find the matching member's name in the type // Die if we can't find it for (m = typeptr->member; m != NULL; m = m->next) if (!strcmp(m->name, Text)) break; if (m == NULL) fatals("No member found in struct/union: ", Text); // Make the left tree an rvalue left->rvalue = 1; // Build an A_INTLIT node with the offset right = mkastleaf(A_INTLIT, P_INT, NULL, NULL, m->st_posn); // Add the member's offset to the base of the struct/union // and dereference it. Still an lvalue at this point left = mkastnode(A_ADD, pointer_to(m->type), m->ctype, left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, m->type, m->ctype, left, NULL, 0); return (left); } // Parse a parenthesised expression and // return an AST node representing it. static struct ASTnode *paren_expression(int ptp) { struct ASTnode *n; int type = 0; struct symtable *ctype = NULL; // Beginning of a parenthesised expression, skip the '('. scan(&Token); // If the token after is a type identifier, this is a cast expression switch (Token.token) { case T_IDENT: // We have to see if the identifier matches a typedef. // If not, treat it as an expression. if (findtypedef(Text) == NULL) { n = binexpr(0); // ptp is zero as expression inside ( ) break; } case T_VOID: case T_CHAR: case T_INT: case T_LONG: case T_STRUCT: case T_UNION: case T_ENUM: // Get the type inside the parentheses type = parse_cast(&ctype); // Skip the closing ')' and then parse the following expression rparen(); default: n = binexpr(ptp); // Scan in the expression. We pass in ptp // as the cast doesn't change the // expression's precedence } // We now have at least an expression in n, and possibly a non-zero type // in type if there was a cast. Skip the closing ')' if there was no cast. if (type == 0) rparen(); else // Otherwise, make a unary AST node for the cast n = mkastunary(A_CAST, type, ctype, n, NULL, 0); return (n); } // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(int ptp) { struct ASTnode *n; struct symtable *enumptr; struct symtable *varptr; int id; int type = 0; int size, class; struct symtable *ctype; switch (Token.token) { case T_STATIC: case T_EXTERN: fatal("Compiler doesn't support static or extern local declarations"); case T_SIZEOF: // Skip the T_SIZEOF and ensure we have a left parenthesis scan(&Token); if (Token.token != T_LPAREN) fatal("Left parenthesis expected after sizeof"); scan(&Token); // Get the type inside the parentheses type = parse_stars(parse_type(&ctype, &class)); // Get the type's size size = typesize(type, ctype); rparen(); // Make a leaf node int literal with the size return (mkastleaf(A_INTLIT, P_INT, NULL, NULL, size)); case T_INTLIT: // For an INTLIT token, make a leaf AST node for it. // Make it a P_CHAR if it's within the P_CHAR range if (Token.intvalue >= 0 && Token.intvalue < 256) n = mkastleaf(A_INTLIT, P_CHAR, NULL, NULL, Token.intvalue); else n = mkastleaf(A_INTLIT, P_INT, NULL, NULL, Token.intvalue); break; case T_STRLIT: // For a STRLIT token, generate the assembly for it. id = genglobstr(Text, 0); // For successive STRLIT tokens, append their contents // to this one while (1) { scan(&Peektoken); if (Peektoken.token != T_STRLIT) break; genglobstr(Text, 1); scan(&Token); // To skip it properly } // Now make a leaf AST node for it. id is the string's label. genglobstrend(); n = mkastleaf(A_STRLIT, pointer_to(P_CHAR), NULL, NULL, id); break; case T_IDENT: // If the identifier matches an enum value, // return an A_INTLIT node if ((enumptr = findenumval(Text)) != NULL) { n = mkastleaf(A_INTLIT, P_INT, NULL, NULL, enumptr->st_posn); break; } // See if this identifier exists as a symbol. For arrays, set rvalue to 1. if ((varptr = findsymbol(Text)) == NULL) fatals("Unknown variable or function", Text); switch (varptr->stype) { case S_VARIABLE: n = mkastleaf(A_IDENT, varptr->type, varptr->ctype, varptr, 0); break; case S_ARRAY: n = mkastleaf(A_ADDR, varptr->type, varptr->ctype, varptr, 0); n->rvalue = 1; break; case S_FUNCTION: // Function call, see if the next token is a left parenthesis scan(&Token); if (Token.token != T_LPAREN) fatals("Function name used without parentheses", Text); return (funccall()); default: fatals("Identifier not a scalar or array variable", Text); } break; case T_LPAREN: return (paren_expression(ptp)); default: fatals("Expecting a primary expression, got token", Token.tokstr); } // Scan in the next token and return the leaf node scan(&Token); return (n); } // Parse a postfix expression and return // an AST node representing it. The // identifier is already in Text. static struct ASTnode *postfix(int ptp) { struct ASTnode *n; // Get the primary expression n = primary(ptp); // Loop until there are no more postfix operators while (1) { switch (Token.token) { case T_LBRACKET: // An array reference n = array_access(n); break; case T_DOT: // Access into a struct or union n = member_access(n, 0); break; case T_ARROW: // Pointer access into a struct or union n = member_access(n, 1); break; case T_INC: // Post-increment: skip over the token if (n->rvalue == 1) fatal("Cannot ++ on rvalue"); scan(&Token); // Can't do it twice if (n->op == A_POSTINC || n->op == A_POSTDEC) fatal("Cannot ++ and/or -- more than once"); // and change the AST operation n->op = A_POSTINC; break; case T_DEC: // Post-decrement: skip over the token if (n->rvalue == 1) fatal("Cannot -- on rvalue"); scan(&Token); // Can't do it twice if (n->op == A_POSTINC || n->op == A_POSTDEC) fatal("Cannot ++ and/or -- more than once"); // and change the AST operation n->op = A_POSTDEC; break; default: return (n); } } return (NULL); // Keep -Wall happy } // Convert a binary operator token into a binary AST operation. // We rely on a 1:1 mapping from token to AST operation static int binastop(int tokentype) { if (tokentype > T_EOF && tokentype <= T_MOD) return (tokentype); fatals("Syntax error, token", Tstring[tokentype]); return (0); // Keep -Wall happy } // Return true if a token is right-associative, // false otherwise. static int rightassoc(int tokentype) { if (tokentype >= T_ASSIGN && tokentype <= T_ASSLASH) return (1); return (0); } // Operator precedence for each token. Must // match up with the order of tokens in defs.h static int OpPrec[] = { 0, 10, 10, // T_EOF, T_ASSIGN, T_ASPLUS, 10, 10, // T_ASMINUS, T_ASSTAR, 10, 10, // T_ASSLASH, T_ASMOD, 15, // T_QUESTION, 20, 30, // T_LOGOR, T_LOGAND 40, 50, 60, // T_OR, T_XOR, T_AMPER 70, 70, // T_EQ, T_NE 80, 80, 80, 80, // T_LT, T_GT, T_LE, T_GE 90, 90, // T_LSHIFT, T_RSHIFT 100, 100, // T_PLUS, T_MINUS 110, 110, 110 // T_STAR, T_SLASH, T_MOD }; // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec; if (tokentype > T_MOD) fatals("Token with no precedence in op_precedence:", Tstring[tokentype]); prec = OpPrec[tokentype]; if (prec == 0) fatals("Syntax error, token", Tstring[tokentype]); return (prec); } // prefix_expression: postfix_expression // | '*' prefix_expression // | '&' prefix_expression // | '-' prefix_expression // | '++' prefix_expression // | '--' prefix_expression // ; // Parse a prefix expression and return // a sub-tree representing it. static struct ASTnode *prefix(int ptp) { struct ASTnode *tree; switch (Token.token) { case T_AMPER: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(ptp); // Ensure that it's an identifier if (tree->op != A_IDENT) fatal("& operator must be followed by an identifier"); // Prevent '&' being performed on an array if (tree->sym->stype == S_ARRAY) fatal("& operator cannot be performed on an array"); // Now change the operator to A_ADDR and the type to // a pointer to the original type tree->op = A_ADDR; tree->type = pointer_to(tree->type); break; case T_STAR: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(ptp); // Ensure the tree's type is a pointer if (!ptrtype(tree->type)) fatal("* operator must be followed by an expression of pointer type"); // Prepend an A_DEREF operation to the tree tree = mkastunary(A_DEREF, value_at(tree->type), tree->ctype, tree, NULL, 0); break; case T_MINUS: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(ptp); // Prepend a A_NEGATE operation to the tree and // make the child an rvalue. Because chars are unsigned, // also widen this if needed to int so that it's signed tree->rvalue = 1; if (tree->type == P_CHAR) tree->type = P_INT; tree = mkastunary(A_NEGATE, tree->type, tree->ctype, tree, NULL, 0); break; case T_INVERT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(ptp); // Prepend a A_INVERT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_INVERT, tree->type, tree->ctype, tree, NULL, 0); break; case T_LOGNOT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(ptp); // Prepend a A_LOGNOT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_LOGNOT, tree->type, tree->ctype, tree, NULL, 0); break; case T_INC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(ptp); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("++ operator must be followed by an identifier"); // Prepend an A_PREINC operation to the tree tree = mkastunary(A_PREINC, tree->type, tree->ctype, tree, NULL, 0); break; case T_DEC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(ptp); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("-- operator must be followed by an identifier"); // Prepend an A_PREDEC operation to the tree tree = mkastunary(A_PREDEC, tree->type, tree->ctype, tree, NULL, 0); break; default: tree = postfix(ptp); } return (tree); } // Return an AST tree whose root is a binary operator. // Parameter ptp is the previous token's precedence. struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; struct ASTnode *ltemp, *rtemp; int ASTop; int tokentype; // Get the tree on the left. // Fetch the next token at the same time. left = prefix(ptp); // If we hit one of several terminating tokens, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA || tokentype == T_COLON || tokentype == T_RBRACE) { left->rvalue = 1; return (left); } // While the precedence of this token is more than that of the // previous token precedence, or it's right associative and // equal to the previous token's precedence while ((op_precedence(tokentype) > ptp) || (rightassoc(tokentype) && op_precedence(tokentype) == ptp)) { // Fetch in the next integer literal scan(&Token); // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Determine the operation to be performed on the sub-trees ASTop = binastop(tokentype); switch (ASTop) { case A_TERNARY: // Ensure we have a ':' token, scan in the expression after it match(T_COLON, ":"); ltemp = binexpr(0); // Build and return the AST for this statement. Use the middle // expression's type as the return type. XXX We should also // consider the third expression's type. return (mkastnode (A_TERNARY, right->type, right->ctype, left, right, ltemp, NULL, 0)); case A_ASSIGN: // Assignment // Make the right tree into an rvalue right->rvalue = 1; // Ensure the right's type matches the left right = modify_type(right, left->type, left->ctype, 0); if (right == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree. However, switch // left and right around, so that the right expression's // code will be generated before the left expression ltemp = left; left = right; right = ltemp; break; default: // We are not doing a ternary or assignment, so both trees should // be rvalues. Convert both trees into rvalue if they are lvalue trees left->rvalue = 1; right->rvalue = 1; // Ensure the two types are compatible by trying // to modify each tree to match the other's type. ltemp = modify_type(left, right->type, right->ctype, ASTop); rtemp = modify_type(right, left->type, left->ctype, ASTop); if (ltemp == NULL && rtemp == NULL) fatal("Incompatible types in binary expression"); if (ltemp != NULL) left = ltemp; if (rtemp != NULL) right = rtemp; } // Join that sub-tree with ours. Convert the token // into an AST operation at the same time. left = mkastnode(binastop(tokentype), left->type, left->ctype, left, NULL, right, NULL, 0); // Some operators produce an int result regardless of their operands switch (binastop(tokentype)) { case A_LOGOR: case A_LOGAND: case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: left->type = P_INT; } // Update the details of the current token. // If we hit a terminating token, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA || tokentype == T_COLON || tokentype == T_RBRACE) { left->rvalue = 1; return (left); } } // Return the tree we have when the precedence // is the same or lower left->rvalue = 1; return (left); } ================================================ FILE: 58_Ptr_Increments/gen.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Generic code generator // Copyright (c) 2019 Warren Toomey, GPL3 // Generate and return a new label number static int labelid = 1; int genlabel(void) { return (labelid++); } // Generate the code for an IF statement // and an optional ELSE clause. static int genIF(struct ASTnode *n, int looptoplabel, int loopendlabel) { int Lfalse, Lend; // Generate two labels: one for the // false compound statement, and one // for the end of the overall IF statement. // When there is no ELSE clause, Lfalse _is_ // the ending label! Lfalse = genlabel(); if (n->right) Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. genAST(n->left, Lfalse, NOLABEL, NOLABEL, n->op); genfreeregs(NOREG); // Generate the true compound statement genAST(n->mid, NOLABEL, looptoplabel, loopendlabel, n->op); genfreeregs(NOREG); // If there is an optional ELSE clause, // generate the jump to skip to the end if (n->right) cgjump(Lend); // Now the false label cglabel(Lfalse); // Optional ELSE clause: generate the // false compound statement and the // end label if (n->right) { genAST(n->right, NOLABEL, NOLABEL, loopendlabel, n->op); genfreeregs(NOREG); cglabel(Lend); } return (NOREG); } // Generate the code for a WHILE statement static int genWHILE(struct ASTnode *n) { int Lstart, Lend; // Generate the start and end labels // and output the start label Lstart = genlabel(); Lend = genlabel(); cglabel(Lstart); // Generate the condition code followed // by a jump to the end label. genAST(n->left, Lend, Lstart, Lend, n->op); genfreeregs(NOREG); // Generate the compound statement for the body genAST(n->right, NOLABEL, Lstart, Lend, n->op); genfreeregs(NOREG); // Finally output the jump back to the condition, // and the end label cgjump(Lstart); cglabel(Lend); return (NOREG); } // Generate the code for a SWITCH statement static int genSWITCH(struct ASTnode *n) { int *caseval, *caselabel; int Ljumptop, Lend; int i, reg, defaultlabel = 0, casecount = 0; struct ASTnode *c; // Create arrays for the case values and associated labels. // Ensure that we have at least one position in each array. caseval = (int *) malloc((n->a_intvalue + 1) * sizeof(int)); caselabel = (int *) malloc((n->a_intvalue + 1) * sizeof(int)); // Generate labels for the top of the jump table, and the // end of the switch statement. Set a default label for // the end of the switch, in case we don't have a default. Ljumptop = genlabel(); Lend = genlabel(); defaultlabel = Lend; // Output the code to calculate the switch condition reg = genAST(n->left, NOLABEL, NOLABEL, NOLABEL, 0); cgjump(Ljumptop); genfreeregs(reg); // Walk the right-child linked list to // generate the code for each case for (i = 0, c = n->right; c != NULL; i++, c = c->right) { // Get a label for this case. Store it // and the case value in the arrays. // Record if it is the default case. caselabel[i] = genlabel(); caseval[i] = c->a_intvalue; cglabel(caselabel[i]); if (c->op == A_DEFAULT) defaultlabel = caselabel[i]; else casecount++; // Generate the case code. Pass in the end label for the breaks. // If case has no body, we will fall into the following body. if (c->left) genAST(c->left, NOLABEL, NOLABEL, Lend, 0); genfreeregs(NOREG); } // Ensure the last case jumps past the switch table cgjump(Lend); // Now output the switch table and the end label. cgswitch(reg, casecount, Ljumptop, caselabel, caseval, defaultlabel); cglabel(Lend); return (NOREG); } // Generate the code for an // A_LOGAND or A_LOGOR operation static int gen_logandor(struct ASTnode *n) { // Generate two labels int Lfalse = genlabel(); int Lend = genlabel(); int reg; // Generate the code for the left expression // followed by the jump to the false label reg= genAST(n->left, NOLABEL, NOLABEL, NOLABEL, 0); cgboolean(reg, n->op, Lfalse); genfreeregs(NOREG); // Generate the code for the right expression // followed by the jump to the false label reg= genAST(n->right, NOLABEL, NOLABEL, NOLABEL, 0); cgboolean(reg, n->op, Lfalse); genfreeregs(reg); // We didn't jump so set the right boolean value if (n->op== A_LOGAND) { cgloadboolean(reg, 1); cgjump(Lend); cglabel(Lfalse); cgloadboolean(reg, 0); } else { cgloadboolean(reg, 0); cgjump(Lend); cglabel(Lfalse); cgloadboolean(reg, 1); } cglabel(Lend); return(reg); } // Generate the code to copy the arguments of a // function call to its parameters, then call the // function itself. Return the register that holds // the function's return value. static int gen_funccall(struct ASTnode *n) { struct ASTnode *gluetree = n->left; int reg; int numargs = 0; // Save the registers before we copy the arguments spill_all_regs(); // If there is a list of arguments, walk this list // from the last argument (right-hand child) to the // first while (gluetree) { // Calculate the expression's value reg = genAST(gluetree->right, NOLABEL, NOLABEL, NOLABEL, gluetree->op); // Copy this into the n'th function parameter: size is 1, 2, 3, ... cgcopyarg(reg, gluetree->a_size); // Keep the first (highest) number of arguments if (numargs == 0) numargs = gluetree->a_size; gluetree = gluetree->left; } // Call the function, clean up the stack (based on numargs), // and return its result return (cgcall(n->sym, numargs)); } // Generate code for a ternary expression static int gen_ternary(struct ASTnode *n) { int Lfalse, Lend; int reg, expreg; // Generate two labels: one for the // false expression, and one for the // end of the overall expression Lfalse = genlabel(); Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. genAST(n->left, Lfalse, NOLABEL, NOLABEL, n->op); genfreeregs(NOREG); // Get a register to hold the result of the two expressions reg = alloc_register(); // Generate the true expression and the false label. // Move the expression result into the known register. expreg = genAST(n->mid, NOLABEL, NOLABEL, NOLABEL, n->op); cgmove(expreg, reg); // Don't free the register holding the result, though! genfreeregs(reg); cgjump(Lend); cglabel(Lfalse); // Generate the false expression and the end label. // Move the expression result into the known register. expreg = genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); cgmove(expreg, reg); // Don't free the register holding the result, though! genfreeregs(reg); cglabel(Lend); return (reg); } // Given an AST, an optional label, and the AST op // of the parent, generate assembly code recursively. // Return the register id with the tree's final value. int genAST(struct ASTnode *n, int iflabel, int looptoplabel, int loopendlabel, int parentASTop) { int leftreg= NOREG, rightreg= NOREG; // Empty tree, do nothing if (n==NULL) return(NOREG); // We have some specific AST node handling at the top // so that we don't evaluate the child sub-trees immediately switch (n->op) { case A_IF: return (genIF(n, looptoplabel, loopendlabel)); case A_WHILE: return (genWHILE(n)); case A_SWITCH: return (genSWITCH(n)); case A_FUNCCALL: return (gen_funccall(n)); case A_TERNARY: return (gen_ternary(n)); case A_LOGOR: return (gen_logandor(n)); case A_LOGAND: return (gen_logandor(n)); case A_GLUE: // Do each child statement, and free the // registers after each child if (n->left != NULL) genAST(n->left, iflabel, looptoplabel, loopendlabel, n->op); genfreeregs(NOREG); if (n->right != NULL) genAST(n->right, iflabel, looptoplabel, loopendlabel, n->op); genfreeregs(NOREG); return (NOREG); case A_FUNCTION: // Generate the function's preamble before the code // in the child sub-tree cgfuncpreamble(n->sym); genAST(n->left, NOLABEL, NOLABEL, NOLABEL, n->op); cgfuncpostamble(n->sym); return (NOREG); } // General AST node handling below // Get the left and right sub-tree values if (n->left) leftreg = genAST(n->left, NOLABEL, NOLABEL, NOLABEL, n->op); if (n->right) rightreg = genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); switch (n->op) { case A_ADD: return (cgadd(leftreg, rightreg)); case A_SUBTRACT: return (cgsub(leftreg, rightreg)); case A_MULTIPLY: return (cgmul(leftreg, rightreg)); case A_DIVIDE: return (cgdivmod(leftreg, rightreg, A_DIVIDE)); case A_MOD: return (cgdivmod(leftreg, rightreg, A_MOD)); case A_AND: return (cgand(leftreg, rightreg)); case A_OR: return (cgor(leftreg, rightreg)); case A_XOR: return (cgxor(leftreg, rightreg)); case A_LSHIFT: return (cgshl(leftreg, rightreg)); case A_RSHIFT: return (cgshr(leftreg, rightreg)); case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: // If the parent AST node is an A_IF, A_WHILE or A_TERNARY, // generate a compare followed by a jump. Otherwise, compare // registers and set one to 1 or 0 based on the comparison. if (parentASTop == A_IF || parentASTop == A_WHILE || parentASTop == A_TERNARY) return (cgcompare_and_jump(n->op, leftreg, rightreg, iflabel)); else return (cgcompare_and_set(n->op, leftreg, rightreg)); case A_INTLIT: return (cgloadint(n->a_intvalue, n->type)); case A_STRLIT: return (cgloadglobstr(n->a_intvalue)); case A_IDENT: // Load our value if we are an rvalue // or we are being dereferenced if (n->rvalue || parentASTop == A_DEREF) { return (cgloadvar(n->sym, n->op)); } else return (NOREG); case A_ASPLUS: case A_ASMINUS: case A_ASSTAR: case A_ASSLASH: case A_ASMOD: case A_ASSIGN: // For the '+=' and friends operators, generate suitable code // and get the register with the result. Then take the left child, // make it the right child so that we can fall into the assignment code. switch (n->op) { case A_ASPLUS: leftreg = cgadd(leftreg, rightreg); n->right = n->left; break; case A_ASMINUS: leftreg = cgsub(leftreg, rightreg); n->right = n->left; break; case A_ASSTAR: leftreg = cgmul(leftreg, rightreg); n->right = n->left; break; case A_ASSLASH: leftreg = cgdivmod(leftreg, rightreg, A_DIVIDE); n->right = n->left; break; case A_ASMOD: leftreg = cgdivmod(leftreg, rightreg, A_MOD); n->right = n->left; break; } // Now into the assignment code // Are we assigning to an identifier or through a pointer? switch (n->right->op) { case A_IDENT: if (n->right->sym->class == C_GLOBAL || n->right->sym->class == C_STATIC) return (cgstorglob(leftreg, n->right->sym)); else return (cgstorlocal(leftreg, n->right->sym)); case A_DEREF: return (cgstorderef(leftreg, rightreg, n->right->type)); default: fatald("Can't A_ASSIGN in genAST(), op", n->op); } case A_WIDEN: // Widen the child's type to the parent's type return (cgwiden(leftreg, n->left->type, n->type)); case A_RETURN: cgreturn(leftreg, Functionid); return (NOREG); case A_ADDR: return (cgaddress(n->sym)); case A_DEREF: // If we are an rvalue, dereference to get the value we point at, // otherwise leave it for A_ASSIGN to store through the pointer if (n->rvalue) return (cgderef(leftreg, n->left->type)); else return (leftreg); case A_SCALE: // Small optimisation: use shift if the // scale value is a known power of two switch (n->a_size) { case 2: return (cgshlconst(leftreg, 1)); case 4: return (cgshlconst(leftreg, 2)); case 8: return (cgshlconst(leftreg, 3)); default: // Load a register with the size and // multiply the leftreg by this size rightreg = cgloadint(n->a_size, P_INT); return (cgmul(leftreg, rightreg)); } case A_POSTINC: case A_POSTDEC: // Load and decrement the variable's value into a register // and post increment/decrement it return (cgloadvar(n->sym, n->op)); case A_PREINC: case A_PREDEC: // Load and decrement the variable's value into a register // and pre increment/decrement it return (cgloadvar(n->left->sym, n->op)); case A_NEGATE: return (cgnegate(leftreg)); case A_INVERT: return (cginvert(leftreg)); case A_LOGNOT: return (cglognot(leftreg)); case A_TOBOOL: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, set the register // to 0 or 1 based on it's zeroeness or non-zeroeness return (cgboolean(leftreg, parentASTop, iflabel)); case A_BREAK: cgjump(loopendlabel); return (NOREG); case A_CONTINUE: cgjump(looptoplabel); return (NOREG); case A_CAST: return (leftreg); // Not much to do default: fatald("Unknown AST operator", n->op); } return (NOREG); // Keep -Wall happy } void genpreamble() { cgpreamble(); } void genpostamble() { cgpostamble(); } void genfreeregs(int keepreg) { freeall_registers(keepreg); } void genglobsym(struct symtable *node) { cgglobsym(node); } // Generate a global string. // If append is true, append to // previous genglobstr() call. int genglobstr(char *strvalue, int append) { int l = genlabel(); cgglobstr(l, strvalue, append); return (l); } void genglobstrend(void) { cgglobstrend(); } int genprimsize(int type) { return (cgprimsize(type)); } int genalign(int type, int offset, int direction) { return (cgalign(type, offset, direction)); } ================================================ FILE: 58_Ptr_Increments/include/ctype.h ================================================ #ifndef _CTYPE_H_ # define _CTYPE_H_ int isalnum(int c); int isalpha(int c); int iscntrl(int c); int isdigit(int c); int isgraph(int c); int islower(int c); int isprint(int c); int ispunct(int c); int isspace(int c); int isupper(int c); int isxdigit(int c); int isascii(int c); int isblank(int c); int toupper(int c); int tolower(int c); #endif // _CTYPE_H_ ================================================ FILE: 58_Ptr_Increments/include/errno.h ================================================ #ifndef _ERRNO_H_ # define _ERRNO_H_ int * __errno_location(void); #define errno (* __errno_location()) #endif // _ERRNO_H_ ================================================ FILE: 58_Ptr_Increments/include/fcntl.h ================================================ #ifndef _FCNTL_H_ # define _FCNTL_H_ #define O_RDONLY 00 #define O_WRONLY 01 #define O_RDWR 02 int open(char *pathname, int flags); #endif // _FCNTL_H_ ================================================ FILE: 58_Ptr_Increments/include/stddef.h ================================================ #ifndef _STDDEF_H_ # define _STDDEF_H_ #ifndef NULL # define NULL (void *)0 #endif typedef long size_t; #endif //_STDDEF_H_ ================================================ FILE: 58_Ptr_Increments/include/stdio.h ================================================ #ifndef _STDIO_H_ # define _STDIO_H_ #include #ifndef NULL # define NULL (void *)0 #endif #ifndef EOF # define EOF (-1) #endif // This FILE definition will do for now typedef char * FILE; FILE *fopen(char *pathname, char *mode); size_t fread(void *ptr, size_t size, size_t nmemb, FILE *stream); size_t fwrite(void *ptr, size_t size, size_t nmemb, FILE *stream); int fclose(FILE *stream); int printf(char *format); int fprintf(FILE *stream, char *format); int sprintf(char *str, char *format); int snprintf(char *str, size_t size, char *format); int fgetc(FILE *stream); int fputc(int c, FILE *stream); int fputs(char *s, FILE *stream); int putc(int c, FILE *stream); int putchar(int c); int puts(char *s); FILE *popen(char *command, char *type); int pclose(FILE *stream); extern FILE *stdin; extern FILE *stdout; extern FILE *stderr; #endif // _STDIO_H_ ================================================ FILE: 58_Ptr_Increments/include/stdlib.h ================================================ #ifndef _STDLIB_H_ # define _STDLIB_H_ void exit(int status); void _Exit(int status); void *malloc(int size); void free(void *ptr); void *calloc(int nmemb, int size); void *realloc(void *ptr, int size); int system(char *command); #endif // _STDLIB_H_ ================================================ FILE: 58_Ptr_Increments/include/string.h ================================================ #ifndef _STRING_H_ # define _STRING_H_ char *strdup(char *s); char *strchr(char *s, int c); char *strrchr(char *s, int c); int strcmp(char *s1, char *s2); int strncmp(char *s1, char *s2, size_t n); char *strerror(int errnum); #endif // _STRING_H_ ================================================ FILE: 58_Ptr_Increments/include/unistd.h ================================================ #ifndef _UNISTD_H_ # define _UNISTD_H_ void _exit(int status); int unlink(char *pathname); #endif // _UNISTD_H_ ================================================ FILE: 58_Ptr_Increments/main.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "decl.h" #include #include // Compiler setup and top-level execution // Copyright (c) 2019 Warren Toomey, GPL3 // Given a string with a '.' and at least a 1-character suffix // after the '.', change the suffix to be the given character. // Return the new string or NULL if the original string could // not be modified char *alter_suffix(char *str, char suffix) { char *posn; char *newstr; // Clone the string if ((newstr = strdup(str)) == NULL) return (NULL); // Find the '.' if ((posn = strrchr(newstr, '.')) == NULL) return (NULL); // Ensure there is a suffix posn++; if (*posn == '\0') return (NULL); // Change the suffix and NUL-terminate the string *posn = suffix; posn++; *posn = '\0'; return (newstr); } // Given an input filename, compile that file // down to assembly code. Return the new file's name static char *do_compile(char *filename) { char cmd[TEXTLEN]; // Change the input file's suffix to .s Outfilename = alter_suffix(filename, 's'); if (Outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .c on the end\n", filename); exit(1); } // Generate the pre-processor command snprintf(cmd, TEXTLEN, "%s %s %s", CPPCMD, INCDIR, filename); // Open up the pre-processor pipe if ((Infile = popen(cmd, "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", filename, strerror(errno)); exit(1); } Infilename = filename; // Create the output file if ((Outfile = fopen(Outfilename, "w")) == NULL) { fprintf(stderr, "Unable to create %s: %s\n", Outfilename, strerror(errno)); exit(1); } Line = 1; // Reset the scanner Linestart = 1; Putback = '\n'; clear_symtable(); // Clear the symbol table if (O_verbose) printf("compiling %s\n", filename); scan(&Token); // Get the first token from the input Peektoken.token = 0; // and set there is no lookahead token genpreamble(); // Output the preamble global_declarations(); // Parse the global declarations genpostamble(); // Output the postamble fclose(Outfile); // Close the output file // Dump the symbol table if requested if (O_dumpsym) { printf("Symbols for %s\n", filename); dumpsymtables(); fprintf(stdout, "\n\n"); } freestaticsyms(); // Free any static symbols in the file return (Outfilename); } // Given an input filename, assemble that file // down to object code. Return the object filename char *do_assemble(char *filename) { char cmd[TEXTLEN]; int err; char *outfilename = alter_suffix(filename, 'o'); if (outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .s on the end\n", filename); exit(1); } // Build the assembly command and run it snprintf(cmd, TEXTLEN, "%s %s %s", ASCMD, outfilename, filename); if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Assembly of %s failed\n", filename); exit(1); } return (outfilename); } // Given a list of object files and an output filename, // link all of the object filenames together. void do_link(char *outfilename, char **objlist) { int cnt, size = TEXTLEN; char cmd[TEXTLEN], *cptr; int err; // Start with the linker command and the output file cptr = cmd; cnt = snprintf(cptr, size, "%s %s ", LDCMD, outfilename); cptr += cnt; size -= cnt; // Now append each object file while (*objlist != NULL) { cnt = snprintf(cptr, size, "%s ", *objlist); cptr += cnt; size -= cnt; objlist++; } if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Linking failed\n"); exit(1); } } // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "Usage: %s [-vcSTM] [-o outfile] file [file ...]\n", prog); fprintf(stderr, " -v give verbose output of the compilation stages\n"); fprintf(stderr, " -c generate object files but don't link them\n"); fprintf(stderr, " -S generate assembly files but don't link them\n"); fprintf(stderr, " -T dump the AST trees for each input file\n"); fprintf(stderr, " -M dump the symbol table for each input file\n"); fprintf(stderr, " -o outfile, produce the outfile executable file\n"); exit(1); } // Main program: check arguments and print a usage // if we don't have an argument. Open up the input // file and call scanfile() to scan the tokens in it. enum { MAXOBJ = 100 }; int main(int argc, char **argv) { char *outfilename = AOUT; char *asmfile, *objfile; char *objlist[MAXOBJ]; int i, j, objcnt = 0; // Initialise our variables O_dumpAST = 0; O_dumpsym = 0; O_keepasm = 0; O_assemble = 0; O_verbose = 0; O_dolink = 1; // Scan for command-line options for (i = 1; i < argc; i++) { // No leading '-', stop scanning for options if (*argv[i] != '-') break; // For each option in this argument for (j = 1; (*argv[i] == '-') && argv[i][j]; j++) { switch (argv[i][j]) { case 'o': outfilename = argv[++i]; // Save & skip to next argument break; case 'T': O_dumpAST = 1; break; case 'M': O_dumpsym = 1; break; case 'c': O_assemble = 1; O_keepasm = 0; O_dolink = 0; break; case 'S': O_keepasm = 1; O_assemble = 0; O_dolink = 0; break; case 'v': O_verbose = 1; break; default: usage(argv[0]); } } } // Ensure we have at lease one input file argument if (i >= argc) usage(argv[0]); // Work on each input file in turn while (i < argc) { asmfile = do_compile(argv[i]); // Compile the source file if (O_dolink || O_assemble) { objfile = do_assemble(asmfile); // Assemble it to object forma if (objcnt == (MAXOBJ - 2)) { fprintf(stderr, "Too many object files for the compiler to handle\n"); exit(1); } objlist[objcnt++] = objfile; // Add the object file's name objlist[objcnt] = NULL; // to the list of object files } if (!O_keepasm) // Remove the assembly file if unlink(asmfile); // we don't need to keep it i++; } // Now link all the object files together if (O_dolink) { do_link(outfilename, objlist); // If we don't need to keep the object // files, then remove them if (!O_assemble) { for (i = 0; objlist[i] != NULL; i++) unlink(objlist[i]); } } return (0); } ================================================ FILE: 58_Ptr_Increments/misc.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" #include #include // Miscellaneous functions // Copyright (c) 2019 Warren Toomey, GPL3 // Ensure that the current token is t, // and fetch the next token. Otherwise // throw an error void match(int t, char *what) { if (Token.token == t) { scan(&Token); } else { fatals("Expected", what); } } // Match a semicolon and fetch the next token void semi(void) { match(T_SEMI, ";"); } // Match a left brace and fetch the next token void lbrace(void) { match(T_LBRACE, "{"); } // Match a right brace and fetch the next token void rbrace(void) { match(T_RBRACE, "}"); } // Match a left parenthesis and fetch the next token void lparen(void) { match(T_LPAREN, "("); } // Match a right parenthesis and fetch the next token void rparen(void) { match(T_RPAREN, ")"); } // Match an identifier and fetch the next token void ident(void) { match(T_IDENT, "identifier"); } // Match a comma and fetch the next token void comma(void) { match(T_COMMA, "comma"); } // Print out fatal messages void fatal(char *s) { fprintf(stderr, "%s on line %d of %s\n", s, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatals(char *s1, char *s2) { fprintf(stderr, "%s:%s on line %d of %s\n", s1, s2, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatald(char *s, int d) { fprintf(stderr, "%s:%d on line %d of %s\n", s, d, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatalc(char *s, int c) { fprintf(stderr, "%s:%c on line %d of %s\n", s, c, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } ================================================ FILE: 58_Ptr_Increments/opt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST Tree Optimisation Code // Copyright (c) 2019 Warren Toomey, GPL3 // Fold an AST tree with a binary operator // and two A_INTLIT children. Return either // the original tree or a new leaf node. static struct ASTnode *fold2(struct ASTnode *n) { int val, leftval, rightval; // Get the values from each child leftval = n->left->a_intvalue; rightval = n->right->a_intvalue; // Perform some of the binary operations. // For any AST op we can't do, return // the original tree. switch (n->op) { case A_ADD: val = leftval + rightval; break; case A_SUBTRACT: val = leftval - rightval; break; case A_MULTIPLY: val = leftval * rightval; break; case A_DIVIDE: // Don't try to divide by zero. if (rightval == 0) return (n); val = leftval / rightval; break; default: return (n); } // Return a leaf node with the new value return (mkastleaf(A_INTLIT, n->type, NULL, NULL, val)); } // Fold an AST tree with a unary operator // and one INTLIT children. Return either // the original tree or a new leaf node. static struct ASTnode *fold1(struct ASTnode *n) { int val; // Get the child value. Do the // operation if recognised. // Return the new leaf node. val = n->left->a_intvalue; switch (n->op) { case A_WIDEN: break; case A_INVERT: val = ~val; break; case A_LOGNOT: val = !val; break; default: return (n); } // Return a leaf node with the new value return (mkastleaf(A_INTLIT, n->type, NULL, NULL, val)); } // Attempt to do constant folding on // the AST tree with the root node n static struct ASTnode *fold(struct ASTnode *n) { if (n == NULL) return (NULL); // Fold on the left child, then // do the same on the right child n->left = fold(n->left); n->right = fold(n->right); // If both children are A_INTLITs, do a fold2() if (n->left && n->left->op == A_INTLIT) { if (n->right && n->right->op == A_INTLIT) n = fold2(n); else // If only the left is A_INTLIT, do a fold1() n = fold1(n); } // Return the possibly modified tree return (n); } // Optimise an AST tree by // constant folding in all sub-trees struct ASTnode *optimise(struct ASTnode *n) { n = fold(n); return (n); } ================================================ FILE: 58_Ptr_Increments/scan.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { int i; for (i = 0; s[i] != '\0'; i++) if (s[i] == (char) c) return (i); return (-1); } // Get the next character from the input file. static int next(void) { int c, l; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return (c); } c = fgetc(Infile); // Read from input file while (Linestart && c == '#') { // We've hit a pre-processor statement Linestart = 0; // No longer at the start of the line scan(&Token); // Get the line number into l if (Token.token != T_INTLIT) fatals("Expecting pre-processor line number, got:", Text); l = Token.intvalue; scan(&Token); // Get the filename in Text if (Token.token != T_STRLIT) fatals("Expecting pre-processor file name, got:", Text); if (Text[0] != '<') { // If this is a real filename if (strcmp(Text, Infilename)) // and not the one we have now Infilename = strdup(Text); // save it. Then update the line num Line = l; } while ((c = fgetc(Infile)) != '\n'); // Skip to the end of the line c = fgetc(Infile); // and get the next character Linestart = 1; // Now back at the start of the line } Linestart = 0; // No longer at the start of the line if ('\n' == c) { Line++; // Increment line count Linestart = 1; // Now back at the start of the line } return (c); } // Put back an unwanted character static void putback(int c) { Putback = c; } // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } // Read in a hexadecimal constant from the input static int hexchar(void) { int c, h, n = 0, f = 0; // Loop getting characters while (isxdigit(c = next())) { // Convert from char to int value h = chrpos("0123456789abcdef", tolower(c)); // Add to running hex value n = n * 16 + h; f = 1; } // We hit a non-hex character, put it back putback(c); // Flag tells us we never saw any hex characters if (!f) fatal("missing digits after '\\x'"); if (n > 255) fatal("value out of range after '\\x'"); return (n); } // Return the next character from a character // or string literal static int scanch(void) { int i, c, c2; // Get the next input character and interpret // metacharacters that start with a backslash c = next(); if (c == '\\') { switch (c = next()) { case 'a': return ('\a'); case 'b': return ('\b'); case 'f': return ('\f'); case 'n': return ('\n'); case 'r': return ('\r'); case 't': return ('\t'); case 'v': return ('\v'); case '\\': return ('\\'); case '"': return ('"'); case '\'': return ('\''); // Deal with octal constants by reading in // characters until we hit a non-octal digit. // Build up the octal value in c2 and count // # digits in i. Permit only 3 octal digits. case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': for (i = c2 = 0; isdigit(c) && c < '8'; c = next()) { if (++i > 3) break; c2 = c2 * 8 + (c - '0'); } putback(c); // Put back the first non-octal char return (c2); case 'x': return (hexchar()); default: fatalc("unknown escape sequence", c); } } return (c); // Just an ordinary old character! } // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0, radix = 10; // Assume the radix is 10, but if it starts with 0 if (c == '0') { // and the next character is 'x', it's radix 16 if ((c = next()) == 'x') { radix = 16; c = next(); } else // Otherwise, it's radix 8 radix = 8; } // Convert each character into an int value while ((k = chrpos("0123456789abcdef", tolower(c))) >= 0) { if (k >= radix) fatalc("invalid digit in integer literal", c); val = val * radix + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return (val); } // Scan in a string literal from the input file, // and store it in buf[]. Return the length of // the string. static int scanstr(char *buf) { int i, c; // Loop while we have enough buffer space for (i = 0; i < TEXTLEN - 1; i++) { // Get the next char and append to buf // Return when we hit the ending double quote if ((c = scanch()) == '"') { buf[i] = 0; return (i); } buf[i] = (char)c; } // Ran out of buf[] space fatal("String literal too long"); return (0); } // Scan an identifier from the input file and // store it in buf[]. Return the identifier's length static int scanident(int c, char *buf, int lim) { int i = 0; // Allow digits, alpha and underscores while (isalpha(c) || isdigit(c) || '_' == c) { // Error if we hit the identifier length limit, // else append to buf[] and get next character if (lim - 1 == i) { fatal("Identifier too long"); } else if (i < lim - 1) { buf[i++] = (char)c; } c = next(); } // We hit a non-valid character, put it back. // NUL-terminate the buf[] and return the length putback(c); buf[i] = '\0'; return (i); } // Given a word from the input, return the matching // keyword token number or 0 if it's not a keyword. // Switch on the first letter so that we don't have // to waste time strcmp()ing against all the keywords. static int keyword(char *s) { switch (*s) { case 'b': if (!strcmp(s, "break")) return (T_BREAK); break; case 'c': if (!strcmp(s, "case")) return (T_CASE); if (!strcmp(s, "char")) return (T_CHAR); if (!strcmp(s, "continue")) return (T_CONTINUE); break; case 'd': if (!strcmp(s, "default")) return (T_DEFAULT); break; case 'e': if (!strcmp(s, "else")) return (T_ELSE); if (!strcmp(s, "enum")) return (T_ENUM); if (!strcmp(s, "extern")) return (T_EXTERN); break; case 'f': if (!strcmp(s, "for")) return (T_FOR); break; case 'i': if (!strcmp(s, "if")) return (T_IF); if (!strcmp(s, "int")) return (T_INT); break; case 'l': if (!strcmp(s, "long")) return (T_LONG); break; case 'r': if (!strcmp(s, "return")) return (T_RETURN); break; case 's': if (!strcmp(s, "sizeof")) return (T_SIZEOF); if (!strcmp(s, "static")) return (T_STATIC); if (!strcmp(s, "struct")) return (T_STRUCT); if (!strcmp(s, "switch")) return (T_SWITCH); break; case 't': if (!strcmp(s, "typedef")) return (T_TYPEDEF); break; case 'u': if (!strcmp(s, "union")) return (T_UNION); break; case 'v': if (!strcmp(s, "void")) return (T_VOID); break; case 'w': if (!strcmp(s, "while")) return (T_WHILE); break; } return (0); } // List of token strings, for debugging purposes char *Tstring[] = { "EOF", "=", "+=", "-=", "*=", "/=", "%=", "?", "||", "&&", "|", "^", "&", "==", "!=", ",", ">", "<=", ">=", "<<", ">>", "+", "-", "*", "/", "%", "++", "--", "~", "!", "void", "char", "int", "long", "if", "else", "while", "for", "return", "struct", "union", "enum", "typedef", "extern", "break", "continue", "switch", "case", "default", "sizeof", "static", "intlit", "strlit", ";", "identifier", "{", "}", "(", ")", "[", "]", ",", ".", "->", ":" }; // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c, tokentype; // If we have a lookahead token, return this token if (Peektoken.token != 0) { t->token = Peektoken.token; t->tokstr = Peektoken.tokstr; t->intvalue = Peektoken.intvalue; Peektoken.token = 0; return (1); } // Skip whitespace c = skip(); // Determine the token based on // the input character switch (c) { case EOF: t->token = T_EOF; return (0); case '+': if ((c = next()) == '+') { t->token = T_INC; } else if (c == '=') { t->token = T_ASPLUS; } else { putback(c); t->token = T_PLUS; } break; case '-': if ((c = next()) == '-') { t->token = T_DEC; } else if (c == '>') { t->token = T_ARROW; } else if (c == '=') { t->token = T_ASMINUS; } else if (isdigit(c)) { // Negative int literal t->intvalue = -scanint(c); t->token = T_INTLIT; } else { putback(c); t->token = T_MINUS; } break; case '*': if ((c = next()) == '=') { t->token = T_ASSTAR; } else { putback(c); t->token = T_STAR; } break; case '/': if ((c = next()) == '=') { t->token = T_ASSLASH; } else { putback(c); t->token = T_SLASH; } break; case '%': if ((c = next()) == '=') { t->token = T_ASMOD; } else { putback(c); t->token = T_MOD; } break; case ';': t->token = T_SEMI; break; case '{': t->token = T_LBRACE; break; case '}': t->token = T_RBRACE; break; case '(': t->token = T_LPAREN; break; case ')': t->token = T_RPAREN; break; case '[': t->token = T_LBRACKET; break; case ']': t->token = T_RBRACKET; break; case '~': t->token = T_INVERT; break; case '^': t->token = T_XOR; break; case ',': t->token = T_COMMA; break; case '.': t->token = T_DOT; break; case ':': t->token = T_COLON; break; case '?': t->token = T_QUESTION; break; case '=': if ((c = next()) == '=') { t->token = T_EQ; } else { putback(c); t->token = T_ASSIGN; } break; case '!': if ((c = next()) == '=') { t->token = T_NE; } else { putback(c); t->token = T_LOGNOT; } break; case '<': if ((c = next()) == '=') { t->token = T_LE; } else if (c == '<') { t->token = T_LSHIFT; } else { putback(c); t->token = T_LT; } break; case '>': if ((c = next()) == '=') { t->token = T_GE; } else if (c == '>') { t->token = T_RSHIFT; } else { putback(c); t->token = T_GT; } break; case '&': if ((c = next()) == '&') { t->token = T_LOGAND; } else { putback(c); t->token = T_AMPER; } break; case '|': if ((c = next()) == '|') { t->token = T_LOGOR; } else { putback(c); t->token = T_OR; } break; case '\'': // If it's a quote, scan in the // literal character value and // the trailing quote t->intvalue = scanch(); t->token = T_INTLIT; if (next() != '\'') fatal("Expected '\\'' at end of char literal"); break; case '"': // Scan in a literal string scanstr(Text); t->token = T_STRLIT; break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } else if (isalpha(c) || '_' == c) { // Read in a keyword or identifier scanident(c, Text, TEXTLEN); // If it's a recognised keyword, return that token if ((tokentype = keyword(Text)) != 0) { t->token = tokentype; break; } // Not a recognised keyword, so it must be an identifier t->token = T_IDENT; break; } // The character isn't part of any recognised token, error fatalc("Unrecognised character", c); } // We found a token t->tokstr = Tstring[t->token]; return (1); } ================================================ FILE: 58_Ptr_Increments/stmt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of statements // Copyright (c) 2019 Warren Toomey, GPL3 // Prototypes static struct ASTnode *single_statement(void); // compound_statement: // empty, i.e. no statement // | statement // | statement statements // ; // // statement: declaration // | expression_statement // | function_call // | if_statement // | while_statement // | for_statement // | return_statement // ; // if_statement: if_head // | if_head 'else' statement // ; // // if_head: 'if' '(' true_false_expression ')' statement ; // // Parse an IF statement including any // optional ELSE clause and return its AST static struct ASTnode *if_statement(void) { struct ASTnode *condAST, *trueAST, *falseAST = NULL; // Ensure we have 'if' '(' match(T_IF, "if"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST->ctype, condAST, NULL, 0); rparen(); // Get the AST for the statement trueAST = single_statement(); // If we have an 'else', skip it // and get the AST for the statement if (Token.token == T_ELSE) { scan(&Token); falseAST = single_statement(); } // Build and return the AST for this statement return (mkastnode(A_IF, P_NONE, NULL, condAST, trueAST, falseAST, NULL, 0)); } // while_statement: 'while' '(' true_false_expression ')' statement ; // // Parse a WHILE statement and return its AST static struct ASTnode *while_statement(void) { struct ASTnode *condAST, *bodyAST; // Ensure we have 'while' '(' match(T_WHILE, "while"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST->ctype, condAST, NULL, 0); rparen(); // Get the AST for the statement. // Update the loop depth in the process Looplevel++; bodyAST = single_statement(); Looplevel--; // Build and return the AST for this statement return (mkastnode(A_WHILE, P_NONE, NULL, condAST, NULL, bodyAST, NULL, 0)); } // for_statement: 'for' '(' expression_list ';' // true_false_expression ';' // expression_list ')' statement ; // // Parse a FOR statement and return its AST static struct ASTnode *for_statement(void) { struct ASTnode *condAST, *bodyAST; struct ASTnode *preopAST, *postopAST; struct ASTnode *tree; // Ensure we have 'for' '(' match(T_FOR, "for"); lparen(); // Get the pre_op expression and the ';' preopAST = expression_list(T_SEMI); semi(); // Get the condition and the ';'. // Force a non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST->ctype, condAST, NULL, 0); semi(); // Get the post_op expression and the ')' postopAST = expression_list(T_RPAREN); rparen(); // Get the statement which is the body // Update the loop depth in the process Looplevel++; bodyAST = single_statement(); Looplevel--; // Glue the statement and the postop tree tree = mkastnode(A_GLUE, P_NONE, NULL, bodyAST, NULL, postopAST, NULL, 0); // Make a WHILE loop with the condition and this new body tree = mkastnode(A_WHILE, P_NONE, NULL, condAST, NULL, tree, NULL, 0); // And glue the preop tree to the A_WHILE tree return (mkastnode(A_GLUE, P_NONE, NULL, preopAST, NULL, tree, NULL, 0)); } // return_statement: 'return' '(' expression ')' ; // // Parse a return statement and return its AST static struct ASTnode *return_statement(void) { struct ASTnode *tree= NULL; // Ensure we have 'return' match(T_RETURN, "return"); // See if we have a return value if (Token.token == T_LPAREN) { // Can't return a value if function returns P_VOID if (Functionid->type == P_VOID) fatal("Can't return from a void function"); // Skip the left parenthesis lparen(); // Parse the following expression tree = binexpr(0); // Ensure this is compatible with the function's type tree = modify_type(tree, Functionid->type, Functionid->ctype, 0); if (tree == NULL) fatal("Incompatible type to return"); // Get the ')' rparen(); } // Add on the A_RETURN node tree = mkastunary(A_RETURN, P_NONE, NULL, tree, NULL, 0); // Get the ';' semi(); return (tree); } // break_statement: 'break' ; // // Parse a break statement and return its AST static struct ASTnode *break_statement(void) { if (Looplevel == 0 && Switchlevel == 0) fatal("no loop or switch to break out from"); scan(&Token); semi(); return (mkastleaf(A_BREAK, P_NONE, NULL, NULL, 0)); } // continue_statement: 'continue' ; // // Parse a continue statement and return its AST static struct ASTnode *continue_statement(void) { if (Looplevel == 0) fatal("no loop to continue to"); scan(&Token); semi(); return (mkastleaf(A_CONTINUE, P_NONE, NULL, NULL, 0)); } // Parse a switch statement and return its AST static struct ASTnode *switch_statement(void) { struct ASTnode *left, *body, *n, *c; struct ASTnode *casetree = NULL, *casetail; int inloop = 1, casecount = 0; int seendefault = 0; int ASTop, casevalue; // Skip the 'switch' and '(' scan(&Token); lparen(); // Get the switch expression, the ')' and the '{' left = binexpr(0); rparen(); lbrace(); // Ensure that this is of int type if (!inttype(left->type)) fatal("Switch expression is not of integer type"); // Build an A_SWITCH subtree with the expression as // the child n = mkastunary(A_SWITCH, P_NONE, NULL, left, NULL, 0); // Now parse the cases Switchlevel++; while (inloop) { switch (Token.token) { // Leave the loop when we hit a '}' case T_RBRACE: if (casecount == 0) fatal("No cases in switch"); inloop = 0; break; case T_CASE: case T_DEFAULT: // Ensure this isn't after a previous 'default' if (seendefault) fatal("case or default after existing default"); // Set the AST operation. Scan the case value if required if (Token.token == T_DEFAULT) { ASTop = A_DEFAULT; seendefault = 1; scan(&Token); } else { ASTop = A_CASE; scan(&Token); left = binexpr(0); // Ensure the case value is an integer literal if (left->op != A_INTLIT) fatal("Expecting integer literal for case value"); casevalue = left->a_intvalue; // Walk the list of existing case values to ensure // that there isn't a duplicate case value for (c = casetree; c != NULL; c = c->right) if (casevalue == c->a_intvalue) fatal("Duplicate case value"); } // Scan the ':' and increment the casecount match(T_COLON, ":"); casecount++; // If the next token is a T_CASE, the existing case will fall // into the next case. Otherwise, parse the case body. if (Token.token == T_CASE) body = NULL; else body = compound_statement(1); // Build a sub-tree with any compound statement as the left child // and link it in to the growing A_CASE tree if (casetree == NULL) { casetree = casetail = mkastunary(ASTop, P_NONE, NULL, body, NULL, casevalue); } else { casetail->right = mkastunary(ASTop, P_NONE, NULL, body, NULL, casevalue); casetail = casetail->right; } break; default: fatals("Unexpected token in switch", Token.tokstr); } } Switchlevel--; // We have a sub-tree with the cases and any default. Put the // case count into the A_SWITCH node and attach the case tree. n->a_intvalue = casecount; n->right = casetree; rbrace(); return (n); } // Parse a single statement and return its AST. static struct ASTnode *single_statement(void) { struct ASTnode *stmt; struct symtable *ctype; switch (Token.token) { case T_SEMI: // An empty statement semi(); break; case T_LBRACE: // We have a '{', so this is a compound statement lbrace(); stmt = compound_statement(0); rbrace(); return (stmt); case T_IDENT: // We have to see if the identifier matches a typedef. // If not, treat it as an expression. // Otherwise, fall down to the parse_type() call. if (findtypedef(Text) == NULL) { stmt = binexpr(0); semi(); return (stmt); } case T_CHAR: case T_INT: case T_LONG: case T_STRUCT: case T_UNION: case T_ENUM: case T_TYPEDEF: // The beginning of a variable declaration list. declaration_list(&ctype, C_LOCAL, T_SEMI, T_EOF, &stmt); semi(); return (stmt); // Any assignments from the declarations case T_IF: return (if_statement()); case T_WHILE: return (while_statement()); case T_FOR: return (for_statement()); case T_RETURN: return (return_statement()); case T_BREAK: return (break_statement()); case T_CONTINUE: return (continue_statement()); case T_SWITCH: return (switch_statement()); default: // For now, see if this is an expression. // This catches assignment statements. stmt = binexpr(0); semi(); return (stmt); } return (NULL); // Keep -Wall happy } // Parse a compound statement // and return its AST. If inswitch is true, // we look for a '}', 'case' or 'default' token // to end the parsing. Otherwise, look for // just a '}' to end the parsing. struct ASTnode *compound_statement(int inswitch) { struct ASTnode *left = NULL; struct ASTnode *tree; while (1) { // Leave if we've hit the end token. We do this first to allow // an empty compound statement if (Token.token == T_RBRACE) return (left); if (inswitch && (Token.token == T_CASE || Token.token == T_DEFAULT)) return (left); // Parse a single statement tree = single_statement(); // For each new tree, either save it in left // if left is empty, or glue the left and the // new tree together if (tree != NULL) { if (left == NULL) left = tree; else left = mkastnode(A_GLUE, P_NONE, NULL, left, NULL, tree, NULL, 0); } } return (NULL); // Keep -Wall happy } ================================================ FILE: 58_Ptr_Increments/sym.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Symbol table functions // Copyright (c) 2019 Warren Toomey, GPL3 // Append a node to the singly-linked list pointed to by head or tail void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node) { // Check for valid pointers if (head == NULL || tail == NULL || node == NULL) fatal("Either head, tail or node is NULL in appendsym"); // Append to the list if (*tail) { (*tail)->next = node; *tail = node; } else *head = *tail = node; node->next = NULL; } // Create a symbol node to be added to a symbol table list. // Set up the node's: // + type: char, int etc. // + ctype: composite type pointer for struct/union // + structural type: var, function, array etc. // + size: number of elements, or endlabel: end label for a function // + posn: Position information for local symbols // Return a pointer to the new node struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn) { // Get a new node struct symtable *node = (struct symtable *) malloc(sizeof(struct symtable)); if (node == NULL) fatal("Unable to malloc a symbol table node in newsym"); // Fill in the values if (name == NULL) node->name = NULL; else node->name = strdup(name); node->type = type; node->ctype = ctype; node->stype = stype; node->class = class; node->nelems = nelems; // For pointers and integer types, set the size // of the symbol. structs and union declarations // manually set this up themselves. if (ptrtype(type) || inttype(type)) node->size = nelems * typesize(type, ctype); node->st_posn = posn; node->next = NULL; node->member = NULL; node->initlist = NULL; return (node); } // Add a symbol to the global symbol list struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn) { struct symtable *sym = newsym(name, type, ctype, stype, class, nelems, posn); // For structs and unions, copy the size from the type node if (type == P_STRUCT || type == P_UNION) sym->size = ctype->size; appendsym(&Globhead, &Globtail, sym); return (sym); } // Add a symbol to the local symbol list struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int nelems) { struct symtable *sym = newsym(name, type, ctype, stype, C_LOCAL, nelems, 0); // For structs and unions, copy the size from the type node if (type == P_STRUCT || type == P_UNION) sym->size = ctype->size; appendsym(&Loclhead, &Locltail, sym); return (sym); } // Add a symbol to the parameter list struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype) { struct symtable *sym = newsym(name, type, ctype, stype, C_PARAM, 1, 0); appendsym(&Parmhead, &Parmtail, sym); return (sym); } // Add a symbol to the temporary member list struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int nelems) { struct symtable *sym = newsym(name, type, ctype, stype, C_MEMBER, nelems, 0); // For structs and unions, copy the size from the type node if (type == P_STRUCT || type == P_UNION) sym->size = ctype->size; appendsym(&Membhead, &Membtail, sym); return (sym); } // Add a struct to the struct list struct symtable *addstruct(char *name) { struct symtable *sym = newsym(name, P_STRUCT, NULL, 0, C_STRUCT, 0, 0); appendsym(&Structhead, &Structtail, sym); return (sym); } // Add a struct to the union list struct symtable *addunion(char *name) { struct symtable *sym = newsym(name, P_UNION, NULL, 0, C_UNION, 0, 0); appendsym(&Unionhead, &Uniontail, sym); return (sym); } // Add an enum type or value to the enum list. // Class is C_ENUMTYPE or C_ENUMVAL. // Use posn to store the int value. struct symtable *addenum(char *name, int class, int value) { struct symtable *sym = newsym(name, P_INT, NULL, 0, class, 0, value); appendsym(&Enumhead, &Enumtail, sym); return (sym); } // Add a typedef to the typedef list struct symtable *addtypedef(char *name, int type, struct symtable *ctype) { struct symtable *sym = newsym(name, type, ctype, 0, C_TYPEDEF, 0, 0); appendsym(&Typehead, &Typetail, sym); return (sym); } // Search for a symbol in a specific list. // Return a pointer to the found node or NULL if not found. // If class is not zero, also match on the given class static struct symtable *findsyminlist(char *s, struct symtable *list, int class) { for (; list != NULL; list = list->next) if ((list->name != NULL) && !strcmp(s, list->name)) if (class == 0 || class == list->class) return (list); return (NULL); } // Determine if the symbol s is in the global symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findglob(char *s) { return (findsyminlist(s, Globhead, 0)); } // Determine if the symbol s is in the local symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findlocl(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member, 0); if (node) return (node); } return (findsyminlist(s, Loclhead, 0)); } // Determine if the symbol s is in the symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findsymbol(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member, 0); if (node) return (node); } // Otherwise, try the local and global symbol lists node = findsyminlist(s, Loclhead, 0); if (node) return (node); return (findsyminlist(s, Globhead, 0)); } // Find a member in the member list // Return a pointer to the found node or NULL if not found. struct symtable *findmember(char *s) { return (findsyminlist(s, Membhead, 0)); } // Find a struct in the struct list // Return a pointer to the found node or NULL if not found. struct symtable *findstruct(char *s) { return (findsyminlist(s, Structhead, 0)); } // Find a struct in the union list // Return a pointer to the found node or NULL if not found. struct symtable *findunion(char *s) { return (findsyminlist(s, Unionhead, 0)); } // Find an enum type in the enum list // Return a pointer to the found node or NULL if not found. struct symtable *findenumtype(char *s) { return (findsyminlist(s, Enumhead, C_ENUMTYPE)); } // Find an enum value in the enum list // Return a pointer to the found node or NULL if not found. struct symtable *findenumval(char *s) { return (findsyminlist(s, Enumhead, C_ENUMVAL)); } // Find a type in the tyedef list // Return a pointer to the found node or NULL if not found. struct symtable *findtypedef(char *s) { return (findsyminlist(s, Typehead, 0)); } // Reset the contents of the symbol table void clear_symtable(void) { Globhead = Globtail = NULL; Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Membhead = Membtail = NULL; Structhead = Structtail = NULL; Unionhead = Uniontail = NULL; Enumhead = Enumtail = NULL; Typehead = Typetail = NULL; } // Clear all the entries in the local symbol table void freeloclsyms(void) { Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Functionid = NULL; } // Remove all static symbols from the global symbol table void freestaticsyms(void) { // g points at current node, prev at the previous one struct symtable *g, *prev = NULL; // Walk the global table looking for static entries for (g = Globhead; g != NULL; g = g->next) { if (g->class == C_STATIC) { // If there's a previous node, rearrange the prev pointer // to skip over the current node. If not, g is the head, // so do the same to Globhead if (prev != NULL) prev->next = g->next; else Globhead->next = g->next; // If g is the tail, point Globtail at the previous node // (if there is one), or Globhead if (g == Globtail) { if (prev != NULL) Globtail = prev; else Globtail = Globhead; } } } // Point prev at g before we move up to the next node prev = g; } // Dump a single symbol static void dumpsym(struct symtable *sym, int indent) { int i; for (i = 0; i < indent; i++) printf(" "); switch (sym->type & (~0xf)) { case P_VOID: printf("void "); break; case P_CHAR: printf("char "); break; case P_INT: printf("int "); break; case P_LONG: printf("long "); break; case P_STRUCT: if (sym->ctype != NULL) printf("struct %s ", sym->ctype->name); else printf("struct %s ", sym->name); break; case P_UNION: if (sym->ctype != NULL) printf("union %s ", sym->ctype->name); else printf("union %s ", sym->name); break; default: printf("unknown type "); } for (i = 0; i < (sym->type & 0xf); i++) printf("*"); printf("%s", sym->name); switch (sym->stype) { case S_VARIABLE: break; case S_FUNCTION: printf("()"); break; case S_ARRAY: printf("[]"); break; default: printf(" unknown stype"); } switch (sym->class) { case C_GLOBAL: printf(": global"); break; case C_LOCAL: printf(": local"); break; case C_PARAM: printf(": param"); break; case C_EXTERN: printf(": extern"); break; case C_STATIC: printf(": static"); break; case C_STRUCT: printf(": struct"); break; case C_UNION: printf(": union"); break; case C_MEMBER: printf(": member"); break; case C_ENUMTYPE: printf(": enumtype"); break; case C_ENUMVAL: printf(": enumval"); break; case C_TYPEDEF: printf(": typedef"); break; default: printf(": unknown class"); } switch (sym->stype) { case S_VARIABLE: if (sym->class == C_ENUMVAL) printf(", value %d\n", sym->st_posn); else printf(", size %d\n", sym->size); break; case S_FUNCTION: printf(", %d params\n", sym->nelems); break; case S_ARRAY: printf(", %d elems, size %d\n", sym->nelems, sym->size); break; } switch (sym->type & (~0xf)) { case P_STRUCT: case P_UNION: dumptable(sym->member, NULL, 4); } switch (sym->stype) { case S_FUNCTION: dumptable(sym->member, NULL, 4); } } // Dump one symbol table void dumptable(struct symtable *head, char *name, int indent) { struct symtable *sym; if (head != NULL && name != NULL) printf("%s\n--------\n", name); for (sym = head; sym != NULL; sym = sym->next) dumpsym(sym, indent); } void dumpsymtables(void) { dumptable(Globhead, "Global", 0); printf("\n"); dumptable(Enumhead, "Enums", 0); printf("\n"); dumptable(Typehead, "Typedefs", 0); } ================================================ FILE: 58_Ptr_Increments/tests/err.input031.c ================================================ Expecting a primary expression, got token:+ on line 5 of input031.c ================================================ FILE: 58_Ptr_Increments/tests/err.input032.c ================================================ Unknown variable or function:pizza on line 4 of input032.c ================================================ FILE: 58_Ptr_Increments/tests/err.input033.c ================================================ Incompatible type to return on line 4 of input033.c ================================================ FILE: 58_Ptr_Increments/tests/err.input034.c ================================================ For now, declaration of non-global arrays is not implemented on line 4 of input034.c ================================================ FILE: 58_Ptr_Increments/tests/err.input035.c ================================================ Duplicate local variable declaration:a on line 4 of input035.c ================================================ FILE: 58_Ptr_Increments/tests/err.input036.c ================================================ Type doesn't match prototype for parameter:2 on line 4 of input036.c ================================================ FILE: 58_Ptr_Increments/tests/err.input037.c ================================================ Expected:comma on line 3 of input037.c ================================================ FILE: 58_Ptr_Increments/tests/err.input038.c ================================================ Type doesn't match prototype for parameter:2 on line 4 of input038.c ================================================ FILE: 58_Ptr_Increments/tests/err.input039.c ================================================ No statements in function with non-void type on line 4 of input039.c ================================================ FILE: 58_Ptr_Increments/tests/err.input040.c ================================================ No return for function with non-void type on line 4 of input040.c ================================================ FILE: 58_Ptr_Increments/tests/err.input041.c ================================================ Can't return from a void function on line 3 of input041.c ================================================ FILE: 58_Ptr_Increments/tests/err.input042.c ================================================ Unknown variable or function:fred on line 3 of input042.c ================================================ FILE: 58_Ptr_Increments/tests/err.input043.c ================================================ Unknown variable or function:b on line 3 of input043.c ================================================ FILE: 58_Ptr_Increments/tests/err.input044.c ================================================ Unknown variable or function:z on line 3 of input044.c ================================================ FILE: 58_Ptr_Increments/tests/err.input045.c ================================================ & operator must be followed by an identifier on line 3 of input045.c ================================================ FILE: 58_Ptr_Increments/tests/err.input046.c ================================================ * operator must be followed by an expression of pointer type on line 3 of input046.c ================================================ FILE: 58_Ptr_Increments/tests/err.input047.c ================================================ ++ operator must be followed by an identifier on line 3 of input047.c ================================================ FILE: 58_Ptr_Increments/tests/err.input048.c ================================================ -- operator must be followed by an identifier on line 3 of input048.c ================================================ FILE: 58_Ptr_Increments/tests/err.input049.c ================================================ Incompatible expression in assignment on line 6 of input049.c ================================================ FILE: 58_Ptr_Increments/tests/err.input050.c ================================================ Incompatible types in binary expression on line 6 of input050.c ================================================ FILE: 58_Ptr_Increments/tests/err.input051.c ================================================ Expected '\'' at end of char literal on line 4 of input051.c ================================================ FILE: 58_Ptr_Increments/tests/err.input052.c ================================================ Unrecognised character:$ on line 5 of input052.c ================================================ FILE: 58_Ptr_Increments/tests/err.input056.c ================================================ unknown struct/union type:var1 on line 2 of input056.c ================================================ FILE: 58_Ptr_Increments/tests/err.input057.c ================================================ previously defined struct/union:fred on line 2 of input057.c ================================================ FILE: 58_Ptr_Increments/tests/err.input059.c ================================================ Unknown variable or function:y on line 3 of input059.c ================================================ FILE: 58_Ptr_Increments/tests/err.input060.c ================================================ Expression is not a struct/union on line 3 of input060.c ================================================ FILE: 58_Ptr_Increments/tests/err.input061.c ================================================ Expression is not a pointer to a struct/union on line 3 of input061.c ================================================ FILE: 58_Ptr_Increments/tests/err.input064.c ================================================ undeclared enum type::fred on line 1 of input064.c ================================================ FILE: 58_Ptr_Increments/tests/err.input065.c ================================================ enum type redeclared::fred on line 2 of input065.c ================================================ FILE: 58_Ptr_Increments/tests/err.input066.c ================================================ enum value redeclared::z on line 2 of input066.c ================================================ FILE: 58_Ptr_Increments/tests/err.input068.c ================================================ redefinition of typedef:FOO on line 2 of input068.c ================================================ FILE: 58_Ptr_Increments/tests/err.input069.c ================================================ unknown type:FLOO on line 2 of input069.c ================================================ FILE: 58_Ptr_Increments/tests/err.input072.c ================================================ no loop or switch to break out from on line 1 of input072.c ================================================ FILE: 58_Ptr_Increments/tests/err.input073.c ================================================ no loop to continue to on line 1 of input073.c ================================================ FILE: 58_Ptr_Increments/tests/err.input075.c ================================================ Unexpected token in switch:if on line 4 of input075.c ================================================ FILE: 58_Ptr_Increments/tests/err.input076.c ================================================ No cases in switch on line 3 of input076.c ================================================ FILE: 58_Ptr_Increments/tests/err.input077.c ================================================ case or default after existing default on line 6 of input077.c ================================================ FILE: 58_Ptr_Increments/tests/err.input078.c ================================================ case or default after existing default on line 6 of input078.c ================================================ FILE: 58_Ptr_Increments/tests/err.input079.c ================================================ Duplicate case value on line 6 of input079.c ================================================ FILE: 58_Ptr_Increments/tests/err.input085.c ================================================ Bad type in parameter list on line 1 of input085.c ================================================ FILE: 58_Ptr_Increments/tests/err.input086.c ================================================ Function definition not at global level on line 3 of input086.c ================================================ FILE: 58_Ptr_Increments/tests/err.input087.c ================================================ Bad type in member list on line 4 of input087.c ================================================ FILE: 58_Ptr_Increments/tests/err.input092.c ================================================ Type mismatch: literal vs. variable on line 1 of input092.c ================================================ FILE: 58_Ptr_Increments/tests/err.input093.c ================================================ Unknown variable or function:fred on line 1 of input093.c ================================================ FILE: 58_Ptr_Increments/tests/err.input094.c ================================================ Type mismatch: literal vs. variable on line 1 of input094.c ================================================ FILE: 58_Ptr_Increments/tests/err.input095.c ================================================ Variable can not be initialised:x on line 1 of input095.c ================================================ FILE: 58_Ptr_Increments/tests/err.input096.c ================================================ Array size is illegal:0 on line 1 of input096.c ================================================ FILE: 58_Ptr_Increments/tests/err.input097.c ================================================ For now, declaration of non-global arrays is not implemented on line 2 of input097.c ================================================ FILE: 58_Ptr_Increments/tests/err.input098.c ================================================ Too many values in initialisation list on line 1 of input098.c ================================================ FILE: 58_Ptr_Increments/tests/err.input102.c ================================================ Cannot cast to a struct, union or void type on line 3 of input102.c ================================================ FILE: 58_Ptr_Increments/tests/err.input103.c ================================================ Cannot cast to a struct, union or void type on line 3 of input103.c ================================================ FILE: 58_Ptr_Increments/tests/err.input104.c ================================================ Cannot cast to a struct, union or void type on line 2 of input104.c ================================================ FILE: 58_Ptr_Increments/tests/err.input105.c ================================================ Incompatible expression in assignment on line 4 of input105.c ================================================ FILE: 58_Ptr_Increments/tests/err.input118.c ================================================ Compiler doesn't support static or extern local declarations on line 2 of input118.c ================================================ FILE: 58_Ptr_Increments/tests/err.input124.c ================================================ Cannot ++ on rvalue on line 6 of input124.c ================================================ FILE: 58_Ptr_Increments/tests/err.input126.c ================================================ Unknown variable or function:ptr on line 7 of input126.c ================================================ FILE: 58_Ptr_Increments/tests/err.input129.c ================================================ Cannot ++ and/or -- more than once on line 6 of input129.c ================================================ FILE: 58_Ptr_Increments/tests/err.input141.c ================================================ Declaration of array parameters is not implemented on line 4 of input141.c ================================================ FILE: 58_Ptr_Increments/tests/err.input142.c ================================================ Array must have non-zero elements:fred on line 1 of input142.c ================================================ FILE: 58_Ptr_Increments/tests/input001.c ================================================ int printf(char *fmt); void main() { printf("%d\n", 12 * 3); printf("%d\n", 18 - 2 * 4); printf("%d\n", 1 + 2 + 9 - 5/2 + 3*5); } ================================================ FILE: 58_Ptr_Increments/tests/input002.c ================================================ int printf(char *fmt); void main() { int fred; int jim; fred= 5; jim= 12; printf("%d\n", fred + jim); } ================================================ FILE: 58_Ptr_Increments/tests/input003.c ================================================ int printf(char *fmt); void main() { int x; x= 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); } ================================================ FILE: 58_Ptr_Increments/tests/input004.c ================================================ int printf(char *fmt); void main() { int x; x= 7 < 9; printf("%d\n", x); x= 7 <= 9; printf("%d\n", x); x= 7 != 9; printf("%d\n", x); x= 7 == 7; printf("%d\n", x); x= 7 >= 7; printf("%d\n", x); x= 7 <= 7; printf("%d\n", x); x= 9 > 7; printf("%d\n", x); x= 9 >= 7; printf("%d\n", x); x= 9 != 7; printf("%d\n", x); } ================================================ FILE: 58_Ptr_Increments/tests/input005.c ================================================ int printf(char *fmt); void main() { int i; int j; i=6; j=12; if (i < j) { printf("%d\n", i); } else { printf("%d\n", j); } } ================================================ FILE: 58_Ptr_Increments/tests/input006.c ================================================ int printf(char *fmt); void main() { int i; i=1; while (i <= 10) { printf("%d\n", i); i= i + 1; } } ================================================ FILE: 58_Ptr_Increments/tests/input007.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 58_Ptr_Increments/tests/input008.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 58_Ptr_Increments/tests/input009.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } void fred() { int a; int b; a= 12; b= 3 * a; if (a >= b) { printf("%d\n", 2 * b - a); } } ================================================ FILE: 58_Ptr_Increments/tests/input010.c ================================================ int printf(char *fmt); void main() { int i; char j; j= 20; printf("%d\n", j); i= 10; printf("%d\n", i); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 2; j= j + 1) { printf("%d\n", j); } } ================================================ FILE: 58_Ptr_Increments/tests/input011.c ================================================ int printf(char *fmt); int main() { int i; char j; long k; i= 10; printf("%d\n", i); j= 20; printf("%d\n", j); k= 30; printf("%d\n", k); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 4; j= j + 1) { printf("%d\n", j); } for (k= 1; k <= 5; k= k + 1) { printf("%d\n", k); } return(i); printf("%d\n", 12345); return(3); } ================================================ FILE: 58_Ptr_Increments/tests/input012.c ================================================ int printf(char *fmt); int fred() { return(5); } void main() { int x; x= fred(2); printf("%d\n", x); } ================================================ FILE: 58_Ptr_Increments/tests/input013.c ================================================ int printf(char *fmt); int fred() { return(56); } void main() { int dummy; int result; dummy= printf("%d\n", 23); result= fred(10); dummy= printf("%d\n", result); } ================================================ FILE: 58_Ptr_Increments/tests/input014.c ================================================ int printf(char *fmt); int fred() { return(20); } int main() { int result; printf("%d\n", 10); result= fred(15); printf("%d\n", result); printf("%d\n", fred(15)+10); return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input015.c ================================================ int printf(char *fmt); int main() { char a; char *b; char c; int d; int *e; int f; a= 18; printf("%d\n", a); b= &a; c= *b; printf("%d\n", c); d= 12; printf("%d\n", d); e= &d; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input016.c ================================================ int printf(char *fmt); int c; int d; int *e; int f; int main() { c= 12; d=18; printf("%d\n", c); e= &c + 1; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input017.c ================================================ int printf(char *fmt); int main() { char a; char *b; int d; int *e; b= &a; *b= 19; printf("%d\n", a); e= &d; *e= 12; printf("%d\n", d); return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input018.c ================================================ int printf(char *fmt); int main() { int a; int b; a= b= 34; printf("%d\n", a); printf("%d\n", b); return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input018a.c ================================================ int printf(char *fmt); int a; int *b; char c; char *d; int main() { b= &a; *b= 15; printf("%d\n", a); d= &c; *d= 16; printf("%d\n", c); return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input019.c ================================================ int printf(char *fmt); int a; int b; int c; int d; int e; int main() { a= 2; b= 4; c= 3; d= 2; e= (a+b) * (c+d); printf("%d\n", e); return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input020.c ================================================ int printf(char *fmt); int a; int b[25]; int main() { b[3]= 12; a= b[3]; printf("%d\n", a); return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input021.c ================================================ int printf(char *fmt); char c; char *str; int main() { c= '\n'; printf("%d\n", c); for (str= "Hello world\n"; *str != 0; str= str + 1) { printf("%c", *str); } return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input022.c ================================================ int printf(char *fmt); char a; char b; char c; int d; int e; int f; long g; long h; long i; int main() { b= 5; c= 7; a= b + c++; printf("%d\n", a); e= 5; f= 7; d= e + f++; printf("%d\n", d); h= 5; i= 7; g= h + i++; printf("%d\n", g); a= b-- + c; printf("%d\n", a); d= e-- + f; printf("%d\n", d); g= h-- + i; printf("%d\n", g); a= ++b + c; printf("%d\n", a); d= ++e + f; printf("%d\n", d); g= ++h + i; printf("%d\n", g); a= b * --c; printf("%d\n", a); d= e * --f; printf("%d\n", d); g= h * --i; printf("%d\n", g); return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input023.c ================================================ int printf(char *fmt); char *str; int x; int main() { x= -23; printf("%d\n", x); printf("%d\n", -10 * -10); x= 1; x= ~x; printf("%d\n", x); x= 2 > 5; printf("%d\n", x); x= !x; printf("%d\n", x); x= !x; printf("%d\n", x); x= 13; if (x) { printf("%d\n", 13); } x= 0; if (!x) { printf("%d\n", 14); } for (str= "Hello world\n"; *str; str++) { printf("%c", *str); } return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input024.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { a= 42; b= 19; printf("%d\n", a & b); printf("%d\n", a | b); printf("%d\n", a ^ b); printf("%d\n", 1 << 3); printf("%d\n", 63 >> 3); return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input025.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { char z; int y; int x; x= 10; y= 20; z= 30; printf("%d\n", x); printf("%d\n", y); printf("%d\n", z); a= 5; b= 15; c= 25; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input026.c ================================================ int printf(char *fmt); int main(int a, char b, long c, int d, int e, int f, int g, int h) { int i; int j; int k; a= 13; printf("%d\n", a); b= 23; printf("%d\n", b); c= 34; printf("%d\n", c); d= 44; printf("%d\n", d); e= 54; printf("%d\n", e); f= 64; printf("%d\n", f); g= 74; printf("%d\n", g); h= 84; printf("%d\n", h); i= 94; printf("%d\n", i); j= 95; printf("%d\n", j); k= 96; printf("%d\n", k); return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input027.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int param5(int a, int b, int c, int d, int e) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param2(int a, int b) { int c; int d; int e; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param0() { int a; int b; int c; int d; int e; a= 1; b= 2; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int main() { param8(1,2,3,4,5,6,7,8); param5(1,2,3,4,5); param2(1,2); param0(); return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input028.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input029.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h); int fred(int a, int b, int c); int main(); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input030.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input030.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 58_Ptr_Increments/tests/input031.c ================================================ int printf(char *fmt); int main() { int x; x= 2 + + 3 - * / ; } ================================================ FILE: 58_Ptr_Increments/tests/input032.c ================================================ int printf(char *fmt); int main() { pizza cow llama sausage; } ================================================ FILE: 58_Ptr_Increments/tests/input033.c ================================================ int printf(char *fmt); int main() { char *z; return(z); } ================================================ FILE: 58_Ptr_Increments/tests/input035.c ================================================ int printf(char *fmt); int fred(int a, int b) { int a; return(a); } ================================================ FILE: 58_Ptr_Increments/tests/input036.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c); ================================================ FILE: 58_Ptr_Increments/tests/input037.c ================================================ int printf(char *fmt); int fred(int a, char b +, int z); ================================================ FILE: 58_Ptr_Increments/tests/input038.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c, int g); ================================================ FILE: 58_Ptr_Increments/tests/input039.c ================================================ int printf(char *fmt); int main() { int a; } ================================================ FILE: 58_Ptr_Increments/tests/input040.c ================================================ int printf(char *fmt); int main() { int a; a= 5; } ================================================ FILE: 58_Ptr_Increments/tests/input041.c ================================================ int printf(char *fmt); void fred() { return(5); } ================================================ FILE: 58_Ptr_Increments/tests/input042.c ================================================ int printf(char *fmt); int main() { fred(5); } ================================================ FILE: 58_Ptr_Increments/tests/input043.c ================================================ int printf(char *fmt); int main() { int a; a= b[4]; } ================================================ FILE: 58_Ptr_Increments/tests/input044.c ================================================ int printf(char *fmt); int main() { int a; a= z; } ================================================ FILE: 58_Ptr_Increments/tests/input045.c ================================================ int printf(char *fmt); int main() { int a; a= &5; } ================================================ FILE: 58_Ptr_Increments/tests/input046.c ================================================ int printf(char *fmt); int main() { int a; a= *5; } ================================================ FILE: 58_Ptr_Increments/tests/input047.c ================================================ int printf(char *fmt); int main() { int a; a= ++5; } ================================================ FILE: 58_Ptr_Increments/tests/input048.c ================================================ int printf(char *fmt); int main() { int a; a= --5; } ================================================ FILE: 58_Ptr_Increments/tests/input049.c ================================================ int printf(char *fmt); int main() { int x; char y; y= x; } ================================================ FILE: 58_Ptr_Increments/tests/input050.c ================================================ int printf(char *fmt); int main() { char *a; char *b; a= a + b; } ================================================ FILE: 58_Ptr_Increments/tests/input051.c ================================================ int printf(char *fmt); int main() { char a; a= 'fred'; } ================================================ FILE: 58_Ptr_Increments/tests/input052.c ================================================ int printf(char *fmt); int main() { int a; a= $5.00; } ================================================ FILE: 58_Ptr_Increments/tests/input053.c ================================================ int printf(char *fmt); int main() { printf("Hello world, %d\n", 23); return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input054.c ================================================ int printf(char *fmt); int main() { int i; for (i=0; i < 20; i++) { printf("Hello world, %d\n", i); } return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input055.c ================================================ int printf(char *fmt); int main(int argc, char **argv) { int i; char *argument; printf("Hello world\n"); for (i=0; i < argc; i++) { argument= *argv; argv= argv + 1; printf("Argument %d is %s\n", i, argument); } return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input056.c ================================================ struct foo { int x; }; struct mary var1; ================================================ FILE: 58_Ptr_Increments/tests/input057.c ================================================ struct fred { int x; } ; struct fred { char y; } ; ================================================ FILE: 58_Ptr_Increments/tests/input058.c ================================================ int printf(char *fmt); struct fred { int x; char y; long z; }; struct fred var2; struct fred *varptr; int main() { long result; var2.x= 12; printf("%d\n", var2.x); var2.y= 'c'; printf("%d\n", var2.y); var2.z= 4005; printf("%d\n", var2.z); result= var2.x + var2.y + var2.z; printf("%d\n", result); varptr= &var2; result= varptr->x + varptr->y + varptr->z; printf("%d\n", result); return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input059.c ================================================ int main() { int x; x= y.foo; } ================================================ FILE: 58_Ptr_Increments/tests/input060.c ================================================ int main() { int x; x= x.foo; } ================================================ FILE: 58_Ptr_Increments/tests/input061.c ================================================ int main() { int x; x= x->foo; } ================================================ FILE: 58_Ptr_Increments/tests/input062.c ================================================ int printf(char *fmt); union fred { char w; int x; int y; long z; }; union fred var1; union fred *varptr; int main() { var1.x= 65; printf("%d\n", var1.x); var1.x= 66; printf("%d\n", var1.x); printf("%d\n", var1.y); printf("The next two depend on the endian of the platform\n"); printf("%d\n", var1.w); printf("%d\n", var1.z); varptr= &var1; varptr->x= 67; printf("%d\n", varptr->x); printf("%d\n", varptr->y); return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input063.c ================================================ int printf(char *fmt); enum fred { apple=1, banana, carrot, pear=10, peach, mango, papaya }; enum jane { aple=1, bnana, crrot, par=10, pech, mago, paaya }; enum fred var1; enum jane var2; enum fred var3; int main() { var1= carrot + pear + mango; printf("%d\n", var1); return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input064.c ================================================ enum fred var3; ================================================ FILE: 58_Ptr_Increments/tests/input065.c ================================================ enum fred { x, y, z }; enum fred { a, b }; ================================================ FILE: 58_Ptr_Increments/tests/input066.c ================================================ enum fred { x, y, z }; enum mary { a, b, z }; ================================================ FILE: 58_Ptr_Increments/tests/input067.c ================================================ int printf(char *fmt); typedef int FOO; FOO var1; struct bar { int x; int y} ; typedef struct bar BAR; BAR var2; int main() { var1= 5; printf("%d\n", var1); var2.x= 7; var2.y= 10; printf("%d\n", var2.x + var2.y); return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input068.c ================================================ typedef int FOO; typedef char FOO; ================================================ FILE: 58_Ptr_Increments/tests/input069.c ================================================ typedef int FOO; FLOO y; ================================================ FILE: 58_Ptr_Increments/tests/input070.c ================================================ #include typedef int FOO; int main() { FOO x; x= 56; printf("%d\n", x); return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input071.c ================================================ #include int main() { int x; x = 0; while (x < 100) { if (x == 5) { x = x + 2; continue; } printf("%d\n", x); if (x == 14) { break; } x = x + 1; } printf("Done\n"); return (0); } ================================================ FILE: 58_Ptr_Increments/tests/input072.c ================================================ int main() { break; } ================================================ FILE: 58_Ptr_Increments/tests/input073.c ================================================ int main() { continue; } ================================================ FILE: 58_Ptr_Increments/tests/input074.c ================================================ #include int main() { int x; int y; y= 0; for (x=0; x < 5; x++) { switch(x) { case 1: { y= 5; break; } case 2: { y= 7; break; } case 3: { y= 9; } default: { y= 100; } } printf("%d\n", y); } return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input075.c ================================================ int main() { int x; switch(x) { if (x<5); } } ================================================ FILE: 58_Ptr_Increments/tests/input076.c ================================================ int main() { int x; switch(x) { } } ================================================ FILE: 58_Ptr_Increments/tests/input077.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } default: { x= 3; } case 4: { x= 2; } } } ================================================ FILE: 58_Ptr_Increments/tests/input078.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } default: { x= 3; } default: { x= 2; } } } ================================================ FILE: 58_Ptr_Increments/tests/input079.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } case 2: { x= 2; } case 1: { x= 2; } default: { x= 2; } } } ================================================ FILE: 58_Ptr_Increments/tests/input080.c ================================================ #include int x; int y; int main() { for (x=0, y=1; x < 6; x++, y=y+2) { printf("%d %d\n", x, y); } return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input081.c ================================================ #include int x; int y; int main() { x= 0; y=1; for (;x<5;) { printf("%d %d\n", x, y); x=x+1; y=y+2; } return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input082.c ================================================ #include int main() { int x; x= 10; // Dangling else test if (x > 5) if (x > 15) printf("x > 15\n"); else printf("15 >= x > 5\n"); else printf("5 >= x\n"); return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input083.c ================================================ #include int main() { int x; // Dangling else test. // We should not print anything for x<= 5 for (x=0; x < 12; x++) if (x > 5) if (x > 10) printf("10 < %2d\n", x); else printf(" 5 < %2d <= 10\n", x); return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input084.c ================================================ #include int main() { int x, y; x=2; y=3; printf("%d %d\n", x, y); char a, *b; a= 'f'; b= &a; printf("%c %c\n", a, *b); return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input085.c ================================================ int main(struct foo { int x; }; , int a) { return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input086.c ================================================ int main() { int fred() { return(5); } int x; x=2; return(x); } ================================================ FILE: 58_Ptr_Increments/tests/input087.c ================================================ struct foo { int x; int y; struct blah { int g; }; }; ================================================ FILE: 58_Ptr_Increments/tests/input088.c ================================================ #include struct foo { int x; int y; } fred, mary; struct foo james; int main() { fred.x= 5; mary.y= 6; printf("%d %d\n", fred.x, mary.y); return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input089.c ================================================ #include int x= 23; char y= 'H'; char *z= "Hello world"; int main() { printf("%d %c %s\n", x, y, z); return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input090.c ================================================ #include int a = 23, b = 100; char y = 'H', *z = "Hello world"; int main() { printf("%d %d %c %s\n", a, b, y, z); return (0); } ================================================ FILE: 58_Ptr_Increments/tests/input091.c ================================================ #include int fred[] = { 1, 2, 3, 4, 5 }; int jim[10] = { 1, 2, 3, 4, 5 }; int main() { int i; for (i=0; i < 5; i++) printf("%d\n", fred[i]); for (i=0; i < 10; i++) printf("%d\n", jim[i]); return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input092.c ================================================ char x= 3000; ================================================ FILE: 58_Ptr_Increments/tests/input093.c ================================================ char x= fred; ================================================ FILE: 58_Ptr_Increments/tests/input094.c ================================================ char *s= 54; ================================================ FILE: 58_Ptr_Increments/tests/input095.c ================================================ int fred(int x=2) { return(x); } ================================================ FILE: 58_Ptr_Increments/tests/input096.c ================================================ int fred[0]; ================================================ FILE: 58_Ptr_Increments/tests/input098.c ================================================ int fred[3]= { 1, 2, 3, 4, 5 }; ================================================ FILE: 58_Ptr_Increments/tests/input099.c ================================================ #include // List of token strings, for debugging purposes. // As yet, we can't store a NULL into the list char *Tstring[] = { "EOF", "=", "||", "&&", "|", "^", "&", "==", "!=", ",", ">", "<=", ">=", "<<", ">>", "+", "-", "*", "/", "++", "--", "~", "!", "void", "char", "int", "long", "if", "else", "while", "for", "return", "struct", "union", "enum", "typedef", "extern", "break", "continue", "switch", "case", "default", "intlit", "strlit", ";", "identifier", "{", "}", "(", ")", "[", "]", ",", ".", "->", ":", "" }; int main() { int i; char *str; i=0; while (1) { str= Tstring[i]; if (*str == 0) break; printf("%s\n", str); i++; } return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input100.c ================================================ #include int main() { int x= 3, y=14; int z= 2 * x + y; char *str= "Hello world"; printf("%s %d %d\n", str, x+y, z); return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input101.c ================================================ #include int main() { int x= 65535; char y; char *str; y= (char )x; printf("0x%x\n", y); str= (char *)0; printf("0x%lx\n", (long)str); return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input102.c ================================================ int main() { struct foo { int p; }; int y= (struct foo) x; } ================================================ FILE: 58_Ptr_Increments/tests/input103.c ================================================ int main() { union foo { int p; }; int y= (union foo) x; } ================================================ FILE: 58_Ptr_Increments/tests/input104.c ================================================ int main() { int y= (void) x; } ================================================ FILE: 58_Ptr_Increments/tests/input105.c ================================================ int main() { int x; char *y; y= (char) x; } ================================================ FILE: 58_Ptr_Increments/tests/input106.c ================================================ #include char *y= (char *)0; int main() { printf("0x%lx\n", (long)y); return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input107.c ================================================ #include char *y[] = { "fish", "cow", NULL }; char *z= NULL; int main() { int i; char *ptr; for (i=0; i < 3; i++) { ptr= y[i]; if (ptr != (char *)0) printf("%s\n", y[i]); else printf("NULL\n"); } return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input108.c ================================================ int main() { char *str= (void *)0; return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input109.c ================================================ #include int main() { int x= 17 - 1; printf("%d\n", x); return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input110.c ================================================ #include int x; int y; int main() { x= 3; y= 15; y += x; printf("%d\n", y); x= 3; y= 15; y -= x; printf("%d\n", y); x= 3; y= 15; y *= x; printf("%d\n", y); x= 3; y= 15; y /= x; printf("%d\n", y); return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input111.c ================================================ #include int main() { int x= 2000 + 3 + 4 * 5 + 6; printf("%d\n", x); return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input112.c ================================================ #include char* y = NULL; int x= 10 + 6; int fred [ 2 + 3 ]; int main() { fred[2]= x; printf("%d\n", fred[2]); return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input113.c ================================================ #include void fred(void); void fred(void) { printf("fred says hello\n"); } int main(void) { fred(); return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input114.c ================================================ #include int main() { int x= 0x4a; printf("%c\n", x); return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input115.c ================================================ #include struct foo { int x; char y; long z; }; typedef struct foo blah; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol struct symtable *ctype; // If struct/union, ptr to that type int stype; // Structural type for the symbol int class; // Storage class for the symbol int size; // Total size in bytes of this symbol int nelems; // Functions: # params. Arrays: # elements #define st_endlabel st_posn // For functions, the end label int st_posn; // For locals, the negative offset // from the stack base pointer int *initlist; // List of initial values struct symtable *next; // Next symbol in one list struct symtable *member; // First member of a function, struct, }; // union or enum // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates int rvalue; // True if the node is an rvalue struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; struct symtable *sym; // For many AST nodes, the pointer to // the symbol in the symbol table #define a_intvalue a_size // For A_INTLIT, the integer value int a_size; // For A_SCALE, the size to scale by }; int main() { printf("%ld\n", sizeof(char)); printf("%ld\n", sizeof(int)); printf("%ld\n", sizeof(long)); printf("%ld\n", sizeof(char *)); printf("%ld\n", sizeof(blah)); printf("%ld\n", sizeof(struct symtable)); printf("%ld\n", sizeof(struct ASTnode)); return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input116.c ================================================ #include static int counter=0; static int fred(void) { return(counter++); } int main(void) { int i; for (i=0; i < 5; i++) printf("%d\n", fred()); return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input117.c ================================================ #include static char *fred(void) { return("Hello"); } int main(void) { printf("%s\n", fred()); return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input118.c ================================================ int main(void) { static int x; } ================================================ FILE: 58_Ptr_Increments/tests/input119.c ================================================ #include int x; int y= 3; int main() { x= y != 3 ? 6 : 8; printf("%d\n", x); x= (y == 3) ? 6 : 8; printf("%d\n", x); return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input120.c ================================================ #include int x; int y= 3; int main() { for (y= 0; y < 10; y++) { x= y > 4 ? y + 2 : y + 9; printf("%d\n", x); } return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input121.c ================================================ #include int x; int y= 3; int main() { for (y= 0; y < 10; y++) { x= (y < 4) ? y + 2 : (y > 7) ? 1000 : y + 9; printf("%d\n", x); } return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input122.c ================================================ #include int x, y, z1, z2; int main() { for (x= 0; x <= 1; x++) { for (y= 0; y <= 1; y++) { z1= x || y; z2= x && y; printf("x %d, y %d, x || y %d, x && y %d\n", x, y, z1, z2); } } //z= x || y; return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input123.c ================================================ #include int main() { int x; for (x=0; x < 20; x++) switch(x) { case 2: case 3: case 5: case 7: case 11: printf("%2d infant prime\n", x); break; case 13: case 17: case 19: printf("%2d teen prime\n", x); break; case 0: case 1: case 4: case 6: case 8: case 9: case 10: case 12: printf("%2d infant composite\n", x); break; default: printf("%2d teen composite\n", x); break; } return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input124.c ================================================ #include int ary[5]; int main() { ary++; return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input125.c ================================================ #include int ary[5]; int *ptr; int x; int main() { ary[3]= 2008; ptr= ary; // Load ary's address into ptr x= ary[3]; printf("%d\n", x); x= ptr[3]; printf("%d\n", x); // Treat ptr as an array return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input126.c ================================================ #include int ary[5]; int main() { ary[3]= 2008; ptr= &ary; return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input127.c ================================================ #include int ary[5]; void fred(int *ptr) { // Receive a pointer printf("%d\n", ptr[3]); } int main() { ary[3]= 2008; printf("%d\n", ary[3]); fred(ary); // Pass ary as a pointer return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input128.c ================================================ #include struct foo { int val; struct foo *next; }; struct foo head, mid, tail; int main() { struct foo *ptr; tail.val= 20; tail.next= NULL; mid.val= 15; mid.next= &tail; head.val= 10; head.next= ∣ ptr= &head; printf("%d %d\n", head.val, ptr->val); printf("%d %d\n", mid.val, ptr->next->val); printf("%d %d\n", tail.val, ptr->next->next->val); return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input129.c ================================================ #include int x= 6; int main() { printf("%d\n", x++ ++); return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input130.c ================================================ #include char *x= "foo"; int main() { printf("Hello " "world" "\n"); return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input131.c ================================================ #include void donothing() { } int main() { int x=0; printf("Doing nothing... "); donothing(); printf("nothing done\n"); while (++x < 100) ; printf("x is now %d\n", x); return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input132.c ================================================ extern int fred; int fred; int mary; extern int mary; int main() { return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input133.c ================================================ #include extern int fred[]; int fred[23]; char mary[100]; extern char mary[]; void main() { printf("OK\n"); } ================================================ FILE: 58_Ptr_Increments/tests/input134.c ================================================ #include char y = 'a'; char *x; int main() { x= &y; if (x && y == 'a') printf("1st match\n"); x= NULL; if (x && y == 'a') printf("2nd match\n"); x= &y; y='b'; if (x && y == 'a') printf("3rd match\n"); return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input135.c ================================================ #include void fred() { int x= 5; printf("testing x\n"); if (x > 4) return; printf("x below 5\n"); } int main() { fred(); return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input136.c ================================================ #include int add(int x, int y) { return(x+y); } int main() { int result; result= 3 * add(2,3) - 5 * add(4,6); printf("%d\n", result); return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input137.c ================================================ #include int a=1, b=2, c=3, d=4, e=5, f=6, g=7, h=8; int main() { int x; x= ((((((a + b) + c) + d) + e) + f) + g) + h; x= a + (b + (c + (d + (e + (f + (g + h)))))); printf("x is %d\n", x); return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input138.c ================================================ #include int x, y, z; int a=1; int *aptr; int main() { // See if generic AND works for (x=0; x <= 1; x++) for (y=0; y <= 1; y++) { z= x && y; printf("%d %d | %d\n", x, y, z); } // See if generic AND works for (x=0; x <= 1; x++) for (y=0; y <= 1; y++) { z= x || y; printf("%d %d | %d\n", x, y, z); } // Now some lazy evaluation aptr= NULL; if (aptr && *aptr == 1) printf("aptr points at 1\n"); else printf("aptr is NULL or doesn't point at 1\n"); aptr= &a; if (aptr && *aptr == 1) printf("aptr points at 1\n"); else printf("aptr is NULL or doesn't point at 1\n"); return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input139.c ================================================ #include int same(int x) { return(x); } int main() { int a= 3; if (same(a) && same(a) >= same(a)) printf("same apparently\n"); return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input140.c ================================================ #include int main() { int i; int ary[5]; char z; // Write below the array z= 'H'; // Fill the array for (i=0; i < 5; i++) ary[i]= i * i; // Write above the array i=14; // Print out the array for (i=0; i < 5; i++) printf("%d\n", ary[i]); // See if either side is OK printf("%d %c\n", i, z); return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input141.c ================================================ static int fred[5]; int jim; int foo(int mary[6]) { return(5); } ================================================ FILE: 58_Ptr_Increments/tests/input142.c ================================================ static int fred[]; int jim; ================================================ FILE: 58_Ptr_Increments/tests/input143.c ================================================ #include char foo; char *a, *b, *c; int main() { a= b= c= NULL; if (a==NULL || b==NULL || c==NULL) printf("One of the three is NULL\n"); a= &foo; if (a==NULL || b==NULL || c==NULL) printf("One of the three is NULL\n"); b= &foo; if (a==NULL || b==NULL || c==NULL) printf("One of the three is NULL\n"); c= &foo; if (a==NULL || b==NULL || c==NULL) printf("One of the three is NULL\n"); else printf("All three are non-NULL\n"); return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input144.c ================================================ #include #include #include char *filename= "fred"; int main() { fprintf(stdout, "Unable to open %s: %s\n", filename, strerror(errno)); return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input145.c ================================================ #include char *str= "qwertyuiop"; int list[]= {3, 5, 7, 9, 11, 13, 15}; int *lptr; int main() { printf("%c\n", *str); str= str + 1; printf("%c\n", *str); str += 1; printf("%c\n", *str); str += 1; printf("%c\n", *str); str -= 1; printf("%c\n", *str); lptr= list; printf("%d\n", *lptr); lptr= lptr + 1; printf("%d\n", *lptr); lptr += 1; printf("%d\n", *lptr); lptr += 1; printf("%d\n", *lptr); lptr -= 1; printf("%d\n", *lptr); return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input146.c ================================================ #include char *str= "qwertyuiop"; int list[]= {3, 5, 7, 9, 11, 13, 15}; int *lptr; int main() { printf("%c\n", *str); str= str + 1; printf("%c\n", *str); str += 1; printf("%c\n", *str); str += 1; printf("%c\n", *str); str -= 1; printf("%c\n", *str); str++; printf("%c\n", *str); str--; printf("%c\n", *str); ++str; printf("%c\n", *str); --str; printf("%c\n\n", *str); lptr= list; printf("%d\n", *lptr); lptr= lptr + 1; printf("%d\n", *lptr); lptr += 1; printf("%d\n", *lptr); lptr += 1; printf("%d\n", *lptr); lptr -= 1; printf("%d\n", *lptr); lptr++ ; printf("%d\n", *lptr); lptr-- ; printf("%d\n", *lptr); ++lptr ; printf("%d\n", *lptr); --lptr ; printf("%d\n", *lptr); return(0); } ================================================ FILE: 58_Ptr_Increments/tests/input147.c ================================================ #include int a; int main() { printf("%d\n", 24 % 9); printf("%d\n", 31 % 11); a= 24; a %= 9; printf("%d\n",a); a= 31; a %= 11; printf("%d\n",a); return(0); } ================================================ FILE: 58_Ptr_Increments/tests/mktests ================================================ #!/bin/sh # Make the output files for each test # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make install) fi for i in input*c do if [ ! -f "out.$i" -a ! -f "err.$i" ] then ../cwj -o out $i 2> "err.$i" # If the err file is empty if [ ! -s "err.$i" ] then rm -f "err.$i" cc -o out $i ./out > "out.$i" fi fi rm -f out out.s done ================================================ FILE: 58_Ptr_Increments/tests/nasmext.inc ================================================ extern printint extern printchar extern open extern close extern read extern write extern printf extern fprintf extern stdout extern strerror extern __errno_location ================================================ FILE: 58_Ptr_Increments/tests/out.input001.c ================================================ 36 10 25 ================================================ FILE: 58_Ptr_Increments/tests/out.input002.c ================================================ 17 ================================================ FILE: 58_Ptr_Increments/tests/out.input003.c ================================================ 1 2 3 4 5 ================================================ FILE: 58_Ptr_Increments/tests/out.input004.c ================================================ 1 1 1 1 1 1 1 1 1 ================================================ FILE: 58_Ptr_Increments/tests/out.input005.c ================================================ 6 ================================================ FILE: 58_Ptr_Increments/tests/out.input006.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 58_Ptr_Increments/tests/out.input007.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 58_Ptr_Increments/tests/out.input008.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 58_Ptr_Increments/tests/out.input009.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 58_Ptr_Increments/tests/out.input010.c ================================================ 20 10 1 2 3 4 5 253 254 255 0 1 ================================================ FILE: 58_Ptr_Increments/tests/out.input011.c ================================================ 10 20 30 1 2 3 4 5 253 254 255 0 1 2 3 1 2 3 4 5 ================================================ FILE: 58_Ptr_Increments/tests/out.input012.c ================================================ 5 ================================================ FILE: 58_Ptr_Increments/tests/out.input013.c ================================================ 23 56 ================================================ FILE: 58_Ptr_Increments/tests/out.input014.c ================================================ 10 20 30 ================================================ FILE: 58_Ptr_Increments/tests/out.input015.c ================================================ 18 18 12 12 ================================================ FILE: 58_Ptr_Increments/tests/out.input016.c ================================================ 12 18 ================================================ FILE: 58_Ptr_Increments/tests/out.input017.c ================================================ 19 12 ================================================ FILE: 58_Ptr_Increments/tests/out.input018.c ================================================ 34 34 ================================================ FILE: 58_Ptr_Increments/tests/out.input018a.c ================================================ 15 16 ================================================ FILE: 58_Ptr_Increments/tests/out.input019.c ================================================ 30 ================================================ FILE: 58_Ptr_Increments/tests/out.input020.c ================================================ 12 ================================================ FILE: 58_Ptr_Increments/tests/out.input021.c ================================================ 10 Hello world ================================================ FILE: 58_Ptr_Increments/tests/out.input022.c ================================================ 12 12 12 13 13 13 13 13 13 35 35 35 ================================================ FILE: 58_Ptr_Increments/tests/out.input023.c ================================================ -23 100 -2 0 1 0 13 14 Hello world ================================================ FILE: 58_Ptr_Increments/tests/out.input024.c ================================================ 2 59 57 8 7 ================================================ FILE: 58_Ptr_Increments/tests/out.input025.c ================================================ 10 20 30 5 15 25 ================================================ FILE: 58_Ptr_Increments/tests/out.input026.c ================================================ 13 23 34 44 54 64 74 84 94 95 96 ================================================ FILE: 58_Ptr_Increments/tests/out.input027.c ================================================ 1 2 3 4 5 6 7 8 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 ================================================ FILE: 58_Ptr_Increments/tests/out.input028.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 58_Ptr_Increments/tests/out.input029.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 58_Ptr_Increments/tests/out.input030.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input030.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 58_Ptr_Increments/tests/out.input053.c ================================================ Hello world, 23 ================================================ FILE: 58_Ptr_Increments/tests/out.input054.c ================================================ Hello world, 0 Hello world, 1 Hello world, 2 Hello world, 3 Hello world, 4 Hello world, 5 Hello world, 6 Hello world, 7 Hello world, 8 Hello world, 9 Hello world, 10 Hello world, 11 Hello world, 12 Hello world, 13 Hello world, 14 Hello world, 15 Hello world, 16 Hello world, 17 Hello world, 18 Hello world, 19 ================================================ FILE: 58_Ptr_Increments/tests/out.input055.c ================================================ Hello world Argument 0 is ./out ================================================ FILE: 58_Ptr_Increments/tests/out.input058.c ================================================ 12 99 4005 4116 4116 ================================================ FILE: 58_Ptr_Increments/tests/out.input062.c ================================================ 65 66 66 The next two depend on the endian of the platform 66 66 67 67 ================================================ FILE: 58_Ptr_Increments/tests/out.input063.c ================================================ 25 ================================================ FILE: 58_Ptr_Increments/tests/out.input067.c ================================================ 5 17 ================================================ FILE: 58_Ptr_Increments/tests/out.input070.c ================================================ 56 ================================================ FILE: 58_Ptr_Increments/tests/out.input071.c ================================================ 0 1 2 3 4 7 8 9 10 11 12 13 14 Done ================================================ FILE: 58_Ptr_Increments/tests/out.input074.c ================================================ 100 5 7 100 100 ================================================ FILE: 58_Ptr_Increments/tests/out.input080.c ================================================ 0 1 1 3 2 5 3 7 4 9 5 11 ================================================ FILE: 58_Ptr_Increments/tests/out.input081.c ================================================ 0 1 1 3 2 5 3 7 4 9 ================================================ FILE: 58_Ptr_Increments/tests/out.input082.c ================================================ 15 >= x > 5 ================================================ FILE: 58_Ptr_Increments/tests/out.input083.c ================================================ 5 < 6 <= 10 5 < 7 <= 10 5 < 8 <= 10 5 < 9 <= 10 5 < 10 <= 10 10 < 11 ================================================ FILE: 58_Ptr_Increments/tests/out.input084.c ================================================ 2 3 f f ================================================ FILE: 58_Ptr_Increments/tests/out.input088.c ================================================ 5 6 ================================================ FILE: 58_Ptr_Increments/tests/out.input089.c ================================================ 23 H Hello world ================================================ FILE: 58_Ptr_Increments/tests/out.input090.c ================================================ 23 100 H Hello world ================================================ FILE: 58_Ptr_Increments/tests/out.input091.c ================================================ 1 2 3 4 5 1 2 3 4 5 0 0 0 0 0 ================================================ FILE: 58_Ptr_Increments/tests/out.input099.c ================================================ EOF = || && | ^ & == != , > <= >= << >> + - * / ++ -- ~ ! void char int long if else while for return struct union enum typedef extern break continue switch case default intlit strlit ; identifier { } ( ) [ ] , . -> : ================================================ FILE: 58_Ptr_Increments/tests/out.input100.c ================================================ Hello world 17 20 ================================================ FILE: 58_Ptr_Increments/tests/out.input101.c ================================================ 0xff 0x0 ================================================ FILE: 58_Ptr_Increments/tests/out.input106.c ================================================ 0x0 ================================================ FILE: 58_Ptr_Increments/tests/out.input107.c ================================================ fish cow NULL ================================================ FILE: 58_Ptr_Increments/tests/out.input108.c ================================================ ================================================ FILE: 58_Ptr_Increments/tests/out.input109.c ================================================ 16 ================================================ FILE: 58_Ptr_Increments/tests/out.input110.c ================================================ 18 12 45 5 ================================================ FILE: 58_Ptr_Increments/tests/out.input111.c ================================================ 2029 ================================================ FILE: 58_Ptr_Increments/tests/out.input112.c ================================================ 16 ================================================ FILE: 58_Ptr_Increments/tests/out.input113.c ================================================ fred says hello ================================================ FILE: 58_Ptr_Increments/tests/out.input114.c ================================================ J ================================================ FILE: 58_Ptr_Increments/tests/out.input115.c ================================================ 1 4 8 8 13 64 48 ================================================ FILE: 58_Ptr_Increments/tests/out.input116.c ================================================ 0 1 2 3 4 ================================================ FILE: 58_Ptr_Increments/tests/out.input117.c ================================================ Hello ================================================ FILE: 58_Ptr_Increments/tests/out.input119.c ================================================ 8 6 ================================================ FILE: 58_Ptr_Increments/tests/out.input120.c ================================================ 9 10 11 12 13 7 8 9 10 11 ================================================ FILE: 58_Ptr_Increments/tests/out.input121.c ================================================ 2 3 4 5 13 14 15 16 1000 1000 ================================================ FILE: 58_Ptr_Increments/tests/out.input122.c ================================================ x 0, y 0, x || y 0, x && y 0 x 0, y 1, x || y 1, x && y 0 x 1, y 0, x || y 1, x && y 0 x 1, y 1, x || y 1, x && y 1 ================================================ FILE: 58_Ptr_Increments/tests/out.input123.c ================================================ 0 infant composite 1 infant composite 2 infant prime 3 infant prime 4 infant composite 5 infant prime 6 infant composite 7 infant prime 8 infant composite 9 infant composite 10 infant composite 11 infant prime 12 infant composite 13 teen prime 14 teen composite 15 teen composite 16 teen composite 17 teen prime 18 teen composite 19 teen prime ================================================ FILE: 58_Ptr_Increments/tests/out.input125.c ================================================ 2008 2008 ================================================ FILE: 58_Ptr_Increments/tests/out.input127.c ================================================ 2008 2008 ================================================ FILE: 58_Ptr_Increments/tests/out.input128.c ================================================ 10 10 15 15 20 20 ================================================ FILE: 58_Ptr_Increments/tests/out.input130.c ================================================ Hello world ================================================ FILE: 58_Ptr_Increments/tests/out.input131.c ================================================ Doing nothing... nothing done x is now 100 ================================================ FILE: 58_Ptr_Increments/tests/out.input132.c ================================================ ================================================ FILE: 58_Ptr_Increments/tests/out.input133.c ================================================ OK ================================================ FILE: 58_Ptr_Increments/tests/out.input134.c ================================================ 1st match ================================================ FILE: 58_Ptr_Increments/tests/out.input135.c ================================================ testing x ================================================ FILE: 58_Ptr_Increments/tests/out.input136.c ================================================ -35 ================================================ FILE: 58_Ptr_Increments/tests/out.input137.c ================================================ x is 36 ================================================ FILE: 58_Ptr_Increments/tests/out.input138.c ================================================ 0 0 | 0 0 1 | 0 1 0 | 0 1 1 | 1 0 0 | 0 0 1 | 1 1 0 | 1 1 1 | 1 aptr is NULL or doesn't point at 1 aptr points at 1 ================================================ FILE: 58_Ptr_Increments/tests/out.input139.c ================================================ same apparently ================================================ FILE: 58_Ptr_Increments/tests/out.input140.c ================================================ 0 1 4 9 16 5 H ================================================ FILE: 58_Ptr_Increments/tests/out.input143.c ================================================ One of the three is NULL One of the three is NULL One of the three is NULL All three are non-NULL ================================================ FILE: 58_Ptr_Increments/tests/out.input144.c ================================================ Unable to open fred: Success ================================================ FILE: 58_Ptr_Increments/tests/out.input145.c ================================================ q w e r e 3 5 7 9 7 ================================================ FILE: 58_Ptr_Increments/tests/out.input146.c ================================================ q w e r e r e r e 3 5 7 9 7 9 7 9 7 ================================================ FILE: 58_Ptr_Increments/tests/out.input147.c ================================================ 6 9 6 9 ================================================ FILE: 58_Ptr_Increments/tests/runtests ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make install) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../cwj -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../cwj $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.s "trial.$i" done ================================================ FILE: 58_Ptr_Increments/tests/runtestsn ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../compn ] then (cd ..; make install) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../compn -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../compn $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.o out.s "trial.$i" done ================================================ FILE: 58_Ptr_Increments/tree.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree functions // Copyright (c) 2019 Warren Toomey, GPL3 // Build and return a generic AST node struct ASTnode *mkastnode(int op, int type, struct symtable *ctype, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue) { struct ASTnode *n; // Malloc a new ASTnode n = (struct ASTnode *) malloc(sizeof(struct ASTnode)); if (n == NULL) fatal("Unable to malloc in mkastnode()"); // Copy in the field values and return it n->op = op; n->type = type; n->ctype = ctype; n->left = left; n->mid = mid; n->right = right; n->sym = sym; n->a_intvalue = intvalue; return (n); } // Make an AST leaf node struct ASTnode *mkastleaf(int op, int type, struct symtable *ctype, struct symtable *sym, int intvalue) { return (mkastnode(op, type, ctype, NULL, NULL, NULL, sym, intvalue)); } // Make a unary AST node: only one child struct ASTnode *mkastunary(int op, int type, struct symtable *ctype, struct ASTnode *left, struct symtable *sym, int intvalue) { return (mkastnode(op, type, ctype, left, NULL, NULL, sym, intvalue)); } // Generate and return a new label number // just for AST dumping purposes static int dumpid = 1; static int gendumplabel(void) { return (dumpid++); } // Given an AST tree, print it out and follow the // traversal of the tree that genAST() follows void dumpAST(struct ASTnode *n, int label, int level) { int Lfalse, Lstart, Lend; int i; switch (n->op) { case A_IF: Lfalse = gendumplabel(); for (i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_IF"); if (n->right) { Lend = gendumplabel(); fprintf(stdout, ", end L%d", Lend); } fprintf(stdout, "\n"); dumpAST(n->left, Lfalse, level + 2); dumpAST(n->mid, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); return; case A_WHILE: Lstart = gendumplabel(); for (i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_WHILE, start L%d\n", Lstart); Lend = gendumplabel(); dumpAST(n->left, Lend, level + 2); dumpAST(n->right, NOLABEL, level + 2); return; } // Reset level to -2 for A_GLUE if (n->op == A_GLUE) level = -2; // General AST node handling if (n->left) dumpAST(n->left, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); for (i = 0; i < level; i++) fprintf(stdout, " "); switch (n->op) { case A_GLUE: fprintf(stdout, "\n\n"); return; case A_FUNCTION: fprintf(stdout, "A_FUNCTION %s\n", n->sym->name); return; case A_ADD: fprintf(stdout, "A_ADD\n"); return; case A_SUBTRACT: fprintf(stdout, "A_SUBTRACT\n"); return; case A_MULTIPLY: fprintf(stdout, "A_MULTIPLY\n"); return; case A_DIVIDE: fprintf(stdout, "A_DIVIDE\n"); return; case A_EQ: fprintf(stdout, "A_EQ\n"); return; case A_NE: fprintf(stdout, "A_NE\n"); return; case A_LT: fprintf(stdout, "A_LE\n"); return; case A_GT: fprintf(stdout, "A_GT\n"); return; case A_LE: fprintf(stdout, "A_LE\n"); return; case A_GE: fprintf(stdout, "A_GE\n"); return; case A_INTLIT: fprintf(stdout, "A_INTLIT %d\n", n->a_intvalue); return; case A_STRLIT: fprintf(stdout, "A_STRLIT rval label L%d\n", n->a_intvalue); return; case A_IDENT: if (n->rvalue) fprintf(stdout, "A_IDENT rval %s\n", n->sym->name); else fprintf(stdout, "A_IDENT %s\n", n->sym->name); return; case A_ASSIGN: fprintf(stdout, "A_ASSIGN\n"); return; case A_WIDEN: fprintf(stdout, "A_WIDEN\n"); return; case A_RETURN: fprintf(stdout, "A_RETURN\n"); return; case A_FUNCCALL: fprintf(stdout, "A_FUNCCALL %s\n", n->sym->name); return; case A_ADDR: fprintf(stdout, "A_ADDR %s\n", n->sym->name); return; case A_DEREF: if (n->rvalue) fprintf(stdout, "A_DEREF rval\n"); else fprintf(stdout, "A_DEREF\n"); return; case A_SCALE: fprintf(stdout, "A_SCALE %d\n", n->a_size); return; case A_PREINC: fprintf(stdout, "A_PREINC %s\n", n->sym->name); return; case A_PREDEC: fprintf(stdout, "A_PREDEC %s\n", n->sym->name); return; case A_POSTINC: fprintf(stdout, "A_POSTINC\n"); return; case A_POSTDEC: fprintf(stdout, "A_POSTDEC\n"); return; case A_NEGATE: fprintf(stdout, "A_NEGATE\n"); return; case A_BREAK: fprintf(stdout, "A_BREAK\n"); return; case A_CONTINUE: fprintf(stdout, "A_CONTINUE\n"); return; case A_CASE: fprintf(stdout, "A_CASE %d\n", n->a_intvalue); return; case A_DEFAULT: fprintf(stdout, "A_DEFAULT\n"); return; case A_SWITCH: fprintf(stdout, "A_SWITCH\n"); return; case A_CAST: fprintf(stdout, "A_CAST %d\n", n->type); return; case A_ASPLUS: fprintf(stdout, "A_ASPLUS\n"); return; case A_ASMINUS: fprintf(stdout, "A_ASMINUS\n"); return; case A_ASSTAR: fprintf(stdout, "A_ASSTAR\n"); return; case A_ASSLASH: fprintf(stdout, "A_ASSLASH\n"); return; case A_TOBOOL: fprintf(stdout, "A_TOBOOL\n"); return; case A_LOGOR: fprintf(stdout, "A_LOGOR\n"); return; case A_LOGAND: fprintf(stdout, "A_LOGAND\n"); return; default: fatald("Unknown dumpAST operator", n->op); } } ================================================ FILE: 58_Ptr_Increments/types.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Types and type handling // Copyright (c) 2019 Warren Toomey, GPL3 // Return true if a type is an int type // of any size, false otherwise int inttype(int type) { return (((type & 0xf) == 0) && (type >= P_CHAR && type <= P_LONG)); } // Return true if a type is of pointer type int ptrtype(int type) { return ((type & 0xf) != 0); } // Given a primitive type, return // the type which is a pointer to it int pointer_to(int type) { if ((type & 0xf) == 0xf) fatald("Unrecognised in pointer_to: type", type); return (type + 1); } // Given a primitive pointer type, return // the type which it points to int value_at(int type) { if ((type & 0xf) == 0x0) fatald("Unrecognised in value_at: type", type); return (type - 1); } // Given a type and a composite type pointer, return // the size of this type in bytes int typesize(int type, struct symtable *ctype) { if (type == P_STRUCT || type == P_UNION) return (ctype->size); return (genprimsize(type)); } // Given an AST tree and a type which we want it to become, // possibly modify the tree by widening or scaling so that // it is compatible with this type. Return the original tree // if no changes occurred, a modified tree, or NULL if the // tree is not compatible with the given type. // If this will be part of a binary operation, the AST op is not zero. struct ASTnode *modify_type(struct ASTnode *tree, int rtype, struct symtable *rctype, int op) { int ltype; int lsize, rsize; ltype = tree->type; // For A_LOGOR and A_LOGAND, both types have to be int or pointer types if (op==A_LOGOR || op==A_LOGAND) { if (!inttype(ltype) && !ptrtype(ltype)) return(NULL); if (!inttype(ltype) && !ptrtype(rtype)) return(NULL); return (tree); } // XXX No idea on these yet if (ltype == P_STRUCT || ltype == P_UNION) fatal("Don't know how to do this yet"); if (rtype == P_STRUCT || rtype == P_UNION) fatal("Don't know how to do this yet"); // Compare scalar int types if (inttype(ltype) && inttype(rtype)) { // Both types same, nothing to do if (ltype == rtype) return (tree); // Get the sizes for each type lsize = typesize(ltype, NULL); rsize = typesize(rtype, NULL); // Tree's size is too big if (lsize > rsize) return (NULL); // Widen to the right if (rsize > lsize) return (mkastunary(A_WIDEN, rtype, NULL, tree, NULL, 0)); } // For pointers if (ptrtype(ltype) && ptrtype(rtype)) { // We can compare them if (op >= A_EQ && op <= A_GE) return (tree); // A comparison of the same type for a non-binary operation is OK, // or when the left tree is of `void *` type. if (op == 0 && (ltype == rtype || ltype == pointer_to(P_VOID))) return (tree); } // We can scale only on add and subtract operations if (op == A_ADD || op == A_SUBTRACT || op == A_ASPLUS || op == A_ASMINUS) { // Left is int type, right is pointer type and the size // of the original type is >1: scale the left if (inttype(ltype) && ptrtype(rtype)) { rsize = genprimsize(value_at(rtype)); if (rsize > 1) return (mkastunary(A_SCALE, rtype, rctype, tree, NULL, rsize)); else return (tree); // Size 1, no need to scale } } // If we get here, the types are not compatible return (NULL); } ================================================ FILE: 59_WDIW_pt1/Makefile ================================================ # Define the location of the include directory # and the location to install the compiler binary INCDIR=/tmp/include BINDIR=/tmp HSRCS= data.h decl.h defs.h incdir.h SRCS= cg.c decl.c expr.c gen.c main.c misc.c \ opt.c scan.c stmt.c sym.c tree.c types.c SRCN= cgn.c decl.c expr.c gen.c main.c misc.c \ opt.c scan.c stmt.c sym.c tree.c types.c ARMSRCS= cg_arm.c decl.c expr.c gen.c main.c misc.c \ opt.c scan.c stmt.c sym.c tree.c types.c cwj: $(SRCS) $(HSRCS) echo "#define INCDIR \"$(INCDIR)\"" > incdir.h cc -o cwj -g -Wall -DINCDIR=\"$(INCDIR)\" $(SRCS) compn: $(SRCN) $(HSRCS) echo "#define __NASM__ 1" >> incdir.h cc -D__NASM__ -o compn -g -Wall -DINCDIR=\"$(INCDIR)\" $(SRCN) cwjarm: $(ARMSRCS) $(HSRCS) echo "#define INCDIR \"$(INCDIR)\"" > incdir.h cc -o cwjarm -g -Wall $(ARMSRCS) cp cwjarm cwj incdir.h: echo "#define INCDIR \"$(INCDIR)\"" > incdir.h install: cwj mkdir -p $(INCDIR) rsync -a include/. $(INCDIR) cp cwj $(BINDIR) chmod +x $(BINDIR)/cwj installn: compn mkdir -p $(INCDIR) rsync -a include/. $(INCDIR) cp compn $(BINDIR) chmod +x $(BINDIR)/compn clean: rm -f cwj cwj0 cwj1 cwjarm compn compn0 compn1 *.o *.s out a.out incdir.h test: install tests/runtests (cd tests; chmod +x runtests; ./runtests) armtest: cwjarm tests/runtests (cd tests; chmod +x runtests; ./runtests) testn: installn tests/runtestsn (cd tests; chmod +x runtestsn; ./runtestsn) # Try to do the triple test triple: cwj1 size cwj[01] cwj1: cwj0 $(SRCS) $(HSRCS) ./cwj0 -o cwj1 $(SRCS) cwj0: install $(SRCS) $(HSRCS) ./cwj -o cwj0 $(SRCS) # Try to do the triple test with nasm triplen: compn1 size compn[01] compn1: compn0 $(SRCN) $(HSRCS) ./compn0 -o compn1 $(SRCN) compn0: installn $(SRCN) $(HSRCS) ./compn -o compn0 $(SRCN) ================================================ FILE: 59_WDIW_pt1/Readme.md ================================================ # Part 59: Why Doesn't It Work, part 1 We've reached the **WDIW** stage: why doesn't it work? In this first part of this stage, I find a few easy to find bugs and fix them. That means there are some more subtle bugs yet to be uncovered. ## Bad Code Generation for `*argv[i]` I'm using `cwj` (the Gnu C compiled version) to build `cwj0`. The assembly code in `cwj0` is *our* assembly code, not the assembly code generated by Gnu C. So, when we run `cwj0`, any errors are because our assembly code isn't correct. The first bug I noticed was that `*argv[i]` seemed to be generating code as if it was `(*argv)[i]`, i.e. always the *i'th* character of `*argv`, not the first character at `argv[i]`. I first thought this was a parsing error, but no. It turned out that we were not setting `argv[i]` as an rvalue before we dereferenced it. I worked this out by dumping the AST trees with both `cwj` and `cwj0` and observing the differences between them. What we need to do is mark the expression *after* the `*` token as an rvalue. This is now done in `prefix()` in `expr.c`: ```c static struct ASTnode *prefix(int ptp) { struct ASTnode *tree; switch (Token.token) { ... case T_STAR: // Get the next token and parse it // recursively as a prefix expression. // Make it an rvalue scan(&Token); tree = prefix(ptp); tree->rvalue= 1; ``` ## Externs are Also Globals This one is going to keep biting me, I'm sure. I found another place where I wasn't treating an extern symbol as a global symbol. This was in `genAST()` in `gen.c` where we generate assignment assembly code. The fix is: ```c // Now into the assignment code // Are we assigning to an identifier or through a pointer? switch (n->right->op) { case A_IDENT: if (n->right->sym->class == C_GLOBAL || n->right->sym->class == C_EXTERN || n->right->sym->class == C_STATIC) return (cgstorglob(leftreg, n->right->sym)); else return (cgstorlocal(leftreg, n->right->sym)); ``` ## Scanning is Working At this point, the `cwj0` compiler is reading source code input but not generating any output. Here are the new `Makefile` rules to do this: ``` # Try to do the triple test triple: cwj1 cwj1: cwj0 $(SRCS) $(HSRCS) ./cwj0 -o cwj1 $(SRCS) cwj0: install $(SRCS) $(HSRCS) ./cwj -o cwj0 $(SRCS) ``` So, a `$ make triple` will build `cwj` with Gnu C, then build `cwj0` with `cwj`, and finally build `cwj1` with `cwj0`. I'll talk about this below. Right now, `cwj1` can't be created as there is no assembly output! The question is, where is the compiler getting to? To find out, I added a `printf()` to the bottom of `scan()`: ```c // We found a token t->tokstr = Tstring[t->token]; printf("Scanned %d\n", t->token); return (1); ``` With this added, I saw that both `cwj` and `cwj0` scan 50,404 tokens and the resulting token streams are identical. Thus, we can conclude that, up to `scan()`, things are working OK. However, the output of `./cwj0 -S -T cg.c` show no AST trees. If I run `gdb cwj0`, set a breakpoint in `dumpAST()` and run it with the `-S -T cg.c` arguments, then we exit before we break at `dumpAST()`. We also don't get to `function_declaration()`. So, why doesn't it work? Ah, I spotted a memory access to `0(%rbp)`. This should never happen as all locals are at negative locations relative to the frame pointer. In `cgaddress()` in `cg.c`, we have another missed external test. We now have: ```c int cgaddress(struct symtable *sym) { int r = alloc_register(); if (sym->class == C_GLOBAL || sym->class == C_EXTERN || sym->class == C_STATIC) fprintf(Outfile, "\tleaq\t%s(%%rip), %s\n", sym->name, reglist[r]); else fprintf(Outfile, "\tleaq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); return (r); } ``` Damn these extern problems! Well, it's all my fault, so I need to take the blame here. ## Bad Comparisons With the above change added, we are now failing with: ``` $ ./cwj0 -S tests/input001.c invalid digit in integer literal:e on line 1 of tests/input001.c ``` This turned out to be caused by this loop in `scanint()` in `scan.c()`: ```c static int scanint(int c) { int k; ... // Convert each character into an int value while ((k = chrpos("0123456789abcdef", tolower(c))) >= 0) { ``` What is happening is that the `k=` assignment is not only storing a result in memory, but it is being used as an expression. In this case the `k` result is being compared, i.e. `k >= 0`. Now, `k` is of type `int`, and we are performing this store to memory for its assignment: ``` movl %r10d, -8(%rbp) ``` When `chrpos()` returns `-1`, this gets truncated down to 32 bits (`0xffffffff`) and stored in `-8(%rbp)`, i.e. in `k`. But in the following comparison: ``` movslq -8(%rbp), %r10 # Load value back from k movq $0, %r11 # Load zero cmpq %r11, %r10 # Compare k's value against zero ``` we load the *32-bit* value of `k` into `%r10`, and now do a *64-bit* comparison. Well, as a 64-bit value, `0xffffffff` is a positive number, the loop comparison remains true and we don't leave the loop when we should. What we should do is use a different `cmp` instruction based on the size of the operands in the comparison. I've made this change to `cgcompare_and_set()` in `cg.c`: ```c int cgcompare_and_set(int ASTop, int r1, int r2, int type) { int size = cgprimsize(type); ... switch (size) { case 1: fprintf(Outfile, "\tcmpb\t%s, %s\n", breglist[r2], breglist[r1]); break; case 4: fprintf(Outfile, "\tcmpl\t%s, %s\n", dreglist[r2], dreglist[r1]); break; default: fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); } ... } ``` Now the correct comparison instruction is being used. There is a similar function, `cgcompare_and_jump()`, and at some stage I should refactor and merge the two functions. # Now, So Close! We are so close to passing what is informally known as the **triple test**. In the triple test, we use an existing compiler to build our compiler from source code (stage 1). Then we use this compiler to build itself (stage 2). Now, to prove that the compiler is self-compiling, we use the stage 2 compiler to build itself, resulting in the stage 3 compiler. We can now: + build `cwj` using the Gnu C compiler (stage 1) + build `cwj0` using the `cwj` compiler (stage 2) + build `cwj1` using the `cwj0` compiler (stage 3) However, the binary sizes for `cwj0` and `cwj1` don't match: ``` $ size cwj[01] text data bss dec hex filename 109636 3028 48 112712 1b848 cwj0 109476 3028 48 112552 1b7a8 cwj1 ``` and they should match *exactly*. Only when the compiler can compile itself multiple times in a row and produce the same result do we know that it is self-compiling properly. Until the results match exactly, there is some subtle behaviour difference between stages 2 and 3, and so the compiler isn't compiling itself consistently. ## Conclusion and What's Next I didn't think I'd get to the point where I can build `cwj`, `cwj0` and `cwj1` in a single step of this journey. I expected we would have a whole pile of bugs to fix before we got to this point. The next problem is to work out why the stage 2 and stage 3 compilers are different sizes. Looking at the `size` output, the data and bss sections are the same, but the amount of assembly code is different between the two compilers. In the next part of our compiler writing journey, we will try to do a side-by-side comparison of the assembly output between the different stages, and try to work out what is causing the difference. > P.S. In this part of the journey, I also started to add some assembly output which would allow `gdb` to see the source line number that we are stopped on. It isn't working yet, but in case you look, you will see a new function `cglinenum()` in `cg.c`. When I get it working, I'll write up some commentary on it. [Next step](../60_TripleTest/Readme.md) ================================================ FILE: 59_WDIW_pt1/cg.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\t.text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\t.data\n", Outfile); currSeg = data_seg; } } // Given a scalar type value, return the // size of the type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch (type) { case P_CHAR: return (offset); case P_INT: case P_LONG: break; default: if (!ptrtype(type)) fatald("Bad type in cg_align:", type); } // Here we have an int or a long. Align it on a 4-byte offset // I put the generic code here so it can be reused elsewhere. alignment = 4; offset = (offset + direction * (alignment - 1)) & ~(alignment - 1); return (offset); } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. static int newlocaloffset(int size) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (size > 4) ? size : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "%r10", "%r11", "%r12", "%r13", "%r9", "%r8", "%rcx", "%rdx", "%rsi", "%rdi" }; static char *breglist[] = { "%r10b", "%r11b", "%r12b", "%r13b", "%r9b", "%r8b", "%cl", "%dl", "%sil", "%dil" }; static char *dreglist[] = { "%r10d", "%r11d", "%r12d", "%r13d", "%r9d", "%r8d", "%ecx", "%edx", "%esi", "%edi" }; // Push and pop a register on/off the stack static void pushreg(int r) { fprintf(Outfile, "\tpushq\t%s\n", reglist[r]); } static void popreg(int r) { fprintf(Outfile, "\tpopq\t%s\n", reglist[r]); } // Set all registers as available. // But if reg is positive, don't free that one. void freeall_registers(int keepreg) { int i; // fprintf(Outfile, "# freeing all registers\n"); for (i = 0; i < NUMFREEREGS; i++) if (i != keepreg) freereg[i] = 1; } // When we need to spill a register, we choose // the following register and then cycle through // the remaining registers. The spillreg increments // continually, so we need to take a modulo NUMFREEREGS // on it. static int spillreg = 0; // Allocate a free register. Return the number of // the register. Die if no available registers. int alloc_register(void) { int reg; for (reg = 0; reg < NUMFREEREGS; reg++) { if (freereg[reg]) { freereg[reg] = 0; // fprintf(Outfile, "# allocated register %s\n", reglist[reg]); return (reg); } } // We have no registers, so we must spill one reg = (spillreg % NUMFREEREGS); spillreg++; // fprintf(Outfile, "# spilling reg %s\n", reglist[reg]); pushreg(reg); return (reg); } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) { // fprintf(Outfile, "# error trying to free register %s\n", reglist[reg]); fatald("Error trying to free register", reg); } // If this was a spilled register, get it back if (spillreg > 0) { spillreg--; reg = (spillreg % NUMFREEREGS); // fprintf(Outfile, "# unspilling reg %s\n", reglist[reg]); popreg(reg); } else { // fprintf(Outfile, "# freeing reg %s\n", reglist[reg]); freereg[reg] = 1; } } // Spill all registers on the stack void spill_all_regs(void) { int i; for (i = 0; i < NUMFREEREGS; i++) pushreg(i); } // Unspill all registers from the stack static void unspill_all_regs(void) { int i; for (i = NUMFREEREGS - 1; i >= 0; i--) popreg(i); } // Print out the assembly preamble void cgpreamble(char *filename) { freeall_registers(NOREG); cgtextseg(); fprintf(Outfile, "\t.file 1 "); fputc('"', Outfile); fprintf(Outfile, "%s", filename); fputc('"', Outfile); fputc('\n', Outfile); fprintf(Outfile, "# internal switch(expr) routine\n" "# %%rsi = switch table, %%rax = expr\n" "# from SubC: http://www.t3x.org/subc/\n" "\n" "__switch:\n" " pushq %%rsi\n" " movq %%rdx, %%rsi\n" " movq %%rax, %%rbx\n" " cld\n" " lodsq\n" " movq %%rax, %%rcx\n" "__next:\n" " lodsq\n" " movq %%rax, %%rdx\n" " lodsq\n" " cmpq %%rdx, %%rbx\n" " jnz __no\n" " popq %%rsi\n" " jmp *%%rax\n" "__no:\n" " loop __next\n" " lodsq\n" " popq %%rsi\n" " jmp *%%rax\n\n"); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the %rsp and %rsp if (sym->class == C_GLOBAL) fprintf(Outfile, "\t.globl\t%s\n" "\t.type\t%s, @function\n", name, name); fprintf(Outfile, "%s:\n" "\tpushq\t%%rbp\n" "\tmovq\t%%rsp, %%rbp\n", name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->st_posn = paramOffset; paramOffset += 8; } else { parm->st_posn = newlocaloffset(parm->size); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->st_posn = newlocaloffset(locvar->size); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\taddq\t$%d,%%rsp\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->st_endlabel); fprintf(Outfile, "\taddq\t$%d,%%rsp\n", stackOffset); fputs("\tpopq %rbp\n" "\tret\n", Outfile); freeall_registers(NOREG); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmovq\t$%d, %s\n", value, reglist[r]); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadvar(struct symtable *sym, int op) { int r, postreg, offset = 1; // Get a new register r = alloc_register(); // If the symbol is a pointer, use the size // of the type that it points to as any // increment or decrement. If not, it's one. if (ptrtype(sym->type)) offset = typesize(value_at(sym->type), sym->ctype); // Negate the offset for decrements if (op == A_PREDEC || op == A_POSTDEC) offset = -offset; // If we have a pre-operation if (op == A_PREINC || op == A_PREDEC) { // Load the symbol's address if (sym->class == C_LOCAL || sym->class == C_PARAM) fprintf(Outfile, "\tleaq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); else fprintf(Outfile, "\tleaq\t%s(%%rip), %s\n", sym->name, reglist[r]); // and change the value at that address switch (sym->size) { case 1: fprintf(Outfile, "\taddb\t$%d,(%s)\n", offset, reglist[r]); break; case 4: fprintf(Outfile, "\taddl\t$%d,(%s)\n", offset, reglist[r]); break; case 8: fprintf(Outfile, "\taddq\t$%d,(%s)\n", offset, reglist[r]); break; } } // Now load the output register with the value if (sym->class == C_LOCAL || sym->class == C_PARAM) { switch (sym->size) { case 1: fprintf(Outfile, "\tmovzbq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); break; case 4: fprintf(Outfile, "\tmovslq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); break; case 8: fprintf(Outfile, "\tmovq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); } } else { switch (sym->size) { case 1: fprintf(Outfile, "\tmovzbq\t%s(%%rip), %s\n", sym->name, reglist[r]); break; case 4: fprintf(Outfile, "\tmovslq\t%s(%%rip), %s\n", sym->name, reglist[r]); break; case 8: fprintf(Outfile, "\tmovq\t%s(%%rip), %s\n", sym->name, reglist[r]); } } // If we have a post-operation, get a new register if (op == A_POSTINC || op == A_POSTDEC) { postreg = alloc_register(); // Load the symbol's address if (sym->class == C_LOCAL || sym->class == C_PARAM) fprintf(Outfile, "\tleaq\t%d(%%rbp), %s\n", sym->st_posn, reglist[postreg]); else fprintf(Outfile, "\tleaq\t%s(%%rip), %s\n", sym->name, reglist[postreg]); // and change the value at that address switch (sym->size) { case 1: fprintf(Outfile, "\taddb\t$%d,(%s)\n", offset, reglist[postreg]); break; case 4: fprintf(Outfile, "\taddl\t$%d,(%s)\n", offset, reglist[postreg]); break; case 8: fprintf(Outfile, "\taddq\t$%d,(%s)\n", offset, reglist[postreg]); break; } // and free the register free_register(postreg); } // Return the register with the value return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tleaq\tL%d(%%rip), %s\n", label, reglist[r]); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\taddq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsubq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timulq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } // Divide or modulo the first register by the second and // return the number of the register with the result int cgdivmod(int r1, int r2, int op) { fprintf(Outfile, "\tmovq\t%s,%%rax\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidivq\t%s\n", reglist[r2]); if (op == A_DIVIDE) fprintf(Outfile, "\tmovq\t%%rax,%s\n", reglist[r1]); else fprintf(Outfile, "\tmovq\t%%rdx,%s\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tandq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } int cgor(int r1, int r2) { fprintf(Outfile, "\torq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txorq\t%s, %s\n", reglist[r2], reglist[r1]); free_register(r2); return (r1); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshlq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshrq\t%%cl, %s\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tnegq\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnotq\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); return (r); } // Load a boolean value (only 0 or 1) // into the given register void cgloadboolean(int r, int val) { fprintf(Outfile, "\tmovq\t$%d, %s\n", val, reglist[r]); } // Convert an integer value to a boolean value. Jump if // it's an IF, WHILE, LOGAND or LOGOR operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); switch (op) { case A_IF: case A_WHILE: case A_LOGAND: fprintf(Outfile, "\tje\tL%d\n", label); break; case A_LOGOR: fprintf(Outfile, "\tjne\tL%d\n", label); break; default: fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(struct symtable *sym, int numargs) { int outr; // Call the function fprintf(Outfile, "\tcall\t%s@PLT\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\taddq\t$%d, %%rsp\n", 8 * (numargs - 6)); // Unspill all the registers unspill_all_regs(); // Get a new register and copy the return value into it outr = alloc_register(); fprintf(Outfile, "\tmovq\t%%rax, %s\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpushq\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmovq\t%s, %s\n", reglist[r], reglist[FIRSTPARAMREG - argposn + 1]); } free_register(r); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsalq\t$%d, %s\n", val, reglist[r]); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %s(%%rip)\n", reglist[r], sym->name); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %s(%%rip)\n", breglist[r], sym->name); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %s(%%rip)\n", dreglist[r], sym->name); break; default: fatald("Bad type in cgstorglob:", sym->type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %d(%%rbp)\n", reglist[r], sym->st_posn); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %d(%%rbp)\n", breglist[r], sym->st_posn); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %d(%%rbp)\n", dreglist[r], sym->st_posn); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size, type; int initvalue; int i; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the variable (or its elements if an array) // and the type of the variable if (node->stype == S_ARRAY) { size = typesize(value_at(node->type), node->ctype); type = value_at(node->type); } else { size = node->size; type = node->type; } // Generate the global identity and the label cgdataseg(); if (node->class == C_GLOBAL) fprintf(Outfile, "\t.globl\t%s\n", node->name); fprintf(Outfile, "%s:\n", node->name); // Output space for one or more elements for (i = 0; i < node->nelems; i++) { // Get any initial value initvalue = 0; if (node->initlist != NULL) initvalue = node->initlist[i]; // Generate the space for this type switch (size) { case 1: fprintf(Outfile, "\t.byte\t%d\n", initvalue); break; case 4: fprintf(Outfile, "\t.long\t%d\n", initvalue); break; case 8: // Generate the pointer to a string literal. Treat a zero value // as actually zero, not the label L0 if (node->initlist != NULL && type == pointer_to(P_CHAR) && initvalue != 0) fprintf(Outfile, "\t.quad\tL%d\n", initvalue); else fprintf(Outfile, "\t.quad\t%d\n", initvalue); break; default: for (i = 0; i < size; i++) fprintf(Outfile, "\t.byte\t0\n"); } } } // Generate a global string and its start label // Don't output the label if append is true. void cgglobstr(int l, char *strvalue, int append) { char *cptr; if (!append) cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\t.byte\t%d\n", *cptr); } } void cgglobstrend(void) { fprintf(Outfile, "\t.byte\t0\n"); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2, int type) { int size = cgprimsize(type); // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); switch (size) { case 1: fprintf(Outfile, "\tcmpb\t%s, %s\n", breglist[r2], breglist[r1]); break; case 4: fprintf(Outfile, "\tcmpl\t%s, %s\n", dreglist[r2], dreglist[r1]); break; default: fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); } fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label, int type) { int size = cgprimsize(type); // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); switch (size) { case 1: fprintf(Outfile, "\tcmpb\t%s, %s\n", breglist[r2], breglist[r1]); break; case 4: fprintf(Outfile, "\tcmpl\t%s, %s\n", dreglist[r2], dreglist[r1]); break; default: fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); } fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(NOREG); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Only return a value if we have a value to return if (reg != NOREG) { // Deal with pointers here as we can't put them in // the switch statement if (ptrtype(sym->type)) fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); else { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzbl\t%s, %%eax\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %%eax\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } } } cgjump(sym->st_endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = alloc_register(); if (sym->class == C_GLOBAL || sym->class == C_EXTERN || sym->class == C_STATIC) fprintf(Outfile, "\tleaq\t%s(%%rip), %s\n", sym->name, reglist[r]); else fprintf(Outfile, "\tleaq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzbq\t(%s), %s\n", reglist[r], reglist[r]); break; case 4: fprintf(Outfile, "\tmovslq\t(%s), %s\n", reglist[r], reglist[r]); break; case 8: fprintf(Outfile, "\tmovq\t(%s), %s\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { // Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmovb\t%s, (%s)\n", breglist[r1], reglist[r2]); break; case 4: fprintf(Outfile, "\tmovl\t%s, (%s)\n", dreglist[r1], reglist[r2]); break; case 8: fprintf(Outfile, "\tmovq\t%s, (%s)\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } // Generate a switch jump table and the code to // load the registers and call the switch() code void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel) { int i, label; // Get a label for the switch table label = genlabel(); cglabel(label); // Heuristic. If we have no cases, create one case // which points to the default case if (casecount == 0) { caseval[0] = 0; caselabel[0] = defaultlabel; casecount = 1; } // Generate the switch jump table. fprintf(Outfile, "\t.quad\t%d\n", casecount); for (i = 0; i < casecount; i++) fprintf(Outfile, "\t.quad\t%d, L%d\n", caseval[i], caselabel[i]); fprintf(Outfile, "\t.quad\tL%d\n", defaultlabel); // Load the specific registers cglabel(toplabel); fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); fprintf(Outfile, "\tleaq\tL%d(%%rip), %%rdx\n", label); fprintf(Outfile, "\tjmp\t__switch\n"); } // Move value between registers void cgmove(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s, %s\n", reglist[r1], reglist[r2]); } void cglinenum(int line) { fprintf(Outfile, "\t.loc 1 %d 0\n", line); } ================================================ FILE: 59_WDIW_pt1/cg_arm.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for ARMv6 on Raspberry Pi // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. static int freereg[4]; static char *reglist[4] = { "r4", "r5", "r6", "r7" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // We have to store large integer literal values in memory. // Keep a list of them which will be output in the postamble #define MAXINTS 1024 int Intlist[MAXINTS]; static int Intslot = 0; // Determine the offset of a large integer // literal from the .L3 label. If the integer // isn't in the list, add it. static void set_int_offset(int val) { int offset = -1; // See if it is already there for (int i = 0; i < Intslot; i++) { if (Intlist[i] == val) { offset = 4 * i; break; } } // Not in the list, so add it if (offset == -1) { offset = 4 * Intslot; if (Intslot == MAXINTS) fatal("Out of int slots in set_int_offset()"); Intlist[Intslot++] = val; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L3+%d\n", offset); } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n", Outfile); } // Print out the assembly postamble void cgpostamble() { // Print out the global variables fprintf(Outfile, ".L2:\n"); for (int i = 0; i < Globs; i++) { if (Symtable[i].stype == S_VARIABLE) fprintf(Outfile, "\t.word %s\n", Symtable[i].name); } // Print out the integer literals fprintf(Outfile, ".L3:\n"); for (int i = 0; i < Intslot; i++) { fprintf(Outfile, "\t.word %d\n", Intlist[i]); } } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Symtable[id].name; fprintf(Outfile, "\t.text\n" "\t.globl\t%s\n" "\t.type\t%s, \%%function\n" "%s:\n" "\tpush\t{fp, lr}\n" "\tadd\tfp, sp, #4\n" "\tsub\tsp, sp, #8\n" "\tstr\tr0, [fp, #-8]\n", name, name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Symtable[id].endlabel); fputs("\tsub\tsp, fp, #4\n" "\tpop\t{fp, pc}\n" "\t.align\t2\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); // If the literal value is small, do it with one instruction if (value <= 1000) fprintf(Outfile, "\tmov\t%s, #%d\n", reglist[r], value); else { set_int_offset(value); fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); } return (r); } // Determine the offset of a variable from the .L2 // label. Yes, this is inefficient code. static void set_var_offset(int id) { int offset = 0; // Walk the symbol table up to id. // Find S_VARIABLEs and add on 4 until // we get to our variable for (int i = 0; i < id; i++) { if (Symtable[i].stype == S_VARIABLE) offset += 4; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L2+%d\n", offset); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tldrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgloadglob:", Symtable[id].type); } return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s, %s\n", reglist[r1], reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\tmul\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { // To do a divide: r0 holds the dividend, r1 holds the divisor. // The quotient is in r0. fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r1]); fprintf(Outfile, "\tmov\tr1, %s\n", reglist[r2]); fprintf(Outfile, "\tbl\t__aeabi_idiv\n"); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r1]); free_register(r2); return (r1); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r]); fprintf(Outfile, "\tbl\t%s\n", Symtable[id].name); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r]); return (r); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tlsl\t%s, %s, #%d\n", reglist[r], reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, int id) { // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tstr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgstorglob:", Symtable[id].type); } return (r); } // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (4); switch (type) { case P_CHAR: return (1); case P_INT: case P_LONG: return (4); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Symtable[id].type); fprintf(Outfile, "\t.data\n" "\t.globl\t%s\n", Symtable[id].name); switch (typesize) { case 1: fprintf(Outfile, "%s:\t.byte\t0\n", Symtable[id].name); break; case 4: fprintf(Outfile, "%s:\t.long\t0\n", Symtable[id].name); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "moveq", "movne", "movlt", "movgt", "movle", "movge" }; // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "movne", "moveq", "movge", "movle", "movgt", "movlt" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #1\n", cmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #0\n", invcmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\tuxtb\t%s, %s\n", reglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tb\tL%d\n", l); } // List of inverted branch instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *brlist[] = { "bne", "beq", "bge", "ble", "bgt", "blt" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", brlist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[reg]); cgjump(Symtable[id].endlabel); } // Generate code to load the address of a global // identifier into a variable. Return a new register int cgaddress(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); fprintf(Outfile, "\tmov\t%s, r3\n", reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tldrb\t%s, [%s]\n", reglist[r], reglist[r]); break; case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [%s]\n", reglist[r], reglist[r]); break; } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [%s]\n", reglist[r1], reglist[r2]); break; case P_INT: case P_LONG: fprintf(Outfile, "\tstr\t%s, [%s]\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 59_WDIW_pt1/cgn.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\tsection .text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\tsection .data\n", Outfile); currSeg = data_seg; } } // Given a scalar type value, return the // size of the type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch (type) { case P_CHAR: return (offset); case P_INT: case P_LONG: break; default: if (!ptrtype(type)) fatald("Bad type in cg_align:", type); } // Here we have an int or a long. Align it on a 4-byte offset // I put the generic code here so it can be reused elsewhere. alignment = 4; offset = (offset + direction * (alignment - 1)) & ~(alignment - 1); return (offset); } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. static int newlocaloffset(int size) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (size > 4) ? size : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "r10", "r11", "r12", "r13", "r9", "r8", "rcx", "rdx", "rsi", "rdi" }; static char *breglist[] = { "r10b", "r11b", "r12b", "r13b", "r9b", "r8b", "cl", "dl", "sil", "dil" }; static char *dreglist[] = { "r10d", "r11d", "r12d", "r13d", "r9d", "r8d", "ecx", "edx", "esi", "edi" }; // Push and pop a register on/off the stack static void pushreg(int r) { fprintf(Outfile, "\tpush\t%s\n", reglist[r]); } static void popreg(int r) { fprintf(Outfile, "\tpop\t%s\n", reglist[r]); } // Set all registers as available. // But if reg is positive, don't free that one. void freeall_registers(int keepreg) { int i; // fprintf(Outfile, "; freeing all registers\n"); for (i = 0; i < NUMFREEREGS; i++) if (i != keepreg) freereg[i] = 1; } // When we need to spill a register, we choose // the following register and then cycle through // the remaining registers. The spillreg increments // continually, so we need to take a modulo NUMFREEREGS // on it. static int spillreg = 0; // Allocate a free register. Return the number of // the register. Die if no available registers. int alloc_register(void) { int reg; for (reg = 0; reg < NUMFREEREGS; reg++) { if (freereg[reg]) { freereg[reg] = 0; // fprintf(Outfile, "; allocated register %s\n", reglist[reg]); return (reg); } } // We have no registers, so we must spill one reg = (spillreg % NUMFREEREGS); spillreg++; // fprintf(Outfile, "; spilling reg %s\n", reglist[reg]); pushreg(reg); return (reg); } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) { //fprintf(Outfile, "# error trying to free register %s\n", reglist[reg]); fatald("Error trying to free register", reg); } // If this was a spilled register, get it back if (spillreg > 0) { spillreg--; reg = (spillreg % NUMFREEREGS); // fprintf(Outfile, "; unspilling reg %s\n", reglist[reg]); popreg(reg); } else { // fprintf(Outfile, "; freeing reg %s\n", reglist[reg]); freereg[reg] = 1; } } // Spill all registers on the stack void spill_all_regs(void) { int i; for (i = 0; i < NUMFREEREGS; i++) pushreg(i); } // Unspill all registers from the stack static void unspill_all_regs(void) { int i; for (i = NUMFREEREGS - 1; i >= 0; i--) popreg(i); } // Print out the assembly preamble void cgpreamble(char *filename) { freeall_registers(NOREG); cgtextseg(); fprintf(Outfile, ";\t%s\n", filename); fprintf(Outfile, "; internal switch(expr) routine\n" "; rsi = switch table, rax = expr\n" "; from SubC: http://www.t3x.org/subc/\n" "\n" "__switch:\n" " push rsi\n" " mov rsi, rdx\n" " mov rbx, rax\n" " cld\n" " lodsq\n" " mov rcx, rax\n" "__next:\n" " lodsq\n" " mov rdx, rax\n" " lodsq\n" " cmp rbx, rdx\n" " jnz __no\n" " pop rsi\n" " jmp rax\n" "__no:\n" " loop __next\n" " lodsq\n" " pop rsi\n" " jmp rax\n\n"); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the rsp and rbp // if (sym->class == C_GLOBAL) if(!sym->extinit) { fprintf(Outfile, "\tglobal\t%s\n", name); sym->extinit = 1; } fprintf(Outfile, "%s:\n" "\tpush\trbp\n" "\tmov\trbp, rsp\n", name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->st_posn = paramOffset; paramOffset += 8; } else { parm->st_posn = newlocaloffset(parm->size); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->st_posn = newlocaloffset(locvar->size); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\tadd\trsp, %d\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->st_endlabel); fprintf(Outfile, "\tadd\trsp, %d\n", stackOffset); fputs("\tpop rbp\n" "\tret\n", Outfile); freeall_registers(NOREG); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], value); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadvar(struct symtable *sym, int op) { int r, postreg, offset = 1; if(!sym->extinit) { fprintf(Outfile, "extern\t%s\n", sym->name); sym->extinit = 1; } // Get a new register r = alloc_register(); // If the symbol is a pointer, use the size // of the type that it points to as any // increment or decrement. If not, it's one. if (ptrtype(sym->type)) offset = typesize(value_at(sym->type), sym->ctype); // Negate the offset for decrements if (op == A_PREDEC || op == A_POSTDEC) offset = -offset; // If we have a pre-operation if (op == A_PREINC || op == A_PREDEC) { // Load the symbol's address if (sym->class == C_LOCAL || sym->class == C_PARAM) fprintf(Outfile, "\tlea\t%s, [rbp+%d]\n", reglist[r], sym->st_posn); else fprintf(Outfile, "\tlea\t%s, [%s]\n", reglist[r], sym->name); // and change the value at that address switch (sym->size) { case 1: fprintf(Outfile, "\tadd\tbyte [%s], %d\n", reglist[r], offset); break; case 4: fprintf(Outfile, "\tadd\tdword [%s], %d\n", reglist[r], offset); break; case 8: fprintf(Outfile, "\tadd\tqword [%s], %d\n", reglist[r], offset); break; } } // Now load the output register with the value if (sym->class == C_LOCAL || sym->class == C_PARAM) { switch (sym->size) { case 1: fprintf(Outfile, "\tmovzx\t%s, byte [rbp+%d]\n", reglist[r], sym->st_posn); break; case 4: fprintf(Outfile, "\tmovsxd\t%s, dword [rbp+%d]\n", reglist[r], sym->st_posn); break; case 8: fprintf(Outfile, "\tmov\t%s, [rbp+%d]\n", reglist[r], sym->st_posn); } } else { switch (sym->size) { case 1: fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], sym->name); break; case 4: fprintf(Outfile, "\tmovsxd\t%s, dword [%s]\n", reglist[r], sym->name); break; case 8: fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], sym->name); } } // If we have a post-operation, get a new register if (op == A_POSTINC || op == A_POSTDEC) { postreg = alloc_register(); // Load the symbol's address if (sym->class == C_LOCAL || sym->class == C_PARAM) fprintf(Outfile, "\tlea\t%s, [rbp+%d]\n", reglist[postreg], sym->st_posn); else fprintf(Outfile, "\tlea\t%s, [%s]\n", reglist[postreg], sym->name); // and change the value at that address switch (sym->size) { case 1: fprintf(Outfile, "\tadd\tbyte [%s], %d\n", reglist[postreg], offset); break; case 4: fprintf(Outfile, "\tadd\tdword [%s], %d\n", reglist[postreg], offset); break; case 8: fprintf(Outfile, "\tadd\tqword [%s], %d\n", reglist[postreg], offset); break; } // and free the register free_register(postreg); } // Return the register with the value return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, L%d\n", reglist[r], label); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timul\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Divide or modulo the first register by the second and // return the number of the register with the result int cgdivmod(int r1, int r2, int op) { fprintf(Outfile, "\tmov\trax, %s\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidiv\t%s\n", reglist[r2]); if (op == A_DIVIDE) fprintf(Outfile, "\tmov\t%s, rax\n", reglist[r1]); else fprintf(Outfile, "\tmov\t%s, rdx\n", reglist[r1]); free_register(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tand\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } int cgor(int r1, int r2) { fprintf(Outfile, "\tor\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txor\t%s, %s\n", reglist[r1], reglist[r2]); free_register(r2); return (r1); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshl\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshr\t%s, cl\n", reglist[r1]); free_register(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tneg\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnot\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r], breglist[r]); return (r); } // Load a boolean value (only 0 or 1) // into the given register void cgloadboolean(int r, int val) { fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], val); } // Convert an integer value to a boolean value. Jump if // it's an IF, WHILE, LOGAND or LOGOR operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); switch (op) { case A_IF: case A_WHILE: case A_LOGAND: fprintf(Outfile, "\tje\tL%d\n", label); break; case A_LOGOR: fprintf(Outfile, "\tjne\tL%d\n", label); break; default: fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, byte %s\n", reglist[r], breglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(struct symtable *sym, int numargs) { int outr; // Call the function if(!sym->extinit) { fprintf(Outfile, "extern\t%s\n", sym->name); sym->extinit = 1; } fprintf(Outfile, "\tcall\t%s\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\tadd\trsp, %d\n", 8 * (numargs - 6)); // Unspill all the registers unspill_all_regs(); // Get a new register and copy the return value into it outr = alloc_register(); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpush\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmov\t%s, %s\n", reglist[FIRSTPARAMREG - argposn + 1], reglist[r]); } free_register(r); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsal\t%s, %d\n", reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if(!sym->extinit) { fprintf(Outfile, "extern\t%s\n", sym->name); sym->extinit = 1; } if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, dreglist[r]); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\tqword\t[rbp+%d], %s\n", sym->st_posn, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\tbyte\t[rbp+%d], %s\n", sym->st_posn, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\tdword\t[rbp+%d], %s\n", sym->st_posn, dreglist[r]); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size, type; int initvalue; int i; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the variable (or its elements if an array) // and the type of the variable if (node->stype == S_ARRAY) { size = typesize(value_at(node->type), node->ctype); type = value_at(node->type); } else { size = node->size; type = node->type; } // Generate the global identity and the label cgdataseg(); if (node->class == C_GLOBAL) fprintf(Outfile, "\tglobal\t%s\n", node->name); if(!node->extinit) { node->extinit = 1; } fprintf(Outfile, "%s:\n", node->name); // Output space for one or more elements for (i = 0; i < node->nelems; i++) { // Get any initial value initvalue = 0; if (node->initlist != NULL) initvalue = node->initlist[i]; // Generate the space for this type // original version switch(size) { case 1: fprintf(Outfile, "\tdb\t%d\n", initvalue); break; case 4: fprintf(Outfile, "\tdd\t%d\n", initvalue); break; case 8: // Generate the pointer to a string literal. Treat a zero value // as actually zero, not the label L0 if (node->initlist != NULL && type == pointer_to(P_CHAR) && initvalue != 0) fprintf(Outfile, "\tdq\tL%d\n", initvalue); else fprintf(Outfile, "\tdq\t%d\n", initvalue); break; default: for (i = 0; i < size; i++) fprintf(Outfile, "\tdb\t0\n"); } } } // Generate a global string and its start label // Don't output the label if append is true. void cgglobstr(int l, char *strvalue, int append) { char *cptr; if (!append) cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\tdb\t%d\n", *cptr); } } void cgglobstrend(void) { fprintf(Outfile, "\tdb\t0\n"); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2, int type) { int size = cgprimsize(type); // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); switch (size) { case 1: fprintf(Outfile, "\tcmp\t%s, %s\n", breglist[r1], breglist[r2]); break; case 4: fprintf(Outfile, "\tcmp\t%s, %s\n", dreglist[r1], dreglist[r2]); break; default: fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); } fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r2], breglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label, int type) { int size = cgprimsize(type); // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); switch (size) { case 1: fprintf(Outfile, "\tcmp\t%s, %s\n", breglist[r1], breglist[r2]); break; case 4: fprintf(Outfile, "\tcmp\t%s, %s\n", dreglist[r1], dreglist[r2]); break; default: fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); } fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(NOREG); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Only return a value if we have a value to return if (reg != NOREG) { // Deal with pointers here as we can't put them in // the switch statement if (ptrtype(sym->type)) fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); else { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzx\teax, %s\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmov\teax, %s\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } } } cgjump(sym->st_endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = alloc_register(); if (!sym->extinit) { fprintf(Outfile, "extern\t%s\n", sym->name); sym->extinit = 1; } if (sym->class == C_GLOBAL || sym->class == C_EXTERN || sym->class == C_STATIC) fprintf(Outfile, "\tmov\t%s, %s\n", reglist[r], sym->name); else fprintf(Outfile, "\tlea\t%s, [rbp+%d]\n", reglist[r], sym->st_posn); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], reglist[r]); break; case 4: fprintf(Outfile, "\tmovsx\t%s, dword [%s]\n", reglist[r], reglist[r]); break; case 8: fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { //Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmov\t[%s], byte %s\n", reglist[r2], breglist[r1]); break; case 4: fprintf(Outfile, "\tmov\t[%s], dword %s\n", reglist[r2], dreglist[r1]); break; case 8: fprintf(Outfile, "\tmov\t[%s], %s\n", reglist[r2], reglist[r1]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } // Generate a switch jump table and the code to // load the registers and call the switch() code void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel) { int i, label; // Get a label for the switch table label = genlabel(); cglabel(label); // Heuristic. If we have no cases, create one case // which points to the default case if (casecount == 0) { caseval[0] = 0; caselabel[0] = defaultlabel; casecount = 1; } // Generate the switch jump table. fprintf(Outfile, "\tdq\t%d\n", casecount); for (i = 0; i < casecount; i++) fprintf(Outfile, "\tdq\t%d, L%d\n", caseval[i], caselabel[i]); fprintf(Outfile, "\tdq\tL%d\n", defaultlabel); // Load the specific registers cglabel(toplabel); fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); fprintf(Outfile, "\tmov\trdx, L%d\n", label); fprintf(Outfile, "\tjmp\t__switch\n"); } // Move value between registers void cgmove(int r1, int r2) { fprintf(Outfile, "\tmov\t%s, %s\n", reglist[r2], reglist[r1]); } void cglinenum(int line) { //fprintf(Outfile, ";\t.loc 1 %d 0\n", line); } ================================================ FILE: 59_WDIW_pt1/data.h ================================================ #ifndef extern_ #define extern_ extern #endif // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 extern_ int Line; // Current line number extern_ int Linestart; // True if at start of a line extern_ int Putback; // Character put back by scanner extern_ struct symtable *Functionid; // Symbol ptr of the current function extern_ FILE *Infile; // Input and output files extern_ FILE *Outfile; extern_ char *Infilename; // Name of file we are parsing extern_ char *Outfilename; // Name of file we opened as Outfile extern_ struct token Token; // Last token scanned extern_ struct token Peektoken; // A look-ahead token extern_ char Text[TEXTLEN + 1]; // Last identifier scanned extern_ int Looplevel; // Depth of nested loops extern_ int Switchlevel; // Depth of nested switches extern char *Tstring[]; // List of token strings // Symbol table lists extern_ struct symtable *Globhead, *Globtail; // Global variables and functions extern_ struct symtable *Loclhead, *Locltail; // Local variables extern_ struct symtable *Parmhead, *Parmtail; // Local parameters extern_ struct symtable *Membhead, *Membtail; // Temp list of struct/union members extern_ struct symtable *Structhead, *Structtail; // List of struct types extern_ struct symtable *Unionhead, *Uniontail; // List of union types extern_ struct symtable *Enumhead, *Enumtail; // List of enum types and values extern_ struct symtable *Typehead, *Typetail; // List of typedefs // Command-line flags extern_ int O_dumpAST; // If true, dump the AST trees extern_ int O_dumpsym; // If true, dump the symbol table extern_ int O_keepasm; // If true, keep any assembly files extern_ int O_assemble; // If true, assemble the assembly files extern_ int O_dolink; // If true, link the object files extern_ int O_verbose; // If true, print info on compilation stages ================================================ FILE: 59_WDIW_pt1/decl.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of declarations // Copyright (c) 2019 Warren Toomey, GPL3 static struct symtable *composite_declaration(int type); static int typedef_declaration(struct symtable **ctype); static int type_of_typedef(char *name, struct symtable **ctype); static void enum_declaration(void); // Parse the current token and return a primitive type enum value, // a pointer to any composite type and possibly modify // the class of the type. int parse_type(struct symtable **ctype, int *class) { int type, exstatic = 1; // See if the class has been changed to extern or static while (exstatic) { switch (Token.token) { case T_EXTERN: if (*class == C_STATIC) fatal("Illegal to have extern and static at the same time"); *class = C_EXTERN; scan(&Token); break; case T_STATIC: if (*class == C_LOCAL) fatal("Compiler doesn't support static local declarations"); if (*class == C_EXTERN) fatal("Illegal to have extern and static at the same time"); *class = C_STATIC; scan(&Token); break; default: exstatic = 0; } } // Now work on the actual type keyword switch (Token.token) { case T_VOID: type = P_VOID; scan(&Token); break; case T_CHAR: type = P_CHAR; scan(&Token); break; case T_INT: type = P_INT; scan(&Token); break; case T_LONG: type = P_LONG; scan(&Token); break; // For the following, if we have a ';' after the // parsing then there is no type, so return -1. // Example: struct x {int y; int z}; case T_STRUCT: type = P_STRUCT; *ctype = composite_declaration(P_STRUCT); if (Token.token == T_SEMI) type = -1; break; case T_UNION: type = P_UNION; *ctype = composite_declaration(P_UNION); if (Token.token == T_SEMI) type = -1; break; case T_ENUM: type = P_INT; // Enums are really ints enum_declaration(); if (Token.token == T_SEMI) type = -1; break; case T_TYPEDEF: type = typedef_declaration(ctype); if (Token.token == T_SEMI) type = -1; break; case T_IDENT: type = type_of_typedef(Text, ctype); break; default: fatals("Illegal type, token", Token.tokstr); } return (type); } // Given a type parsed by parse_type(), scan in any following // '*' tokens and return the new type int parse_stars(int type) { while (1) { if (Token.token != T_STAR) break; type = pointer_to(type); scan(&Token); } return (type); } // Parse a type which appears inside a cast int parse_cast(struct symtable **ctype) { int type, class = 0; // Get the type inside the parentheses type = parse_stars(parse_type(ctype, &class)); // Do some error checking. I'm sure more can be done if (type == P_STRUCT || type == P_UNION || type == P_VOID) fatal("Cannot cast to a struct, union or void type"); return (type); } // Given a type, parse an expression of literals and ensure // that the type of this expression matches the given type. // Parse any type cast that precedes the expression. // If an integer literal, return this value. // If a string literal, return the label number of the string. int parse_literal(int type) { struct ASTnode *tree; // Parse the expression and optimise the resulting AST tree tree = optimise(binexpr(0)); // If there's a cast, get the child and // mark it as having the type from the cast if (tree->op == A_CAST) { tree->left->type = tree->type; tree = tree->left; } // The tree must now have an integer or string literal if (tree->op != A_INTLIT && tree->op != A_STRLIT) fatal("Cannot initialise globals with a general expression"); // If the type is char * and if (type == pointer_to(P_CHAR)) { // We have a string literal, return the label number if (tree->op == A_STRLIT) return (tree->a_intvalue); // We have a zero int literal, so that's a NULL if (tree->op == A_INTLIT && tree->a_intvalue == 0) return (0); } // We only get here with an integer literal. The input type // is an integer type and is wide enough to hold the literal value if (inttype(type) && typesize(type, NULL) >= typesize(tree->type, NULL)) return (tree->a_intvalue); fatal("Type mismatch: literal vs. variable"); return (0); // Keep -Wall happy } // Given a pointer to a symbol that may already exist // return true if this symbol doesn't exist. We use // this function to convert externs into globals int is_new_symbol(struct symtable *sym, int class, int type, struct symtable *ctype) { // There is no existing symbol, thus is new if (sym==NULL) return(1); // global versus extern: if they match that it's not new // and we can convert the class to global if ((sym->class== C_GLOBAL && class== C_EXTERN) || (sym->class== C_EXTERN && class== C_GLOBAL)) { // If the types don't match, there's a problem if (type != sym->type) fatals("Type mismatch between global/extern", sym->name); // Struct/unions, also compare the ctype if (type >= P_STRUCT && ctype != sym->ctype) fatals("Type mismatch between global/extern", sym->name); // If we get to here, the types match, so mark the symbol // as global sym->class= C_GLOBAL; // Return that symbol is not new return(0); } // It must be a duplicate symbol if we get here fatals("Duplicate global variable declaration", sym->name); return(-1); // Keep -Wall happy } // Given the type, name and class of a scalar variable, // parse any initialisation value and allocate storage for it. // Return the variable's symbol table entry. static struct symtable *scalar_declaration(char *varname, int type, struct symtable *ctype, int class, struct ASTnode **tree) { struct symtable *sym = NULL; struct ASTnode *varnode, *exprnode; *tree = NULL; // Add this as a known scalar switch (class) { case C_STATIC: case C_EXTERN: case C_GLOBAL: // See if this variable is new or already exists sym= findglob(varname); if (is_new_symbol(sym, class, type, ctype)) sym = addglob(varname, type, ctype, S_VARIABLE, class, 1, 0); break; case C_LOCAL: sym = addlocl(varname, type, ctype, S_VARIABLE, 1); break; case C_PARAM: sym = addparm(varname, type, ctype, S_VARIABLE); break; case C_MEMBER: sym = addmemb(varname, type, ctype, S_VARIABLE, 1); break; } // The variable is being initialised if (Token.token == T_ASSIGN) { // Only possible for a global or local if (class != C_GLOBAL && class != C_LOCAL && class != C_STATIC) fatals("Variable can not be initialised", varname); scan(&Token); // Globals must be assigned a literal value if (class == C_GLOBAL || class == C_STATIC) { // Create one initial value for the variable and // parse this value sym->initlist = (int *) malloc(sizeof(int)); sym->initlist[0] = parse_literal(type); } if (class == C_LOCAL) { // Make an A_IDENT AST node with the variable varnode = mkastleaf(A_IDENT, sym->type, sym->ctype, sym, 0); // Get the expression for the assignment, make into a rvalue exprnode = binexpr(0); exprnode->rvalue = 1; // Ensure the expression's type matches the variable exprnode = modify_type(exprnode, varnode->type, varnode->ctype, 0); if (exprnode == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree *tree = mkastnode(A_ASSIGN, exprnode->type, exprnode->ctype, exprnode, NULL, varnode, NULL, 0); } } // Generate any global space if (class == C_GLOBAL || class == C_STATIC) genglobsym(sym); return (sym); } // Given the type, name and class of an variable, parse // the size of the array, if any. Then parse any initialisation // value and allocate storage for it. // Return the variable's symbol table entry. static struct symtable *array_declaration(char *varname, int type, struct symtable *ctype, int class) { struct symtable *sym; // New symbol table entry int nelems = -1; // Assume the number of elements won't be given int maxelems; // The maximum number of elements in the init list int *initlist; // The list of initial elements int i = 0, j; // Skip past the '[' scan(&Token); // See we have an array size if (Token.token != T_RBRACKET) { nelems = parse_literal(P_INT); if (nelems <= 0) fatald("Array size is illegal", nelems); } // Ensure we have a following ']' match(T_RBRACKET, "]"); // Add this as a known array. We treat the // array as a pointer to its elements' type switch (class) { case C_STATIC: case C_EXTERN: case C_GLOBAL: // See if this variable is new or already exists sym= findglob(varname); if (is_new_symbol(sym, class, pointer_to(type), ctype)) sym = addglob(varname, pointer_to(type), ctype, S_ARRAY, class, 0, 0); break; case C_LOCAL: sym = addlocl(varname, pointer_to(type), ctype, S_ARRAY, 0); break; default: fatal("Declaration of array parameters is not implemented"); } // Array initialisation if (Token.token == T_ASSIGN) { if (class != C_GLOBAL && class != C_STATIC) fatals("Variable can not be initialised", varname); scan(&Token); // Get the following left curly bracket match(T_LBRACE, "{"); #define TABLE_INCREMENT 10 // If the array already has nelems, allocate that many elements // in the list. Otherwise, start with TABLE_INCREMENT. if (nelems != -1) maxelems = nelems; else maxelems = TABLE_INCREMENT; initlist = (int *) malloc(maxelems * sizeof(int)); // Loop getting a new literal value from the list while (1) { // Check we can add the next value, then parse and add it if (nelems != -1 && i == maxelems) fatal("Too many values in initialisation list"); initlist[i++] = parse_literal(type); // Increase the list size if the original size was // not set and we have hit the end of the current list if (nelems == -1 && i == maxelems) { maxelems += TABLE_INCREMENT; initlist = (int *) realloc(initlist, maxelems * sizeof(int)); } // Leave when we hit the right curly bracket if (Token.token == T_RBRACE) { scan(&Token); break; } // Next token must be a comma, then comma(); } // Zero any unused elements in the initlist. // Attach the list to the symbol table entry for (j = i; j < sym->nelems; j++) initlist[j] = 0; if (i > nelems) nelems = i; sym->initlist = initlist; } // Set the size of the array and the number of elements // Only externs can have no elements. if (class != C_EXTERN && nelems<=0) fatals("Array must have non-zero elements", sym->name); sym->nelems = nelems; sym->size = sym->nelems * typesize(type, ctype); // Generate any global space if (class == C_GLOBAL || class == C_STATIC) genglobsym(sym); return (sym); } // Given a pointer to the new function being declared and // a possibly NULL pointer to the function's previous declaration, // parse a list of parameters and cross-check them against the // previous declaration. Return the count of parameters static int param_declaration_list(struct symtable *oldfuncsym, struct symtable *newfuncsym) { int type, paramcnt = 0; struct symtable *ctype; struct symtable *protoptr = NULL; struct ASTnode *unused; // Get the pointer to the first prototype parameter if (oldfuncsym != NULL) protoptr = oldfuncsym->member; // Loop getting any parameters while (Token.token != T_RPAREN) { // If the first token is 'void' if (Token.token == T_VOID) { // Peek at the next token. If a ')', the function // has no parameters, so leave the loop. scan(&Peektoken); if (Peektoken.token == T_RPAREN) { // Move the Peektoken into the Token paramcnt = 0; scan(&Token); break; } } // Get the type of the next parameter type = declaration_list(&ctype, C_PARAM, T_COMMA, T_RPAREN, &unused); if (type == -1) fatal("Bad type in parameter list"); // Ensure the type of this parameter matches the prototype if (protoptr != NULL) { if (type != protoptr->type) fatald("Type doesn't match prototype for parameter", paramcnt + 1); protoptr = protoptr->next; } paramcnt++; // Stop when we hit the right parenthesis if (Token.token == T_RPAREN) break; // We need a comma as separator comma(); } if (oldfuncsym != NULL && paramcnt != oldfuncsym->nelems) fatals("Parameter count mismatch for function", oldfuncsym->name); // Return the count of parameters return (paramcnt); } // // function_declaration: type identifier '(' parameter_list ')' ; // | type identifier '(' parameter_list ')' compound_statement ; // // Parse the declaration of function. static struct symtable *function_declaration(char *funcname, int type, struct symtable *ctype, int class) { struct ASTnode *tree, *finalstmt; struct symtable *oldfuncsym, *newfuncsym = NULL; int endlabel, paramcnt; int linenum= Line; // Text has the identifier's name. If this exists and is a // function, get the id. Otherwise, set oldfuncsym to NULL. if ((oldfuncsym = findsymbol(funcname)) != NULL) if (oldfuncsym->stype != S_FUNCTION) oldfuncsym = NULL; // If this is a new function declaration, get a // label-id for the end label, and add the function // to the symbol table, if (oldfuncsym == NULL) { endlabel = genlabel(); // Assumtion: functions only return scalar types, so NULL below newfuncsym = addglob(funcname, type, NULL, S_FUNCTION, class, 0, endlabel); } // Scan in the '(', any parameters and the ')'. // Pass in any existing function prototype pointer lparen(); paramcnt = param_declaration_list(oldfuncsym, newfuncsym); rparen(); // If this is a new function declaration, update the // function symbol entry with the number of parameters. // Also copy the parameter list into the function's node. if (newfuncsym) { newfuncsym->nelems = paramcnt; newfuncsym->member = Parmhead; oldfuncsym = newfuncsym; } // Clear out the parameter list Parmhead = Parmtail = NULL; // Declaration ends in a semicolon, only a prototype. if (Token.token == T_SEMI) return (oldfuncsym); // This is not just a prototype. // Set the Functionid global to the function's symbol pointer Functionid = oldfuncsym; // Get the AST tree for the compound statement and mark // that we have parsed no loops or switches yet Looplevel = 0; Switchlevel = 0; lbrace(); tree = compound_statement(0); rbrace(); // If the function type isn't P_VOID .. if (type != P_VOID) { // Error if no statements in the function if (tree == NULL) fatal("No statements in function with non-void type"); // Check that the last AST operation in the // compound statement was a return statement finalstmt = (tree->op == A_GLUE) ? tree->right : tree; if (finalstmt == NULL || finalstmt->op != A_RETURN) fatal("No return for function with non-void type"); } // Build the A_FUNCTION node which has the function's symbol pointer // and the compound statement sub-tree tree = mkastunary(A_FUNCTION, type, ctype, tree, oldfuncsym, endlabel); tree->linenum= linenum; // Do optimisations on the AST tree tree = optimise(tree); // Dump the AST tree if requested if (O_dumpAST) { dumpAST(tree, NOLABEL, 0); fprintf(stdout, "\n\n"); } // Generate the assembly code for it genAST(tree, NOLABEL, NOLABEL, NOLABEL, 0); // Now free the symbols associated // with this function freeloclsyms(); return (oldfuncsym); } // Parse composite type declarations: structs or unions. // Either find an existing struct/union declaration, or build // a struct/union symbol table entry and return its pointer. static struct symtable *composite_declaration(int type) { struct symtable *ctype = NULL; struct symtable *m; struct ASTnode *unused; int offset; int t; // Skip the struct/union keyword scan(&Token); // See if there is a following struct/union name if (Token.token == T_IDENT) { // Find any matching composite type if (type == P_STRUCT) ctype = findstruct(Text); else ctype = findunion(Text); scan(&Token); } // If the next token isn't an LBRACE , this is // the usage of an existing struct/union type. // Return the pointer to the type. if (Token.token != T_LBRACE) { if (ctype == NULL) fatals("unknown struct/union type", Text); return (ctype); } // Ensure this struct/union type hasn't been // previously defined if (ctype) fatals("previously defined struct/union", Text); // Build the composite type and skip the left brace if (type == P_STRUCT) ctype = addstruct(Text); else ctype = addunion(Text); scan(&Token); // Scan in the list of members while (1) { // Get the next member. m is used as a dummy t = declaration_list(&m, C_MEMBER, T_SEMI, T_RBRACE, &unused); if (t == -1) fatal("Bad type in member list"); if (Token.token == T_SEMI) scan(&Token); if (Token.token == T_RBRACE) break; } // Attach to the struct type's node rbrace(); if (Membhead == NULL) fatals("No members in struct", ctype->name); ctype->member = Membhead; Membhead = Membtail = NULL; // Set the offset of the initial member // and find the first free byte after it m = ctype->member; m->st_posn = 0; offset = typesize(m->type, m->ctype); // Set the position of each successive member in the composite type // Unions are easy. For structs, align the member and find the next free byte for (m = m->next; m != NULL; m = m->next) { // Set the offset for this member if (type == P_STRUCT) m->st_posn = genalign(m->type, offset, 1); else m->st_posn = 0; // Get the offset of the next free byte after this member offset += typesize(m->type, m->ctype); } // Set the overall size of the composite type ctype->size = offset; return (ctype); } // Parse an enum declaration static void enum_declaration(void) { struct symtable *etype = NULL; char *name = NULL; int intval = 0; // Skip the enum keyword. scan(&Token); // If there's a following enum type name, get a // pointer to any existing enum type node. if (Token.token == T_IDENT) { etype = findenumtype(Text); name = strdup(Text); // As it gets tromped soon scan(&Token); } // If the next token isn't a LBRACE, check // that we have an enum type name, then return if (Token.token != T_LBRACE) { if (etype == NULL) fatals("undeclared enum type:", name); return; } // We do have an LBRACE. Skip it scan(&Token); // If we have an enum type name, ensure that it // hasn't been declared before. if (etype != NULL) fatals("enum type redeclared:", etype->name); else // Build an enum type node for this identifier etype = addenum(name, C_ENUMTYPE, 0); // Loop to get all the enum values while (1) { // Ensure we have an identifier // Copy it in case there's an int literal coming up ident(); name = strdup(Text); // Ensure this enum value hasn't been declared before etype = findenumval(name); if (etype != NULL) fatals("enum value redeclared:", Text); // If the next token is an '=', skip it and // get the following int literal if (Token.token == T_ASSIGN) { scan(&Token); if (Token.token != T_INTLIT) fatal("Expected int literal after '='"); intval = Token.intvalue; scan(&Token); } // Build an enum value node for this identifier. // Increment the value for the next enum identifier. etype = addenum(name, C_ENUMVAL, intval++); // Bail out on a right curly bracket, else get a comma if (Token.token == T_RBRACE) break; comma(); } scan(&Token); // Skip over the right curly bracket } // Parse a typedef declaration and return the type // and ctype that it represents static int typedef_declaration(struct symtable **ctype) { int type, class = 0; // Skip the typedef keyword. scan(&Token); // Get the actual type following the keyword type = parse_type(ctype, &class); if (class != 0) fatal("Can't have extern in a typedef declaration"); // See if the typedef identifier already exists if (findtypedef(Text) != NULL) fatals("redefinition of typedef", Text); // Get any following '*' tokens type = parse_stars(type); // It doesn't exist so add it to the typedef list addtypedef(Text, type, *ctype); scan(&Token); return (type); } // Given a typedef name, return the type it represents static int type_of_typedef(char *name, struct symtable **ctype) { struct symtable *t; // Look up the typedef in the list t = findtypedef(name); if (t == NULL) fatals("unknown type", name); scan(&Token); *ctype = t->ctype; return (t->type); } // Parse the declaration of a variable or function. // The type and any following '*'s have been scanned, and we // have the identifier in the Token variable. // The class argument is the variable's class. // Return a pointer to the symbol's entry in the symbol table static struct symtable *symbol_declaration(int type, struct symtable *ctype, int class, struct ASTnode **tree) { struct symtable *sym = NULL; char *varname = strdup(Text); // Ensure that we have an identifier. // We copied it above so we can scan more tokens in, e.g. // an assignment expression for a local variable. ident(); // Deal with function declarations if (Token.token == T_LPAREN) { return (function_declaration(varname, type, ctype, class)); } // See if this array or scalar variable has already been declared switch (class) { case C_EXTERN: case C_STATIC: case C_GLOBAL: case C_LOCAL: case C_PARAM: if (findlocl(varname) != NULL) fatals("Duplicate local variable declaration", varname); case C_MEMBER: if (findmember(varname) != NULL) fatals("Duplicate struct/union member declaration", varname); } // Add the array or scalar variable to the symbol table if (Token.token == T_LBRACKET) { sym = array_declaration(varname, type, ctype, class); *tree= NULL; // Local arrays are not initialised } else sym = scalar_declaration(varname, type, ctype, class, tree); return (sym); } // Parse a list of symbols where there is an initial type. // Return the type of the symbols. et1 and et2 are end tokens. int declaration_list(struct symtable **ctype, int class, int et1, int et2, struct ASTnode **gluetree) { int inittype, type; struct symtable *sym; struct ASTnode *tree; *gluetree = NULL; // Get the initial type. If -1, it was // a composite type definition, return this if ((inittype = parse_type(ctype, &class)) == -1) return (inittype); // Now parse the list of symbols while (1) { // See if this symbol is a pointer type = parse_stars(inittype); // Parse this symbol sym = symbol_declaration(type, *ctype, class, &tree); // We parsed a function, there is no list so leave if (sym->stype == S_FUNCTION) { if (class != C_GLOBAL && class != C_STATIC) fatal("Function definition not at global level"); return (type); } // Glue any AST tree from a local declaration // to build a sequence of assignments to perform if (*gluetree == NULL) *gluetree = tree; else *gluetree = mkastnode(A_GLUE, P_NONE, NULL, *gluetree, NULL, tree, NULL, 0); // We are at the end of the list, leave if (Token.token == et1 || Token.token == et2) return (type); // Otherwise, we need a comma as separator comma(); } return(0); // Keep -Wall happy } // Parse one or more global declarations, either // variables, functions or structs void global_declarations(void) { struct symtable *ctype= NULL; struct ASTnode *unused; while (Token.token != T_EOF) { declaration_list(&ctype, C_GLOBAL, T_SEMI, T_EOF, &unused); // Skip any semicolons and right curly brackets if (Token.token == T_SEMI) scan(&Token); } } ================================================ FILE: 59_WDIW_pt1/decl.h ================================================ // Function prototypes for all compiler files // Copyright (c) 2019 Warren Toomey, GPL3 // scan.c void reject_token(struct token *t); int scan(struct token *t); // tree.c struct ASTnode *mkastnode(int op, int type, struct symtable *ctype, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue); struct ASTnode *mkastleaf(int op, int type, struct symtable *ctype, struct symtable *sym, int intvalue); struct ASTnode *mkastunary(int op, int type, struct symtable *ctype, struct ASTnode *left, struct symtable *sym, int intvalue); void dumpAST(struct ASTnode *n, int label, int level); // gen.c int genlabel(void); int genAST(struct ASTnode *n, int iflabel, int looptoplabel, int loopendlabel, int parentASTop); void genpreamble(char *filename); void genpostamble(); void genfreeregs(int keepreg); void genglobsym(struct symtable *node); int genglobstr(char *strvalue, int append); void genglobstrend(void); int genprimsize(int type); int genalign(int type, int offset, int direction); void genreturn(int reg, int id); // cg.c int cgprimsize(int type); int cgalign(int type, int offset, int direction); void cgtextseg(); void cgdataseg(); int alloc_register(void); void freeall_registers(int keepreg); void spill_all_regs(void); void cgpreamble(char *filename); void cgpostamble(); void cgfuncpreamble(struct symtable *sym); void cgfuncpostamble(struct symtable *sym); int cgloadint(int value, int type); int cgloadvar(struct symtable *sym, int op); int cgloadglobstr(int label); int cgadd(int r1, int r2); int cgsub(int r1, int r2); int cgmul(int r1, int r2); int cgdivmod(int r1, int r2, int op); int cgshlconst(int r, int val); int cgcall(struct symtable *sym, int numargs); void cgcopyarg(int r, int argposn); int cgstorglob(int r, struct symtable *sym); int cgstorlocal(int r, struct symtable *sym); void cgglobsym(struct symtable *node); void cgglobstr(int l, char *strvalue, int append); void cgglobstrend(void); int cgcompare_and_set(int ASTop, int r1, int r2, int type); int cgcompare_and_jump(int ASTop, int r1, int r2, int label, int type); void cglabel(int l); void cgjump(int l); int cgwiden(int r, int oldtype, int newtype); void cgreturn(int reg, struct symtable *sym); int cgaddress(struct symtable *sym); int cgderef(int r, int type); int cgstorderef(int r1, int r2, int type); int cgnegate(int r); int cginvert(int r); int cglognot(int r); void cgloadboolean(int r, int val); int cgboolean(int r, int op, int label); int cgand(int r1, int r2); int cgor(int r1, int r2); int cgxor(int r1, int r2); int cgshl(int r1, int r2); int cgshr(int r1, int r2); void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel); void cgmove(int r1, int r2); void cglinenum(int line); // expr.c struct ASTnode *expression_list(int endtoken); struct ASTnode *binexpr(int ptp); // stmt.c struct ASTnode *compound_statement(int inswitch); // misc.c void match(int t, char *what); void semi(void); void lbrace(void); void rbrace(void); void lparen(void); void rparen(void); void ident(void); void comma(void); void fatal(char *s); void fatals(char *s1, char *s2); void fatald(char *s, int d); void fatalc(char *s, int c); // sym.c void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node); struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn); struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn); struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int nelems); struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype); struct symtable *addstruct(char *name); struct symtable *addunion(char *name); struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int nelems); struct symtable *addenum(char *name, int class, int value); struct symtable *addtypedef(char *name, int type, struct symtable *ctype); struct symtable *findglob(char *s); struct symtable *findlocl(char *s); struct symtable *findsymbol(char *s); struct symtable *findmember(char *s); struct symtable *findstruct(char *s); struct symtable *findunion(char *s); struct symtable *findenumtype(char *s); struct symtable *findenumval(char *s); struct symtable *findtypedef(char *s); void clear_symtable(void); void freeloclsyms(void); void freestaticsyms(void); void dumptable(struct symtable *head, char *name, int indent); void dumpsymtables(void); // decl.c int parse_type(struct symtable **ctype, int *class); int parse_stars(int type); int parse_cast(struct symtable **ctype); int declaration_list(struct symtable **ctype, int class, int et1, int et2, struct ASTnode **gluetree); void global_declarations(void); // types.c int inttype(int type); int ptrtype(int type); int pointer_to(int type); int value_at(int type); int typesize(int type, struct symtable *ctype); struct ASTnode *modify_type(struct ASTnode *tree, int rtype, struct symtable *rctype, int op); // opt.c struct ASTnode *optimise(struct ASTnode *n); ================================================ FILE: 59_WDIW_pt1/defs.h ================================================ #include #include #include #include #include "incdir.h" // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 enum { TEXTLEN = 512 // Length of identifiers in input }; // Commands and default filenames #define AOUT "a.out" #ifdef __NASM__ #define ASCMD "nasm -g -f elf64 -w-ptr -o " #define LDCMD "cc -g -no-pie -fno-plt -Wall -o " #else #define ASCMD "as -g -o " #define LDCMD "cc -g -o " #endif #define CPPCMD "cpp -nostdinc -isystem " // Token types enum { T_EOF, // Binary operators T_ASSIGN, T_ASPLUS, T_ASMINUS, T_ASSTAR, T_ASSLASH, T_ASMOD, T_QUESTION, T_LOGOR, T_LOGAND, T_OR, T_XOR, T_AMPER, T_EQ, T_NE, T_LT, T_GT, T_LE, T_GE, T_LSHIFT, T_RSHIFT, T_PLUS, T_MINUS, T_STAR, T_SLASH, T_MOD, // Other operators T_INC, T_DEC, T_INVERT, T_LOGNOT, // Type keywords T_VOID, T_CHAR, T_INT, T_LONG, // Other keywords T_IF, T_ELSE, T_WHILE, T_FOR, T_RETURN, T_STRUCT, T_UNION, T_ENUM, T_TYPEDEF, T_EXTERN, T_BREAK, T_CONTINUE, T_SWITCH, T_CASE, T_DEFAULT, T_SIZEOF, T_STATIC, // Structural tokens T_INTLIT, T_STRLIT, T_SEMI, T_IDENT, T_LBRACE, T_RBRACE, T_LPAREN, T_RPAREN, T_LBRACKET, T_RBRACKET, T_COMMA, T_DOT, T_ARROW, T_COLON }; // Token structure struct token { int token; // Token type, from the enum list above char *tokstr; // String version of the token int intvalue; // For T_INTLIT, the integer value }; // AST node types. The first few line up // with the related tokens enum { A_ASSIGN = 1, A_ASPLUS, A_ASMINUS, A_ASSTAR, // 1 A_ASSLASH, A_ASMOD, A_TERNARY, A_LOGOR, // 5 A_LOGAND, A_OR, A_XOR, A_AND, A_EQ, A_NE, A_LT, // 9 A_GT, A_LE, A_GE, A_LSHIFT, A_RSHIFT, // 16 A_ADD, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, A_MOD, // 21 A_INTLIT, A_STRLIT, A_IDENT, A_GLUE, // 26 A_IF, A_WHILE, A_FUNCTION, A_WIDEN, A_RETURN, // 30 A_FUNCCALL, A_DEREF, A_ADDR, A_SCALE, // 35 A_PREINC, A_PREDEC, A_POSTINC, A_POSTDEC, // 39 A_NEGATE, A_INVERT, A_LOGNOT, A_TOBOOL, A_BREAK, // 43 A_CONTINUE, A_SWITCH, A_CASE, A_DEFAULT, A_CAST // 48 }; // Primitive types. The bottom 4 bits is an integer // value that represents the level of indirection, // e.g. 0= no pointer, 1= pointer, 2= pointer pointer etc. enum { P_NONE, P_VOID = 16, P_CHAR = 32, P_INT = 48, P_LONG = 64, P_STRUCT=80, P_UNION=96 }; // Structural types enum { S_VARIABLE, S_FUNCTION, S_ARRAY }; // Storage classes enum { C_GLOBAL = 1, // Globally visible symbol C_LOCAL, // Locally visible symbol C_PARAM, // Locally visible function parameter C_EXTERN, // External globally visible symbol C_STATIC, // Static symbol, visible in one file C_STRUCT, // A struct C_UNION, // A union C_MEMBER, // Member of a struct or union C_ENUMTYPE, // A named enumeration type C_ENUMVAL, // A named enumeration value C_TYPEDEF // A named typedef }; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol struct symtable *ctype; // If struct/union, ptr to that type int stype; // Structural type for the symbol int class; // Storage class for the symbol int size; // Total size in bytes of this symbol int nelems; // Functions: # params. Arrays: # elements #define st_endlabel st_posn // For functions, the end label int st_posn; // For locals, the negative offset // from the stack base pointer int *initlist; // List of initial values struct symtable *next; // Next symbol in one list struct symtable *member; // First member of a function, struct, #ifdef __NASM__ int extinit; #endif }; // union or enum // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates struct symtable *ctype; // If struct/union, ptr to that type int rvalue; // True if the node is an rvalue struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; struct symtable *sym; // For many AST nodes, the pointer to // the symbol in the symbol table #define a_intvalue a_size // For A_INTLIT, the integer value int a_size; // For A_SCALE, the size to scale by int linenum; // Line number from where this node comes }; enum { NOREG = -1, // Use NOREG when the AST generation // functions have no register to return NOLABEL = 0 // Use NOLABEL when we have no label to // pass to genAST() }; ================================================ FILE: 59_WDIW_pt1/expr.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of expressions // Copyright (c) 2019 Warren Toomey, GPL3 // expression_list: // | expression // | expression ',' expression_list // ; // Parse a list of zero or more comma-separated expressions and // return an AST composed of A_GLUE nodes with the left-hand child // being the sub-tree of previous expressions (or NULL) and the right-hand // child being the next expression. Each A_GLUE node will have size field // set to the number of expressions in the tree at this point. If no // expressions are parsed, NULL is returned struct ASTnode *expression_list(int endtoken) { struct ASTnode *tree = NULL; struct ASTnode *child = NULL; int exprcount = 0; // Loop until the end token while (Token.token != endtoken) { // Parse the next expression and increment the expression count child = binexpr(0); exprcount++; // Build an A_GLUE AST node with the previous tree as the left child // and the new expression as the right child. Store the expression count. tree = mkastnode(A_GLUE, P_NONE, NULL, tree, NULL, child, NULL, exprcount); // Stop when we reach the end token if (Token.token == endtoken) break; // Must have a ',' at this point match(T_COMMA, ","); } // Return the tree of expressions return (tree); } // Parse a function call and return its AST static struct ASTnode *funccall(void) { struct ASTnode *tree; struct symtable *funcptr; // Check that the identifier has been defined as a function, // then make a leaf node for it. if ((funcptr = findsymbol(Text)) == NULL || funcptr->stype != S_FUNCTION) { fatals("Undeclared function", Text); } // Get the '(' lparen(); // Parse the argument expression list tree = expression_list(T_RPAREN); // XXX Check type of each argument against the function's prototype // Build the function call AST node. Store the // function's return type as this node's type. // Also record the function's symbol-id tree = mkastunary(A_FUNCCALL, funcptr->type, funcptr->ctype, tree, funcptr, 0); // Get the ')' rparen(); return (tree); } // Parse the index into an array and return an AST tree for it static struct ASTnode *array_access(struct ASTnode *left) { struct ASTnode *right; // Check that the sub-tree is a pointer if (!ptrtype(left->type)) fatal("Not an array or pointer"); // Get the '[' scan(&Token); // Parse the following expression right = binexpr(0); // Get the ']' match(T_RBRACKET, "]"); // Ensure that this is of int type if (!inttype(right->type)) fatal("Array index is not of integer type"); // Make the left tree an rvalue left->rvalue = 1; // Scale the index by the size of the element's type right = modify_type(right, left->type, left->ctype, A_ADD); // Return an AST tree where the array's base has the offset added to it, // and dereference the element. Still an lvalue at this point. left = mkastnode(A_ADD, left->type, left->ctype, left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, value_at(left->type), left->ctype, left, NULL, 0); return (left); } // Parse the member reference of a struct or union // and return an AST tree for it. If withpointer is true, // the access is through a pointer to the member. static struct ASTnode *member_access(struct ASTnode *left, int withpointer) { struct ASTnode *right; struct symtable *typeptr; struct symtable *m; // Check that the left AST tree is a pointer to struct or union if (withpointer && left->type != pointer_to(P_STRUCT) && left->type != pointer_to(P_UNION)) fatal("Expression is not a pointer to a struct/union"); // Or, check that the left AST tree is a struct or union. // If so, change it from an A_IDENT to an A_ADDR so that // we get the base address, not the value at this address. if (!withpointer) { if (left->type == P_STRUCT || left->type == P_UNION) left->op = A_ADDR; else fatal("Expression is not a struct/union"); } // Get the details of the composite type typeptr = left->ctype; // Skip the '.' or '->' token and get the member's name scan(&Token); ident(); // Find the matching member's name in the type // Die if we can't find it for (m = typeptr->member; m != NULL; m = m->next) if (!strcmp(m->name, Text)) break; if (m == NULL) fatals("No member found in struct/union: ", Text); // Make the left tree an rvalue left->rvalue = 1; // Build an A_INTLIT node with the offset right = mkastleaf(A_INTLIT, P_INT, NULL, NULL, m->st_posn); // Add the member's offset to the base of the struct/union // and dereference it. Still an lvalue at this point left = mkastnode(A_ADD, pointer_to(m->type), m->ctype, left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, m->type, m->ctype, left, NULL, 0); return (left); } // Parse a parenthesised expression and // return an AST node representing it. static struct ASTnode *paren_expression(int ptp) { struct ASTnode *n; int type = 0; struct symtable *ctype = NULL; // Beginning of a parenthesised expression, skip the '('. scan(&Token); // If the token after is a type identifier, this is a cast expression switch (Token.token) { case T_IDENT: // We have to see if the identifier matches a typedef. // If not, treat it as an expression. if (findtypedef(Text) == NULL) { n = binexpr(0); // ptp is zero as expression inside ( ) break; } case T_VOID: case T_CHAR: case T_INT: case T_LONG: case T_STRUCT: case T_UNION: case T_ENUM: // Get the type inside the parentheses type = parse_cast(&ctype); // Skip the closing ')' and then parse the following expression rparen(); default: n = binexpr(ptp); // Scan in the expression. We pass in ptp // as the cast doesn't change the // expression's precedence } // We now have at least an expression in n, and possibly a non-zero type // in type if there was a cast. Skip the closing ')' if there was no cast. if (type == 0) rparen(); else // Otherwise, make a unary AST node for the cast n = mkastunary(A_CAST, type, ctype, n, NULL, 0); return (n); } // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(int ptp) { struct ASTnode *n; struct symtable *enumptr; struct symtable *varptr; int id; int type = 0; int size, class; struct symtable *ctype; switch (Token.token) { case T_STATIC: case T_EXTERN: fatal("Compiler doesn't support static or extern local declarations"); case T_SIZEOF: // Skip the T_SIZEOF and ensure we have a left parenthesis scan(&Token); if (Token.token != T_LPAREN) fatal("Left parenthesis expected after sizeof"); scan(&Token); // Get the type inside the parentheses type = parse_stars(parse_type(&ctype, &class)); // Get the type's size size = typesize(type, ctype); rparen(); // Make a leaf node int literal with the size return (mkastleaf(A_INTLIT, P_INT, NULL, NULL, size)); case T_INTLIT: // For an INTLIT token, make a leaf AST node for it. // Make it a P_CHAR if it's within the P_CHAR range if (Token.intvalue >= 0 && Token.intvalue < 256) n = mkastleaf(A_INTLIT, P_CHAR, NULL, NULL, Token.intvalue); else n = mkastleaf(A_INTLIT, P_INT, NULL, NULL, Token.intvalue); break; case T_STRLIT: // For a STRLIT token, generate the assembly for it. id = genglobstr(Text, 0); // For successive STRLIT tokens, append their contents // to this one while (1) { scan(&Peektoken); if (Peektoken.token != T_STRLIT) break; genglobstr(Text, 1); scan(&Token); // To skip it properly } // Now make a leaf AST node for it. id is the string's label. genglobstrend(); n = mkastleaf(A_STRLIT, pointer_to(P_CHAR), NULL, NULL, id); break; case T_IDENT: // If the identifier matches an enum value, // return an A_INTLIT node if ((enumptr = findenumval(Text)) != NULL) { n = mkastleaf(A_INTLIT, P_INT, NULL, NULL, enumptr->st_posn); break; } // See if this identifier exists as a symbol. For arrays, set rvalue to 1. if ((varptr = findsymbol(Text)) == NULL) fatals("Unknown variable or function", Text); switch (varptr->stype) { case S_VARIABLE: n = mkastleaf(A_IDENT, varptr->type, varptr->ctype, varptr, 0); break; case S_ARRAY: n = mkastleaf(A_ADDR, varptr->type, varptr->ctype, varptr, 0); n->rvalue = 1; break; case S_FUNCTION: // Function call, see if the next token is a left parenthesis scan(&Token); if (Token.token != T_LPAREN) fatals("Function name used without parentheses", Text); return (funccall()); default: fatals("Identifier not a scalar or array variable", Text); } break; case T_LPAREN: return (paren_expression(ptp)); default: fatals("Expecting a primary expression, got token", Token.tokstr); } // Scan in the next token and return the leaf node scan(&Token); return (n); } // Parse a postfix expression and return // an AST node representing it. The // identifier is already in Text. static struct ASTnode *postfix(int ptp) { struct ASTnode *n; // Get the primary expression n = primary(ptp); // Loop until there are no more postfix operators while (1) { switch (Token.token) { case T_LBRACKET: // An array reference n = array_access(n); break; case T_DOT: // Access into a struct or union n = member_access(n, 0); break; case T_ARROW: // Pointer access into a struct or union n = member_access(n, 1); break; case T_INC: // Post-increment: skip over the token if (n->rvalue == 1) fatal("Cannot ++ on rvalue"); scan(&Token); // Can't do it twice if (n->op == A_POSTINC || n->op == A_POSTDEC) fatal("Cannot ++ and/or -- more than once"); // and change the AST operation n->op = A_POSTINC; break; case T_DEC: // Post-decrement: skip over the token if (n->rvalue == 1) fatal("Cannot -- on rvalue"); scan(&Token); // Can't do it twice if (n->op == A_POSTINC || n->op == A_POSTDEC) fatal("Cannot ++ and/or -- more than once"); // and change the AST operation n->op = A_POSTDEC; break; default: return (n); } } return (NULL); // Keep -Wall happy } // Convert a binary operator token into a binary AST operation. // We rely on a 1:1 mapping from token to AST operation static int binastop(int tokentype) { if (tokentype > T_EOF && tokentype <= T_MOD) return (tokentype); fatals("Syntax error, token", Tstring[tokentype]); return (0); // Keep -Wall happy } // Return true if a token is right-associative, // false otherwise. static int rightassoc(int tokentype) { if (tokentype >= T_ASSIGN && tokentype <= T_ASSLASH) return (1); return (0); } // Operator precedence for each token. Must // match up with the order of tokens in defs.h static int OpPrec[] = { 0, 10, 10, // T_EOF, T_ASSIGN, T_ASPLUS, 10, 10, // T_ASMINUS, T_ASSTAR, 10, 10, // T_ASSLASH, T_ASMOD, 15, // T_QUESTION, 20, 30, // T_LOGOR, T_LOGAND 40, 50, 60, // T_OR, T_XOR, T_AMPER 70, 70, // T_EQ, T_NE 80, 80, 80, 80, // T_LT, T_GT, T_LE, T_GE 90, 90, // T_LSHIFT, T_RSHIFT 100, 100, // T_PLUS, T_MINUS 110, 110, 110 // T_STAR, T_SLASH, T_MOD }; // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec; if (tokentype > T_MOD) fatals("Token with no precedence in op_precedence:", Tstring[tokentype]); prec = OpPrec[tokentype]; if (prec == 0) fatals("Syntax error, token", Tstring[tokentype]); return (prec); } // prefix_expression: postfix_expression // | '*' prefix_expression // | '&' prefix_expression // | '-' prefix_expression // | '++' prefix_expression // | '--' prefix_expression // ; // Parse a prefix expression and return // a sub-tree representing it. static struct ASTnode *prefix(int ptp) { struct ASTnode *tree; switch (Token.token) { case T_AMPER: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(ptp); // Ensure that it's an identifier if (tree->op != A_IDENT) fatal("& operator must be followed by an identifier"); // Prevent '&' being performed on an array if (tree->sym->stype == S_ARRAY) fatal("& operator cannot be performed on an array"); // Now change the operator to A_ADDR and the type to // a pointer to the original type tree->op = A_ADDR; tree->type = pointer_to(tree->type); break; case T_STAR: // Get the next token and parse it // recursively as a prefix expression. // Make it an rvalue scan(&Token); tree = prefix(ptp); tree->rvalue= 1; // Ensure the tree's type is a pointer if (!ptrtype(tree->type)) fatal("* operator must be followed by an expression of pointer type"); // Prepend an A_DEREF operation to the tree tree = mkastunary(A_DEREF, value_at(tree->type), tree->ctype, tree, NULL, 0); break; case T_MINUS: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(ptp); // Prepend a A_NEGATE operation to the tree and // make the child an rvalue. Because chars are unsigned, // also widen this if needed to int so that it's signed tree->rvalue = 1; if (tree->type == P_CHAR) tree->type = P_INT; tree = mkastunary(A_NEGATE, tree->type, tree->ctype, tree, NULL, 0); break; case T_INVERT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(ptp); // Prepend a A_INVERT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_INVERT, tree->type, tree->ctype, tree, NULL, 0); break; case T_LOGNOT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(ptp); // Prepend a A_LOGNOT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_LOGNOT, tree->type, tree->ctype, tree, NULL, 0); break; case T_INC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(ptp); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("++ operator must be followed by an identifier"); // Prepend an A_PREINC operation to the tree tree = mkastunary(A_PREINC, tree->type, tree->ctype, tree, NULL, 0); break; case T_DEC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(ptp); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("-- operator must be followed by an identifier"); // Prepend an A_PREDEC operation to the tree tree = mkastunary(A_PREDEC, tree->type, tree->ctype, tree, NULL, 0); break; default: tree = postfix(ptp); } return (tree); } // Return an AST tree whose root is a binary operator. // Parameter ptp is the previous token's precedence. struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; struct ASTnode *ltemp, *rtemp; int ASTop; int tokentype; // Get the tree on the left. // Fetch the next token at the same time. left = prefix(ptp); // If we hit one of several terminating tokens, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA || tokentype == T_COLON || tokentype == T_RBRACE) { left->rvalue = 1; return (left); } // While the precedence of this token is more than that of the // previous token precedence, or it's right associative and // equal to the previous token's precedence while ((op_precedence(tokentype) > ptp) || (rightassoc(tokentype) && op_precedence(tokentype) == ptp)) { // Fetch in the next integer literal scan(&Token); // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Determine the operation to be performed on the sub-trees ASTop = binastop(tokentype); switch (ASTop) { case A_TERNARY: // Ensure we have a ':' token, scan in the expression after it match(T_COLON, ":"); ltemp = binexpr(0); // Build and return the AST for this statement. Use the middle // expression's type as the return type. XXX We should also // consider the third expression's type. return (mkastnode (A_TERNARY, right->type, right->ctype, left, right, ltemp, NULL, 0)); case A_ASSIGN: // Assignment // Make the right tree into an rvalue right->rvalue = 1; // Ensure the right's type matches the left right = modify_type(right, left->type, left->ctype, 0); if (right == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree. However, switch // left and right around, so that the right expression's // code will be generated before the left expression ltemp = left; left = right; right = ltemp; break; default: // We are not doing a ternary or assignment, so both trees should // be rvalues. Convert both trees into rvalue if they are lvalue trees left->rvalue = 1; right->rvalue = 1; // Ensure the two types are compatible by trying // to modify each tree to match the other's type. ltemp = modify_type(left, right->type, right->ctype, ASTop); rtemp = modify_type(right, left->type, left->ctype, ASTop); if (ltemp == NULL && rtemp == NULL) fatal("Incompatible types in binary expression"); if (ltemp != NULL) left = ltemp; if (rtemp != NULL) right = rtemp; } // Join that sub-tree with ours. Convert the token // into an AST operation at the same time. left = mkastnode(binastop(tokentype), left->type, left->ctype, left, NULL, right, NULL, 0); // Some operators produce an int result regardless of their operands switch (binastop(tokentype)) { case A_LOGOR: case A_LOGAND: case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: left->type = P_INT; } // Update the details of the current token. // If we hit a terminating token, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA || tokentype == T_COLON || tokentype == T_RBRACE) { left->rvalue = 1; return (left); } } // Return the tree we have when the precedence // is the same or lower left->rvalue = 1; return (left); } ================================================ FILE: 59_WDIW_pt1/gen.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Generic code generator // Copyright (c) 2019 Warren Toomey, GPL3 // Generate and return a new label number static int labelid = 1; int genlabel(void) { return (labelid++); } static void update_line(struct ASTnode *n) { // Output the line into the assembly if we've // changed the line number in the AST node if (n->linenum != 0 && Line != n->linenum) { Line = n->linenum; cglinenum(Line); } } // Generate the code for an IF statement // and an optional ELSE clause. static int genIF(struct ASTnode *n, int looptoplabel, int loopendlabel) { int Lfalse, Lend; // Generate two labels: one for the // false compound statement, and one // for the end of the overall IF statement. // When there is no ELSE clause, Lfalse _is_ // the ending label! Lfalse = genlabel(); if (n->right) Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. genAST(n->left, Lfalse, NOLABEL, NOLABEL, n->op); genfreeregs(NOREG); // Generate the true compound statement genAST(n->mid, NOLABEL, looptoplabel, loopendlabel, n->op); genfreeregs(NOREG); // If there is an optional ELSE clause, // generate the jump to skip to the end if (n->right) cgjump(Lend); // Now the false label cglabel(Lfalse); // Optional ELSE clause: generate the // false compound statement and the // end label if (n->right) { genAST(n->right, NOLABEL, NOLABEL, loopendlabel, n->op); genfreeregs(NOREG); cglabel(Lend); } return (NOREG); } // Generate the code for a WHILE statement static int genWHILE(struct ASTnode *n) { int Lstart, Lend; // Generate the start and end labels // and output the start label Lstart = genlabel(); Lend = genlabel(); cglabel(Lstart); // Generate the condition code followed // by a jump to the end label. genAST(n->left, Lend, Lstart, Lend, n->op); genfreeregs(NOREG); // Generate the compound statement for the body genAST(n->right, NOLABEL, Lstart, Lend, n->op); genfreeregs(NOREG); // Finally output the jump back to the condition, // and the end label cgjump(Lstart); cglabel(Lend); return (NOREG); } // Generate the code for a SWITCH statement static int genSWITCH(struct ASTnode *n) { int *caseval, *caselabel; int Ljumptop, Lend; int i, reg, defaultlabel = 0, casecount = 0; struct ASTnode *c; // Create arrays for the case values and associated labels. // Ensure that we have at least one position in each array. caseval = (int *) malloc((n->a_intvalue + 1) * sizeof(int)); caselabel = (int *) malloc((n->a_intvalue + 1) * sizeof(int)); // Generate labels for the top of the jump table, and the // end of the switch statement. Set a default label for // the end of the switch, in case we don't have a default. Ljumptop = genlabel(); Lend = genlabel(); defaultlabel = Lend; // Output the code to calculate the switch condition reg = genAST(n->left, NOLABEL, NOLABEL, NOLABEL, 0); cgjump(Ljumptop); genfreeregs(reg); // Walk the right-child linked list to // generate the code for each case for (i = 0, c = n->right; c != NULL; i++, c = c->right) { // Get a label for this case. Store it // and the case value in the arrays. // Record if it is the default case. caselabel[i] = genlabel(); caseval[i] = c->a_intvalue; cglabel(caselabel[i]); if (c->op == A_DEFAULT) defaultlabel = caselabel[i]; else casecount++; // Generate the case code. Pass in the end label for the breaks. // If case has no body, we will fall into the following body. if (c->left) genAST(c->left, NOLABEL, NOLABEL, Lend, 0); genfreeregs(NOREG); } // Ensure the last case jumps past the switch table cgjump(Lend); // Now output the switch table and the end label. cgswitch(reg, casecount, Ljumptop, caselabel, caseval, defaultlabel); cglabel(Lend); return (NOREG); } // Generate the code for an // A_LOGAND or A_LOGOR operation static int gen_logandor(struct ASTnode *n) { // Generate two labels int Lfalse = genlabel(); int Lend = genlabel(); int reg; // Generate the code for the left expression // followed by the jump to the false label reg = genAST(n->left, NOLABEL, NOLABEL, NOLABEL, 0); cgboolean(reg, n->op, Lfalse); genfreeregs(NOREG); // Generate the code for the right expression // followed by the jump to the false label reg = genAST(n->right, NOLABEL, NOLABEL, NOLABEL, 0); cgboolean(reg, n->op, Lfalse); genfreeregs(reg); // We didn't jump so set the right boolean value if (n->op == A_LOGAND) { cgloadboolean(reg, 1); cgjump(Lend); cglabel(Lfalse); cgloadboolean(reg, 0); } else { cgloadboolean(reg, 0); cgjump(Lend); cglabel(Lfalse); cgloadboolean(reg, 1); } cglabel(Lend); return (reg); } // Generate the code to copy the arguments of a // function call to its parameters, then call the // function itself. Return the register that holds // the function's return value. static int gen_funccall(struct ASTnode *n) { struct ASTnode *gluetree = n->left; int reg; int numargs = 0; // Save the registers before we copy the arguments spill_all_regs(); // If there is a list of arguments, walk this list // from the last argument (right-hand child) to the // first while (gluetree) { // Calculate the expression's value reg = genAST(gluetree->right, NOLABEL, NOLABEL, NOLABEL, gluetree->op); // Copy this into the n'th function parameter: size is 1, 2, 3, ... cgcopyarg(reg, gluetree->a_size); // Keep the first (highest) number of arguments if (numargs == 0) numargs = gluetree->a_size; gluetree = gluetree->left; } // Call the function, clean up the stack (based on numargs), // and return its result return (cgcall(n->sym, numargs)); } // Generate code for a ternary expression static int gen_ternary(struct ASTnode *n) { int Lfalse, Lend; int reg, expreg; // Generate two labels: one for the // false expression, and one for the // end of the overall expression Lfalse = genlabel(); Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. genAST(n->left, Lfalse, NOLABEL, NOLABEL, n->op); genfreeregs(NOREG); // Get a register to hold the result of the two expressions reg = alloc_register(); // Generate the true expression and the false label. // Move the expression result into the known register. expreg = genAST(n->mid, NOLABEL, NOLABEL, NOLABEL, n->op); cgmove(expreg, reg); // Don't free the register holding the result, though! genfreeregs(reg); cgjump(Lend); cglabel(Lfalse); // Generate the false expression and the end label. // Move the expression result into the known register. expreg = genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); cgmove(expreg, reg); // Don't free the register holding the result, though! genfreeregs(reg); cglabel(Lend); return (reg); } // Given an AST, an optional label, and the AST op // of the parent, generate assembly code recursively. // Return the register id with the tree's final value. int genAST(struct ASTnode *n, int iflabel, int looptoplabel, int loopendlabel, int parentASTop) { int leftreg = NOREG, rightreg = NOREG; // Empty tree, do nothing if (n == NULL) return (NOREG); // Update the line number in the output update_line(n); // We have some specific AST node handling at the top // so that we don't evaluate the child sub-trees immediately switch (n->op) { case A_IF: return (genIF(n, looptoplabel, loopendlabel)); case A_WHILE: return (genWHILE(n)); case A_SWITCH: return (genSWITCH(n)); case A_FUNCCALL: return (gen_funccall(n)); case A_TERNARY: return (gen_ternary(n)); case A_LOGOR: return (gen_logandor(n)); case A_LOGAND: return (gen_logandor(n)); case A_GLUE: // Do each child statement, and free the // registers after each child if (n->left != NULL) genAST(n->left, iflabel, looptoplabel, loopendlabel, n->op); genfreeregs(NOREG); if (n->right != NULL) genAST(n->right, iflabel, looptoplabel, loopendlabel, n->op); genfreeregs(NOREG); return (NOREG); case A_FUNCTION: // Generate the function's preamble before the code // in the child sub-tree cgfuncpreamble(n->sym); genAST(n->left, NOLABEL, NOLABEL, NOLABEL, n->op); cgfuncpostamble(n->sym); return (NOREG); } // General AST node handling below // Get the left and right sub-tree values if (n->left) leftreg = genAST(n->left, NOLABEL, NOLABEL, NOLABEL, n->op); if (n->right) rightreg = genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); switch (n->op) { case A_ADD: return (cgadd(leftreg, rightreg)); case A_SUBTRACT: return (cgsub(leftreg, rightreg)); case A_MULTIPLY: return (cgmul(leftreg, rightreg)); case A_DIVIDE: return (cgdivmod(leftreg, rightreg, A_DIVIDE)); case A_MOD: return (cgdivmod(leftreg, rightreg, A_MOD)); case A_AND: return (cgand(leftreg, rightreg)); case A_OR: return (cgor(leftreg, rightreg)); case A_XOR: return (cgxor(leftreg, rightreg)); case A_LSHIFT: return (cgshl(leftreg, rightreg)); case A_RSHIFT: return (cgshr(leftreg, rightreg)); case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: // If the parent AST node is an A_IF, A_WHILE or A_TERNARY, // generate a compare followed by a jump. Otherwise, compare // registers and set one to 1 or 0 based on the comparison. if (parentASTop == A_IF || parentASTop == A_WHILE || parentASTop == A_TERNARY) return (cgcompare_and_jump (n->op, leftreg, rightreg, iflabel, n->left->type)); else return (cgcompare_and_set(n->op, leftreg, rightreg, n->left->type)); case A_INTLIT: return (cgloadint(n->a_intvalue, n->type)); case A_STRLIT: return (cgloadglobstr(n->a_intvalue)); case A_IDENT: // Load our value if we are an rvalue // or we are being dereferenced if (n->rvalue || parentASTop == A_DEREF) { return (cgloadvar(n->sym, n->op)); } else return (NOREG); case A_ASPLUS: case A_ASMINUS: case A_ASSTAR: case A_ASSLASH: case A_ASMOD: case A_ASSIGN: // For the '+=' and friends operators, generate suitable code // and get the register with the result. Then take the left child, // make it the right child so that we can fall into the assignment code. switch (n->op) { case A_ASPLUS: leftreg = cgadd(leftreg, rightreg); n->right = n->left; break; case A_ASMINUS: leftreg = cgsub(leftreg, rightreg); n->right = n->left; break; case A_ASSTAR: leftreg = cgmul(leftreg, rightreg); n->right = n->left; break; case A_ASSLASH: leftreg = cgdivmod(leftreg, rightreg, A_DIVIDE); n->right = n->left; break; case A_ASMOD: leftreg = cgdivmod(leftreg, rightreg, A_MOD); n->right = n->left; break; } // Now into the assignment code // Are we assigning to an identifier or through a pointer? switch (n->right->op) { case A_IDENT: if (n->right->sym->class == C_GLOBAL || n->right->sym->class == C_EXTERN || n->right->sym->class == C_STATIC) return (cgstorglob(leftreg, n->right->sym)); else return (cgstorlocal(leftreg, n->right->sym)); case A_DEREF: return (cgstorderef(leftreg, rightreg, n->right->type)); default: fatald("Can't A_ASSIGN in genAST(), op", n->op); } case A_WIDEN: // Widen the child's type to the parent's type return (cgwiden(leftreg, n->left->type, n->type)); case A_RETURN: cgreturn(leftreg, Functionid); return (NOREG); case A_ADDR: return (cgaddress(n->sym)); case A_DEREF: // If we are an rvalue, dereference to get the value we point at, // otherwise leave it for A_ASSIGN to store through the pointer if (n->rvalue) return (cgderef(leftreg, n->left->type)); else return (leftreg); case A_SCALE: // Small optimisation: use shift if the // scale value is a known power of two switch (n->a_size) { case 2: return (cgshlconst(leftreg, 1)); case 4: return (cgshlconst(leftreg, 2)); case 8: return (cgshlconst(leftreg, 3)); default: // Load a register with the size and // multiply the leftreg by this size rightreg = cgloadint(n->a_size, P_INT); return (cgmul(leftreg, rightreg)); } case A_POSTINC: case A_POSTDEC: // Load and decrement the variable's value into a register // and post increment/decrement it return (cgloadvar(n->sym, n->op)); case A_PREINC: case A_PREDEC: // Load and decrement the variable's value into a register // and pre increment/decrement it return (cgloadvar(n->left->sym, n->op)); case A_NEGATE: return (cgnegate(leftreg)); case A_INVERT: return (cginvert(leftreg)); case A_LOGNOT: return (cglognot(leftreg)); case A_TOBOOL: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, set the register // to 0 or 1 based on it's zeroeness or non-zeroeness return (cgboolean(leftreg, parentASTop, iflabel)); case A_BREAK: cgjump(loopendlabel); return (NOREG); case A_CONTINUE: cgjump(looptoplabel); return (NOREG); case A_CAST: return (leftreg); // Not much to do default: fatald("Unknown AST operator", n->op); } return (NOREG); // Keep -Wall happy } void genpreamble(char *filename) { cgpreamble(filename); } void genpostamble() { cgpostamble(); } void genfreeregs(int keepreg) { freeall_registers(keepreg); } void genglobsym(struct symtable *node) { cgglobsym(node); } // Generate a global string. // If append is true, append to // previous genglobstr() call. int genglobstr(char *strvalue, int append) { int l = genlabel(); cgglobstr(l, strvalue, append); return (l); } void genglobstrend(void) { cgglobstrend(); } int genprimsize(int type) { return (cgprimsize(type)); } int genalign(int type, int offset, int direction) { return (cgalign(type, offset, direction)); } ================================================ FILE: 59_WDIW_pt1/include/ctype.h ================================================ #ifndef _CTYPE_H_ # define _CTYPE_H_ int isalnum(int c); int isalpha(int c); int iscntrl(int c); int isdigit(int c); int isgraph(int c); int islower(int c); int isprint(int c); int ispunct(int c); int isspace(int c); int isupper(int c); int isxdigit(int c); int isascii(int c); int isblank(int c); int toupper(int c); int tolower(int c); #endif // _CTYPE_H_ ================================================ FILE: 59_WDIW_pt1/include/errno.h ================================================ #ifndef _ERRNO_H_ # define _ERRNO_H_ int * __errno_location(void); #define errno (* __errno_location()) #endif // _ERRNO_H_ ================================================ FILE: 59_WDIW_pt1/include/fcntl.h ================================================ #ifndef _FCNTL_H_ # define _FCNTL_H_ #define O_RDONLY 00 #define O_WRONLY 01 #define O_RDWR 02 int open(char *pathname, int flags); #endif // _FCNTL_H_ ================================================ FILE: 59_WDIW_pt1/include/stddef.h ================================================ #ifndef _STDDEF_H_ # define _STDDEF_H_ #ifndef NULL # define NULL (void *)0 #endif typedef long size_t; #endif //_STDDEF_H_ ================================================ FILE: 59_WDIW_pt1/include/stdio.h ================================================ #ifndef _STDIO_H_ # define _STDIO_H_ #include #ifndef NULL # define NULL (void *)0 #endif #ifndef EOF # define EOF (-1) #endif // This FILE definition will do for now typedef char * FILE; FILE *fopen(char *pathname, char *mode); size_t fread(void *ptr, size_t size, size_t nmemb, FILE *stream); size_t fwrite(void *ptr, size_t size, size_t nmemb, FILE *stream); int fclose(FILE *stream); int printf(char *format); int fprintf(FILE *stream, char *format); int sprintf(char *str, char *format); int snprintf(char *str, size_t size, char *format); int fgetc(FILE *stream); int fputc(int c, FILE *stream); int fputs(char *s, FILE *stream); int putc(int c, FILE *stream); int putchar(int c); int puts(char *s); FILE *popen(char *command, char *type); int pclose(FILE *stream); extern FILE *stdin; extern FILE *stdout; extern FILE *stderr; #endif // _STDIO_H_ ================================================ FILE: 59_WDIW_pt1/include/stdlib.h ================================================ #ifndef _STDLIB_H_ # define _STDLIB_H_ void exit(int status); void _Exit(int status); void *malloc(int size); void free(void *ptr); void *calloc(int nmemb, int size); void *realloc(void *ptr, int size); int system(char *command); #endif // _STDLIB_H_ ================================================ FILE: 59_WDIW_pt1/include/string.h ================================================ #ifndef _STRING_H_ # define _STRING_H_ char *strdup(char *s); char *strchr(char *s, int c); char *strrchr(char *s, int c); int strcmp(char *s1, char *s2); int strncmp(char *s1, char *s2, size_t n); char *strerror(int errnum); #endif // _STRING_H_ ================================================ FILE: 59_WDIW_pt1/include/unistd.h ================================================ #ifndef _UNISTD_H_ # define _UNISTD_H_ void _exit(int status); int unlink(char *pathname); #endif // _UNISTD_H_ ================================================ FILE: 59_WDIW_pt1/main.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "decl.h" #include #include // Compiler setup and top-level execution // Copyright (c) 2019 Warren Toomey, GPL3 // Given a string with a '.' and at least a 1-character suffix // after the '.', change the suffix to be the given character. // Return the new string or NULL if the original string could // not be modified char *alter_suffix(char *str, char suffix) { char *posn; char *newstr; // Clone the string if ((newstr = strdup(str)) == NULL) return (NULL); // Find the '.' if ((posn = strrchr(newstr, '.')) == NULL) return (NULL); // Ensure there is a suffix posn++; if (*posn == '\0') return (NULL); // Change the suffix and NUL-terminate the string *posn = suffix; posn++; *posn = '\0'; return (newstr); } // Given an input filename, compile that file // down to assembly code. Return the new file's name static char *do_compile(char *filename) { char cmd[TEXTLEN]; // Change the input file's suffix to .s Outfilename = alter_suffix(filename, 's'); if (Outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .c on the end\n", filename); exit(1); } // Generate the pre-processor command snprintf(cmd, TEXTLEN, "%s %s %s", CPPCMD, INCDIR, filename); // Open up the pre-processor pipe if ((Infile = popen(cmd, "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", filename, strerror(errno)); exit(1); } Infilename = filename; // Create the output file if ((Outfile = fopen(Outfilename, "w")) == NULL) { fprintf(stderr, "Unable to create %s: %s\n", Outfilename, strerror(errno)); exit(1); } Line = 1; // Reset the scanner Linestart = 1; Putback = '\n'; clear_symtable(); // Clear the symbol table if (O_verbose) printf("compiling %s\n", filename); scan(&Token); // Get the first token from the input Peektoken.token = 0; // and set there is no lookahead token genpreamble(filename); // Output the preamble global_declarations(); // Parse the global declarations genpostamble(); // Output the postamble fclose(Outfile); // Close the output file // Dump the symbol table if requested if (O_dumpsym) { printf("Symbols for %s\n", filename); dumpsymtables(); fprintf(stdout, "\n\n"); } freestaticsyms(); // Free any static symbols in the file return (Outfilename); } // Given an input filename, assemble that file // down to object code. Return the object filename char *do_assemble(char *filename) { char cmd[TEXTLEN]; int err; char *outfilename = alter_suffix(filename, 'o'); if (outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .s on the end\n", filename); exit(1); } // Build the assembly command and run it #ifdef __NASM__ char *incfilename = alter_suffix(filename, 'n'); if (incfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .s on the end\n", filename); exit(1); } sprintf(cmd, "%s %s %s", ASCMD, outfilename, filename); #else snprintf(cmd, TEXTLEN, "%s %s %s", ASCMD, outfilename, filename); #endif if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Assembly of %s failed\n", filename); exit(1); } return (outfilename); } // Given a list of object files and an output filename, // link all of the object filenames together. void do_link(char *outfilename, char **objlist) { int cnt, size = TEXTLEN; char cmd[TEXTLEN], *cptr; int err; // Start with the linker command and the output file cptr = cmd; cnt = snprintf(cptr, size, "%s %s ", LDCMD, outfilename); cptr += cnt; size -= cnt; // Now append each object file while (*objlist != NULL) { cnt = snprintf(cptr, size, "%s ", *objlist); cptr += cnt; size -= cnt; objlist++; } if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Linking failed\n"); exit(1); } } // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "Usage: %s [-vcSTM] [-o outfile] file [file ...]\n", prog); fprintf(stderr, " -v give verbose output of the compilation stages\n"); fprintf(stderr, " -c generate object files but don't link them\n"); fprintf(stderr, " -S generate assembly files but don't link them\n"); fprintf(stderr, " -T dump the AST trees for each input file\n"); fprintf(stderr, " -M dump the symbol table for each input file\n"); fprintf(stderr, " -o outfile, produce the outfile executable file\n"); exit(1); } // Main program: check arguments and print a usage // if we don't have an argument. Open up the input // file and call scanfile() to scan the tokens in it. enum { MAXOBJ = 100 }; int main(int argc, char **argv) { char *outfilename = AOUT; char *asmfile, *objfile; char *objlist[MAXOBJ]; int i, j, objcnt = 0; // Initialise our variables O_dumpAST = 0; O_dumpsym = 0; O_keepasm = 0; O_assemble = 0; O_verbose = 0; O_dolink = 1; // Scan for command-line options for (i = 1; i < argc; i++) { // No leading '-', stop scanning for options if (*argv[i] != '-') break; // For each option in this argument for (j = 1; (*argv[i] == '-') && argv[i][j]; j++) { switch (argv[i][j]) { case 'o': outfilename = argv[++i]; // Save & skip to next argument break; case 'T': O_dumpAST = 1; break; case 'M': O_dumpsym = 1; break; case 'c': O_assemble = 1; O_keepasm = 0; O_dolink = 0; break; case 'S': O_keepasm = 1; O_assemble = 0; O_dolink = 0; break; case 'v': O_verbose = 1; break; default: usage(argv[0]); } } } // Ensure we have at lease one input file argument if (i >= argc) usage(argv[0]); // Work on each input file in turn while (i < argc) { asmfile = do_compile(argv[i]); // Compile the source file if (O_dolink || O_assemble) { objfile = do_assemble(asmfile); // Assemble it to object forma if (objcnt == (MAXOBJ - 2)) { fprintf(stderr, "Too many object files for the compiler to handle\n"); exit(1); } objlist[objcnt++] = objfile; // Add the object file's name objlist[objcnt] = NULL; // to the list of object files } if (!O_keepasm) // Remove the assembly file if unlink(asmfile); // we don't need to keep it i++; } // Now link all the object files together if (O_dolink) { do_link(outfilename, objlist); // If we don't need to keep the object // files, then remove them if (!O_assemble) { for (i = 0; objlist[i] != NULL; i++) unlink(objlist[i]); } } return (0); } ================================================ FILE: 59_WDIW_pt1/misc.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" #include #include // Miscellaneous functions // Copyright (c) 2019 Warren Toomey, GPL3 // Ensure that the current token is t, // and fetch the next token. Otherwise // throw an error void match(int t, char *what) { if (Token.token == t) { scan(&Token); } else { fatals("Expected", what); } } // Match a semicolon and fetch the next token void semi(void) { match(T_SEMI, ";"); } // Match a left brace and fetch the next token void lbrace(void) { match(T_LBRACE, "{"); } // Match a right brace and fetch the next token void rbrace(void) { match(T_RBRACE, "}"); } // Match a left parenthesis and fetch the next token void lparen(void) { match(T_LPAREN, "("); } // Match a right parenthesis and fetch the next token void rparen(void) { match(T_RPAREN, ")"); } // Match an identifier and fetch the next token void ident(void) { match(T_IDENT, "identifier"); } // Match a comma and fetch the next token void comma(void) { match(T_COMMA, "comma"); } // Print out fatal messages void fatal(char *s) { fprintf(stderr, "%s on line %d of %s\n", s, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatals(char *s1, char *s2) { fprintf(stderr, "%s:%s on line %d of %s\n", s1, s2, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatald(char *s, int d) { fprintf(stderr, "%s:%d on line %d of %s\n", s, d, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatalc(char *s, int c) { fprintf(stderr, "%s:%c on line %d of %s\n", s, c, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } ================================================ FILE: 59_WDIW_pt1/opt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST Tree Optimisation Code // Copyright (c) 2019 Warren Toomey, GPL3 // Fold an AST tree with a binary operator // and two A_INTLIT children. Return either // the original tree or a new leaf node. static struct ASTnode *fold2(struct ASTnode *n) { int val, leftval, rightval; // Get the values from each child leftval = n->left->a_intvalue; rightval = n->right->a_intvalue; // Perform some of the binary operations. // For any AST op we can't do, return // the original tree. switch (n->op) { case A_ADD: val = leftval + rightval; break; case A_SUBTRACT: val = leftval - rightval; break; case A_MULTIPLY: val = leftval * rightval; break; case A_DIVIDE: // Don't try to divide by zero. if (rightval == 0) return (n); val = leftval / rightval; break; default: return (n); } // Return a leaf node with the new value return (mkastleaf(A_INTLIT, n->type, NULL, NULL, val)); } // Fold an AST tree with a unary operator // and one INTLIT children. Return either // the original tree or a new leaf node. static struct ASTnode *fold1(struct ASTnode *n) { int val; // Get the child value. Do the // operation if recognised. // Return the new leaf node. val = n->left->a_intvalue; switch (n->op) { case A_WIDEN: break; case A_INVERT: val = ~val; break; case A_LOGNOT: val = !val; break; default: return (n); } // Return a leaf node with the new value return (mkastleaf(A_INTLIT, n->type, NULL, NULL, val)); } // Attempt to do constant folding on // the AST tree with the root node n static struct ASTnode *fold(struct ASTnode *n) { if (n == NULL) return (NULL); // Fold on the left child, then // do the same on the right child n->left = fold(n->left); n->right = fold(n->right); // If both children are A_INTLITs, do a fold2() if (n->left && n->left->op == A_INTLIT) { if (n->right && n->right->op == A_INTLIT) n = fold2(n); else // If only the left is A_INTLIT, do a fold1() n = fold1(n); } // Return the possibly modified tree return (n); } // Optimise an AST tree by // constant folding in all sub-trees struct ASTnode *optimise(struct ASTnode *n) { n = fold(n); return (n); } ================================================ FILE: 59_WDIW_pt1/scan.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { int i; for (i = 0; s[i] != '\0'; i++) if (s[i] == (char) c) return (i); return (-1); } // Get the next character from the input file. static int next(void) { int c, l; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return (c); } c = fgetc(Infile); // Read from input file while (Linestart && c == '#') { // We've hit a pre-processor statement Linestart = 0; // No longer at the start of the line scan(&Token); // Get the line number into l if (Token.token != T_INTLIT) fatals("Expecting pre-processor line number, got:", Text); l = Token.intvalue; scan(&Token); // Get the filename in Text if (Token.token != T_STRLIT) fatals("Expecting pre-processor file name, got:", Text); if (Text[0] != '<') { // If this is a real filename if (strcmp(Text, Infilename)) // and not the one we have now Infilename = strdup(Text); // save it. Then update the line num Line = l; } while ((c = fgetc(Infile)) != '\n'); // Skip to the end of the line c = fgetc(Infile); // and get the next character Linestart = 1; // Now back at the start of the line } Linestart = 0; // No longer at the start of the line if ('\n' == c) { Line++; // Increment line count Linestart = 1; // Now back at the start of the line } return (c); } // Put back an unwanted character static void putback(int c) { Putback = c; } // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } // Read in a hexadecimal constant from the input static int hexchar(void) { int c, h, n = 0, f = 0; // Loop getting characters while (isxdigit(c = next())) { // Convert from char to int value h = chrpos("0123456789abcdef", tolower(c)); // Add to running hex value n = n * 16 + h; f = 1; } // We hit a non-hex character, put it back putback(c); // Flag tells us we never saw any hex characters if (!f) fatal("missing digits after '\\x'"); if (n > 255) fatal("value out of range after '\\x'"); return (n); } // Return the next character from a character // or string literal static int scanch(void) { int i, c, c2; // Get the next input character and interpret // metacharacters that start with a backslash c = next(); if (c == '\\') { switch (c = next()) { case 'a': return ('\a'); case 'b': return ('\b'); case 'f': return ('\f'); case 'n': return ('\n'); case 'r': return ('\r'); case 't': return ('\t'); case 'v': return ('\v'); case '\\': return ('\\'); case '"': return ('"'); case '\'': return ('\''); // Deal with octal constants by reading in // characters until we hit a non-octal digit. // Build up the octal value in c2 and count // # digits in i. Permit only 3 octal digits. case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': for (i = c2 = 0; isdigit(c) && c < '8'; c = next()) { if (++i > 3) break; c2 = c2 * 8 + (c - '0'); } putback(c); // Put back the first non-octal char return (c2); case 'x': return (hexchar()); default: fatalc("unknown escape sequence", c); } } return (c); // Just an ordinary old character! } // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0, radix = 10; // Assume the radix is 10, but if it starts with 0 if (c == '0') { // and the next character is 'x', it's radix 16 if ((c = next()) == 'x') { radix = 16; c = next(); } else // Otherwise, it's radix 8 radix = 8; } // Convert each character into an int value while ((k = chrpos("0123456789abcdef", tolower(c))) >= 0) { if (k >= radix) fatalc("invalid digit in integer literal", c); val = val * radix + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return (val); } // Scan in a string literal from the input file, // and store it in buf[]. Return the length of // the string. static int scanstr(char *buf) { int i, c; // Loop while we have enough buffer space for (i = 0; i < TEXTLEN - 1; i++) { // Get the next char and append to buf // Return when we hit the ending double quote if ((c = scanch()) == '"') { buf[i] = 0; return (i); } buf[i] = (char)c; } // Ran out of buf[] space fatal("String literal too long"); return (0); } // Scan an identifier from the input file and // store it in buf[]. Return the identifier's length static int scanident(int c, char *buf, int lim) { int i = 0; // Allow digits, alpha and underscores while (isalpha(c) || isdigit(c) || '_' == c) { // Error if we hit the identifier length limit, // else append to buf[] and get next character if (lim - 1 == i) { fatal("Identifier too long"); } else if (i < lim - 1) { buf[i++] = (char)c; } c = next(); } // We hit a non-valid character, put it back. // NUL-terminate the buf[] and return the length putback(c); buf[i] = '\0'; return (i); } // Given a word from the input, return the matching // keyword token number or 0 if it's not a keyword. // Switch on the first letter so that we don't have // to waste time strcmp()ing against all the keywords. static int keyword(char *s) { switch (*s) { case 'b': if (!strcmp(s, "break")) return (T_BREAK); break; case 'c': if (!strcmp(s, "case")) return (T_CASE); if (!strcmp(s, "char")) return (T_CHAR); if (!strcmp(s, "continue")) return (T_CONTINUE); break; case 'd': if (!strcmp(s, "default")) return (T_DEFAULT); break; case 'e': if (!strcmp(s, "else")) return (T_ELSE); if (!strcmp(s, "enum")) return (T_ENUM); if (!strcmp(s, "extern")) return (T_EXTERN); break; case 'f': if (!strcmp(s, "for")) return (T_FOR); break; case 'i': if (!strcmp(s, "if")) return (T_IF); if (!strcmp(s, "int")) return (T_INT); break; case 'l': if (!strcmp(s, "long")) return (T_LONG); break; case 'r': if (!strcmp(s, "return")) return (T_RETURN); break; case 's': if (!strcmp(s, "sizeof")) return (T_SIZEOF); if (!strcmp(s, "static")) return (T_STATIC); if (!strcmp(s, "struct")) return (T_STRUCT); if (!strcmp(s, "switch")) return (T_SWITCH); break; case 't': if (!strcmp(s, "typedef")) return (T_TYPEDEF); break; case 'u': if (!strcmp(s, "union")) return (T_UNION); break; case 'v': if (!strcmp(s, "void")) return (T_VOID); break; case 'w': if (!strcmp(s, "while")) return (T_WHILE); break; } return (0); } // List of token strings, for debugging purposes char *Tstring[] = { "EOF", "=", "+=", "-=", "*=", "/=", "%=", "?", "||", "&&", "|", "^", "&", "==", "!=", ",", ">", "<=", ">=", "<<", ">>", "+", "-", "*", "/", "%", "++", "--", "~", "!", "void", "char", "int", "long", "if", "else", "while", "for", "return", "struct", "union", "enum", "typedef", "extern", "break", "continue", "switch", "case", "default", "sizeof", "static", "intlit", "strlit", ";", "identifier", "{", "}", "(", ")", "[", "]", ",", ".", "->", ":" }; // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c, tokentype; // If we have a lookahead token, return this token if (Peektoken.token != 0) { t->token = Peektoken.token; t->tokstr = Peektoken.tokstr; t->intvalue = Peektoken.intvalue; Peektoken.token = 0; return (1); } // Skip whitespace c = skip(); // Determine the token based on // the input character switch (c) { case EOF: t->token = T_EOF; return (0); case '+': if ((c = next()) == '+') { t->token = T_INC; } else if (c == '=') { t->token = T_ASPLUS; } else { putback(c); t->token = T_PLUS; } break; case '-': if ((c = next()) == '-') { t->token = T_DEC; } else if (c == '>') { t->token = T_ARROW; } else if (c == '=') { t->token = T_ASMINUS; } else if (isdigit(c)) { // Negative int literal t->intvalue = -scanint(c); t->token = T_INTLIT; } else { putback(c); t->token = T_MINUS; } break; case '*': if ((c = next()) == '=') { t->token = T_ASSTAR; } else { putback(c); t->token = T_STAR; } break; case '/': if ((c = next()) == '=') { t->token = T_ASSLASH; } else { putback(c); t->token = T_SLASH; } break; case '%': if ((c = next()) == '=') { t->token = T_ASMOD; } else { putback(c); t->token = T_MOD; } break; case ';': t->token = T_SEMI; break; case '{': t->token = T_LBRACE; break; case '}': t->token = T_RBRACE; break; case '(': t->token = T_LPAREN; break; case ')': t->token = T_RPAREN; break; case '[': t->token = T_LBRACKET; break; case ']': t->token = T_RBRACKET; break; case '~': t->token = T_INVERT; break; case '^': t->token = T_XOR; break; case ',': t->token = T_COMMA; break; case '.': t->token = T_DOT; break; case ':': t->token = T_COLON; break; case '?': t->token = T_QUESTION; break; case '=': if ((c = next()) == '=') { t->token = T_EQ; } else { putback(c); t->token = T_ASSIGN; } break; case '!': if ((c = next()) == '=') { t->token = T_NE; } else { putback(c); t->token = T_LOGNOT; } break; case '<': if ((c = next()) == '=') { t->token = T_LE; } else if (c == '<') { t->token = T_LSHIFT; } else { putback(c); t->token = T_LT; } break; case '>': if ((c = next()) == '=') { t->token = T_GE; } else if (c == '>') { t->token = T_RSHIFT; } else { putback(c); t->token = T_GT; } break; case '&': if ((c = next()) == '&') { t->token = T_LOGAND; } else { putback(c); t->token = T_AMPER; } break; case '|': if ((c = next()) == '|') { t->token = T_LOGOR; } else { putback(c); t->token = T_OR; } break; case '\'': // If it's a quote, scan in the // literal character value and // the trailing quote t->intvalue = scanch(); t->token = T_INTLIT; if (next() != '\'') fatal("Expected '\\'' at end of char literal"); break; case '"': // Scan in a literal string scanstr(Text); t->token = T_STRLIT; break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } else if (isalpha(c) || '_' == c) { // Read in a keyword or identifier scanident(c, Text, TEXTLEN); // If it's a recognised keyword, return that token if ((tokentype = keyword(Text)) != 0) { t->token = tokentype; break; } // Not a recognised keyword, so it must be an identifier t->token = T_IDENT; break; } // The character isn't part of any recognised token, error fatalc("Unrecognised character", c); } // We found a token t->tokstr = Tstring[t->token]; return (1); } ================================================ FILE: 59_WDIW_pt1/stmt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of statements // Copyright (c) 2019 Warren Toomey, GPL3 // Prototypes static struct ASTnode *single_statement(void); // compound_statement: // empty, i.e. no statement // | statement // | statement statements // ; // // statement: declaration // | expression_statement // | function_call // | if_statement // | while_statement // | for_statement // | return_statement // ; // if_statement: if_head // | if_head 'else' statement // ; // // if_head: 'if' '(' true_false_expression ')' statement ; // // Parse an IF statement including any // optional ELSE clause and return its AST static struct ASTnode *if_statement(void) { struct ASTnode *condAST, *trueAST, *falseAST = NULL; // Ensure we have 'if' '(' match(T_IF, "if"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST->ctype, condAST, NULL, 0); rparen(); // Get the AST for the statement trueAST = single_statement(); // If we have an 'else', skip it // and get the AST for the statement if (Token.token == T_ELSE) { scan(&Token); falseAST = single_statement(); } // Build and return the AST for this statement return (mkastnode(A_IF, P_NONE, NULL, condAST, trueAST, falseAST, NULL, 0)); } // while_statement: 'while' '(' true_false_expression ')' statement ; // // Parse a WHILE statement and return its AST static struct ASTnode *while_statement(void) { struct ASTnode *condAST, *bodyAST; // Ensure we have 'while' '(' match(T_WHILE, "while"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST->ctype, condAST, NULL, 0); rparen(); // Get the AST for the statement. // Update the loop depth in the process Looplevel++; bodyAST = single_statement(); Looplevel--; // Build and return the AST for this statement return (mkastnode(A_WHILE, P_NONE, NULL, condAST, NULL, bodyAST, NULL, 0)); } // for_statement: 'for' '(' expression_list ';' // true_false_expression ';' // expression_list ')' statement ; // // Parse a FOR statement and return its AST static struct ASTnode *for_statement(void) { struct ASTnode *condAST, *bodyAST; struct ASTnode *preopAST, *postopAST; struct ASTnode *tree; // Ensure we have 'for' '(' match(T_FOR, "for"); lparen(); // Get the pre_op expression and the ';' preopAST = expression_list(T_SEMI); semi(); // Get the condition and the ';'. // Force a non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST->ctype, condAST, NULL, 0); semi(); // Get the post_op expression and the ')' postopAST = expression_list(T_RPAREN); rparen(); // Get the statement which is the body // Update the loop depth in the process Looplevel++; bodyAST = single_statement(); Looplevel--; // Glue the statement and the postop tree tree = mkastnode(A_GLUE, P_NONE, NULL, bodyAST, NULL, postopAST, NULL, 0); // Make a WHILE loop with the condition and this new body tree = mkastnode(A_WHILE, P_NONE, NULL, condAST, NULL, tree, NULL, 0); // And glue the preop tree to the A_WHILE tree return (mkastnode(A_GLUE, P_NONE, NULL, preopAST, NULL, tree, NULL, 0)); } // return_statement: 'return' '(' expression ')' ; // // Parse a return statement and return its AST static struct ASTnode *return_statement(void) { struct ASTnode *tree= NULL; // Ensure we have 'return' match(T_RETURN, "return"); // See if we have a return value if (Token.token == T_LPAREN) { // Can't return a value if function returns P_VOID if (Functionid->type == P_VOID) fatal("Can't return from a void function"); // Skip the left parenthesis lparen(); // Parse the following expression tree = binexpr(0); // Ensure this is compatible with the function's type tree = modify_type(tree, Functionid->type, Functionid->ctype, 0); if (tree == NULL) fatal("Incompatible type to return"); // Get the ')' rparen(); } // Add on the A_RETURN node tree = mkastunary(A_RETURN, P_NONE, NULL, tree, NULL, 0); // Get the ';' semi(); return (tree); } // break_statement: 'break' ; // // Parse a break statement and return its AST static struct ASTnode *break_statement(void) { if (Looplevel == 0 && Switchlevel == 0) fatal("no loop or switch to break out from"); scan(&Token); semi(); return (mkastleaf(A_BREAK, P_NONE, NULL, NULL, 0)); } // continue_statement: 'continue' ; // // Parse a continue statement and return its AST static struct ASTnode *continue_statement(void) { if (Looplevel == 0) fatal("no loop to continue to"); scan(&Token); semi(); return (mkastleaf(A_CONTINUE, P_NONE, NULL, NULL, 0)); } // Parse a switch statement and return its AST static struct ASTnode *switch_statement(void) { struct ASTnode *left, *body, *n, *c; struct ASTnode *casetree = NULL, *casetail; int inloop = 1, casecount = 0; int seendefault = 0; int ASTop, casevalue; // Skip the 'switch' and '(' scan(&Token); lparen(); // Get the switch expression, the ')' and the '{' left = binexpr(0); rparen(); lbrace(); // Ensure that this is of int type if (!inttype(left->type)) fatal("Switch expression is not of integer type"); // Build an A_SWITCH subtree with the expression as // the child n = mkastunary(A_SWITCH, P_NONE, NULL, left, NULL, 0); // Now parse the cases Switchlevel++; while (inloop) { switch (Token.token) { // Leave the loop when we hit a '}' case T_RBRACE: if (casecount == 0) fatal("No cases in switch"); inloop = 0; break; case T_CASE: case T_DEFAULT: // Ensure this isn't after a previous 'default' if (seendefault) fatal("case or default after existing default"); // Set the AST operation. Scan the case value if required if (Token.token == T_DEFAULT) { ASTop = A_DEFAULT; seendefault = 1; scan(&Token); } else { ASTop = A_CASE; scan(&Token); left = binexpr(0); // Ensure the case value is an integer literal if (left->op != A_INTLIT) fatal("Expecting integer literal for case value"); casevalue = left->a_intvalue; // Walk the list of existing case values to ensure // that there isn't a duplicate case value for (c = casetree; c != NULL; c = c->right) if (casevalue == c->a_intvalue) fatal("Duplicate case value"); } // Scan the ':' and increment the casecount match(T_COLON, ":"); casecount++; // If the next token is a T_CASE, the existing case will fall // into the next case. Otherwise, parse the case body. if (Token.token == T_CASE) body = NULL; else body = compound_statement(1); // Build a sub-tree with any compound statement as the left child // and link it in to the growing A_CASE tree if (casetree == NULL) { casetree = casetail = mkastunary(ASTop, P_NONE, NULL, body, NULL, casevalue); } else { casetail->right = mkastunary(ASTop, P_NONE, NULL, body, NULL, casevalue); casetail = casetail->right; } break; default: fatals("Unexpected token in switch", Token.tokstr); } } Switchlevel--; // We have a sub-tree with the cases and any default. Put the // case count into the A_SWITCH node and attach the case tree. n->a_intvalue = casecount; n->right = casetree; rbrace(); return (n); } // Parse a single statement and return its AST. static struct ASTnode *single_statement(void) { struct ASTnode *stmt; struct symtable *ctype; int linenum= Line; switch (Token.token) { case T_SEMI: // An empty statement semi(); break; case T_LBRACE: // We have a '{', so this is a compound statement lbrace(); stmt = compound_statement(0); stmt->linenum= linenum; rbrace(); return (stmt); case T_IDENT: // We have to see if the identifier matches a typedef. // If not, treat it as an expression. // Otherwise, fall down to the parse_type() call. if (findtypedef(Text) == NULL) { stmt = binexpr(0); stmt->linenum= linenum; semi(); return (stmt); } case T_CHAR: case T_INT: case T_LONG: case T_STRUCT: case T_UNION: case T_ENUM: case T_TYPEDEF: // The beginning of a variable declaration list. declaration_list(&ctype, C_LOCAL, T_SEMI, T_EOF, &stmt); semi(); return (stmt); // Any assignments from the declarations case T_IF: stmt= if_statement(); stmt->linenum= linenum; return(stmt); case T_WHILE: stmt= while_statement(); stmt->linenum= linenum; return(stmt); case T_FOR: stmt= for_statement(); stmt->linenum= linenum; return(stmt); case T_RETURN: stmt= return_statement(); stmt->linenum= linenum; return(stmt); case T_BREAK: stmt= break_statement(); stmt->linenum= linenum; return(stmt); case T_CONTINUE: stmt= continue_statement(); stmt->linenum= linenum; return(stmt); case T_SWITCH: stmt= switch_statement(); stmt->linenum= linenum; return(stmt); default: // For now, see if this is an expression. // This catches assignment statements. stmt = binexpr(0); stmt->linenum= linenum; semi(); return (stmt); } return (NULL); // Keep -Wall happy } // Parse a compound statement // and return its AST. If inswitch is true, // we look for a '}', 'case' or 'default' token // to end the parsing. Otherwise, look for // just a '}' to end the parsing. struct ASTnode *compound_statement(int inswitch) { struct ASTnode *left = NULL; struct ASTnode *tree; while (1) { // Leave if we've hit the end token. We do this first to allow // an empty compound statement if (Token.token == T_RBRACE) return (left); if (inswitch && (Token.token == T_CASE || Token.token == T_DEFAULT)) return (left); // Parse a single statement tree = single_statement(); // For each new tree, either save it in left // if left is empty, or glue the left and the // new tree together if (tree != NULL) { if (left == NULL) left = tree; else left = mkastnode(A_GLUE, P_NONE, NULL, left, NULL, tree, NULL, 0); } } return (NULL); // Keep -Wall happy } ================================================ FILE: 59_WDIW_pt1/sym.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Symbol table functions // Copyright (c) 2019 Warren Toomey, GPL3 // Append a node to the singly-linked list pointed to by head or tail void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node) { // Check for valid pointers if (head == NULL || tail == NULL || node == NULL) fatal("Either head, tail or node is NULL in appendsym"); // Append to the list if (*tail) { (*tail)->next = node; *tail = node; } else *head = *tail = node; node->next = NULL; } // Create a symbol node to be added to a symbol table list. // Set up the node's: // + type: char, int etc. // + ctype: composite type pointer for struct/union // + structural type: var, function, array etc. // + size: number of elements, or endlabel: end label for a function // + posn: Position information for local symbols // Return a pointer to the new node struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn) { // Get a new node struct symtable *node = (struct symtable *) malloc(sizeof(struct symtable)); if (node == NULL) fatal("Unable to malloc a symbol table node in newsym"); // Fill in the values if (name == NULL) node->name = NULL; else node->name = strdup(name); node->type = type; node->ctype = ctype; node->stype = stype; node->class = class; node->nelems = nelems; // For pointers and integer types, set the size // of the symbol. structs and union declarations // manually set this up themselves. if (ptrtype(type) || inttype(type)) node->size = nelems * typesize(type, ctype); node->st_posn = posn; node->next = NULL; node->member = NULL; node->initlist = NULL; #ifdef __NASM__ node->extinit = 0; #endif return (node); } // Add a symbol to the global symbol list struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn) { struct symtable *sym = newsym(name, type, ctype, stype, class, nelems, posn); // For structs and unions, copy the size from the type node if (type == P_STRUCT || type == P_UNION) sym->size = ctype->size; appendsym(&Globhead, &Globtail, sym); return (sym); } // Add a symbol to the local symbol list struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int nelems) { struct symtable *sym = newsym(name, type, ctype, stype, C_LOCAL, nelems, 0); // For structs and unions, copy the size from the type node if (type == P_STRUCT || type == P_UNION) sym->size = ctype->size; appendsym(&Loclhead, &Locltail, sym); return (sym); } // Add a symbol to the parameter list struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype) { struct symtable *sym = newsym(name, type, ctype, stype, C_PARAM, 1, 0); appendsym(&Parmhead, &Parmtail, sym); return (sym); } // Add a symbol to the temporary member list struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int nelems) { struct symtable *sym = newsym(name, type, ctype, stype, C_MEMBER, nelems, 0); // For structs and unions, copy the size from the type node if (type == P_STRUCT || type == P_UNION) sym->size = ctype->size; appendsym(&Membhead, &Membtail, sym); return (sym); } // Add a struct to the struct list struct symtable *addstruct(char *name) { struct symtable *sym = newsym(name, P_STRUCT, NULL, 0, C_STRUCT, 0, 0); appendsym(&Structhead, &Structtail, sym); return (sym); } // Add a struct to the union list struct symtable *addunion(char *name) { struct symtable *sym = newsym(name, P_UNION, NULL, 0, C_UNION, 0, 0); appendsym(&Unionhead, &Uniontail, sym); return (sym); } // Add an enum type or value to the enum list. // Class is C_ENUMTYPE or C_ENUMVAL. // Use posn to store the int value. struct symtable *addenum(char *name, int class, int value) { struct symtable *sym = newsym(name, P_INT, NULL, 0, class, 0, value); appendsym(&Enumhead, &Enumtail, sym); return (sym); } // Add a typedef to the typedef list struct symtable *addtypedef(char *name, int type, struct symtable *ctype) { struct symtable *sym = newsym(name, type, ctype, 0, C_TYPEDEF, 0, 0); appendsym(&Typehead, &Typetail, sym); return (sym); } // Search for a symbol in a specific list. // Return a pointer to the found node or NULL if not found. // If class is not zero, also match on the given class static struct symtable *findsyminlist(char *s, struct symtable *list, int class) { for (; list != NULL; list = list->next) if ((list->name != NULL) && !strcmp(s, list->name)) if (class == 0 || class == list->class) return (list); return (NULL); } // Determine if the symbol s is in the global symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findglob(char *s) { return (findsyminlist(s, Globhead, 0)); } // Determine if the symbol s is in the local symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findlocl(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member, 0); if (node) return (node); } return (findsyminlist(s, Loclhead, 0)); } // Determine if the symbol s is in the symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findsymbol(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member, 0); if (node) return (node); } // Otherwise, try the local and global symbol lists node = findsyminlist(s, Loclhead, 0); if (node) return (node); return (findsyminlist(s, Globhead, 0)); } // Find a member in the member list // Return a pointer to the found node or NULL if not found. struct symtable *findmember(char *s) { return (findsyminlist(s, Membhead, 0)); } // Find a struct in the struct list // Return a pointer to the found node or NULL if not found. struct symtable *findstruct(char *s) { return (findsyminlist(s, Structhead, 0)); } // Find a struct in the union list // Return a pointer to the found node or NULL if not found. struct symtable *findunion(char *s) { return (findsyminlist(s, Unionhead, 0)); } // Find an enum type in the enum list // Return a pointer to the found node or NULL if not found. struct symtable *findenumtype(char *s) { return (findsyminlist(s, Enumhead, C_ENUMTYPE)); } // Find an enum value in the enum list // Return a pointer to the found node or NULL if not found. struct symtable *findenumval(char *s) { return (findsyminlist(s, Enumhead, C_ENUMVAL)); } // Find a type in the tyedef list // Return a pointer to the found node or NULL if not found. struct symtable *findtypedef(char *s) { return (findsyminlist(s, Typehead, 0)); } // Reset the contents of the symbol table void clear_symtable(void) { Globhead = Globtail = NULL; Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Membhead = Membtail = NULL; Structhead = Structtail = NULL; Unionhead = Uniontail = NULL; Enumhead = Enumtail = NULL; Typehead = Typetail = NULL; } // Clear all the entries in the local symbol table void freeloclsyms(void) { Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Functionid = NULL; } // Remove all static symbols from the global symbol table void freestaticsyms(void) { // g points at current node, prev at the previous one struct symtable *g, *prev = NULL; // Walk the global table looking for static entries for (g = Globhead; g != NULL; g = g->next) { if (g->class == C_STATIC) { // If there's a previous node, rearrange the prev pointer // to skip over the current node. If not, g is the head, // so do the same to Globhead if (prev != NULL) prev->next = g->next; else Globhead->next = g->next; // If g is the tail, point Globtail at the previous node // (if there is one), or Globhead if (g == Globtail) { if (prev != NULL) Globtail = prev; else Globtail = Globhead; } } } // Point prev at g before we move up to the next node prev = g; } // Dump a single symbol static void dumpsym(struct symtable *sym, int indent) { int i; for (i = 0; i < indent; i++) printf(" "); switch (sym->type & (~0xf)) { case P_VOID: printf("void "); break; case P_CHAR: printf("char "); break; case P_INT: printf("int "); break; case P_LONG: printf("long "); break; case P_STRUCT: if (sym->ctype != NULL) printf("struct %s ", sym->ctype->name); else printf("struct %s ", sym->name); break; case P_UNION: if (sym->ctype != NULL) printf("union %s ", sym->ctype->name); else printf("union %s ", sym->name); break; default: printf("unknown type "); } for (i = 0; i < (sym->type & 0xf); i++) printf("*"); printf("%s", sym->name); switch (sym->stype) { case S_VARIABLE: break; case S_FUNCTION: printf("()"); break; case S_ARRAY: printf("[]"); break; default: printf(" unknown stype"); } switch (sym->class) { case C_GLOBAL: printf(": global"); break; case C_LOCAL: printf(": local"); break; case C_PARAM: printf(": param"); break; case C_EXTERN: printf(": extern"); break; case C_STATIC: printf(": static"); break; case C_STRUCT: printf(": struct"); break; case C_UNION: printf(": union"); break; case C_MEMBER: printf(": member"); break; case C_ENUMTYPE: printf(": enumtype"); break; case C_ENUMVAL: printf(": enumval"); break; case C_TYPEDEF: printf(": typedef"); break; default: printf(": unknown class"); } switch (sym->stype) { case S_VARIABLE: if (sym->class == C_ENUMVAL) printf(", value %d\n", sym->st_posn); else printf(", size %d\n", sym->size); break; case S_FUNCTION: printf(", %d params\n", sym->nelems); break; case S_ARRAY: printf(", %d elems, size %d\n", sym->nelems, sym->size); break; } switch (sym->type & (~0xf)) { case P_STRUCT: case P_UNION: dumptable(sym->member, NULL, 4); } switch (sym->stype) { case S_FUNCTION: dumptable(sym->member, NULL, 4); } } // Dump one symbol table void dumptable(struct symtable *head, char *name, int indent) { struct symtable *sym; if (head != NULL && name != NULL) printf("%s\n--------\n", name); for (sym = head; sym != NULL; sym = sym->next) dumpsym(sym, indent); } void dumpsymtables(void) { dumptable(Globhead, "Global", 0); printf("\n"); dumptable(Enumhead, "Enums", 0); printf("\n"); dumptable(Typehead, "Typedefs", 0); } ================================================ FILE: 59_WDIW_pt1/tests/err.input031.c ================================================ Expecting a primary expression, got token:+ on line 5 of input031.c ================================================ FILE: 59_WDIW_pt1/tests/err.input032.c ================================================ Unknown variable or function:pizza on line 4 of input032.c ================================================ FILE: 59_WDIW_pt1/tests/err.input033.c ================================================ Incompatible type to return on line 4 of input033.c ================================================ FILE: 59_WDIW_pt1/tests/err.input034.c ================================================ For now, declaration of non-global arrays is not implemented on line 4 of input034.c ================================================ FILE: 59_WDIW_pt1/tests/err.input035.c ================================================ Duplicate local variable declaration:a on line 4 of input035.c ================================================ FILE: 59_WDIW_pt1/tests/err.input036.c ================================================ Type doesn't match prototype for parameter:2 on line 4 of input036.c ================================================ FILE: 59_WDIW_pt1/tests/err.input037.c ================================================ Expected:comma on line 3 of input037.c ================================================ FILE: 59_WDIW_pt1/tests/err.input038.c ================================================ Type doesn't match prototype for parameter:2 on line 4 of input038.c ================================================ FILE: 59_WDIW_pt1/tests/err.input039.c ================================================ No statements in function with non-void type on line 4 of input039.c ================================================ FILE: 59_WDIW_pt1/tests/err.input040.c ================================================ No return for function with non-void type on line 4 of input040.c ================================================ FILE: 59_WDIW_pt1/tests/err.input041.c ================================================ Can't return from a void function on line 3 of input041.c ================================================ FILE: 59_WDIW_pt1/tests/err.input042.c ================================================ Unknown variable or function:fred on line 3 of input042.c ================================================ FILE: 59_WDIW_pt1/tests/err.input043.c ================================================ Unknown variable or function:b on line 3 of input043.c ================================================ FILE: 59_WDIW_pt1/tests/err.input044.c ================================================ Unknown variable or function:z on line 3 of input044.c ================================================ FILE: 59_WDIW_pt1/tests/err.input045.c ================================================ & operator must be followed by an identifier on line 3 of input045.c ================================================ FILE: 59_WDIW_pt1/tests/err.input046.c ================================================ * operator must be followed by an expression of pointer type on line 3 of input046.c ================================================ FILE: 59_WDIW_pt1/tests/err.input047.c ================================================ ++ operator must be followed by an identifier on line 3 of input047.c ================================================ FILE: 59_WDIW_pt1/tests/err.input048.c ================================================ -- operator must be followed by an identifier on line 3 of input048.c ================================================ FILE: 59_WDIW_pt1/tests/err.input049.c ================================================ Incompatible expression in assignment on line 6 of input049.c ================================================ FILE: 59_WDIW_pt1/tests/err.input050.c ================================================ Incompatible types in binary expression on line 6 of input050.c ================================================ FILE: 59_WDIW_pt1/tests/err.input051.c ================================================ Expected '\'' at end of char literal on line 4 of input051.c ================================================ FILE: 59_WDIW_pt1/tests/err.input052.c ================================================ Unrecognised character:$ on line 5 of input052.c ================================================ FILE: 59_WDIW_pt1/tests/err.input056.c ================================================ unknown struct/union type:var1 on line 2 of input056.c ================================================ FILE: 59_WDIW_pt1/tests/err.input057.c ================================================ previously defined struct/union:fred on line 2 of input057.c ================================================ FILE: 59_WDIW_pt1/tests/err.input059.c ================================================ Unknown variable or function:y on line 3 of input059.c ================================================ FILE: 59_WDIW_pt1/tests/err.input060.c ================================================ Expression is not a struct/union on line 3 of input060.c ================================================ FILE: 59_WDIW_pt1/tests/err.input061.c ================================================ Expression is not a pointer to a struct/union on line 3 of input061.c ================================================ FILE: 59_WDIW_pt1/tests/err.input064.c ================================================ undeclared enum type::fred on line 1 of input064.c ================================================ FILE: 59_WDIW_pt1/tests/err.input065.c ================================================ enum type redeclared::fred on line 2 of input065.c ================================================ FILE: 59_WDIW_pt1/tests/err.input066.c ================================================ enum value redeclared::z on line 2 of input066.c ================================================ FILE: 59_WDIW_pt1/tests/err.input068.c ================================================ redefinition of typedef:FOO on line 2 of input068.c ================================================ FILE: 59_WDIW_pt1/tests/err.input069.c ================================================ unknown type:FLOO on line 2 of input069.c ================================================ FILE: 59_WDIW_pt1/tests/err.input072.c ================================================ no loop or switch to break out from on line 1 of input072.c ================================================ FILE: 59_WDIW_pt1/tests/err.input073.c ================================================ no loop to continue to on line 1 of input073.c ================================================ FILE: 59_WDIW_pt1/tests/err.input075.c ================================================ Unexpected token in switch:if on line 4 of input075.c ================================================ FILE: 59_WDIW_pt1/tests/err.input076.c ================================================ No cases in switch on line 3 of input076.c ================================================ FILE: 59_WDIW_pt1/tests/err.input077.c ================================================ case or default after existing default on line 6 of input077.c ================================================ FILE: 59_WDIW_pt1/tests/err.input078.c ================================================ case or default after existing default on line 6 of input078.c ================================================ FILE: 59_WDIW_pt1/tests/err.input079.c ================================================ Duplicate case value on line 6 of input079.c ================================================ FILE: 59_WDIW_pt1/tests/err.input085.c ================================================ Bad type in parameter list on line 1 of input085.c ================================================ FILE: 59_WDIW_pt1/tests/err.input086.c ================================================ Function definition not at global level on line 2 of input086.c ================================================ FILE: 59_WDIW_pt1/tests/err.input087.c ================================================ Bad type in member list on line 4 of input087.c ================================================ FILE: 59_WDIW_pt1/tests/err.input092.c ================================================ Type mismatch: literal vs. variable on line 1 of input092.c ================================================ FILE: 59_WDIW_pt1/tests/err.input093.c ================================================ Unknown variable or function:fred on line 1 of input093.c ================================================ FILE: 59_WDIW_pt1/tests/err.input094.c ================================================ Type mismatch: literal vs. variable on line 1 of input094.c ================================================ FILE: 59_WDIW_pt1/tests/err.input095.c ================================================ Variable can not be initialised:x on line 1 of input095.c ================================================ FILE: 59_WDIW_pt1/tests/err.input096.c ================================================ Array size is illegal:0 on line 1 of input096.c ================================================ FILE: 59_WDIW_pt1/tests/err.input097.c ================================================ For now, declaration of non-global arrays is not implemented on line 2 of input097.c ================================================ FILE: 59_WDIW_pt1/tests/err.input098.c ================================================ Too many values in initialisation list on line 1 of input098.c ================================================ FILE: 59_WDIW_pt1/tests/err.input102.c ================================================ Cannot cast to a struct, union or void type on line 3 of input102.c ================================================ FILE: 59_WDIW_pt1/tests/err.input103.c ================================================ Cannot cast to a struct, union or void type on line 3 of input103.c ================================================ FILE: 59_WDIW_pt1/tests/err.input104.c ================================================ Cannot cast to a struct, union or void type on line 2 of input104.c ================================================ FILE: 59_WDIW_pt1/tests/err.input105.c ================================================ Incompatible expression in assignment on line 4 of input105.c ================================================ FILE: 59_WDIW_pt1/tests/err.input118.c ================================================ Compiler doesn't support static or extern local declarations on line 2 of input118.c ================================================ FILE: 59_WDIW_pt1/tests/err.input124.c ================================================ Cannot ++ on rvalue on line 6 of input124.c ================================================ FILE: 59_WDIW_pt1/tests/err.input126.c ================================================ Unknown variable or function:ptr on line 7 of input126.c ================================================ FILE: 59_WDIW_pt1/tests/err.input129.c ================================================ Cannot ++ and/or -- more than once on line 6 of input129.c ================================================ FILE: 59_WDIW_pt1/tests/err.input141.c ================================================ Declaration of array parameters is not implemented on line 4 of input141.c ================================================ FILE: 59_WDIW_pt1/tests/err.input142.c ================================================ Array must have non-zero elements:fred on line 1 of input142.c ================================================ FILE: 59_WDIW_pt1/tests/input001.c ================================================ int printf(char *fmt); void main() { printf("%d\n", 12 * 3); printf("%d\n", 18 - 2 * 4); printf("%d\n", 1 + 2 + 9 - 5/2 + 3*5); } ================================================ FILE: 59_WDIW_pt1/tests/input002.c ================================================ int printf(char *fmt); void main() { int fred; int jim; fred= 5; jim= 12; printf("%d\n", fred + jim); } ================================================ FILE: 59_WDIW_pt1/tests/input003.c ================================================ int printf(char *fmt); void main() { int x; x= 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); } ================================================ FILE: 59_WDIW_pt1/tests/input004.c ================================================ int printf(char *fmt); void main() { int x; x= 7 < 9; printf("%d\n", x); x= 7 <= 9; printf("%d\n", x); x= 7 != 9; printf("%d\n", x); x= 7 == 7; printf("%d\n", x); x= 7 >= 7; printf("%d\n", x); x= 7 <= 7; printf("%d\n", x); x= 9 > 7; printf("%d\n", x); x= 9 >= 7; printf("%d\n", x); x= 9 != 7; printf("%d\n", x); } ================================================ FILE: 59_WDIW_pt1/tests/input005.c ================================================ int printf(char *fmt); void main() { int i; int j; i=6; j=12; if (i < j) { printf("%d\n", i); } else { printf("%d\n", j); } } ================================================ FILE: 59_WDIW_pt1/tests/input006.c ================================================ int printf(char *fmt); void main() { int i; i=1; while (i <= 10) { printf("%d\n", i); i= i + 1; } } ================================================ FILE: 59_WDIW_pt1/tests/input007.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 59_WDIW_pt1/tests/input008.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 59_WDIW_pt1/tests/input009.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } void fred() { int a; int b; a= 12; b= 3 * a; if (a >= b) { printf("%d\n", 2 * b - a); } } ================================================ FILE: 59_WDIW_pt1/tests/input010.c ================================================ int printf(char *fmt); void main() { int i; char j; j= 20; printf("%d\n", j); i= 10; printf("%d\n", i); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 2; j= j + 1) { printf("%d\n", j); } } ================================================ FILE: 59_WDIW_pt1/tests/input011.c ================================================ int printf(char *fmt); int main() { int i; char j; long k; i= 10; printf("%d\n", i); j= 20; printf("%d\n", j); k= 30; printf("%d\n", k); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 4; j= j + 1) { printf("%d\n", j); } for (k= 1; k <= 5; k= k + 1) { printf("%d\n", k); } return(i); printf("%d\n", 12345); return(3); } ================================================ FILE: 59_WDIW_pt1/tests/input012.c ================================================ int printf(char *fmt); int fred() { return(5); } void main() { int x; x= fred(2); printf("%d\n", x); } ================================================ FILE: 59_WDIW_pt1/tests/input013.c ================================================ int printf(char *fmt); int fred() { return(56); } void main() { int dummy; int result; dummy= printf("%d\n", 23); result= fred(10); dummy= printf("%d\n", result); } ================================================ FILE: 59_WDIW_pt1/tests/input014.c ================================================ int printf(char *fmt); int fred() { return(20); } int main() { int result; printf("%d\n", 10); result= fred(15); printf("%d\n", result); printf("%d\n", fred(15)+10); return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input015.c ================================================ int printf(char *fmt); int main() { char a; char *b; char c; int d; int *e; int f; a= 18; printf("%d\n", a); b= &a; c= *b; printf("%d\n", c); d= 12; printf("%d\n", d); e= &d; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input016.c ================================================ int printf(char *fmt); int c; int d; int *e; int f; int main() { c= 12; d=18; printf("%d\n", c); e= &c + 1; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input017.c ================================================ int printf(char *fmt); int main() { char a; char *b; int d; int *e; b= &a; *b= 19; printf("%d\n", a); e= &d; *e= 12; printf("%d\n", d); return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input018.c ================================================ int printf(char *fmt); int main() { int a; int b; a= b= 34; printf("%d\n", a); printf("%d\n", b); return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input018a.c ================================================ int printf(char *fmt); int a; int *b; char c; char *d; int main() { b= &a; *b= 15; printf("%d\n", a); d= &c; *d= 16; printf("%d\n", c); return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input019.c ================================================ int printf(char *fmt); int a; int b; int c; int d; int e; int main() { a= 2; b= 4; c= 3; d= 2; e= (a+b) * (c+d); printf("%d\n", e); return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input020.c ================================================ int printf(char *fmt); int a; int b[25]; int main() { b[3]= 12; a= b[3]; printf("%d\n", a); return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input021.c ================================================ int printf(char *fmt); char c; char *str; int main() { c= '\n'; printf("%d\n", c); for (str= "Hello world\n"; *str != 0; str= str + 1) { printf("%c", *str); } return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input022.c ================================================ int printf(char *fmt); char a; char b; char c; int d; int e; int f; long g; long h; long i; int main() { b= 5; c= 7; a= b + c++; printf("%d\n", a); e= 5; f= 7; d= e + f++; printf("%d\n", d); h= 5; i= 7; g= h + i++; printf("%d\n", g); a= b-- + c; printf("%d\n", a); d= e-- + f; printf("%d\n", d); g= h-- + i; printf("%d\n", g); a= ++b + c; printf("%d\n", a); d= ++e + f; printf("%d\n", d); g= ++h + i; printf("%d\n", g); a= b * --c; printf("%d\n", a); d= e * --f; printf("%d\n", d); g= h * --i; printf("%d\n", g); return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input023.c ================================================ int printf(char *fmt); char *str; int x; int main() { x= -23; printf("%d\n", x); printf("%d\n", -10 * -10); x= 1; x= ~x; printf("%d\n", x); x= 2 > 5; printf("%d\n", x); x= !x; printf("%d\n", x); x= !x; printf("%d\n", x); x= 13; if (x) { printf("%d\n", 13); } x= 0; if (!x) { printf("%d\n", 14); } for (str= "Hello world\n"; *str; str++) { printf("%c", *str); } return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input024.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { a= 42; b= 19; printf("%d\n", a & b); printf("%d\n", a | b); printf("%d\n", a ^ b); printf("%d\n", 1 << 3); printf("%d\n", 63 >> 3); return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input025.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { char z; int y; int x; x= 10; y= 20; z= 30; printf("%d\n", x); printf("%d\n", y); printf("%d\n", z); a= 5; b= 15; c= 25; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input026.c ================================================ int printf(char *fmt); int main(int a, char b, long c, int d, int e, int f, int g, int h) { int i; int j; int k; a= 13; printf("%d\n", a); b= 23; printf("%d\n", b); c= 34; printf("%d\n", c); d= 44; printf("%d\n", d); e= 54; printf("%d\n", e); f= 64; printf("%d\n", f); g= 74; printf("%d\n", g); h= 84; printf("%d\n", h); i= 94; printf("%d\n", i); j= 95; printf("%d\n", j); k= 96; printf("%d\n", k); return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input027.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int param5(int a, int b, int c, int d, int e) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param2(int a, int b) { int c; int d; int e; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param0() { int a; int b; int c; int d; int e; a= 1; b= 2; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int main() { param8(1,2,3,4,5,6,7,8); param5(1,2,3,4,5); param2(1,2); param0(); return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input028.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input029.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h); int fred(int a, int b, int c); int main(); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input030.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input030.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 59_WDIW_pt1/tests/input031.c ================================================ int printf(char *fmt); int main() { int x; x= 2 + + 3 - * / ; } ================================================ FILE: 59_WDIW_pt1/tests/input032.c ================================================ int printf(char *fmt); int main() { pizza cow llama sausage; } ================================================ FILE: 59_WDIW_pt1/tests/input033.c ================================================ int printf(char *fmt); int main() { char *z; return(z); } ================================================ FILE: 59_WDIW_pt1/tests/input035.c ================================================ int printf(char *fmt); int fred(int a, int b) { int a; return(a); } ================================================ FILE: 59_WDIW_pt1/tests/input036.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c); ================================================ FILE: 59_WDIW_pt1/tests/input037.c ================================================ int printf(char *fmt); int fred(int a, char b +, int z); ================================================ FILE: 59_WDIW_pt1/tests/input038.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c, int g); ================================================ FILE: 59_WDIW_pt1/tests/input039.c ================================================ int printf(char *fmt); int main() { int a; } ================================================ FILE: 59_WDIW_pt1/tests/input040.c ================================================ int printf(char *fmt); int main() { int a; a= 5; } ================================================ FILE: 59_WDIW_pt1/tests/input041.c ================================================ int printf(char *fmt); void fred() { return(5); } ================================================ FILE: 59_WDIW_pt1/tests/input042.c ================================================ int printf(char *fmt); int main() { fred(5); } ================================================ FILE: 59_WDIW_pt1/tests/input043.c ================================================ int printf(char *fmt); int main() { int a; a= b[4]; } ================================================ FILE: 59_WDIW_pt1/tests/input044.c ================================================ int printf(char *fmt); int main() { int a; a= z; } ================================================ FILE: 59_WDIW_pt1/tests/input045.c ================================================ int printf(char *fmt); int main() { int a; a= &5; } ================================================ FILE: 59_WDIW_pt1/tests/input046.c ================================================ int printf(char *fmt); int main() { int a; a= *5; } ================================================ FILE: 59_WDIW_pt1/tests/input047.c ================================================ int printf(char *fmt); int main() { int a; a= ++5; } ================================================ FILE: 59_WDIW_pt1/tests/input048.c ================================================ int printf(char *fmt); int main() { int a; a= --5; } ================================================ FILE: 59_WDIW_pt1/tests/input049.c ================================================ int printf(char *fmt); int main() { int x; char y; y= x; } ================================================ FILE: 59_WDIW_pt1/tests/input050.c ================================================ int printf(char *fmt); int main() { char *a; char *b; a= a + b; } ================================================ FILE: 59_WDIW_pt1/tests/input051.c ================================================ int printf(char *fmt); int main() { char a; a= 'fred'; } ================================================ FILE: 59_WDIW_pt1/tests/input052.c ================================================ int printf(char *fmt); int main() { int a; a= $5.00; } ================================================ FILE: 59_WDIW_pt1/tests/input053.c ================================================ int printf(char *fmt); int main() { printf("Hello world, %d\n", 23); return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input054.c ================================================ int printf(char *fmt); int main() { int i; for (i=0; i < 20; i++) { printf("Hello world, %d\n", i); } return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input055.c ================================================ int printf(char *fmt); int main(int argc, char **argv) { int i; char *argument; printf("Hello world\n"); for (i=0; i < argc; i++) { argument= *argv; argv= argv + 1; printf("Argument %d is %s\n", i, argument); } return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input056.c ================================================ struct foo { int x; }; struct mary var1; ================================================ FILE: 59_WDIW_pt1/tests/input057.c ================================================ struct fred { int x; } ; struct fred { char y; } ; ================================================ FILE: 59_WDIW_pt1/tests/input058.c ================================================ int printf(char *fmt); struct fred { int x; char y; long z; }; struct fred var2; struct fred *varptr; int main() { long result; var2.x= 12; printf("%d\n", var2.x); var2.y= 'c'; printf("%d\n", var2.y); var2.z= 4005; printf("%d\n", var2.z); result= var2.x + var2.y + var2.z; printf("%d\n", result); varptr= &var2; result= varptr->x + varptr->y + varptr->z; printf("%d\n", result); return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input059.c ================================================ int main() { int x; x= y.foo; } ================================================ FILE: 59_WDIW_pt1/tests/input060.c ================================================ int main() { int x; x= x.foo; } ================================================ FILE: 59_WDIW_pt1/tests/input061.c ================================================ int main() { int x; x= x->foo; } ================================================ FILE: 59_WDIW_pt1/tests/input062.c ================================================ int printf(char *fmt); union fred { char w; int x; int y; long z; }; union fred var1; union fred *varptr; int main() { var1.x= 65; printf("%d\n", var1.x); var1.x= 66; printf("%d\n", var1.x); printf("%d\n", var1.y); printf("The next two depend on the endian of the platform\n"); printf("%d\n", var1.w); printf("%d\n", var1.z); varptr= &var1; varptr->x= 67; printf("%d\n", varptr->x); printf("%d\n", varptr->y); return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input063.c ================================================ int printf(char *fmt); enum fred { apple=1, banana, carrot, pear=10, peach, mango, papaya }; enum jane { aple=1, bnana, crrot, par=10, pech, mago, paaya }; enum fred var1; enum jane var2; enum fred var3; int main() { var1= carrot + pear + mango; printf("%d\n", var1); return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input064.c ================================================ enum fred var3; ================================================ FILE: 59_WDIW_pt1/tests/input065.c ================================================ enum fred { x, y, z }; enum fred { a, b }; ================================================ FILE: 59_WDIW_pt1/tests/input066.c ================================================ enum fred { x, y, z }; enum mary { a, b, z }; ================================================ FILE: 59_WDIW_pt1/tests/input067.c ================================================ int printf(char *fmt); typedef int FOO; FOO var1; struct bar { int x; int y} ; typedef struct bar BAR; BAR var2; int main() { var1= 5; printf("%d\n", var1); var2.x= 7; var2.y= 10; printf("%d\n", var2.x + var2.y); return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input068.c ================================================ typedef int FOO; typedef char FOO; ================================================ FILE: 59_WDIW_pt1/tests/input069.c ================================================ typedef int FOO; FLOO y; ================================================ FILE: 59_WDIW_pt1/tests/input070.c ================================================ #include typedef int FOO; int main() { FOO x; x= 56; printf("%d\n", x); return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input071.c ================================================ #include int main() { int x; x = 0; while (x < 100) { if (x == 5) { x = x + 2; continue; } printf("%d\n", x); if (x == 14) { break; } x = x + 1; } printf("Done\n"); return (0); } ================================================ FILE: 59_WDIW_pt1/tests/input072.c ================================================ int main() { break; } ================================================ FILE: 59_WDIW_pt1/tests/input073.c ================================================ int main() { continue; } ================================================ FILE: 59_WDIW_pt1/tests/input074.c ================================================ #include int main() { int x; int y; y= 0; for (x=0; x < 5; x++) { switch(x) { case 1: { y= 5; break; } case 2: { y= 7; break; } case 3: { y= 9; } default: { y= 100; } } printf("%d\n", y); } return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input075.c ================================================ int main() { int x; switch(x) { if (x<5); } } ================================================ FILE: 59_WDIW_pt1/tests/input076.c ================================================ int main() { int x; switch(x) { } } ================================================ FILE: 59_WDIW_pt1/tests/input077.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } default: { x= 3; } case 4: { x= 2; } } } ================================================ FILE: 59_WDIW_pt1/tests/input078.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } default: { x= 3; } default: { x= 2; } } } ================================================ FILE: 59_WDIW_pt1/tests/input079.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } case 2: { x= 2; } case 1: { x= 2; } default: { x= 2; } } } ================================================ FILE: 59_WDIW_pt1/tests/input080.c ================================================ #include int x; int y; int main() { for (x=0, y=1; x < 6; x++, y=y+2) { printf("%d %d\n", x, y); } return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input081.c ================================================ #include int x; int y; int main() { x= 0; y=1; for (;x<5;) { printf("%d %d\n", x, y); x=x+1; y=y+2; } return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input082.c ================================================ #include int main() { int x; x= 10; // Dangling else test if (x > 5) if (x > 15) printf("x > 15\n"); else printf("15 >= x > 5\n"); else printf("5 >= x\n"); return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input083.c ================================================ #include int main() { int x; // Dangling else test. // We should not print anything for x<= 5 for (x=0; x < 12; x++) if (x > 5) if (x > 10) printf("10 < %2d\n", x); else printf(" 5 < %2d <= 10\n", x); return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input084.c ================================================ #include int main() { int x, y; x=2; y=3; printf("%d %d\n", x, y); char a, *b; a= 'f'; b= &a; printf("%c %c\n", a, *b); return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input085.c ================================================ int main(struct foo { int x; }; , int a) { return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input086.c ================================================ int main() { int fred() { return(5); } int x; x=2; return(x); } ================================================ FILE: 59_WDIW_pt1/tests/input087.c ================================================ struct foo { int x; int y; struct blah { int g; }; }; ================================================ FILE: 59_WDIW_pt1/tests/input088.c ================================================ #include struct foo { int x; int y; } fred, mary; struct foo james; int main() { fred.x= 5; mary.y= 6; printf("%d %d\n", fred.x, mary.y); return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input089.c ================================================ #include int x= 23; char y= 'H'; char *z= "Hello world"; int main() { printf("%d %c %s\n", x, y, z); return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input090.c ================================================ #include int a = 23, b = 100; char y = 'H', *z = "Hello world"; int main() { printf("%d %d %c %s\n", a, b, y, z); return (0); } ================================================ FILE: 59_WDIW_pt1/tests/input091.c ================================================ #include int fred[] = { 1, 2, 3, 4, 5 }; int jim[10] = { 1, 2, 3, 4, 5 }; int main() { int i; for (i=0; i < 5; i++) printf("%d\n", fred[i]); for (i=0; i < 10; i++) printf("%d\n", jim[i]); return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input092.c ================================================ char x= 3000; ================================================ FILE: 59_WDIW_pt1/tests/input093.c ================================================ char x= fred; ================================================ FILE: 59_WDIW_pt1/tests/input094.c ================================================ char *s= 54; ================================================ FILE: 59_WDIW_pt1/tests/input095.c ================================================ int fred(int x=2) { return(x); } ================================================ FILE: 59_WDIW_pt1/tests/input096.c ================================================ int fred[0]; ================================================ FILE: 59_WDIW_pt1/tests/input098.c ================================================ int fred[3]= { 1, 2, 3, 4, 5 }; ================================================ FILE: 59_WDIW_pt1/tests/input099.c ================================================ #include // List of token strings, for debugging purposes. // As yet, we can't store a NULL into the list char *Tstring[] = { "EOF", "=", "||", "&&", "|", "^", "&", "==", "!=", ",", ">", "<=", ">=", "<<", ">>", "+", "-", "*", "/", "++", "--", "~", "!", "void", "char", "int", "long", "if", "else", "while", "for", "return", "struct", "union", "enum", "typedef", "extern", "break", "continue", "switch", "case", "default", "intlit", "strlit", ";", "identifier", "{", "}", "(", ")", "[", "]", ",", ".", "->", ":", "" }; int main() { int i; char *str; i=0; while (1) { str= Tstring[i]; if (*str == 0) break; printf("%s\n", str); i++; } return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input100.c ================================================ #include int main() { int x= 3, y=14; int z= 2 * x + y; char *str= "Hello world"; printf("%s %d %d\n", str, x+y, z); return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input101.c ================================================ #include int main() { int x= 65535; char y; char *str; y= (char )x; printf("0x%x\n", y); str= (char *)0; printf("0x%lx\n", (long)str); return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input102.c ================================================ int main() { struct foo { int p; }; int y= (struct foo) x; } ================================================ FILE: 59_WDIW_pt1/tests/input103.c ================================================ int main() { union foo { int p; }; int y= (union foo) x; } ================================================ FILE: 59_WDIW_pt1/tests/input104.c ================================================ int main() { int y= (void) x; } ================================================ FILE: 59_WDIW_pt1/tests/input105.c ================================================ int main() { int x; char *y; y= (char) x; } ================================================ FILE: 59_WDIW_pt1/tests/input106.c ================================================ #include char *y= (char *)0; int main() { printf("0x%lx\n", (long)y); return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input107.c ================================================ #include char *y[] = { "fish", "cow", NULL }; char *z= NULL; int main() { int i; char *ptr; for (i=0; i < 3; i++) { ptr= y[i]; if (ptr != (char *)0) printf("%s\n", y[i]); else printf("NULL\n"); } return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input108.c ================================================ int main() { char *str= (void *)0; return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input109.c ================================================ #include int main() { int x= 17 - 1; printf("%d\n", x); return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input110.c ================================================ #include int x; int y; int main() { x= 3; y= 15; y += x; printf("%d\n", y); x= 3; y= 15; y -= x; printf("%d\n", y); x= 3; y= 15; y *= x; printf("%d\n", y); x= 3; y= 15; y /= x; printf("%d\n", y); return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input111.c ================================================ #include int main() { int x= 2000 + 3 + 4 * 5 + 6; printf("%d\n", x); return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input112.c ================================================ #include char* y = NULL; int x= 10 + 6; int fred [ 2 + 3 ]; int main() { fred[2]= x; printf("%d\n", fred[2]); return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input113.c ================================================ #include void fred(void); void fred(void) { printf("fred says hello\n"); } int main(void) { fred(); return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input114.c ================================================ #include int main() { int x= 0x4a; printf("%c\n", x); return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input115.c ================================================ #include struct foo { int x; char y; long z; }; typedef struct foo blah; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol struct symtable *ctype; // If struct/union, ptr to that type int stype; // Structural type for the symbol int class; // Storage class for the symbol int size; // Total size in bytes of this symbol int nelems; // Functions: # params. Arrays: # elements #define st_endlabel st_posn // For functions, the end label int st_posn; // For locals, the negative offset // from the stack base pointer int *initlist; // List of initial values struct symtable *next; // Next symbol in one list struct symtable *member; // First member of a function, struct, }; // union or enum // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates int rvalue; // True if the node is an rvalue struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; struct symtable *sym; // For many AST nodes, the pointer to // the symbol in the symbol table #define a_intvalue a_size // For A_INTLIT, the integer value int a_size; // For A_SCALE, the size to scale by }; int main() { printf("%ld\n", sizeof(char)); printf("%ld\n", sizeof(int)); printf("%ld\n", sizeof(long)); printf("%ld\n", sizeof(char *)); printf("%ld\n", sizeof(blah)); printf("%ld\n", sizeof(struct symtable)); printf("%ld\n", sizeof(struct ASTnode)); return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input116.c ================================================ #include static int counter=0; static int fred(void) { return(counter++); } int main(void) { int i; for (i=0; i < 5; i++) printf("%d\n", fred()); return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input117.c ================================================ #include static char *fred(void) { return("Hello"); } int main(void) { printf("%s\n", fred()); return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input118.c ================================================ int main(void) { static int x; } ================================================ FILE: 59_WDIW_pt1/tests/input119.c ================================================ #include int x; int y= 3; int main() { x= y != 3 ? 6 : 8; printf("%d\n", x); x= (y == 3) ? 6 : 8; printf("%d\n", x); return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input120.c ================================================ #include int x; int y= 3; int main() { for (y= 0; y < 10; y++) { x= y > 4 ? y + 2 : y + 9; printf("%d\n", x); } return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input121.c ================================================ #include int x; int y= 3; int main() { for (y= 0; y < 10; y++) { x= (y < 4) ? y + 2 : (y > 7) ? 1000 : y + 9; printf("%d\n", x); } return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input122.c ================================================ #include int x, y, z1, z2; int main() { for (x= 0; x <= 1; x++) { for (y= 0; y <= 1; y++) { z1= x || y; z2= x && y; printf("x %d, y %d, x || y %d, x && y %d\n", x, y, z1, z2); } } //z= x || y; return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input123.c ================================================ #include int main() { int x; for (x=0; x < 20; x++) switch(x) { case 2: case 3: case 5: case 7: case 11: printf("%2d infant prime\n", x); break; case 13: case 17: case 19: printf("%2d teen prime\n", x); break; case 0: case 1: case 4: case 6: case 8: case 9: case 10: case 12: printf("%2d infant composite\n", x); break; default: printf("%2d teen composite\n", x); break; } return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input124.c ================================================ #include int ary[5]; int main() { ary++; return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input125.c ================================================ #include int ary[5]; int *ptr; int x; int main() { ary[3]= 2008; ptr= ary; // Load ary's address into ptr x= ary[3]; printf("%d\n", x); x= ptr[3]; printf("%d\n", x); // Treat ptr as an array return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input126.c ================================================ #include int ary[5]; int main() { ary[3]= 2008; ptr= &ary; return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input127.c ================================================ #include int ary[5]; void fred(int *ptr) { // Receive a pointer printf("%d\n", ptr[3]); } int main() { ary[3]= 2008; printf("%d\n", ary[3]); fred(ary); // Pass ary as a pointer return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input128.c ================================================ #include struct foo { int val; struct foo *next; }; struct foo head, mid, tail; int main() { struct foo *ptr; tail.val= 20; tail.next= NULL; mid.val= 15; mid.next= &tail; head.val= 10; head.next= ∣ ptr= &head; printf("%d %d\n", head.val, ptr->val); printf("%d %d\n", mid.val, ptr->next->val); printf("%d %d\n", tail.val, ptr->next->next->val); return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input129.c ================================================ #include int x= 6; int main() { printf("%d\n", x++ ++); return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input130.c ================================================ #include char *x= "foo"; int main() { printf("Hello " "world" "\n"); return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input131.c ================================================ #include void donothing() { } int main() { int x=0; printf("Doing nothing... "); donothing(); printf("nothing done\n"); while (++x < 100) ; printf("x is now %d\n", x); return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input132.c ================================================ extern int fred; int fred; int mary; extern int mary; int main() { return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input133.c ================================================ #include extern int fred[]; int fred[23]; char mary[100]; extern char mary[]; void main() { printf("OK\n"); } ================================================ FILE: 59_WDIW_pt1/tests/input134.c ================================================ #include char y = 'a'; char *x; int main() { x= &y; if (x && y == 'a') printf("1st match\n"); x= NULL; if (x && y == 'a') printf("2nd match\n"); x= &y; y='b'; if (x && y == 'a') printf("3rd match\n"); return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input135.c ================================================ #include void fred() { int x= 5; printf("testing x\n"); if (x > 4) return; printf("x below 5\n"); } int main() { fred(); return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input136.c ================================================ #include int add(int x, int y) { return(x+y); } int main() { int result; result= 3 * add(2,3) - 5 * add(4,6); printf("%d\n", result); return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input137.c ================================================ #include int a=1, b=2, c=3, d=4, e=5, f=6, g=7, h=8; int main() { int x; x= ((((((a + b) + c) + d) + e) + f) + g) + h; x= a + (b + (c + (d + (e + (f + (g + h)))))); printf("x is %d\n", x); return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input138.c ================================================ #include int x, y, z; int a=1; int *aptr; int main() { // See if generic AND works for (x=0; x <= 1; x++) for (y=0; y <= 1; y++) { z= x && y; printf("%d %d | %d\n", x, y, z); } // See if generic AND works for (x=0; x <= 1; x++) for (y=0; y <= 1; y++) { z= x || y; printf("%d %d | %d\n", x, y, z); } // Now some lazy evaluation aptr= NULL; if (aptr && *aptr == 1) printf("aptr points at 1\n"); else printf("aptr is NULL or doesn't point at 1\n"); aptr= &a; if (aptr && *aptr == 1) printf("aptr points at 1\n"); else printf("aptr is NULL or doesn't point at 1\n"); return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input139.c ================================================ #include int same(int x) { return(x); } int main() { int a= 3; if (same(a) && same(a) >= same(a)) printf("same apparently\n"); return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input140.c ================================================ #include int main() { int i; int ary[5]; char z; // Write below the array z= 'H'; // Fill the array for (i=0; i < 5; i++) ary[i]= i * i; // Write above the array i=14; // Print out the array for (i=0; i < 5; i++) printf("%d\n", ary[i]); // See if either side is OK printf("%d %c\n", i, z); return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input141.c ================================================ static int fred[5]; int jim; int foo(int mary[6]) { return(5); } ================================================ FILE: 59_WDIW_pt1/tests/input142.c ================================================ static int fred[]; int jim; ================================================ FILE: 59_WDIW_pt1/tests/input143.c ================================================ #include char foo; char *a, *b, *c; int main() { a= b= c= NULL; if (a==NULL || b==NULL || c==NULL) printf("One of the three is NULL\n"); a= &foo; if (a==NULL || b==NULL || c==NULL) printf("One of the three is NULL\n"); b= &foo; if (a==NULL || b==NULL || c==NULL) printf("One of the three is NULL\n"); c= &foo; if (a==NULL || b==NULL || c==NULL) printf("One of the three is NULL\n"); else printf("All three are non-NULL\n"); return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input144.c ================================================ #include #include #include char *filename= "fred"; int main() { fprintf(stdout, "Unable to open %s: %s\n", filename, strerror(errno)); return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input145.c ================================================ #include char *str= "qwertyuiop"; int list[]= {3, 5, 7, 9, 11, 13, 15}; int *lptr; int main() { printf("%c\n", *str); str= str + 1; printf("%c\n", *str); str += 1; printf("%c\n", *str); str += 1; printf("%c\n", *str); str -= 1; printf("%c\n", *str); lptr= list; printf("%d\n", *lptr); lptr= lptr + 1; printf("%d\n", *lptr); lptr += 1; printf("%d\n", *lptr); lptr += 1; printf("%d\n", *lptr); lptr -= 1; printf("%d\n", *lptr); return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input146.c ================================================ #include char *str= "qwertyuiop"; int list[]= {3, 5, 7, 9, 11, 13, 15}; int *lptr; int main() { printf("%c\n", *str); str= str + 1; printf("%c\n", *str); str += 1; printf("%c\n", *str); str += 1; printf("%c\n", *str); str -= 1; printf("%c\n", *str); str++; printf("%c\n", *str); str--; printf("%c\n", *str); ++str; printf("%c\n", *str); --str; printf("%c\n\n", *str); lptr= list; printf("%d\n", *lptr); lptr= lptr + 1; printf("%d\n", *lptr); lptr += 1; printf("%d\n", *lptr); lptr += 1; printf("%d\n", *lptr); lptr -= 1; printf("%d\n", *lptr); lptr++ ; printf("%d\n", *lptr); lptr-- ; printf("%d\n", *lptr); ++lptr ; printf("%d\n", *lptr); --lptr ; printf("%d\n", *lptr); return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input147.c ================================================ #include int a; int main() { printf("%d\n", 24 % 9); printf("%d\n", 31 % 11); a= 24; a %= 9; printf("%d\n",a); a= 31; a %= 11; printf("%d\n",a); return(0); } ================================================ FILE: 59_WDIW_pt1/tests/input148.c ================================================ #include char *argv[]= { "unused", "-fish", "-cat", "owl" }; int argc= 4; int main() { int i; for (i = 1; i < argc; i++) { printf("i is %d\n", i); if (*argv[i] != '-') break; } while (i < argc) { printf("leftover %s\n", argv[i]); i++; } return (0); } ================================================ FILE: 59_WDIW_pt1/tests/mktests ================================================ #!/bin/sh # Make the output files for each test # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make install) fi for i in input*c do if [ ! -f "out.$i" -a ! -f "err.$i" ] then ../cwj -o out $i 2> "err.$i" # If the err file is empty if [ ! -s "err.$i" ] then rm -f "err.$i" cc -o out $i ./out > "out.$i" fi fi rm -f out out.s done ================================================ FILE: 59_WDIW_pt1/tests/out.input001.c ================================================ 36 10 25 ================================================ FILE: 59_WDIW_pt1/tests/out.input002.c ================================================ 17 ================================================ FILE: 59_WDIW_pt1/tests/out.input003.c ================================================ 1 2 3 4 5 ================================================ FILE: 59_WDIW_pt1/tests/out.input004.c ================================================ 1 1 1 1 1 1 1 1 1 ================================================ FILE: 59_WDIW_pt1/tests/out.input005.c ================================================ 6 ================================================ FILE: 59_WDIW_pt1/tests/out.input006.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 59_WDIW_pt1/tests/out.input007.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 59_WDIW_pt1/tests/out.input008.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 59_WDIW_pt1/tests/out.input009.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 59_WDIW_pt1/tests/out.input010.c ================================================ 20 10 1 2 3 4 5 253 254 255 0 1 ================================================ FILE: 59_WDIW_pt1/tests/out.input011.c ================================================ 10 20 30 1 2 3 4 5 253 254 255 0 1 2 3 1 2 3 4 5 ================================================ FILE: 59_WDIW_pt1/tests/out.input012.c ================================================ 5 ================================================ FILE: 59_WDIW_pt1/tests/out.input013.c ================================================ 23 56 ================================================ FILE: 59_WDIW_pt1/tests/out.input014.c ================================================ 10 20 30 ================================================ FILE: 59_WDIW_pt1/tests/out.input015.c ================================================ 18 18 12 12 ================================================ FILE: 59_WDIW_pt1/tests/out.input016.c ================================================ 12 18 ================================================ FILE: 59_WDIW_pt1/tests/out.input017.c ================================================ 19 12 ================================================ FILE: 59_WDIW_pt1/tests/out.input018.c ================================================ 34 34 ================================================ FILE: 59_WDIW_pt1/tests/out.input018a.c ================================================ 15 16 ================================================ FILE: 59_WDIW_pt1/tests/out.input019.c ================================================ 30 ================================================ FILE: 59_WDIW_pt1/tests/out.input020.c ================================================ 12 ================================================ FILE: 59_WDIW_pt1/tests/out.input021.c ================================================ 10 Hello world ================================================ FILE: 59_WDIW_pt1/tests/out.input022.c ================================================ 12 12 12 13 13 13 13 13 13 35 35 35 ================================================ FILE: 59_WDIW_pt1/tests/out.input023.c ================================================ -23 100 -2 0 1 0 13 14 Hello world ================================================ FILE: 59_WDIW_pt1/tests/out.input024.c ================================================ 2 59 57 8 7 ================================================ FILE: 59_WDIW_pt1/tests/out.input025.c ================================================ 10 20 30 5 15 25 ================================================ FILE: 59_WDIW_pt1/tests/out.input026.c ================================================ 13 23 34 44 54 64 74 84 94 95 96 ================================================ FILE: 59_WDIW_pt1/tests/out.input027.c ================================================ 1 2 3 4 5 6 7 8 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 ================================================ FILE: 59_WDIW_pt1/tests/out.input028.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 59_WDIW_pt1/tests/out.input029.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 59_WDIW_pt1/tests/out.input030.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input030.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 59_WDIW_pt1/tests/out.input053.c ================================================ Hello world, 23 ================================================ FILE: 59_WDIW_pt1/tests/out.input054.c ================================================ Hello world, 0 Hello world, 1 Hello world, 2 Hello world, 3 Hello world, 4 Hello world, 5 Hello world, 6 Hello world, 7 Hello world, 8 Hello world, 9 Hello world, 10 Hello world, 11 Hello world, 12 Hello world, 13 Hello world, 14 Hello world, 15 Hello world, 16 Hello world, 17 Hello world, 18 Hello world, 19 ================================================ FILE: 59_WDIW_pt1/tests/out.input055.c ================================================ Hello world Argument 0 is ./out ================================================ FILE: 59_WDIW_pt1/tests/out.input058.c ================================================ 12 99 4005 4116 4116 ================================================ FILE: 59_WDIW_pt1/tests/out.input062.c ================================================ 65 66 66 The next two depend on the endian of the platform 66 66 67 67 ================================================ FILE: 59_WDIW_pt1/tests/out.input063.c ================================================ 25 ================================================ FILE: 59_WDIW_pt1/tests/out.input067.c ================================================ 5 17 ================================================ FILE: 59_WDIW_pt1/tests/out.input070.c ================================================ 56 ================================================ FILE: 59_WDIW_pt1/tests/out.input071.c ================================================ 0 1 2 3 4 7 8 9 10 11 12 13 14 Done ================================================ FILE: 59_WDIW_pt1/tests/out.input074.c ================================================ 100 5 7 100 100 ================================================ FILE: 59_WDIW_pt1/tests/out.input080.c ================================================ 0 1 1 3 2 5 3 7 4 9 5 11 ================================================ FILE: 59_WDIW_pt1/tests/out.input081.c ================================================ 0 1 1 3 2 5 3 7 4 9 ================================================ FILE: 59_WDIW_pt1/tests/out.input082.c ================================================ 15 >= x > 5 ================================================ FILE: 59_WDIW_pt1/tests/out.input083.c ================================================ 5 < 6 <= 10 5 < 7 <= 10 5 < 8 <= 10 5 < 9 <= 10 5 < 10 <= 10 10 < 11 ================================================ FILE: 59_WDIW_pt1/tests/out.input084.c ================================================ 2 3 f f ================================================ FILE: 59_WDIW_pt1/tests/out.input088.c ================================================ 5 6 ================================================ FILE: 59_WDIW_pt1/tests/out.input089.c ================================================ 23 H Hello world ================================================ FILE: 59_WDIW_pt1/tests/out.input090.c ================================================ 23 100 H Hello world ================================================ FILE: 59_WDIW_pt1/tests/out.input091.c ================================================ 1 2 3 4 5 1 2 3 4 5 0 0 0 0 0 ================================================ FILE: 59_WDIW_pt1/tests/out.input099.c ================================================ EOF = || && | ^ & == != , > <= >= << >> + - * / ++ -- ~ ! void char int long if else while for return struct union enum typedef extern break continue switch case default intlit strlit ; identifier { } ( ) [ ] , . -> : ================================================ FILE: 59_WDIW_pt1/tests/out.input100.c ================================================ Hello world 17 20 ================================================ FILE: 59_WDIW_pt1/tests/out.input101.c ================================================ 0xff 0x0 ================================================ FILE: 59_WDIW_pt1/tests/out.input106.c ================================================ 0x0 ================================================ FILE: 59_WDIW_pt1/tests/out.input107.c ================================================ fish cow NULL ================================================ FILE: 59_WDIW_pt1/tests/out.input108.c ================================================ ================================================ FILE: 59_WDIW_pt1/tests/out.input109.c ================================================ 16 ================================================ FILE: 59_WDIW_pt1/tests/out.input110.c ================================================ 18 12 45 5 ================================================ FILE: 59_WDIW_pt1/tests/out.input111.c ================================================ 2029 ================================================ FILE: 59_WDIW_pt1/tests/out.input112.c ================================================ 16 ================================================ FILE: 59_WDIW_pt1/tests/out.input113.c ================================================ fred says hello ================================================ FILE: 59_WDIW_pt1/tests/out.input114.c ================================================ J ================================================ FILE: 59_WDIW_pt1/tests/out.input115.c ================================================ 1 4 8 8 13 64 48 ================================================ FILE: 59_WDIW_pt1/tests/out.input116.c ================================================ 0 1 2 3 4 ================================================ FILE: 59_WDIW_pt1/tests/out.input117.c ================================================ Hello ================================================ FILE: 59_WDIW_pt1/tests/out.input119.c ================================================ 8 6 ================================================ FILE: 59_WDIW_pt1/tests/out.input120.c ================================================ 9 10 11 12 13 7 8 9 10 11 ================================================ FILE: 59_WDIW_pt1/tests/out.input121.c ================================================ 2 3 4 5 13 14 15 16 1000 1000 ================================================ FILE: 59_WDIW_pt1/tests/out.input122.c ================================================ x 0, y 0, x || y 0, x && y 0 x 0, y 1, x || y 1, x && y 0 x 1, y 0, x || y 1, x && y 0 x 1, y 1, x || y 1, x && y 1 ================================================ FILE: 59_WDIW_pt1/tests/out.input123.c ================================================ 0 infant composite 1 infant composite 2 infant prime 3 infant prime 4 infant composite 5 infant prime 6 infant composite 7 infant prime 8 infant composite 9 infant composite 10 infant composite 11 infant prime 12 infant composite 13 teen prime 14 teen composite 15 teen composite 16 teen composite 17 teen prime 18 teen composite 19 teen prime ================================================ FILE: 59_WDIW_pt1/tests/out.input125.c ================================================ 2008 2008 ================================================ FILE: 59_WDIW_pt1/tests/out.input127.c ================================================ 2008 2008 ================================================ FILE: 59_WDIW_pt1/tests/out.input128.c ================================================ 10 10 15 15 20 20 ================================================ FILE: 59_WDIW_pt1/tests/out.input130.c ================================================ Hello world ================================================ FILE: 59_WDIW_pt1/tests/out.input131.c ================================================ Doing nothing... nothing done x is now 100 ================================================ FILE: 59_WDIW_pt1/tests/out.input132.c ================================================ ================================================ FILE: 59_WDIW_pt1/tests/out.input133.c ================================================ OK ================================================ FILE: 59_WDIW_pt1/tests/out.input134.c ================================================ 1st match ================================================ FILE: 59_WDIW_pt1/tests/out.input135.c ================================================ testing x ================================================ FILE: 59_WDIW_pt1/tests/out.input136.c ================================================ -35 ================================================ FILE: 59_WDIW_pt1/tests/out.input137.c ================================================ x is 36 ================================================ FILE: 59_WDIW_pt1/tests/out.input138.c ================================================ 0 0 | 0 0 1 | 0 1 0 | 0 1 1 | 1 0 0 | 0 0 1 | 1 1 0 | 1 1 1 | 1 aptr is NULL or doesn't point at 1 aptr points at 1 ================================================ FILE: 59_WDIW_pt1/tests/out.input139.c ================================================ same apparently ================================================ FILE: 59_WDIW_pt1/tests/out.input140.c ================================================ 0 1 4 9 16 5 H ================================================ FILE: 59_WDIW_pt1/tests/out.input143.c ================================================ One of the three is NULL One of the three is NULL One of the three is NULL All three are non-NULL ================================================ FILE: 59_WDIW_pt1/tests/out.input144.c ================================================ Unable to open fred: Success ================================================ FILE: 59_WDIW_pt1/tests/out.input145.c ================================================ q w e r e 3 5 7 9 7 ================================================ FILE: 59_WDIW_pt1/tests/out.input146.c ================================================ q w e r e r e r e 3 5 7 9 7 9 7 9 7 ================================================ FILE: 59_WDIW_pt1/tests/out.input147.c ================================================ 6 9 6 9 ================================================ FILE: 59_WDIW_pt1/tests/out.input148.c ================================================ i is 1 i is 2 i is 3 leftover owl ================================================ FILE: 59_WDIW_pt1/tests/runtests ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make install) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../cwj -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../cwj $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.s "trial.$i" done ================================================ FILE: 59_WDIW_pt1/tests/runtests0 ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make install; make cwj0) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../cwj0 -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../cwj0 $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.s "trial.$i" done ================================================ FILE: 59_WDIW_pt1/tests/runtestsn ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../compn ] then (cd ..; make install) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do bn=$(echo $i | cut -d. -f1) if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../compn -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../compn $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.o out.s ${bn}.s "trial.$i" done ================================================ FILE: 59_WDIW_pt1/tree.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree functions // Copyright (c) 2019 Warren Toomey, GPL3 // Build and return a generic AST node struct ASTnode *mkastnode(int op, int type, struct symtable *ctype, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue) { struct ASTnode *n; // Malloc a new ASTnode n = (struct ASTnode *) malloc(sizeof(struct ASTnode)); if (n == NULL) fatal("Unable to malloc in mkastnode()"); // Copy in the field values and return it n->op = op; n->type = type; n->ctype = ctype; n->left = left; n->mid = mid; n->right = right; n->sym = sym; n->a_intvalue = intvalue; n->linenum= 0; return (n); } // Make an AST leaf node struct ASTnode *mkastleaf(int op, int type, struct symtable *ctype, struct symtable *sym, int intvalue) { return (mkastnode(op, type, ctype, NULL, NULL, NULL, sym, intvalue)); } // Make a unary AST node: only one child struct ASTnode *mkastunary(int op, int type, struct symtable *ctype, struct ASTnode *left, struct symtable *sym, int intvalue) { return (mkastnode(op, type, ctype, left, NULL, NULL, sym, intvalue)); } // Generate and return a new label number // just for AST dumping purposes static int dumpid = 1; static int gendumplabel(void) { return (dumpid++); } // Given an AST tree, print it out and follow the // traversal of the tree that genAST() follows void dumpAST(struct ASTnode *n, int label, int level) { int Lfalse, Lstart, Lend; int i; switch (n->op) { case A_IF: Lfalse = gendumplabel(); for (i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_IF"); if (n->right) { Lend = gendumplabel(); fprintf(stdout, ", end L%d", Lend); } fprintf(stdout, "\n"); dumpAST(n->left, Lfalse, level + 2); dumpAST(n->mid, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); return; case A_WHILE: Lstart = gendumplabel(); for (i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_WHILE, start L%d\n", Lstart); Lend = gendumplabel(); dumpAST(n->left, Lend, level + 2); dumpAST(n->right, NOLABEL, level + 2); return; } // Reset level to -2 for A_GLUE if (n->op == A_GLUE) level = -2; // General AST node handling if (n->left) dumpAST(n->left, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); for (i = 0; i < level; i++) fprintf(stdout, " "); switch (n->op) { case A_GLUE: fprintf(stdout, "\n\n"); return; case A_FUNCTION: fprintf(stdout, "A_FUNCTION %s\n", n->sym->name); return; case A_ADD: fprintf(stdout, "A_ADD\n"); return; case A_SUBTRACT: fprintf(stdout, "A_SUBTRACT\n"); return; case A_MULTIPLY: fprintf(stdout, "A_MULTIPLY\n"); return; case A_DIVIDE: fprintf(stdout, "A_DIVIDE\n"); return; case A_EQ: fprintf(stdout, "A_EQ\n"); return; case A_NE: fprintf(stdout, "A_NE\n"); return; case A_LT: fprintf(stdout, "A_LE\n"); return; case A_GT: fprintf(stdout, "A_GT\n"); return; case A_LE: fprintf(stdout, "A_LE\n"); return; case A_GE: fprintf(stdout, "A_GE\n"); return; case A_INTLIT: fprintf(stdout, "A_INTLIT %d\n", n->a_intvalue); return; case A_STRLIT: fprintf(stdout, "A_STRLIT rval label L%d\n", n->a_intvalue); return; case A_IDENT: if (n->rvalue) fprintf(stdout, "A_IDENT rval %s\n", n->sym->name); else fprintf(stdout, "A_IDENT %s\n", n->sym->name); return; case A_ASSIGN: fprintf(stdout, "A_ASSIGN\n"); return; case A_WIDEN: fprintf(stdout, "A_WIDEN\n"); return; case A_RETURN: fprintf(stdout, "A_RETURN\n"); return; case A_FUNCCALL: fprintf(stdout, "A_FUNCCALL %s\n", n->sym->name); return; case A_ADDR: fprintf(stdout, "A_ADDR %s\n", n->sym->name); return; case A_DEREF: if (n->rvalue) fprintf(stdout, "A_DEREF rval\n"); else fprintf(stdout, "A_DEREF\n"); return; case A_SCALE: fprintf(stdout, "A_SCALE %d\n", n->a_size); return; case A_PREINC: fprintf(stdout, "A_PREINC %s\n", n->sym->name); return; case A_PREDEC: fprintf(stdout, "A_PREDEC %s\n", n->sym->name); return; case A_POSTINC: fprintf(stdout, "A_POSTINC\n"); return; case A_POSTDEC: fprintf(stdout, "A_POSTDEC\n"); return; case A_NEGATE: fprintf(stdout, "A_NEGATE\n"); return; case A_BREAK: fprintf(stdout, "A_BREAK\n"); return; case A_CONTINUE: fprintf(stdout, "A_CONTINUE\n"); return; case A_CASE: fprintf(stdout, "A_CASE %d\n", n->a_intvalue); return; case A_DEFAULT: fprintf(stdout, "A_DEFAULT\n"); return; case A_SWITCH: fprintf(stdout, "A_SWITCH\n"); return; case A_CAST: fprintf(stdout, "A_CAST %d\n", n->type); return; case A_ASPLUS: fprintf(stdout, "A_ASPLUS\n"); return; case A_ASMINUS: fprintf(stdout, "A_ASMINUS\n"); return; case A_ASSTAR: fprintf(stdout, "A_ASSTAR\n"); return; case A_ASSLASH: fprintf(stdout, "A_ASSLASH\n"); return; case A_TOBOOL: fprintf(stdout, "A_TOBOOL\n"); return; case A_LOGOR: fprintf(stdout, "A_LOGOR\n"); return; case A_LOGAND: fprintf(stdout, "A_LOGAND\n"); return; case A_AND: fprintf(stdout, "A_AND\n"); return; case A_ASMOD: fprintf(stdout, "A_ASMOD\n"); return; case A_INVERT: fprintf(stdout, "A_INVERT\n"); return; case A_LOGNOT: fprintf(stdout, "A_LOGNOT\n"); return; case A_LSHIFT: fprintf(stdout, "A_LSHIFT\n"); return; case A_MOD: fprintf(stdout, "A_MOD\n"); return; case A_OR: fprintf(stdout, "A_OR\n"); return; case A_RSHIFT: fprintf(stdout, "A_RSHIFT\n"); return; case A_TERNARY: fprintf(stdout, "A_TERNARY\n"); return; case A_XOR: fprintf(stdout, "A_XOR\n"); return; default: fatald("Unknown dumpAST operator", n->op); } } ================================================ FILE: 59_WDIW_pt1/types.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Types and type handling // Copyright (c) 2019 Warren Toomey, GPL3 // Return true if a type is an int type // of any size, false otherwise int inttype(int type) { return (((type & 0xf) == 0) && (type >= P_CHAR && type <= P_LONG)); } // Return true if a type is of pointer type int ptrtype(int type) { return ((type & 0xf) != 0); } // Given a primitive type, return // the type which is a pointer to it int pointer_to(int type) { if ((type & 0xf) == 0xf) fatald("Unrecognised in pointer_to: type", type); return (type + 1); } // Given a primitive pointer type, return // the type which it points to int value_at(int type) { if ((type & 0xf) == 0x0) fatald("Unrecognised in value_at: type", type); return (type - 1); } // Given a type and a composite type pointer, return // the size of this type in bytes int typesize(int type, struct symtable *ctype) { if (type == P_STRUCT || type == P_UNION) return (ctype->size); return (genprimsize(type)); } // Given an AST tree and a type which we want it to become, // possibly modify the tree by widening or scaling so that // it is compatible with this type. Return the original tree // if no changes occurred, a modified tree, or NULL if the // tree is not compatible with the given type. // If this will be part of a binary operation, the AST op is not zero. struct ASTnode *modify_type(struct ASTnode *tree, int rtype, struct symtable *rctype, int op) { int ltype; int lsize, rsize; ltype = tree->type; // For A_LOGOR and A_LOGAND, both types have to be int or pointer types if (op==A_LOGOR || op==A_LOGAND) { if (!inttype(ltype) && !ptrtype(ltype)) return(NULL); if (!inttype(ltype) && !ptrtype(rtype)) return(NULL); return (tree); } // XXX No idea on these yet if (ltype == P_STRUCT || ltype == P_UNION) fatal("Don't know how to do this yet"); if (rtype == P_STRUCT || rtype == P_UNION) fatal("Don't know how to do this yet"); // Compare scalar int types if (inttype(ltype) && inttype(rtype)) { // Both types same, nothing to do if (ltype == rtype) return (tree); // Get the sizes for each type lsize = typesize(ltype, NULL); rsize = typesize(rtype, NULL); // Tree's size is too big if (lsize > rsize) return (NULL); // Widen to the right if (rsize > lsize) return (mkastunary(A_WIDEN, rtype, NULL, tree, NULL, 0)); } // For pointers if (ptrtype(ltype) && ptrtype(rtype)) { // We can compare them if (op >= A_EQ && op <= A_GE) return (tree); // A comparison of the same type for a non-binary operation is OK, // or when the left tree is of `void *` type. if (op == 0 && (ltype == rtype || ltype == pointer_to(P_VOID))) return (tree); } // We can scale only on add and subtract operations if (op == A_ADD || op == A_SUBTRACT || op == A_ASPLUS || op == A_ASMINUS) { // Left is int type, right is pointer type and the size // of the original type is >1: scale the left if (inttype(ltype) && ptrtype(rtype)) { rsize = genprimsize(value_at(rtype)); if (rsize > 1) return (mkastunary(A_SCALE, rtype, rctype, tree, NULL, rsize)); else return (tree); // Size 1, no need to scale } } // If we get here, the types are not compatible return (NULL); } ================================================ FILE: 60_TripleTest/Makefile ================================================ # Define the location of the include directory # and the location to install the compiler binary INCDIR=/tmp/include BINDIR=/tmp HSRCS= data.h decl.h defs.h incdir.h SRCS= cg.c decl.c expr.c gen.c main.c misc.c \ opt.c scan.c stmt.c sym.c tree.c types.c SRCN= cgn.c decl.c expr.c gen.c main.c misc.c \ opt.c scan.c stmt.c sym.c tree.c types.c ARMSRCS= cg_arm.c decl.c expr.c gen.c main.c misc.c \ opt.c scan.c stmt.c sym.c tree.c types.c cwj: $(SRCS) $(HSRCS) echo "#define INCDIR \"$(INCDIR)\"" > incdir.h cc -o cwj -g -Wall -DINCDIR=\"$(INCDIR)\" $(SRCS) compn: $(SRCN) $(HSRCS) echo "#define __NASM__ 1" >> incdir.h cc -D__NASM__ -o compn -g -Wall -DINCDIR=\"$(INCDIR)\" $(SRCN) cwjarm: $(ARMSRCS) $(HSRCS) echo "#define INCDIR \"$(INCDIR)\"" > incdir.h cc -o cwjarm -g -Wall $(ARMSRCS) cp cwjarm cwj incdir.h: echo "#define INCDIR \"$(INCDIR)\"" > incdir.h install: cwj mkdir -p $(INCDIR) rsync -a include/. $(INCDIR) cp cwj $(BINDIR) chmod +x $(BINDIR)/cwj installn: compn mkdir -p $(INCDIR) rsync -a include/. $(INCDIR) cp compn $(BINDIR) chmod +x $(BINDIR)/compn clean: rm -f cwj cwj[0-9] cwjarm compn compn[0-9] *.o *.s out a.out incdir.h test: install tests/runtests (cd tests; chmod +x runtests; ./runtests) # Run the tests with the # compiler that compiled itself test0: install tests/runtests0 cwj0 (cd tests; chmod +x runtests0; ./runtests0) # Run the tests with the # compiler that compiled itself test0n: install tests/runtests0n compn0 (cd tests; chmod +x runtests0n; ./runtests0n) armtest: cwjarm tests/runtests (cd tests; chmod +x runtests; ./runtests) testn: installn tests/runtestsn (cd tests; chmod +x runtestsn; ./runtestsn) # Try to do the triple test triple: cwj1 size cwj[01] # Paranoid: quadruple test quad: cwj2 size cwj[012] cwj2: cwj1 $(SRCS) $(HSRCS) ./cwj1 -o cwj2 $(SRCS) cwj1: cwj0 $(SRCS) $(HSRCS) ./cwj0 -o cwj1 $(SRCS) cwj0: install $(SRCS) $(HSRCS) ./cwj -o cwj0 $(SRCS) # Try to do the triple test with nasm triplen: compn1 size compn[01] quadn: compn2 size compn[012] compn2: compn1 $(SRCN) $(HSRCS) ./compn1 -o compn2 $(SRCN) compn1: compn0 $(SRCN) $(HSRCS) ./compn0 -o compn1 $(SRCN) compn0: installn $(SRCN) $(HSRCS) echo "#define __NASM__ 1" >> incdir.h ./compn -o compn0 $(SRCN) ================================================ FILE: 60_TripleTest/Readme.md ================================================ # Part 60: Passing the Triple Test In this part of our compiler writing journey, we will get the compiler to pass the triple test! How do I know? I've just got it to pass the triple test by changing a few source code lines in the compiler. But I don't yet know why the original lines are not working. So, this part will be a investigation where we gather the clues, deduce the problem, fix it and finally get the compiler to pass the triple test properly. Or, so I hope! ## The First Piece of Evidence We now have three compiler binaries: 1. `cwj`, built with the Gnu C compiler, 2. `cwj0`, built with the `cwj` compiler, and 2. `cwj1`, built with the `cwj0` compiler The last two should be identical but they are not. Thus, `cwj0` isn't generating the right assembly output, and this is because of a flaw in the compiler's source code. How can we narrow the problem down? Well, we have a pile of test programs in the `tests/` directory. Let's run `cwj` and `cwj0` over all these tests and see if there's a difference. Yes there is, with `tests/input002.c`: ``` $ ./cwj -o z tests/input002.c ; ./z 17 $ ./cwj0 -o z tests/input002.c ; ./z 24 ``` ## What's The Problem? So, `cwj0` is producing incorrect assembly output. Let's start with the test source code: ```c void main() { int fred; int jim; fred= 5; jim= 12; printf("%d\n", fred + jim); } ``` We have two local variables, `fred` and `jim`. The two compilers produce assembly code with these differences: ``` 42c42 < movl %r10d, -4(%rbp) --- > movl %r10d, -8(%rbp) 51c51 < movslq -4(%rbp), %r10 --- > movslq -8(%rbp), %r10 ``` Hmm, the second compiler is calculating the offset of `fred` incorrectly. The first compiler is correctly calculating the offset as `-4` below the frame pointer. The second compiler is calculating the offset as `-8` below the frame pointer. ## What's Causing the Problem? These offsets are being calculated by the function `newlocaloffset()` in `cg.c`: ```c // Create the position of a new local variable. static int localOffset; static int newlocaloffset(int size) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (size > 4) ? size : 4; return (-localOffset); } ``` At the start of each function, `localOffset` is set to zero. As we create local variables, we get the size of each one, pass it to `newlocaloffset()` and get back the offset. Both `fred` and `jim` local variables are `int`s, which are size 4. Therefore, their offsets should be `-4` and `-8`. ## More Evidence, Please Let's abstract `newlocaloffset()` into a separate source file, `z.c` (my "go to" temporary file name) and compile it. The source file is: ```c static int localOffset=0; static int newlocaloffset(int size) { localOffset += (size > 4) ? size : 4; return (-localOffset); } ``` And here is the output assembly with my comments: ``` .data localOffset: .long 0 .text newlocaloffset: pushq %rbp movq %rsp, %rbp # Set up the stack and movl %edi, -4(%rbp) # frame pointers addq $-16,%rsp movslq localOffset(%rip), %r10 # Get localOffset into %r10 # in preparation for the += movslq -4(%rbp), %r11 # Get size into %r11 movq $4, %r12 # Get 4 into %r12 cmpl %r12d, %r11d # Compare them jle L2 # Jump if size < 4 movslq -4(%rbp), %r11 movq %r11, %r10 # Get size into %r10 jmp L3 # and jump to L3 L2: movq $4, %r11 # Otherwise get 4 movq %r11, %r10 # into %r10 L3: addq %r10, %r10 # Add the += exression to the # cached copy of localOffset movl %r10d, localOffset(%rip) # Save %r10 into localOffset movslq localOffset(%rip), %r10 negq %r10 # Negate localOffset movl %r10d, %eax # Set up the return value jmp L1 L1: addq $16,%rsp # Restore the stack and popq %rbp # frame pointers ret # and return ``` Hmm, the code is trying to do `localOffset += expression`, and we have a copy of `localOffset` cached in `%r10`. However, the expression itself also uses `%r10`, thus destroying the cached version of `localOffset`. The `addq %r10, %r10`, in particular, is just wrong: it should be adding two different registers. ## Passing the Triple Test by Cheating We can pass the triple test by rewriting the source code to `newlocaloffset()`: ```c static int newlocaloffset(int size) { if (size > 4) localOffset= localOffset + size; else localOffset= localOffset + 4; return (-localOffset); } ``` When we now do: ``` $ make triple cc -Wall -o cwj cg.c decl.c expr.c gen.c main.c misc.c opt.c scan.c stmt.c sym.c tree.c types.c ./cwj -o cwj0 cg.c decl.c expr.c gen.c main.c misc.c opt.c scan.c stmt.c sym.c tree.c types.c ./cwj0 -o cwj1 cg.c decl.c expr.c gen.c main.c misc.c opt.c scan.c stmt.c sym.c tree.c types.c size cwj[01] text data bss dec hex filename 109652 3028 48 112728 1b858 cwj0 109652 3028 48 112728 1b858 cwj1 ``` the last two compiler binaries are 100% identical. But this hides the fact that the original `newlocaloffset()` source code should work but it doesn't. Why are we reallocating `%r10` when we know that it is allocated? ## A Possible Culprit I added back in to `cg.c` the `printf()` lines to see when registers were being allocated and freed. I noticed that, after these assembly lines: ``` movslq -4(%rbp), %r11 # Get size into %r11 movq $4, %r12 # Get 4 into %r12 cmpl %r12d, %r11d # Compare them jle L2 # Jump if size < 4 ``` all the registers are freed, even though `%r10` holds the cached copy of `localOffset`. Which function is generating these lines and freeing the registers? The answer is: ```c // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label, int type) { int size = cgprimsize(type); // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); switch (size) { case 1: fprintf(Outfile, "\tcmpb\t%s, %s\n", breglist[r2], breglist[r1]); break; case 4: fprintf(Outfile, "\tcmpl\t%s, %s\n", dreglist[r2], dreglist[r1]); break; default: fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); } fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); freeall_registers(NOREG); return (NOREG); } ``` Looking at the code, we can definitely free `r1` and `r2`, so let's try that instead of freeing all the registers. Yes, that helps, and all our regression tests still pass. However, another function is also freeing all the registers. It's time to use `gdb` and follow the execution. ## The Real Culprit It looks like the real culprit is that I forgot that many operations can be part of an expression, and I can't free all registers until the expression's result is either used or discarded. As I looked at the execution with `gdb`, I saw that the code that deals with ternary operators is freeing registers, even though this may only be part of a bigger expression with registers already allocated (in `gen.c`): ```c static int gen_ternary(struct ASTnode *n) { ... // Generate the condition code genAST(n->left, Lfalse, NOLABEL, NOLABEL, n->op); genfreeregs(NOREG); // HERE // Get a register to hold the result of the two expressions reg = alloc_register(); // Generate the true expression and the false label. // Move the expression result into the known register. // Don't free the register holding the result, though! expreg = genAST(n->mid, NOLABEL, NOLABEL, NOLABEL, n->op); cgmove(expreg, reg); genfreeregs(reg); // HERE // Generate the false expression and the end label. // Move the expression result into the known register. // Don't free the register holding the result, though! expreg = genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); cgmove(expreg, reg); genfreeregs(reg); // HERE ... } ``` Looking through `cg.c`, all the functions in there free registers that are no longer used, so I think that we can lose the `genfreeregs()` straight after the generation of the condition code. Next up, once we move the true expression's value in the register reserved for the ternary result, we can free `expreg`. Ditto for the false expression's value. To make this happen, I've made a previously-static function in `cg.c` global and renamed it: ```c // Return a register to the list of available registers. // Check to see if it's not already there. void cgfreereg(int reg) { ... } ``` We can now rewrite the ternary handling code in `gen.c`: ```c static int gen_ternary(struct ASTnode *n) { ... // Generate the condition code followed // by a jump to the false label. genAST(n->left, Lfalse, NOLABEL, NOLABEL, n->op); // Get a register to hold the result of the two expressions reg = alloc_register(); // Generate the true expression and the false label. // Move the expression result into the known register. expreg = genAST(n->mid, NOLABEL, NOLABEL, NOLABEL, n->op); cgmove(expreg, reg); cgfreereg(expreg); ... // Generate the false expression and the end label. // Move the expression result into the known register. expreg = genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); cgmove(expreg, reg); cgfreereg(expreg); ... } ``` With this change, the compiler now passes several tests: + the triple test: `$ make triple` + a quadruple test where we do one more compiler compilation: ``` $ make quad ... ./cwj -o cwj0 cg.c decl.c expr.c gen.c main.c misc.c opt.c scan.c stmt.c sym.c tree.c types.c ./cwj0 -o cwj1 cg.c decl.c expr.c gen.c main.c misc.c opt.c scan.c stmt.c sym.c tree.c types.c ./cwj1 -o cwj2 cg.c decl.c expr.c gen.c main.c misc.c opt.c scan.c stmt.c sym.c tree.c types.c size cwj[012] text data bss dec hex filename 109636 3028 48 112712 1b848 cwj0 109636 3028 48 112712 1b848 cwj1 109636 3028 48 112712 1b848 cwj2 ``` + the regression tests with the Gnu C compiled compiler: `$ make test` + the regression tests with our compiler compiled with itself: `$ make test0` That feels very satisfying. ## Conclusion and What's Next I've reached the original goal of this journey: to write a self-compiling compiler. It's taken 60 parts, 5,700 lines of code, 149 regression tests and 108,000 words in the *Readme* files. That said, this doesn't have to be the end of the journey. There is still a lot of work that could be done to the compiler to make it more production ready. However, I've been working sporadically on this for about two months now, so I feel like I can (at least) have a small break. In the next part of our compiler writing journey, I will outline what more can be done with our compiler. Perhaps I'll do some of these things; perhaps you will. [Next step](../61_What_Next/Readme.md) ================================================ FILE: 60_TripleTest/cg.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\t.text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\t.data\n", Outfile); currSeg = data_seg; } } // Given a scalar type value, return the // size of the type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch (type) { case P_CHAR: return (offset); case P_INT: case P_LONG: break; default: if (!ptrtype(type)) fatald("Bad type in cg_align:", type); } // Here we have an int or a long. Align it on a 4-byte offset // I put the generic code here so it can be reused elsewhere. alignment = 4; offset = (offset + direction * (alignment - 1)) & ~(alignment - 1); return (offset); } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. static int newlocaloffset(int size) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (size > 4) ? size : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "%r10", "%r11", "%r12", "%r13", "%r9", "%r8", "%rcx", "%rdx", "%rsi", "%rdi" }; static char *breglist[] = { "%r10b", "%r11b", "%r12b", "%r13b", "%r9b", "%r8b", "%cl", "%dl", "%sil", "%dil" }; static char *dreglist[] = { "%r10d", "%r11d", "%r12d", "%r13d", "%r9d", "%r8d", "%ecx", "%edx", "%esi", "%edi" }; // Push and pop a register on/off the stack static void pushreg(int r) { fprintf(Outfile, "\tpushq\t%s\n", reglist[r]); } static void popreg(int r) { fprintf(Outfile, "\tpopq\t%s\n", reglist[r]); } // Set all registers as available. // But if reg is positive, don't free that one. void freeall_registers(int keepreg) { int i; // fprintf(Outfile, "# freeing all registers\n"); for (i = 0; i < NUMFREEREGS; i++) if (i != keepreg) freereg[i] = 1; } // When we need to spill a register, we choose // the following register and then cycle through // the remaining registers. The spillreg increments // continually, so we need to take a modulo NUMFREEREGS // on it. static int spillreg = 0; // Allocate a free register. Return the number of // the register. Die if no available registers. int alloc_register(void) { int reg; for (reg = 0; reg < NUMFREEREGS; reg++) { if (freereg[reg]) { freereg[reg] = 0; // fprintf(Outfile, "# allocated register %s\n", reglist[reg]); return (reg); } } // We have no registers, so we must spill one reg = (spillreg % NUMFREEREGS); spillreg++; // fprintf(Outfile, "# spilling reg %s\n", reglist[reg]); pushreg(reg); return (reg); } // Return a register to the list of available registers. // Check to see if it's not already there. void cgfreereg(int reg) { if (freereg[reg] != 0) { // fprintf(Outfile, "# error trying to free register %s\n", reglist[reg]); fatald("Error trying to free register", reg); } // If this was a spilled register, get it back if (spillreg > 0) { spillreg--; reg = (spillreg % NUMFREEREGS); // fprintf(Outfile, "# unspilling reg %s\n", reglist[reg]); popreg(reg); } else { // fprintf(Outfile, "# freeing reg %s\n", reglist[reg]); freereg[reg] = 1; } } // Spill all registers on the stack void spill_all_regs(void) { int i; for (i = 0; i < NUMFREEREGS; i++) pushreg(i); } // Unspill all registers from the stack static void unspill_all_regs(void) { int i; for (i = NUMFREEREGS - 1; i >= 0; i--) popreg(i); } // Print out the assembly preamble void cgpreamble(char *filename) { freeall_registers(NOREG); cgtextseg(); fprintf(Outfile, "\t.file 1 "); fputc('"', Outfile); fprintf(Outfile, "%s", filename); fputc('"', Outfile); fputc('\n', Outfile); fprintf(Outfile, "# internal switch(expr) routine\n" "# %%rsi = switch table, %%rax = expr\n" "# from SubC: http://www.t3x.org/subc/\n" "\n" "__switch:\n" " pushq %%rsi\n" " movq %%rdx, %%rsi\n" " movq %%rax, %%rbx\n" " cld\n" " lodsq\n" " movq %%rax, %%rcx\n" "__next:\n" " lodsq\n" " movq %%rax, %%rdx\n" " lodsq\n" " cmpq %%rdx, %%rbx\n" " jnz __no\n" " popq %%rsi\n" " jmp *%%rax\n" "__no:\n" " loop __next\n" " lodsq\n" " popq %%rsi\n" " jmp *%%rax\n\n"); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the %rsp and %rsp if (sym->class == C_GLOBAL) fprintf(Outfile, "\t.globl\t%s\n" "\t.type\t%s, @function\n", name, name); fprintf(Outfile, "%s:\n" "\tpushq\t%%rbp\n" "\tmovq\t%%rsp, %%rbp\n", name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->st_posn = paramOffset; paramOffset += 8; } else { parm->st_posn = newlocaloffset(parm->size); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->st_posn = newlocaloffset(locvar->size); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\taddq\t$%d,%%rsp\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->st_endlabel); fprintf(Outfile, "\taddq\t$%d,%%rsp\n", stackOffset); fputs("\tpopq %rbp\n" "\tret\n", Outfile); freeall_registers(NOREG); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmovq\t$%d, %s\n", value, reglist[r]); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadvar(struct symtable *sym, int op) { int r, postreg, offset = 1; // Get a new register r = alloc_register(); // If the symbol is a pointer, use the size // of the type that it points to as any // increment or decrement. If not, it's one. if (ptrtype(sym->type)) offset = typesize(value_at(sym->type), sym->ctype); // Negate the offset for decrements if (op == A_PREDEC || op == A_POSTDEC) offset = -offset; // If we have a pre-operation if (op == A_PREINC || op == A_PREDEC) { // Load the symbol's address if (sym->class == C_LOCAL || sym->class == C_PARAM) fprintf(Outfile, "\tleaq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); else fprintf(Outfile, "\tleaq\t%s(%%rip), %s\n", sym->name, reglist[r]); // and change the value at that address switch (sym->size) { case 1: fprintf(Outfile, "\taddb\t$%d,(%s)\n", offset, reglist[r]); break; case 4: fprintf(Outfile, "\taddl\t$%d,(%s)\n", offset, reglist[r]); break; case 8: fprintf(Outfile, "\taddq\t$%d,(%s)\n", offset, reglist[r]); break; } } // Now load the output register with the value if (sym->class == C_LOCAL || sym->class == C_PARAM) { switch (sym->size) { case 1: fprintf(Outfile, "\tmovzbq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); break; case 4: fprintf(Outfile, "\tmovslq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); break; case 8: fprintf(Outfile, "\tmovq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); } } else { switch (sym->size) { case 1: fprintf(Outfile, "\tmovzbq\t%s(%%rip), %s\n", sym->name, reglist[r]); break; case 4: fprintf(Outfile, "\tmovslq\t%s(%%rip), %s\n", sym->name, reglist[r]); break; case 8: fprintf(Outfile, "\tmovq\t%s(%%rip), %s\n", sym->name, reglist[r]); } } // If we have a post-operation, get a new register if (op == A_POSTINC || op == A_POSTDEC) { postreg = alloc_register(); // Load the symbol's address if (sym->class == C_LOCAL || sym->class == C_PARAM) fprintf(Outfile, "\tleaq\t%d(%%rbp), %s\n", sym->st_posn, reglist[postreg]); else fprintf(Outfile, "\tleaq\t%s(%%rip), %s\n", sym->name, reglist[postreg]); // and change the value at that address switch (sym->size) { case 1: fprintf(Outfile, "\taddb\t$%d,(%s)\n", offset, reglist[postreg]); break; case 4: fprintf(Outfile, "\taddl\t$%d,(%s)\n", offset, reglist[postreg]); break; case 8: fprintf(Outfile, "\taddq\t$%d,(%s)\n", offset, reglist[postreg]); break; } // and free the register cgfreereg(postreg); } // Return the register with the value return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tleaq\tL%d(%%rip), %s\n", label, reglist[r]); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\taddq\t%s, %s\n", reglist[r2], reglist[r1]); cgfreereg(r2); return (r1); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsubq\t%s, %s\n", reglist[r2], reglist[r1]); cgfreereg(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timulq\t%s, %s\n", reglist[r2], reglist[r1]); cgfreereg(r2); return (r1); } // Divide or modulo the first register by the second and // return the number of the register with the result int cgdivmod(int r1, int r2, int op) { fprintf(Outfile, "\tmovq\t%s,%%rax\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidivq\t%s\n", reglist[r2]); if (op == A_DIVIDE) fprintf(Outfile, "\tmovq\t%%rax,%s\n", reglist[r1]); else fprintf(Outfile, "\tmovq\t%%rdx,%s\n", reglist[r1]); cgfreereg(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tandq\t%s, %s\n", reglist[r2], reglist[r1]); cgfreereg(r2); return (r1); } int cgor(int r1, int r2) { fprintf(Outfile, "\torq\t%s, %s\n", reglist[r2], reglist[r1]); cgfreereg(r2); return (r1); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txorq\t%s, %s\n", reglist[r2], reglist[r1]); cgfreereg(r2); return (r1); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshlq\t%%cl, %s\n", reglist[r1]); cgfreereg(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshrq\t%%cl, %s\n", reglist[r1]); cgfreereg(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tnegq\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnotq\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); return (r); } // Load a boolean value (only 0 or 1) // into the given register void cgloadboolean(int r, int val) { fprintf(Outfile, "\tmovq\t$%d, %s\n", val, reglist[r]); } // Convert an integer value to a boolean value. Jump if // it's an IF, WHILE, LOGAND or LOGOR operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); switch (op) { case A_IF: case A_WHILE: case A_LOGAND: fprintf(Outfile, "\tje\tL%d\n", label); break; case A_LOGOR: fprintf(Outfile, "\tjne\tL%d\n", label); break; default: fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(struct symtable *sym, int numargs) { int outr; // Call the function fprintf(Outfile, "\tcall\t%s@PLT\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\taddq\t$%d, %%rsp\n", 8 * (numargs - 6)); // Unspill all the registers unspill_all_regs(); // Get a new register and copy the return value into it outr = alloc_register(); fprintf(Outfile, "\tmovq\t%%rax, %s\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpushq\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmovq\t%s, %s\n", reglist[r], reglist[FIRSTPARAMREG - argposn + 1]); } cgfreereg(r); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsalq\t$%d, %s\n", val, reglist[r]); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %s(%%rip)\n", reglist[r], sym->name); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %s(%%rip)\n", breglist[r], sym->name); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %s(%%rip)\n", dreglist[r], sym->name); break; default: fatald("Bad type in cgstorglob:", sym->type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %d(%%rbp)\n", reglist[r], sym->st_posn); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %d(%%rbp)\n", breglist[r], sym->st_posn); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %d(%%rbp)\n", dreglist[r], sym->st_posn); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size, type; int initvalue; int i; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the variable (or its elements if an array) // and the type of the variable if (node->stype == S_ARRAY) { size = typesize(value_at(node->type), node->ctype); type = value_at(node->type); } else { size = node->size; type = node->type; } // Generate the global identity and the label cgdataseg(); if (node->class == C_GLOBAL) fprintf(Outfile, "\t.globl\t%s\n", node->name); fprintf(Outfile, "%s:\n", node->name); // Output space for one or more elements for (i = 0; i < node->nelems; i++) { // Get any initial value initvalue = 0; if (node->initlist != NULL) initvalue = node->initlist[i]; // Generate the space for this type switch (size) { case 1: fprintf(Outfile, "\t.byte\t%d\n", initvalue); break; case 4: fprintf(Outfile, "\t.long\t%d\n", initvalue); break; case 8: // Generate the pointer to a string literal. Treat a zero value // as actually zero, not the label L0 if (node->initlist != NULL && type == pointer_to(P_CHAR) && initvalue != 0) fprintf(Outfile, "\t.quad\tL%d\n", initvalue); else fprintf(Outfile, "\t.quad\t%d\n", initvalue); break; default: for (i = 0; i < size; i++) fprintf(Outfile, "\t.byte\t0\n"); } } } // Generate a global string and its start label // Don't output the label if append is true. void cgglobstr(int l, char *strvalue, int append) { char *cptr; if (!append) cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\t.byte\t%d\n", *cptr); } } void cgglobstrend(void) { fprintf(Outfile, "\t.byte\t0\n"); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2, int type) { int size = cgprimsize(type); // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); switch (size) { case 1: fprintf(Outfile, "\tcmpb\t%s, %s\n", breglist[r2], breglist[r1]); break; case 4: fprintf(Outfile, "\tcmpl\t%s, %s\n", dreglist[r2], dreglist[r1]); break; default: fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); } fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r2], reglist[r2]); cgfreereg(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label, int type) { int size = cgprimsize(type); // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); switch (size) { case 1: fprintf(Outfile, "\tcmpb\t%s, %s\n", breglist[r2], breglist[r1]); break; case 4: fprintf(Outfile, "\tcmpl\t%s, %s\n", dreglist[r2], dreglist[r1]); break; default: fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); } fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); cgfreereg(r1); cgfreereg(r2); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Only return a value if we have a value to return if (reg != NOREG) { // Deal with pointers here as we can't put them in // the switch statement if (ptrtype(sym->type)) fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); else { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzbl\t%s, %%eax\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %%eax\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } } } cgjump(sym->st_endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = alloc_register(); if (sym->class == C_GLOBAL || sym->class == C_EXTERN || sym->class == C_STATIC) fprintf(Outfile, "\tleaq\t%s(%%rip), %s\n", sym->name, reglist[r]); else fprintf(Outfile, "\tleaq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzbq\t(%s), %s\n", reglist[r], reglist[r]); break; case 4: fprintf(Outfile, "\tmovslq\t(%s), %s\n", reglist[r], reglist[r]); break; case 8: fprintf(Outfile, "\tmovq\t(%s), %s\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { // Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmovb\t%s, (%s)\n", breglist[r1], reglist[r2]); break; case 4: fprintf(Outfile, "\tmovl\t%s, (%s)\n", dreglist[r1], reglist[r2]); break; case 8: fprintf(Outfile, "\tmovq\t%s, (%s)\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } // Generate a switch jump table and the code to // load the registers and call the switch() code void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel) { int i, label; // Get a label for the switch table label = genlabel(); cglabel(label); // Heuristic. If we have no cases, create one case // which points to the default case if (casecount == 0) { caseval[0] = 0; caselabel[0] = defaultlabel; casecount = 1; } // Generate the switch jump table. fprintf(Outfile, "\t.quad\t%d\n", casecount); for (i = 0; i < casecount; i++) fprintf(Outfile, "\t.quad\t%d, L%d\n", caseval[i], caselabel[i]); fprintf(Outfile, "\t.quad\tL%d\n", defaultlabel); // Load the specific registers cglabel(toplabel); fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); fprintf(Outfile, "\tleaq\tL%d(%%rip), %%rdx\n", label); fprintf(Outfile, "\tjmp\t__switch\n"); } // Move value between registers void cgmove(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s, %s\n", reglist[r1], reglist[r2]); } void cglinenum(int line) { fprintf(Outfile, "\t.loc 1 %d 0\n", line); } ================================================ FILE: 60_TripleTest/cg_arm.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for ARMv6 on Raspberry Pi // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. static int freereg[4]; static char *reglist[4] = { "r4", "r5", "r6", "r7" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // We have to store large integer literal values in memory. // Keep a list of them which will be output in the postamble #define MAXINTS 1024 int Intlist[MAXINTS]; static int Intslot = 0; // Determine the offset of a large integer // literal from the .L3 label. If the integer // isn't in the list, add it. static void set_int_offset(int val) { int offset = -1; // See if it is already there for (int i = 0; i < Intslot; i++) { if (Intlist[i] == val) { offset = 4 * i; break; } } // Not in the list, so add it if (offset == -1) { offset = 4 * Intslot; if (Intslot == MAXINTS) fatal("Out of int slots in set_int_offset()"); Intlist[Intslot++] = val; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L3+%d\n", offset); } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n", Outfile); } // Print out the assembly postamble void cgpostamble() { // Print out the global variables fprintf(Outfile, ".L2:\n"); for (int i = 0; i < Globs; i++) { if (Symtable[i].stype == S_VARIABLE) fprintf(Outfile, "\t.word %s\n", Symtable[i].name); } // Print out the integer literals fprintf(Outfile, ".L3:\n"); for (int i = 0; i < Intslot; i++) { fprintf(Outfile, "\t.word %d\n", Intlist[i]); } } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Symtable[id].name; fprintf(Outfile, "\t.text\n" "\t.globl\t%s\n" "\t.type\t%s, \%%function\n" "%s:\n" "\tpush\t{fp, lr}\n" "\tadd\tfp, sp, #4\n" "\tsub\tsp, sp, #8\n" "\tstr\tr0, [fp, #-8]\n", name, name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Symtable[id].endlabel); fputs("\tsub\tsp, fp, #4\n" "\tpop\t{fp, pc}\n" "\t.align\t2\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); // If the literal value is small, do it with one instruction if (value <= 1000) fprintf(Outfile, "\tmov\t%s, #%d\n", reglist[r], value); else { set_int_offset(value); fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); } return (r); } // Determine the offset of a variable from the .L2 // label. Yes, this is inefficient code. static void set_var_offset(int id) { int offset = 0; // Walk the symbol table up to id. // Find S_VARIABLEs and add on 4 until // we get to our variable for (int i = 0; i < id; i++) { if (Symtable[i].stype == S_VARIABLE) offset += 4; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L2+%d\n", offset); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tldrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgloadglob:", Symtable[id].type); } return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s, %s\n", reglist[r1], reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\tmul\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { // To do a divide: r0 holds the dividend, r1 holds the divisor. // The quotient is in r0. fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r1]); fprintf(Outfile, "\tmov\tr1, %s\n", reglist[r2]); fprintf(Outfile, "\tbl\t__aeabi_idiv\n"); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r1]); free_register(r2); return (r1); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r]); fprintf(Outfile, "\tbl\t%s\n", Symtable[id].name); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r]); return (r); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tlsl\t%s, %s, #%d\n", reglist[r], reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, int id) { // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tstr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgstorglob:", Symtable[id].type); } return (r); } // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (4); switch (type) { case P_CHAR: return (1); case P_INT: case P_LONG: return (4); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Symtable[id].type); fprintf(Outfile, "\t.data\n" "\t.globl\t%s\n", Symtable[id].name); switch (typesize) { case 1: fprintf(Outfile, "%s:\t.byte\t0\n", Symtable[id].name); break; case 4: fprintf(Outfile, "%s:\t.long\t0\n", Symtable[id].name); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "moveq", "movne", "movlt", "movgt", "movle", "movge" }; // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "movne", "moveq", "movge", "movle", "movgt", "movlt" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #1\n", cmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #0\n", invcmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\tuxtb\t%s, %s\n", reglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tb\tL%d\n", l); } // List of inverted branch instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *brlist[] = { "bne", "beq", "bge", "ble", "bgt", "blt" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", brlist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[reg]); cgjump(Symtable[id].endlabel); } // Generate code to load the address of a global // identifier into a variable. Return a new register int cgaddress(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); fprintf(Outfile, "\tmov\t%s, r3\n", reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tldrb\t%s, [%s]\n", reglist[r], reglist[r]); break; case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [%s]\n", reglist[r], reglist[r]); break; } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [%s]\n", reglist[r1], reglist[r2]); break; case P_INT: case P_LONG: fprintf(Outfile, "\tstr\t%s, [%s]\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 60_TripleTest/cgn.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; void cgtextseg() { if (currSeg != text_seg) { fputs("\tsection .text\n", Outfile); currSeg = text_seg; } } void cgdataseg() { if (currSeg != data_seg) { fputs("\tsection .data\n", Outfile); currSeg = data_seg; } } // Given a scalar type value, return the // size of the type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch (type) { case P_CHAR: return (offset); case P_INT: case P_LONG: break; default: if (!ptrtype(type)) fatald("Bad type in cg_align:", type); } // Here we have an int or a long. Align it on a 4-byte offset // I put the generic code here so it can be reused elsewhere. alignment = 4; offset = (offset + direction * (alignment - 1)) & ~(alignment - 1); return (offset); } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; static int stackOffset; // Create the position of a new local variable. static int newlocaloffset(int size) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (size > 4) ? size : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "r10", "r11", "r12", "r13", "r9", "r8", "rcx", "rdx", "rsi", "rdi" }; static char *breglist[] = { "r10b", "r11b", "r12b", "r13b", "r9b", "r8b", "cl", "dl", "sil", "dil" }; static char *dreglist[] = { "r10d", "r11d", "r12d", "r13d", "r9d", "r8d", "ecx", "edx", "esi", "edi" }; // Push and pop a register on/off the stack static void pushreg(int r) { fprintf(Outfile, "\tpush\t%s\n", reglist[r]); } static void popreg(int r) { fprintf(Outfile, "\tpop\t%s\n", reglist[r]); } // Set all registers as available. // But if reg is positive, don't free that one. void freeall_registers(int keepreg) { int i; // fprintf(Outfile, "; freeing all registers\n"); for (i = 0; i < NUMFREEREGS; i++) if (i != keepreg) freereg[i] = 1; } // When we need to spill a register, we choose // the following register and then cycle through // the remaining registers. The spillreg increments // continually, so we need to take a modulo NUMFREEREGS // on it. static int spillreg = 0; // Allocate a free register. Return the number of // the register. Die if no available registers. int alloc_register(void) { int reg; for (reg = 0; reg < NUMFREEREGS; reg++) { if (freereg[reg]) { freereg[reg] = 0; // fprintf(Outfile, "; allocated register %s\n", reglist[reg]); return (reg); } } // We have no registers, so we must spill one reg = (spillreg % NUMFREEREGS); spillreg++; // fprintf(Outfile, "; spilling reg %s\n", reglist[reg]); pushreg(reg); return (reg); } // Return a register to the list of available registers. // Check to see if it's not already there. void cgfreereg(int reg) { if (freereg[reg] != 0) { //fprintf(Outfile, "# error trying to free register %s\n", reglist[reg]); fatald("Error trying to free register", reg); } // If this was a spilled register, get it back if (spillreg > 0) { spillreg--; reg = (spillreg % NUMFREEREGS); // fprintf(Outfile, "; unspilling reg %s\n", reglist[reg]); popreg(reg); } else { // fprintf(Outfile, "; freeing reg %s\n", reglist[reg]); freereg[reg] = 1; } } // Spill all registers on the stack void spill_all_regs(void) { int i; for (i = 0; i < NUMFREEREGS; i++) pushreg(i); } // Unspill all registers from the stack static void unspill_all_regs(void) { int i; for (i = NUMFREEREGS - 1; i >= 0; i--) popreg(i); } // Print out the assembly preamble void cgpreamble(char *filename) { freeall_registers(NOREG); cgtextseg(); fprintf(Outfile, ";\t%s\n", filename); fprintf(Outfile, "; internal switch(expr) routine\n" "; rsi = switch table, rax = expr\n" "; from SubC: http://www.t3x.org/subc/\n" "\n" "__switch:\n" " push rsi\n" " mov rsi, rdx\n" " mov rbx, rax\n" " cld\n" " lodsq\n" " mov rcx, rax\n" "__next:\n" " lodsq\n" " mov rdx, rax\n" " lodsq\n" " cmp rbx, rdx\n" " jnz __no\n" " pop rsi\n" " jmp rax\n" "__no:\n" " loop __next\n" " lodsq\n" " pop rsi\n" " jmp rax\n\n"); } // Nothing to do void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the rsp and rbp // if (sym->class == C_GLOBAL) if(!sym->extinit) { fprintf(Outfile, "\tglobal\t%s\n", name); sym->extinit = 1; } fprintf(Outfile, "%s:\n" "\tpush\trbp\n" "\tmov\trbp, rsp\n", name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->st_posn = paramOffset; paramOffset += 8; } else { parm->st_posn = newlocaloffset(parm->size); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->st_posn = newlocaloffset(locvar->size); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\tadd\trsp, %d\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->st_endlabel); fprintf(Outfile, "\tadd\trsp, %d\n", stackOffset); fputs("\tpop rbp\n" "\tret\n", Outfile); freeall_registers(NOREG); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], value); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadvar(struct symtable *sym, int op) { int r, postreg, offset = 1; if(!sym->extinit) { fprintf(Outfile, "extern\t%s\n", sym->name); sym->extinit = 1; } // Get a new register r = alloc_register(); // If the symbol is a pointer, use the size // of the type that it points to as any // increment or decrement. If not, it's one. if (ptrtype(sym->type)) offset = typesize(value_at(sym->type), sym->ctype); // Negate the offset for decrements if (op == A_PREDEC || op == A_POSTDEC) offset = -offset; // If we have a pre-operation if (op == A_PREINC || op == A_PREDEC) { // Load the symbol's address if (sym->class == C_LOCAL || sym->class == C_PARAM) fprintf(Outfile, "\tlea\t%s, [rbp+%d]\n", reglist[r], sym->st_posn); else fprintf(Outfile, "\tlea\t%s, [%s]\n", reglist[r], sym->name); // and change the value at that address switch (sym->size) { case 1: fprintf(Outfile, "\tadd\tbyte [%s], %d\n", reglist[r], offset); break; case 4: fprintf(Outfile, "\tadd\tdword [%s], %d\n", reglist[r], offset); break; case 8: fprintf(Outfile, "\tadd\tqword [%s], %d\n", reglist[r], offset); break; } } // Now load the output register with the value if (sym->class == C_LOCAL || sym->class == C_PARAM) { switch (sym->size) { case 1: fprintf(Outfile, "\tmovzx\t%s, byte [rbp+%d]\n", reglist[r], sym->st_posn); break; case 4: fprintf(Outfile, "\tmovsxd\t%s, dword [rbp+%d]\n", reglist[r], sym->st_posn); break; case 8: fprintf(Outfile, "\tmov\t%s, [rbp+%d]\n", reglist[r], sym->st_posn); } } else { switch (sym->size) { case 1: fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], sym->name); break; case 4: fprintf(Outfile, "\tmovsxd\t%s, dword [%s]\n", reglist[r], sym->name); break; case 8: fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], sym->name); } } // If we have a post-operation, get a new register if (op == A_POSTINC || op == A_POSTDEC) { postreg = alloc_register(); // Load the symbol's address if (sym->class == C_LOCAL || sym->class == C_PARAM) fprintf(Outfile, "\tlea\t%s, [rbp+%d]\n", reglist[postreg], sym->st_posn); else fprintf(Outfile, "\tlea\t%s, [%s]\n", reglist[postreg], sym->name); // and change the value at that address switch (sym->size) { case 1: fprintf(Outfile, "\tadd\tbyte [%s], %d\n", reglist[postreg], offset); break; case 4: fprintf(Outfile, "\tadd\tdword [%s], %d\n", reglist[postreg], offset); break; case 8: fprintf(Outfile, "\tadd\tqword [%s], %d\n", reglist[postreg], offset); break; } // and free the register cgfreereg(postreg); } // Return the register with the value return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = alloc_register(); fprintf(Outfile, "\tmov\t%s, L%d\n", reglist[r], label); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s\n", reglist[r1], reglist[r2]); cgfreereg(r2); return (r1); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s\n", reglist[r1], reglist[r2]); cgfreereg(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timul\t%s, %s\n", reglist[r1], reglist[r2]); cgfreereg(r2); return (r1); } // Divide or modulo the first register by the second and // return the number of the register with the result int cgdivmod(int r1, int r2, int op) { fprintf(Outfile, "\tmov\trax, %s\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidiv\t%s\n", reglist[r2]); if (op == A_DIVIDE) fprintf(Outfile, "\tmov\t%s, rax\n", reglist[r1]); else fprintf(Outfile, "\tmov\t%s, rdx\n", reglist[r1]); cgfreereg(r2); return (r1); } int cgand(int r1, int r2) { fprintf(Outfile, "\tand\t%s, %s\n", reglist[r1], reglist[r2]); cgfreereg(r2); return (r1); } int cgor(int r1, int r2) { fprintf(Outfile, "\tor\t%s, %s\n", reglist[r1], reglist[r2]); cgfreereg(r2); return (r1); } int cgxor(int r1, int r2) { fprintf(Outfile, "\txor\t%s, %s\n", reglist[r1], reglist[r2]); cgfreereg(r2); return (r1); } int cgshl(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshl\t%s, cl\n", reglist[r1]); cgfreereg(r2); return (r1); } int cgshr(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshr\t%s, cl\n", reglist[r1]); cgfreereg(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tneg\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnot\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r], breglist[r]); return (r); } // Load a boolean value (only 0 or 1) // into the given register void cgloadboolean(int r, int val) { fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], val); } // Convert an integer value to a boolean value. Jump if // it's an IF, WHILE, LOGAND or LOGOR operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); switch (op) { case A_IF: case A_WHILE: case A_LOGAND: fprintf(Outfile, "\tje\tL%d\n", label); break; case A_LOGOR: fprintf(Outfile, "\tjne\tL%d\n", label); break; default: fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, byte %s\n", reglist[r], breglist[r]); } return (r); } // Call a function with the given symbol id // Pop off any arguments pushed on the stack // Return the register with the result int cgcall(struct symtable *sym, int numargs) { int outr; // Call the function if(!sym->extinit) { fprintf(Outfile, "extern\t%s\n", sym->name); sym->extinit = 1; } fprintf(Outfile, "\tcall\t%s\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\tadd\trsp, %d\n", 8 * (numargs - 6)); // Unspill all the registers unspill_all_regs(); // Get a new register and copy the return value into it outr = alloc_register(); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function // call. Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpush\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmov\t%s, %s\n", reglist[FIRSTPARAMREG - argposn + 1], reglist[r]); } cgfreereg(r); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsal\t%s, %d\n", reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if(!sym->extinit) { fprintf(Outfile, "extern\t%s\n", sym->name); sym->extinit = 1; } if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, dreglist[r]); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\tqword\t[rbp+%d], %s\n", sym->st_posn, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\tbyte\t[rbp+%d], %s\n", sym->st_posn, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\tdword\t[rbp+%d], %s\n", sym->st_posn, dreglist[r]); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size, type; int initvalue; int i; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the variable (or its elements if an array) // and the type of the variable if (node->stype == S_ARRAY) { size = typesize(value_at(node->type), node->ctype); type = value_at(node->type); } else { size = node->size; type = node->type; } // Generate the global identity and the label cgdataseg(); if (node->class == C_GLOBAL) fprintf(Outfile, "\tglobal\t%s\n", node->name); if(!node->extinit) { node->extinit = 1; } fprintf(Outfile, "%s:\n", node->name); // Output space for one or more elements for (i = 0; i < node->nelems; i++) { // Get any initial value initvalue = 0; if (node->initlist != NULL) initvalue = node->initlist[i]; // Generate the space for this type // original version switch(size) { case 1: fprintf(Outfile, "\tdb\t%d\n", initvalue); break; case 4: fprintf(Outfile, "\tdd\t%d\n", initvalue); break; case 8: // Generate the pointer to a string literal. Treat a zero value // as actually zero, not the label L0 if (node->initlist != NULL && type == pointer_to(P_CHAR) && initvalue != 0) fprintf(Outfile, "\tdq\tL%d\n", initvalue); else fprintf(Outfile, "\tdq\t%d\n", initvalue); break; default: for (i = 0; i < size; i++) fprintf(Outfile, "\tdb\t0\n"); } } } // Generate a global string and its start label // Don't output the label if append is true. void cgglobstr(int l, char *strvalue, int append) { char *cptr; if (!append) cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\tdb\t%d\n", *cptr); } } void cgglobstrend(void) { fprintf(Outfile, "\tdb\t0\n"); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2, int type) { int size = cgprimsize(type); // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); switch (size) { case 1: fprintf(Outfile, "\tcmp\t%s, %s\n", breglist[r1], breglist[r2]); break; case 4: fprintf(Outfile, "\tcmp\t%s, %s\n", dreglist[r1], dreglist[r2]); break; default: fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); } fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r2], breglist[r2]); cgfreereg(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label, int type) { int size = cgprimsize(type); // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); switch (size) { case 1: fprintf(Outfile, "\tcmp\t%s, %s\n", breglist[r1], breglist[r2]); break; case 4: fprintf(Outfile, "\tcmp\t%s, %s\n", dreglist[r1], dreglist[r2]); break; default: fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); } fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); cgfreereg(r1); cgfreereg(r2); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Only return a value if we have a value to return if (reg != NOREG) { // Deal with pointers here as we can't put them in // the switch statement if (ptrtype(sym->type)) fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); else { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzx\teax, %s\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmov\teax, %s\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } } } cgjump(sym->st_endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = alloc_register(); if (!sym->extinit) { fprintf(Outfile, "extern\t%s\n", sym->name); sym->extinit = 1; } if (sym->class == C_GLOBAL || sym->class == C_EXTERN || sym->class == C_STATIC) fprintf(Outfile, "\tmov\t%s, %s\n", reglist[r], sym->name); else fprintf(Outfile, "\tlea\t%s, [rbp+%d]\n", reglist[r], sym->st_posn); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], reglist[r]); break; case 4: fprintf(Outfile, "\tmovsx\t%s, dword [%s]\n", reglist[r], reglist[r]); break; case 8: fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { //Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmov\t[%s], byte %s\n", reglist[r2], breglist[r1]); break; case 4: fprintf(Outfile, "\tmov\t[%s], dword %s\n", reglist[r2], dreglist[r1]); break; case 8: fprintf(Outfile, "\tmov\t[%s], %s\n", reglist[r2], reglist[r1]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } // Generate a switch jump table and the code to // load the registers and call the switch() code void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel) { int i, label; // Get a label for the switch table label = genlabel(); cglabel(label); // Heuristic. If we have no cases, create one case // which points to the default case if (casecount == 0) { caseval[0] = 0; caselabel[0] = defaultlabel; casecount = 1; } // Generate the switch jump table. fprintf(Outfile, "\tdq\t%d\n", casecount); for (i = 0; i < casecount; i++) fprintf(Outfile, "\tdq\t%d, L%d\n", caseval[i], caselabel[i]); fprintf(Outfile, "\tdq\tL%d\n", defaultlabel); // Load the specific registers cglabel(toplabel); fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); fprintf(Outfile, "\tmov\trdx, L%d\n", label); fprintf(Outfile, "\tjmp\t__switch\n"); } // Move value between registers void cgmove(int r1, int r2) { fprintf(Outfile, "\tmov\t%s, %s\n", reglist[r2], reglist[r1]); } void cglinenum(int line) { //fprintf(Outfile, ";\t.loc 1 %d 0\n", line); } ================================================ FILE: 60_TripleTest/data.h ================================================ #ifndef extern_ #define extern_ extern #endif // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 extern_ int Line; // Current line number extern_ int Linestart; // True if at start of a line extern_ int Putback; // Character put back by scanner extern_ struct symtable *Functionid; // Symbol ptr of the current function extern_ FILE *Infile; // Input and output files extern_ FILE *Outfile; extern_ char *Infilename; // Name of file we are parsing extern_ char *Outfilename; // Name of file we opened as Outfile extern_ struct token Token; // Last token scanned extern_ struct token Peektoken; // A look-ahead token extern_ char Text[TEXTLEN + 1]; // Last identifier scanned extern_ int Looplevel; // Depth of nested loops extern_ int Switchlevel; // Depth of nested switches extern char *Tstring[]; // List of token strings // Symbol table lists extern_ struct symtable *Globhead, *Globtail; // Global variables and functions extern_ struct symtable *Loclhead, *Locltail; // Local variables extern_ struct symtable *Parmhead, *Parmtail; // Local parameters extern_ struct symtable *Membhead, *Membtail; // Temp list of struct/union members extern_ struct symtable *Structhead, *Structtail; // List of struct types extern_ struct symtable *Unionhead, *Uniontail; // List of union types extern_ struct symtable *Enumhead, *Enumtail; // List of enum types and values extern_ struct symtable *Typehead, *Typetail; // List of typedefs // Command-line flags extern_ int O_dumpAST; // If true, dump the AST trees extern_ int O_dumpsym; // If true, dump the symbol table extern_ int O_keepasm; // If true, keep any assembly files extern_ int O_assemble; // If true, assemble the assembly files extern_ int O_dolink; // If true, link the object files extern_ int O_verbose; // If true, print info on compilation stages ================================================ FILE: 60_TripleTest/decl.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of declarations // Copyright (c) 2019 Warren Toomey, GPL3 static struct symtable *composite_declaration(int type); static int typedef_declaration(struct symtable **ctype); static int type_of_typedef(char *name, struct symtable **ctype); static void enum_declaration(void); // Parse the current token and return a primitive type enum value, // a pointer to any composite type and possibly modify // the class of the type. int parse_type(struct symtable **ctype, int *class) { int type, exstatic = 1; // See if the class has been changed to extern or static while (exstatic) { switch (Token.token) { case T_EXTERN: if (*class == C_STATIC) fatal("Illegal to have extern and static at the same time"); *class = C_EXTERN; scan(&Token); break; case T_STATIC: if (*class == C_LOCAL) fatal("Compiler doesn't support static local declarations"); if (*class == C_EXTERN) fatal("Illegal to have extern and static at the same time"); *class = C_STATIC; scan(&Token); break; default: exstatic = 0; } } // Now work on the actual type keyword switch (Token.token) { case T_VOID: type = P_VOID; scan(&Token); break; case T_CHAR: type = P_CHAR; scan(&Token); break; case T_INT: type = P_INT; scan(&Token); break; case T_LONG: type = P_LONG; scan(&Token); break; // For the following, if we have a ';' after the // parsing then there is no type, so return -1. // Example: struct x {int y; int z}; case T_STRUCT: type = P_STRUCT; *ctype = composite_declaration(P_STRUCT); if (Token.token == T_SEMI) type = -1; break; case T_UNION: type = P_UNION; *ctype = composite_declaration(P_UNION); if (Token.token == T_SEMI) type = -1; break; case T_ENUM: type = P_INT; // Enums are really ints enum_declaration(); if (Token.token == T_SEMI) type = -1; break; case T_TYPEDEF: type = typedef_declaration(ctype); if (Token.token == T_SEMI) type = -1; break; case T_IDENT: type = type_of_typedef(Text, ctype); break; default: fatals("Illegal type, token", Token.tokstr); } return (type); } // Given a type parsed by parse_type(), scan in any following // '*' tokens and return the new type int parse_stars(int type) { while (1) { if (Token.token != T_STAR) break; type = pointer_to(type); scan(&Token); } return (type); } // Parse a type which appears inside a cast int parse_cast(struct symtable **ctype) { int type, class = 0; // Get the type inside the parentheses type = parse_stars(parse_type(ctype, &class)); // Do some error checking. I'm sure more can be done if (type == P_STRUCT || type == P_UNION || type == P_VOID) fatal("Cannot cast to a struct, union or void type"); return (type); } // Given a type, parse an expression of literals and ensure // that the type of this expression matches the given type. // Parse any type cast that precedes the expression. // If an integer literal, return this value. // If a string literal, return the label number of the string. int parse_literal(int type) { struct ASTnode *tree; // Parse the expression and optimise the resulting AST tree tree = optimise(binexpr(0)); // If there's a cast, get the child and // mark it as having the type from the cast if (tree->op == A_CAST) { tree->left->type = tree->type; tree = tree->left; } // The tree must now have an integer or string literal if (tree->op != A_INTLIT && tree->op != A_STRLIT) fatal("Cannot initialise globals with a general expression"); // If the type is char * and if (type == pointer_to(P_CHAR)) { // We have a string literal, return the label number if (tree->op == A_STRLIT) return (tree->a_intvalue); // We have a zero int literal, so that's a NULL if (tree->op == A_INTLIT && tree->a_intvalue == 0) return (0); } // We only get here with an integer literal. The input type // is an integer type and is wide enough to hold the literal value if (inttype(type) && typesize(type, NULL) >= typesize(tree->type, NULL)) return (tree->a_intvalue); fatal("Type mismatch: literal vs. variable"); return (0); // Keep -Wall happy } // Given a pointer to a symbol that may already exist // return true if this symbol doesn't exist. We use // this function to convert externs into globals int is_new_symbol(struct symtable *sym, int class, int type, struct symtable *ctype) { // There is no existing symbol, thus is new if (sym==NULL) return(1); // global versus extern: if they match that it's not new // and we can convert the class to global if ((sym->class== C_GLOBAL && class== C_EXTERN) || (sym->class== C_EXTERN && class== C_GLOBAL)) { // If the types don't match, there's a problem if (type != sym->type) fatals("Type mismatch between global/extern", sym->name); // Struct/unions, also compare the ctype if (type >= P_STRUCT && ctype != sym->ctype) fatals("Type mismatch between global/extern", sym->name); // If we get to here, the types match, so mark the symbol // as global sym->class= C_GLOBAL; // Return that symbol is not new return(0); } // It must be a duplicate symbol if we get here fatals("Duplicate global variable declaration", sym->name); return(-1); // Keep -Wall happy } // Given the type, name and class of a scalar variable, // parse any initialisation value and allocate storage for it. // Return the variable's symbol table entry. static struct symtable *scalar_declaration(char *varname, int type, struct symtable *ctype, int class, struct ASTnode **tree) { struct symtable *sym = NULL; struct ASTnode *varnode, *exprnode; *tree = NULL; // Add this as a known scalar switch (class) { case C_STATIC: case C_EXTERN: case C_GLOBAL: // See if this variable is new or already exists sym= findglob(varname); if (is_new_symbol(sym, class, type, ctype)) sym = addglob(varname, type, ctype, S_VARIABLE, class, 1, 0); break; case C_LOCAL: sym = addlocl(varname, type, ctype, S_VARIABLE, 1); break; case C_PARAM: sym = addparm(varname, type, ctype, S_VARIABLE); break; case C_MEMBER: sym = addmemb(varname, type, ctype, S_VARIABLE, 1); break; } // The variable is being initialised if (Token.token == T_ASSIGN) { // Only possible for a global or local if (class != C_GLOBAL && class != C_LOCAL && class != C_STATIC) fatals("Variable can not be initialised", varname); scan(&Token); // Globals must be assigned a literal value if (class == C_GLOBAL || class == C_STATIC) { // Create one initial value for the variable and // parse this value sym->initlist = (int *) malloc(sizeof(int)); sym->initlist[0] = parse_literal(type); } if (class == C_LOCAL) { // Make an A_IDENT AST node with the variable varnode = mkastleaf(A_IDENT, sym->type, sym->ctype, sym, 0); // Get the expression for the assignment, make into a rvalue exprnode = binexpr(0); exprnode->rvalue = 1; // Ensure the expression's type matches the variable exprnode = modify_type(exprnode, varnode->type, varnode->ctype, 0); if (exprnode == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree *tree = mkastnode(A_ASSIGN, exprnode->type, exprnode->ctype, exprnode, NULL, varnode, NULL, 0); } } // Generate any global space if (class == C_GLOBAL || class == C_STATIC) genglobsym(sym); return (sym); } // Given the type, name and class of an variable, parse // the size of the array, if any. Then parse any initialisation // value and allocate storage for it. // Return the variable's symbol table entry. static struct symtable *array_declaration(char *varname, int type, struct symtable *ctype, int class) { struct symtable *sym; // New symbol table entry int nelems = -1; // Assume the number of elements won't be given int maxelems; // The maximum number of elements in the init list int *initlist; // The list of initial elements int i = 0, j; // Skip past the '[' scan(&Token); // See we have an array size if (Token.token != T_RBRACKET) { nelems = parse_literal(P_INT); if (nelems <= 0) fatald("Array size is illegal", nelems); } // Ensure we have a following ']' match(T_RBRACKET, "]"); // Add this as a known array. We treat the // array as a pointer to its elements' type switch (class) { case C_STATIC: case C_EXTERN: case C_GLOBAL: // See if this variable is new or already exists sym= findglob(varname); if (is_new_symbol(sym, class, pointer_to(type), ctype)) sym = addglob(varname, pointer_to(type), ctype, S_ARRAY, class, 0, 0); break; case C_LOCAL: sym = addlocl(varname, pointer_to(type), ctype, S_ARRAY, 0); break; default: fatal("Declaration of array parameters is not implemented"); } // Array initialisation if (Token.token == T_ASSIGN) { if (class != C_GLOBAL && class != C_STATIC) fatals("Variable can not be initialised", varname); scan(&Token); // Get the following left curly bracket match(T_LBRACE, "{"); #define TABLE_INCREMENT 10 // If the array already has nelems, allocate that many elements // in the list. Otherwise, start with TABLE_INCREMENT. if (nelems != -1) maxelems = nelems; else maxelems = TABLE_INCREMENT; initlist = (int *) malloc(maxelems * sizeof(int)); // Loop getting a new literal value from the list while (1) { // Check we can add the next value, then parse and add it if (nelems != -1 && i == maxelems) fatal("Too many values in initialisation list"); initlist[i++] = parse_literal(type); // Increase the list size if the original size was // not set and we have hit the end of the current list if (nelems == -1 && i == maxelems) { maxelems += TABLE_INCREMENT; initlist = (int *) realloc(initlist, maxelems * sizeof(int)); } // Leave when we hit the right curly bracket if (Token.token == T_RBRACE) { scan(&Token); break; } // Next token must be a comma, then comma(); } // Zero any unused elements in the initlist. // Attach the list to the symbol table entry for (j = i; j < sym->nelems; j++) initlist[j] = 0; if (i > nelems) nelems = i; sym->initlist = initlist; } // Set the size of the array and the number of elements // Only externs can have no elements. if (class != C_EXTERN && nelems<=0) fatals("Array must have non-zero elements", sym->name); sym->nelems = nelems; sym->size = sym->nelems * typesize(type, ctype); // Generate any global space if (class == C_GLOBAL || class == C_STATIC) genglobsym(sym); return (sym); } // Given a pointer to the new function being declared and // a possibly NULL pointer to the function's previous declaration, // parse a list of parameters and cross-check them against the // previous declaration. Return the count of parameters static int param_declaration_list(struct symtable *oldfuncsym, struct symtable *newfuncsym) { int type, paramcnt = 0; struct symtable *ctype; struct symtable *protoptr = NULL; struct ASTnode *unused; // Get the pointer to the first prototype parameter if (oldfuncsym != NULL) protoptr = oldfuncsym->member; // Loop getting any parameters while (Token.token != T_RPAREN) { // If the first token is 'void' if (Token.token == T_VOID) { // Peek at the next token. If a ')', the function // has no parameters, so leave the loop. scan(&Peektoken); if (Peektoken.token == T_RPAREN) { // Move the Peektoken into the Token paramcnt = 0; scan(&Token); break; } } // Get the type of the next parameter type = declaration_list(&ctype, C_PARAM, T_COMMA, T_RPAREN, &unused); if (type == -1) fatal("Bad type in parameter list"); // Ensure the type of this parameter matches the prototype if (protoptr != NULL) { if (type != protoptr->type) fatald("Type doesn't match prototype for parameter", paramcnt + 1); protoptr = protoptr->next; } paramcnt++; // Stop when we hit the right parenthesis if (Token.token == T_RPAREN) break; // We need a comma as separator comma(); } if (oldfuncsym != NULL && paramcnt != oldfuncsym->nelems) fatals("Parameter count mismatch for function", oldfuncsym->name); // Return the count of parameters return (paramcnt); } // // function_declaration: type identifier '(' parameter_list ')' ; // | type identifier '(' parameter_list ')' compound_statement ; // // Parse the declaration of function. static struct symtable *function_declaration(char *funcname, int type, struct symtable *ctype, int class) { struct ASTnode *tree, *finalstmt; struct symtable *oldfuncsym, *newfuncsym = NULL; int endlabel, paramcnt; int linenum= Line; // Text has the identifier's name. If this exists and is a // function, get the id. Otherwise, set oldfuncsym to NULL. if ((oldfuncsym = findsymbol(funcname)) != NULL) if (oldfuncsym->stype != S_FUNCTION) oldfuncsym = NULL; // If this is a new function declaration, get a // label-id for the end label, and add the function // to the symbol table, if (oldfuncsym == NULL) { endlabel = genlabel(); // Assumtion: functions only return scalar types, so NULL below newfuncsym = addglob(funcname, type, NULL, S_FUNCTION, class, 0, endlabel); } // Scan in the '(', any parameters and the ')'. // Pass in any existing function prototype pointer lparen(); paramcnt = param_declaration_list(oldfuncsym, newfuncsym); rparen(); // If this is a new function declaration, update the // function symbol entry with the number of parameters. // Also copy the parameter list into the function's node. if (newfuncsym) { newfuncsym->nelems = paramcnt; newfuncsym->member = Parmhead; oldfuncsym = newfuncsym; } // Clear out the parameter list Parmhead = Parmtail = NULL; // Declaration ends in a semicolon, only a prototype. if (Token.token == T_SEMI) return (oldfuncsym); // This is not just a prototype. // Set the Functionid global to the function's symbol pointer Functionid = oldfuncsym; // Get the AST tree for the compound statement and mark // that we have parsed no loops or switches yet Looplevel = 0; Switchlevel = 0; lbrace(); tree = compound_statement(0); rbrace(); // If the function type isn't P_VOID .. if (type != P_VOID) { // Error if no statements in the function if (tree == NULL) fatal("No statements in function with non-void type"); // Check that the last AST operation in the // compound statement was a return statement finalstmt = (tree->op == A_GLUE) ? tree->right : tree; if (finalstmt == NULL || finalstmt->op != A_RETURN) fatal("No return for function with non-void type"); } // Build the A_FUNCTION node which has the function's symbol pointer // and the compound statement sub-tree tree = mkastunary(A_FUNCTION, type, ctype, tree, oldfuncsym, endlabel); tree->linenum= linenum; // Do optimisations on the AST tree tree = optimise(tree); // Dump the AST tree if requested if (O_dumpAST) { dumpAST(tree, NOLABEL, 0); fprintf(stdout, "\n\n"); } // Generate the assembly code for it genAST(tree, NOLABEL, NOLABEL, NOLABEL, 0); // Now free the symbols associated // with this function freeloclsyms(); return (oldfuncsym); } // Parse composite type declarations: structs or unions. // Either find an existing struct/union declaration, or build // a struct/union symbol table entry and return its pointer. static struct symtable *composite_declaration(int type) { struct symtable *ctype = NULL; struct symtable *m; struct ASTnode *unused; int offset; int t; // Skip the struct/union keyword scan(&Token); // See if there is a following struct/union name if (Token.token == T_IDENT) { // Find any matching composite type if (type == P_STRUCT) ctype = findstruct(Text); else ctype = findunion(Text); scan(&Token); } // If the next token isn't an LBRACE , this is // the usage of an existing struct/union type. // Return the pointer to the type. if (Token.token != T_LBRACE) { if (ctype == NULL) fatals("unknown struct/union type", Text); return (ctype); } // Ensure this struct/union type hasn't been // previously defined if (ctype) fatals("previously defined struct/union", Text); // Build the composite type and skip the left brace if (type == P_STRUCT) ctype = addstruct(Text); else ctype = addunion(Text); scan(&Token); // Scan in the list of members while (1) { // Get the next member. m is used as a dummy t = declaration_list(&m, C_MEMBER, T_SEMI, T_RBRACE, &unused); if (t == -1) fatal("Bad type in member list"); if (Token.token == T_SEMI) scan(&Token); if (Token.token == T_RBRACE) break; } // Attach to the struct type's node rbrace(); if (Membhead == NULL) fatals("No members in struct", ctype->name); ctype->member = Membhead; Membhead = Membtail = NULL; // Set the offset of the initial member // and find the first free byte after it m = ctype->member; m->st_posn = 0; offset = typesize(m->type, m->ctype); // Set the position of each successive member in the composite type // Unions are easy. For structs, align the member and find the next free byte for (m = m->next; m != NULL; m = m->next) { // Set the offset for this member if (type == P_STRUCT) m->st_posn = genalign(m->type, offset, 1); else m->st_posn = 0; // Get the offset of the next free byte after this member offset += typesize(m->type, m->ctype); } // Set the overall size of the composite type ctype->size = offset; return (ctype); } // Parse an enum declaration static void enum_declaration(void) { struct symtable *etype = NULL; char *name = NULL; int intval = 0; // Skip the enum keyword. scan(&Token); // If there's a following enum type name, get a // pointer to any existing enum type node. if (Token.token == T_IDENT) { etype = findenumtype(Text); name = strdup(Text); // As it gets tromped soon scan(&Token); } // If the next token isn't a LBRACE, check // that we have an enum type name, then return if (Token.token != T_LBRACE) { if (etype == NULL) fatals("undeclared enum type:", name); return; } // We do have an LBRACE. Skip it scan(&Token); // If we have an enum type name, ensure that it // hasn't been declared before. if (etype != NULL) fatals("enum type redeclared:", etype->name); else // Build an enum type node for this identifier etype = addenum(name, C_ENUMTYPE, 0); // Loop to get all the enum values while (1) { // Ensure we have an identifier // Copy it in case there's an int literal coming up ident(); name = strdup(Text); // Ensure this enum value hasn't been declared before etype = findenumval(name); if (etype != NULL) fatals("enum value redeclared:", Text); // If the next token is an '=', skip it and // get the following int literal if (Token.token == T_ASSIGN) { scan(&Token); if (Token.token != T_INTLIT) fatal("Expected int literal after '='"); intval = Token.intvalue; scan(&Token); } // Build an enum value node for this identifier. // Increment the value for the next enum identifier. etype = addenum(name, C_ENUMVAL, intval++); // Bail out on a right curly bracket, else get a comma if (Token.token == T_RBRACE) break; comma(); } scan(&Token); // Skip over the right curly bracket } // Parse a typedef declaration and return the type // and ctype that it represents static int typedef_declaration(struct symtable **ctype) { int type, class = 0; // Skip the typedef keyword. scan(&Token); // Get the actual type following the keyword type = parse_type(ctype, &class); if (class != 0) fatal("Can't have extern in a typedef declaration"); // See if the typedef identifier already exists if (findtypedef(Text) != NULL) fatals("redefinition of typedef", Text); // Get any following '*' tokens type = parse_stars(type); // It doesn't exist so add it to the typedef list addtypedef(Text, type, *ctype); scan(&Token); return (type); } // Given a typedef name, return the type it represents static int type_of_typedef(char *name, struct symtable **ctype) { struct symtable *t; // Look up the typedef in the list t = findtypedef(name); if (t == NULL) fatals("unknown type", name); scan(&Token); *ctype = t->ctype; return (t->type); } // Parse the declaration of a variable or function. // The type and any following '*'s have been scanned, and we // have the identifier in the Token variable. // The class argument is the variable's class. // Return a pointer to the symbol's entry in the symbol table static struct symtable *symbol_declaration(int type, struct symtable *ctype, int class, struct ASTnode **tree) { struct symtable *sym = NULL; char *varname = strdup(Text); // Ensure that we have an identifier. // We copied it above so we can scan more tokens in, e.g. // an assignment expression for a local variable. ident(); // Deal with function declarations if (Token.token == T_LPAREN) { return (function_declaration(varname, type, ctype, class)); } // See if this array or scalar variable has already been declared switch (class) { case C_EXTERN: case C_STATIC: case C_GLOBAL: case C_LOCAL: case C_PARAM: if (findlocl(varname) != NULL) fatals("Duplicate local variable declaration", varname); case C_MEMBER: if (findmember(varname) != NULL) fatals("Duplicate struct/union member declaration", varname); } // Add the array or scalar variable to the symbol table if (Token.token == T_LBRACKET) { sym = array_declaration(varname, type, ctype, class); *tree= NULL; // Local arrays are not initialised } else sym = scalar_declaration(varname, type, ctype, class, tree); return (sym); } // Parse a list of symbols where there is an initial type. // Return the type of the symbols. et1 and et2 are end tokens. int declaration_list(struct symtable **ctype, int class, int et1, int et2, struct ASTnode **gluetree) { int inittype, type; struct symtable *sym; struct ASTnode *tree; *gluetree = NULL; // Get the initial type. If -1, it was // a composite type definition, return this if ((inittype = parse_type(ctype, &class)) == -1) return (inittype); // Now parse the list of symbols while (1) { // See if this symbol is a pointer type = parse_stars(inittype); // Parse this symbol sym = symbol_declaration(type, *ctype, class, &tree); // We parsed a function, there is no list so leave if (sym->stype == S_FUNCTION) { if (class != C_GLOBAL && class != C_STATIC) fatal("Function definition not at global level"); return (type); } // Glue any AST tree from a local declaration // to build a sequence of assignments to perform if (*gluetree == NULL) *gluetree = tree; else *gluetree = mkastnode(A_GLUE, P_NONE, NULL, *gluetree, NULL, tree, NULL, 0); // We are at the end of the list, leave if (Token.token == et1 || Token.token == et2) return (type); // Otherwise, we need a comma as separator comma(); } return(0); // Keep -Wall happy } // Parse one or more global declarations, either // variables, functions or structs void global_declarations(void) { struct symtable *ctype= NULL; struct ASTnode *unused; while (Token.token != T_EOF) { declaration_list(&ctype, C_GLOBAL, T_SEMI, T_EOF, &unused); // Skip any semicolons and right curly brackets if (Token.token == T_SEMI) scan(&Token); } } ================================================ FILE: 60_TripleTest/decl.h ================================================ // Function prototypes for all compiler files // Copyright (c) 2019 Warren Toomey, GPL3 // scan.c void reject_token(struct token *t); int scan(struct token *t); // tree.c struct ASTnode *mkastnode(int op, int type, struct symtable *ctype, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue); struct ASTnode *mkastleaf(int op, int type, struct symtable *ctype, struct symtable *sym, int intvalue); struct ASTnode *mkastunary(int op, int type, struct symtable *ctype, struct ASTnode *left, struct symtable *sym, int intvalue); void dumpAST(struct ASTnode *n, int label, int level); // gen.c int genlabel(void); int genAST(struct ASTnode *n, int iflabel, int looptoplabel, int loopendlabel, int parentASTop); void genpreamble(char *filename); void genpostamble(); void genfreeregs(int keepreg); void genglobsym(struct symtable *node); int genglobstr(char *strvalue, int append); void genglobstrend(void); int genprimsize(int type); int genalign(int type, int offset, int direction); void genreturn(int reg, int id); // cg.c int cgprimsize(int type); int cgalign(int type, int offset, int direction); void cgtextseg(); void cgdataseg(); int alloc_register(void); void freeall_registers(int keepreg); void cgfreereg(int reg); void spill_all_regs(void); void cgpreamble(char *filename); void cgpostamble(); void cgfuncpreamble(struct symtable *sym); void cgfuncpostamble(struct symtable *sym); int cgloadint(int value, int type); int cgloadvar(struct symtable *sym, int op); int cgloadglobstr(int label); int cgadd(int r1, int r2); int cgsub(int r1, int r2); int cgmul(int r1, int r2); int cgdivmod(int r1, int r2, int op); int cgshlconst(int r, int val); int cgcall(struct symtable *sym, int numargs); void cgcopyarg(int r, int argposn); int cgstorglob(int r, struct symtable *sym); int cgstorlocal(int r, struct symtable *sym); void cgglobsym(struct symtable *node); void cgglobstr(int l, char *strvalue, int append); void cgglobstrend(void); int cgcompare_and_set(int ASTop, int r1, int r2, int type); int cgcompare_and_jump(int ASTop, int r1, int r2, int label, int type); void cglabel(int l); void cgjump(int l); int cgwiden(int r, int oldtype, int newtype); void cgreturn(int reg, struct symtable *sym); int cgaddress(struct symtable *sym); int cgderef(int r, int type); int cgstorderef(int r1, int r2, int type); int cgnegate(int r); int cginvert(int r); int cglognot(int r); void cgloadboolean(int r, int val); int cgboolean(int r, int op, int label); int cgand(int r1, int r2); int cgor(int r1, int r2); int cgxor(int r1, int r2); int cgshl(int r1, int r2); int cgshr(int r1, int r2); void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel); void cgmove(int r1, int r2); void cglinenum(int line); // expr.c struct ASTnode *expression_list(int endtoken); struct ASTnode *binexpr(int ptp); // stmt.c struct ASTnode *compound_statement(int inswitch); // misc.c void match(int t, char *what); void semi(void); void lbrace(void); void rbrace(void); void lparen(void); void rparen(void); void ident(void); void comma(void); void fatal(char *s); void fatals(char *s1, char *s2); void fatald(char *s, int d); void fatalc(char *s, int c); // sym.c void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node); struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn); struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn); struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int nelems); struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype); struct symtable *addstruct(char *name); struct symtable *addunion(char *name); struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int nelems); struct symtable *addenum(char *name, int class, int value); struct symtable *addtypedef(char *name, int type, struct symtable *ctype); struct symtable *findglob(char *s); struct symtable *findlocl(char *s); struct symtable *findsymbol(char *s); struct symtable *findmember(char *s); struct symtable *findstruct(char *s); struct symtable *findunion(char *s); struct symtable *findenumtype(char *s); struct symtable *findenumval(char *s); struct symtable *findtypedef(char *s); void clear_symtable(void); void freeloclsyms(void); void freestaticsyms(void); void dumptable(struct symtable *head, char *name, int indent); void dumpsymtables(void); // decl.c int parse_type(struct symtable **ctype, int *class); int parse_stars(int type); int parse_cast(struct symtable **ctype); int declaration_list(struct symtable **ctype, int class, int et1, int et2, struct ASTnode **gluetree); void global_declarations(void); // types.c int inttype(int type); int ptrtype(int type); int pointer_to(int type); int value_at(int type); int typesize(int type, struct symtable *ctype); struct ASTnode *modify_type(struct ASTnode *tree, int rtype, struct symtable *rctype, int op); // opt.c struct ASTnode *optimise(struct ASTnode *n); ================================================ FILE: 60_TripleTest/defs.h ================================================ #include #include #include #include #include "incdir.h" // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 enum { TEXTLEN = 512 // Length of identifiers in input }; // Commands and default filenames #define AOUT "a.out" #ifdef __NASM__ #define ASCMD "nasm -g -f elf64 -w-ptr -o " #define LDCMD "cc -g -no-pie -fno-plt -Wall -o " #else #define ASCMD "as -g -o " #define LDCMD "cc -g -o " #endif #define CPPCMD "cpp -nostdinc -isystem " // Token types enum { T_EOF, // Binary operators T_ASSIGN, T_ASPLUS, T_ASMINUS, T_ASSTAR, T_ASSLASH, T_ASMOD, T_QUESTION, T_LOGOR, T_LOGAND, T_OR, T_XOR, T_AMPER, T_EQ, T_NE, T_LT, T_GT, T_LE, T_GE, T_LSHIFT, T_RSHIFT, T_PLUS, T_MINUS, T_STAR, T_SLASH, T_MOD, // Other operators T_INC, T_DEC, T_INVERT, T_LOGNOT, // Type keywords T_VOID, T_CHAR, T_INT, T_LONG, // Other keywords T_IF, T_ELSE, T_WHILE, T_FOR, T_RETURN, T_STRUCT, T_UNION, T_ENUM, T_TYPEDEF, T_EXTERN, T_BREAK, T_CONTINUE, T_SWITCH, T_CASE, T_DEFAULT, T_SIZEOF, T_STATIC, // Structural tokens T_INTLIT, T_STRLIT, T_SEMI, T_IDENT, T_LBRACE, T_RBRACE, T_LPAREN, T_RPAREN, T_LBRACKET, T_RBRACKET, T_COMMA, T_DOT, T_ARROW, T_COLON }; // Token structure struct token { int token; // Token type, from the enum list above char *tokstr; // String version of the token int intvalue; // For T_INTLIT, the integer value }; // AST node types. The first few line up // with the related tokens enum { A_ASSIGN = 1, A_ASPLUS, A_ASMINUS, A_ASSTAR, // 1 A_ASSLASH, A_ASMOD, A_TERNARY, A_LOGOR, // 5 A_LOGAND, A_OR, A_XOR, A_AND, A_EQ, A_NE, A_LT, // 9 A_GT, A_LE, A_GE, A_LSHIFT, A_RSHIFT, // 16 A_ADD, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, A_MOD, // 21 A_INTLIT, A_STRLIT, A_IDENT, A_GLUE, // 26 A_IF, A_WHILE, A_FUNCTION, A_WIDEN, A_RETURN, // 30 A_FUNCCALL, A_DEREF, A_ADDR, A_SCALE, // 35 A_PREINC, A_PREDEC, A_POSTINC, A_POSTDEC, // 39 A_NEGATE, A_INVERT, A_LOGNOT, A_TOBOOL, A_BREAK, // 43 A_CONTINUE, A_SWITCH, A_CASE, A_DEFAULT, A_CAST // 48 }; // Primitive types. The bottom 4 bits is an integer // value that represents the level of indirection, // e.g. 0= no pointer, 1= pointer, 2= pointer pointer etc. enum { P_NONE, P_VOID = 16, P_CHAR = 32, P_INT = 48, P_LONG = 64, P_STRUCT=80, P_UNION=96 }; // Structural types enum { S_VARIABLE, S_FUNCTION, S_ARRAY }; // Storage classes enum { C_GLOBAL = 1, // Globally visible symbol C_LOCAL, // Locally visible symbol C_PARAM, // Locally visible function parameter C_EXTERN, // External globally visible symbol C_STATIC, // Static symbol, visible in one file C_STRUCT, // A struct C_UNION, // A union C_MEMBER, // Member of a struct or union C_ENUMTYPE, // A named enumeration type C_ENUMVAL, // A named enumeration value C_TYPEDEF // A named typedef }; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol struct symtable *ctype; // If struct/union, ptr to that type int stype; // Structural type for the symbol int class; // Storage class for the symbol int size; // Total size in bytes of this symbol int nelems; // Functions: # params. Arrays: # elements #define st_endlabel st_posn // For functions, the end label int st_posn; // For locals, the negative offset // from the stack base pointer int *initlist; // List of initial values struct symtable *next; // Next symbol in one list struct symtable *member; // First member of a function, struct, #ifdef __NASM__ int extinit; #endif }; // union or enum // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates struct symtable *ctype; // If struct/union, ptr to that type int rvalue; // True if the node is an rvalue struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; struct symtable *sym; // For many AST nodes, the pointer to // the symbol in the symbol table #define a_intvalue a_size // For A_INTLIT, the integer value int a_size; // For A_SCALE, the size to scale by int linenum; // Line number from where this node comes }; enum { NOREG = -1, // Use NOREG when the AST generation // functions have no register to return NOLABEL = 0 // Use NOLABEL when we have no label to // pass to genAST() }; ================================================ FILE: 60_TripleTest/expr.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of expressions // Copyright (c) 2019 Warren Toomey, GPL3 // expression_list: // | expression // | expression ',' expression_list // ; // Parse a list of zero or more comma-separated expressions and // return an AST composed of A_GLUE nodes with the left-hand child // being the sub-tree of previous expressions (or NULL) and the right-hand // child being the next expression. Each A_GLUE node will have size field // set to the number of expressions in the tree at this point. If no // expressions are parsed, NULL is returned struct ASTnode *expression_list(int endtoken) { struct ASTnode *tree = NULL; struct ASTnode *child = NULL; int exprcount = 0; // Loop until the end token while (Token.token != endtoken) { // Parse the next expression and increment the expression count child = binexpr(0); exprcount++; // Build an A_GLUE AST node with the previous tree as the left child // and the new expression as the right child. Store the expression count. tree = mkastnode(A_GLUE, P_NONE, NULL, tree, NULL, child, NULL, exprcount); // Stop when we reach the end token if (Token.token == endtoken) break; // Must have a ',' at this point match(T_COMMA, ","); } // Return the tree of expressions return (tree); } // Parse a function call and return its AST static struct ASTnode *funccall(void) { struct ASTnode *tree; struct symtable *funcptr; // Check that the identifier has been defined as a function, // then make a leaf node for it. if ((funcptr = findsymbol(Text)) == NULL || funcptr->stype != S_FUNCTION) { fatals("Undeclared function", Text); } // Get the '(' lparen(); // Parse the argument expression list tree = expression_list(T_RPAREN); // XXX Check type of each argument against the function's prototype // Build the function call AST node. Store the // function's return type as this node's type. // Also record the function's symbol-id tree = mkastunary(A_FUNCCALL, funcptr->type, funcptr->ctype, tree, funcptr, 0); // Get the ')' rparen(); return (tree); } // Parse the index into an array and return an AST tree for it static struct ASTnode *array_access(struct ASTnode *left) { struct ASTnode *right; // Check that the sub-tree is a pointer if (!ptrtype(left->type)) fatal("Not an array or pointer"); // Get the '[' scan(&Token); // Parse the following expression right = binexpr(0); // Get the ']' match(T_RBRACKET, "]"); // Ensure that this is of int type if (!inttype(right->type)) fatal("Array index is not of integer type"); // Make the left tree an rvalue left->rvalue = 1; // Scale the index by the size of the element's type right = modify_type(right, left->type, left->ctype, A_ADD); // Return an AST tree where the array's base has the offset added to it, // and dereference the element. Still an lvalue at this point. left = mkastnode(A_ADD, left->type, left->ctype, left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, value_at(left->type), left->ctype, left, NULL, 0); return (left); } // Parse the member reference of a struct or union // and return an AST tree for it. If withpointer is true, // the access is through a pointer to the member. static struct ASTnode *member_access(struct ASTnode *left, int withpointer) { struct ASTnode *right; struct symtable *typeptr; struct symtable *m; // Check that the left AST tree is a pointer to struct or union if (withpointer && left->type != pointer_to(P_STRUCT) && left->type != pointer_to(P_UNION)) fatal("Expression is not a pointer to a struct/union"); // Or, check that the left AST tree is a struct or union. // If so, change it from an A_IDENT to an A_ADDR so that // we get the base address, not the value at this address. if (!withpointer) { if (left->type == P_STRUCT || left->type == P_UNION) left->op = A_ADDR; else fatal("Expression is not a struct/union"); } // Get the details of the composite type typeptr = left->ctype; // Skip the '.' or '->' token and get the member's name scan(&Token); ident(); // Find the matching member's name in the type // Die if we can't find it for (m = typeptr->member; m != NULL; m = m->next) if (!strcmp(m->name, Text)) break; if (m == NULL) fatals("No member found in struct/union: ", Text); // Make the left tree an rvalue left->rvalue = 1; // Build an A_INTLIT node with the offset right = mkastleaf(A_INTLIT, P_INT, NULL, NULL, m->st_posn); // Add the member's offset to the base of the struct/union // and dereference it. Still an lvalue at this point left = mkastnode(A_ADD, pointer_to(m->type), m->ctype, left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, m->type, m->ctype, left, NULL, 0); return (left); } // Parse a parenthesised expression and // return an AST node representing it. static struct ASTnode *paren_expression(int ptp) { struct ASTnode *n; int type = 0; struct symtable *ctype = NULL; // Beginning of a parenthesised expression, skip the '('. scan(&Token); // If the token after is a type identifier, this is a cast expression switch (Token.token) { case T_IDENT: // We have to see if the identifier matches a typedef. // If not, treat it as an expression. if (findtypedef(Text) == NULL) { n = binexpr(0); // ptp is zero as expression inside ( ) break; } case T_VOID: case T_CHAR: case T_INT: case T_LONG: case T_STRUCT: case T_UNION: case T_ENUM: // Get the type inside the parentheses type = parse_cast(&ctype); // Skip the closing ')' and then parse the following expression rparen(); default: n = binexpr(ptp); // Scan in the expression. We pass in ptp // as the cast doesn't change the // expression's precedence } // We now have at least an expression in n, and possibly a non-zero type // in type if there was a cast. Skip the closing ')' if there was no cast. if (type == 0) rparen(); else // Otherwise, make a unary AST node for the cast n = mkastunary(A_CAST, type, ctype, n, NULL, 0); return (n); } // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(int ptp) { struct ASTnode *n; struct symtable *enumptr; struct symtable *varptr; int id; int type = 0; int size, class; struct symtable *ctype; switch (Token.token) { case T_STATIC: case T_EXTERN: fatal("Compiler doesn't support static or extern local declarations"); case T_SIZEOF: // Skip the T_SIZEOF and ensure we have a left parenthesis scan(&Token); if (Token.token != T_LPAREN) fatal("Left parenthesis expected after sizeof"); scan(&Token); // Get the type inside the parentheses type = parse_stars(parse_type(&ctype, &class)); // Get the type's size size = typesize(type, ctype); rparen(); // Make a leaf node int literal with the size return (mkastleaf(A_INTLIT, P_INT, NULL, NULL, size)); case T_INTLIT: // For an INTLIT token, make a leaf AST node for it. // Make it a P_CHAR if it's within the P_CHAR range if (Token.intvalue >= 0 && Token.intvalue < 256) n = mkastleaf(A_INTLIT, P_CHAR, NULL, NULL, Token.intvalue); else n = mkastleaf(A_INTLIT, P_INT, NULL, NULL, Token.intvalue); break; case T_STRLIT: // For a STRLIT token, generate the assembly for it. id = genglobstr(Text, 0); // For successive STRLIT tokens, append their contents // to this one while (1) { scan(&Peektoken); if (Peektoken.token != T_STRLIT) break; genglobstr(Text, 1); scan(&Token); // To skip it properly } // Now make a leaf AST node for it. id is the string's label. genglobstrend(); n = mkastleaf(A_STRLIT, pointer_to(P_CHAR), NULL, NULL, id); break; case T_IDENT: // If the identifier matches an enum value, // return an A_INTLIT node if ((enumptr = findenumval(Text)) != NULL) { n = mkastleaf(A_INTLIT, P_INT, NULL, NULL, enumptr->st_posn); break; } // See if this identifier exists as a symbol. For arrays, set rvalue to 1. if ((varptr = findsymbol(Text)) == NULL) fatals("Unknown variable or function", Text); switch (varptr->stype) { case S_VARIABLE: n = mkastleaf(A_IDENT, varptr->type, varptr->ctype, varptr, 0); break; case S_ARRAY: n = mkastleaf(A_ADDR, varptr->type, varptr->ctype, varptr, 0); n->rvalue = 1; break; case S_FUNCTION: // Function call, see if the next token is a left parenthesis scan(&Token); if (Token.token != T_LPAREN) fatals("Function name used without parentheses", Text); return (funccall()); default: fatals("Identifier not a scalar or array variable", Text); } break; case T_LPAREN: return (paren_expression(ptp)); default: fatals("Expecting a primary expression, got token", Token.tokstr); } // Scan in the next token and return the leaf node scan(&Token); return (n); } // Parse a postfix expression and return // an AST node representing it. The // identifier is already in Text. static struct ASTnode *postfix(int ptp) { struct ASTnode *n; // Get the primary expression n = primary(ptp); // Loop until there are no more postfix operators while (1) { switch (Token.token) { case T_LBRACKET: // An array reference n = array_access(n); break; case T_DOT: // Access into a struct or union n = member_access(n, 0); break; case T_ARROW: // Pointer access into a struct or union n = member_access(n, 1); break; case T_INC: // Post-increment: skip over the token if (n->rvalue == 1) fatal("Cannot ++ on rvalue"); scan(&Token); // Can't do it twice if (n->op == A_POSTINC || n->op == A_POSTDEC) fatal("Cannot ++ and/or -- more than once"); // and change the AST operation n->op = A_POSTINC; break; case T_DEC: // Post-decrement: skip over the token if (n->rvalue == 1) fatal("Cannot -- on rvalue"); scan(&Token); // Can't do it twice if (n->op == A_POSTINC || n->op == A_POSTDEC) fatal("Cannot ++ and/or -- more than once"); // and change the AST operation n->op = A_POSTDEC; break; default: return (n); } } return (NULL); // Keep -Wall happy } // Convert a binary operator token into a binary AST operation. // We rely on a 1:1 mapping from token to AST operation static int binastop(int tokentype) { if (tokentype > T_EOF && tokentype <= T_MOD) return (tokentype); fatals("Syntax error, token", Tstring[tokentype]); return (0); // Keep -Wall happy } // Return true if a token is right-associative, // false otherwise. static int rightassoc(int tokentype) { if (tokentype >= T_ASSIGN && tokentype <= T_ASSLASH) return (1); return (0); } // Operator precedence for each token. Must // match up with the order of tokens in defs.h static int OpPrec[] = { 0, 10, 10, // T_EOF, T_ASSIGN, T_ASPLUS, 10, 10, // T_ASMINUS, T_ASSTAR, 10, 10, // T_ASSLASH, T_ASMOD, 15, // T_QUESTION, 20, 30, // T_LOGOR, T_LOGAND 40, 50, 60, // T_OR, T_XOR, T_AMPER 70, 70, // T_EQ, T_NE 80, 80, 80, 80, // T_LT, T_GT, T_LE, T_GE 90, 90, // T_LSHIFT, T_RSHIFT 100, 100, // T_PLUS, T_MINUS 110, 110, 110 // T_STAR, T_SLASH, T_MOD }; // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec; if (tokentype > T_MOD) fatals("Token with no precedence in op_precedence:", Tstring[tokentype]); prec = OpPrec[tokentype]; if (prec == 0) fatals("Syntax error, token", Tstring[tokentype]); return (prec); } // prefix_expression: postfix_expression // | '*' prefix_expression // | '&' prefix_expression // | '-' prefix_expression // | '++' prefix_expression // | '--' prefix_expression // ; // Parse a prefix expression and return // a sub-tree representing it. static struct ASTnode *prefix(int ptp) { struct ASTnode *tree; switch (Token.token) { case T_AMPER: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(ptp); // Ensure that it's an identifier if (tree->op != A_IDENT) fatal("& operator must be followed by an identifier"); // Prevent '&' being performed on an array if (tree->sym->stype == S_ARRAY) fatal("& operator cannot be performed on an array"); // Now change the operator to A_ADDR and the type to // a pointer to the original type tree->op = A_ADDR; tree->type = pointer_to(tree->type); break; case T_STAR: // Get the next token and parse it // recursively as a prefix expression. // Make it an rvalue scan(&Token); tree = prefix(ptp); tree->rvalue= 1; // Ensure the tree's type is a pointer if (!ptrtype(tree->type)) fatal("* operator must be followed by an expression of pointer type"); // Prepend an A_DEREF operation to the tree tree = mkastunary(A_DEREF, value_at(tree->type), tree->ctype, tree, NULL, 0); break; case T_MINUS: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(ptp); // Prepend a A_NEGATE operation to the tree and // make the child an rvalue. Because chars are unsigned, // also widen this if needed to int so that it's signed tree->rvalue = 1; if (tree->type == P_CHAR) tree->type = P_INT; tree = mkastunary(A_NEGATE, tree->type, tree->ctype, tree, NULL, 0); break; case T_INVERT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(ptp); // Prepend a A_INVERT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_INVERT, tree->type, tree->ctype, tree, NULL, 0); break; case T_LOGNOT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(ptp); // Prepend a A_LOGNOT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_LOGNOT, tree->type, tree->ctype, tree, NULL, 0); break; case T_INC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(ptp); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("++ operator must be followed by an identifier"); // Prepend an A_PREINC operation to the tree tree = mkastunary(A_PREINC, tree->type, tree->ctype, tree, NULL, 0); break; case T_DEC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(ptp); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("-- operator must be followed by an identifier"); // Prepend an A_PREDEC operation to the tree tree = mkastunary(A_PREDEC, tree->type, tree->ctype, tree, NULL, 0); break; default: tree = postfix(ptp); } return (tree); } // Return an AST tree whose root is a binary operator. // Parameter ptp is the previous token's precedence. struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; struct ASTnode *ltemp, *rtemp; int ASTop; int tokentype; // Get the tree on the left. // Fetch the next token at the same time. left = prefix(ptp); // If we hit one of several terminating tokens, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA || tokentype == T_COLON || tokentype == T_RBRACE) { left->rvalue = 1; return (left); } // While the precedence of this token is more than that of the // previous token precedence, or it's right associative and // equal to the previous token's precedence while ((op_precedence(tokentype) > ptp) || (rightassoc(tokentype) && op_precedence(tokentype) == ptp)) { // Fetch in the next integer literal scan(&Token); // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Determine the operation to be performed on the sub-trees ASTop = binastop(tokentype); switch (ASTop) { case A_TERNARY: // Ensure we have a ':' token, scan in the expression after it match(T_COLON, ":"); ltemp = binexpr(0); // Build and return the AST for this statement. Use the middle // expression's type as the return type. XXX We should also // consider the third expression's type. return (mkastnode (A_TERNARY, right->type, right->ctype, left, right, ltemp, NULL, 0)); case A_ASSIGN: // Assignment // Make the right tree into an rvalue right->rvalue = 1; // Ensure the right's type matches the left right = modify_type(right, left->type, left->ctype, 0); if (right == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree. However, switch // left and right around, so that the right expression's // code will be generated before the left expression ltemp = left; left = right; right = ltemp; break; default: // We are not doing a ternary or assignment, so both trees should // be rvalues. Convert both trees into rvalue if they are lvalue trees left->rvalue = 1; right->rvalue = 1; // Ensure the two types are compatible by trying // to modify each tree to match the other's type. ltemp = modify_type(left, right->type, right->ctype, ASTop); rtemp = modify_type(right, left->type, left->ctype, ASTop); if (ltemp == NULL && rtemp == NULL) fatal("Incompatible types in binary expression"); if (ltemp != NULL) left = ltemp; if (rtemp != NULL) right = rtemp; } // Join that sub-tree with ours. Convert the token // into an AST operation at the same time. left = mkastnode(binastop(tokentype), left->type, left->ctype, left, NULL, right, NULL, 0); // Some operators produce an int result regardless of their operands switch (binastop(tokentype)) { case A_LOGOR: case A_LOGAND: case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: left->type = P_INT; } // Update the details of the current token. // If we hit a terminating token, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA || tokentype == T_COLON || tokentype == T_RBRACE) { left->rvalue = 1; return (left); } } // Return the tree we have when the precedence // is the same or lower left->rvalue = 1; return (left); } ================================================ FILE: 60_TripleTest/gen.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Generic code generator // Copyright (c) 2019 Warren Toomey, GPL3 // Generate and return a new label number static int labelid = 1; int genlabel(void) { return (labelid++); } static void update_line(struct ASTnode *n) { // Output the line into the assembly if we've // changed the line number in the AST node if (n->linenum != 0 && Line != n->linenum) { Line = n->linenum; cglinenum(Line); } } // Generate the code for an IF statement // and an optional ELSE clause. static int genIF(struct ASTnode *n, int looptoplabel, int loopendlabel) { int Lfalse, Lend; // Generate two labels: one for the // false compound statement, and one // for the end of the overall IF statement. // When there is no ELSE clause, Lfalse _is_ // the ending label! Lfalse = genlabel(); if (n->right) Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. genAST(n->left, Lfalse, NOLABEL, NOLABEL, n->op); genfreeregs(NOREG); // Generate the true compound statement genAST(n->mid, NOLABEL, looptoplabel, loopendlabel, n->op); genfreeregs(NOREG); // If there is an optional ELSE clause, // generate the jump to skip to the end if (n->right) cgjump(Lend); // Now the false label cglabel(Lfalse); // Optional ELSE clause: generate the // false compound statement and the // end label if (n->right) { genAST(n->right, NOLABEL, NOLABEL, loopendlabel, n->op); genfreeregs(NOREG); cglabel(Lend); } return (NOREG); } // Generate the code for a WHILE statement static int genWHILE(struct ASTnode *n) { int Lstart, Lend; // Generate the start and end labels // and output the start label Lstart = genlabel(); Lend = genlabel(); cglabel(Lstart); // Generate the condition code followed // by a jump to the end label. genAST(n->left, Lend, Lstart, Lend, n->op); genfreeregs(NOREG); // Generate the compound statement for the body genAST(n->right, NOLABEL, Lstart, Lend, n->op); genfreeregs(NOREG); // Finally output the jump back to the condition, // and the end label cgjump(Lstart); cglabel(Lend); return (NOREG); } // Generate the code for a SWITCH statement static int genSWITCH(struct ASTnode *n) { int *caseval, *caselabel; int Ljumptop, Lend; int i, reg, defaultlabel = 0, casecount = 0; struct ASTnode *c; // Create arrays for the case values and associated labels. // Ensure that we have at least one position in each array. caseval = (int *) malloc((n->a_intvalue + 1) * sizeof(int)); caselabel = (int *) malloc((n->a_intvalue + 1) * sizeof(int)); // Generate labels for the top of the jump table, and the // end of the switch statement. Set a default label for // the end of the switch, in case we don't have a default. Ljumptop = genlabel(); Lend = genlabel(); defaultlabel = Lend; // Output the code to calculate the switch condition reg = genAST(n->left, NOLABEL, NOLABEL, NOLABEL, 0); cgjump(Ljumptop); genfreeregs(reg); // Walk the right-child linked list to // generate the code for each case for (i = 0, c = n->right; c != NULL; i++, c = c->right) { // Get a label for this case. Store it // and the case value in the arrays. // Record if it is the default case. caselabel[i] = genlabel(); caseval[i] = c->a_intvalue; cglabel(caselabel[i]); if (c->op == A_DEFAULT) defaultlabel = caselabel[i]; else casecount++; // Generate the case code. Pass in the end label for the breaks. // If case has no body, we will fall into the following body. if (c->left) genAST(c->left, NOLABEL, NOLABEL, Lend, 0); genfreeregs(NOREG); } // Ensure the last case jumps past the switch table cgjump(Lend); // Now output the switch table and the end label. cgswitch(reg, casecount, Ljumptop, caselabel, caseval, defaultlabel); cglabel(Lend); return (NOREG); } // Generate the code for an // A_LOGAND or A_LOGOR operation static int gen_logandor(struct ASTnode *n) { // Generate two labels int Lfalse = genlabel(); int Lend = genlabel(); int reg; // Generate the code for the left expression // followed by the jump to the false label reg = genAST(n->left, NOLABEL, NOLABEL, NOLABEL, 0); cgboolean(reg, n->op, Lfalse); genfreeregs(NOREG); // Generate the code for the right expression // followed by the jump to the false label reg = genAST(n->right, NOLABEL, NOLABEL, NOLABEL, 0); cgboolean(reg, n->op, Lfalse); genfreeregs(reg); // We didn't jump so set the right boolean value if (n->op == A_LOGAND) { cgloadboolean(reg, 1); cgjump(Lend); cglabel(Lfalse); cgloadboolean(reg, 0); } else { cgloadboolean(reg, 0); cgjump(Lend); cglabel(Lfalse); cgloadboolean(reg, 1); } cglabel(Lend); return (reg); } // Generate the code to copy the arguments of a // function call to its parameters, then call the // function itself. Return the register that holds // the function's return value. static int gen_funccall(struct ASTnode *n) { struct ASTnode *gluetree = n->left; int reg; int numargs = 0; // Save the registers before we copy the arguments spill_all_regs(); // If there is a list of arguments, walk this list // from the last argument (right-hand child) to the // first while (gluetree) { // Calculate the expression's value reg = genAST(gluetree->right, NOLABEL, NOLABEL, NOLABEL, gluetree->op); // Copy this into the n'th function parameter: size is 1, 2, 3, ... cgcopyarg(reg, gluetree->a_size); // Keep the first (highest) number of arguments if (numargs == 0) numargs = gluetree->a_size; gluetree = gluetree->left; } // Call the function, clean up the stack (based on numargs), // and return its result return (cgcall(n->sym, numargs)); } // Generate code for a ternary expression static int gen_ternary(struct ASTnode *n) { int Lfalse, Lend; int reg, expreg; // Generate two labels: one for the // false expression, and one for the // end of the overall expression Lfalse = genlabel(); Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. genAST(n->left, Lfalse, NOLABEL, NOLABEL, n->op); // genfreeregs(NOREG); // Get a register to hold the result of the two expressions reg = alloc_register(); // Generate the true expression and the false label. // Move the expression result into the known register. expreg = genAST(n->mid, NOLABEL, NOLABEL, NOLABEL, n->op); cgmove(expreg, reg); cgfreereg(expreg); cgjump(Lend); cglabel(Lfalse); // Generate the false expression and the end label. // Move the expression result into the known register. expreg = genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); cgmove(expreg, reg); cgfreereg(expreg); cglabel(Lend); return (reg); } // Given an AST, an optional label, and the AST op // of the parent, generate assembly code recursively. // Return the register id with the tree's final value. int genAST(struct ASTnode *n, int iflabel, int looptoplabel, int loopendlabel, int parentASTop) { int leftreg = NOREG, rightreg = NOREG; // Empty tree, do nothing if (n == NULL) return (NOREG); // Update the line number in the output update_line(n); // We have some specific AST node handling at the top // so that we don't evaluate the child sub-trees immediately switch (n->op) { case A_IF: return (genIF(n, looptoplabel, loopendlabel)); case A_WHILE: return (genWHILE(n)); case A_SWITCH: return (genSWITCH(n)); case A_FUNCCALL: return (gen_funccall(n)); case A_TERNARY: return (gen_ternary(n)); case A_LOGOR: return (gen_logandor(n)); case A_LOGAND: return (gen_logandor(n)); case A_GLUE: // Do each child statement, and free the // registers after each child if (n->left != NULL) genAST(n->left, iflabel, looptoplabel, loopendlabel, n->op); genfreeregs(NOREG); if (n->right != NULL) genAST(n->right, iflabel, looptoplabel, loopendlabel, n->op); genfreeregs(NOREG); return (NOREG); case A_FUNCTION: // Generate the function's preamble before the code // in the child sub-tree cgfuncpreamble(n->sym); genAST(n->left, NOLABEL, NOLABEL, NOLABEL, n->op); cgfuncpostamble(n->sym); return (NOREG); } // General AST node handling below // Get the left and right sub-tree values if (n->left) leftreg = genAST(n->left, NOLABEL, NOLABEL, NOLABEL, n->op); if (n->right) rightreg = genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); switch (n->op) { case A_ADD: return (cgadd(leftreg, rightreg)); case A_SUBTRACT: return (cgsub(leftreg, rightreg)); case A_MULTIPLY: return (cgmul(leftreg, rightreg)); case A_DIVIDE: return (cgdivmod(leftreg, rightreg, A_DIVIDE)); case A_MOD: return (cgdivmod(leftreg, rightreg, A_MOD)); case A_AND: return (cgand(leftreg, rightreg)); case A_OR: return (cgor(leftreg, rightreg)); case A_XOR: return (cgxor(leftreg, rightreg)); case A_LSHIFT: return (cgshl(leftreg, rightreg)); case A_RSHIFT: return (cgshr(leftreg, rightreg)); case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: // If the parent AST node is an A_IF, A_WHILE or A_TERNARY, // generate a compare followed by a jump. Otherwise, compare // registers and set one to 1 or 0 based on the comparison. if (parentASTop == A_IF || parentASTop == A_WHILE || parentASTop == A_TERNARY) return (cgcompare_and_jump (n->op, leftreg, rightreg, iflabel, n->left->type)); else return (cgcompare_and_set(n->op, leftreg, rightreg, n->left->type)); case A_INTLIT: return (cgloadint(n->a_intvalue, n->type)); case A_STRLIT: return (cgloadglobstr(n->a_intvalue)); case A_IDENT: // Load our value if we are an rvalue // or we are being dereferenced if (n->rvalue || parentASTop == A_DEREF) { return (cgloadvar(n->sym, n->op)); } else return (NOREG); case A_ASPLUS: case A_ASMINUS: case A_ASSTAR: case A_ASSLASH: case A_ASMOD: case A_ASSIGN: // For the '+=' and friends operators, generate suitable code // and get the register with the result. Then take the left child, // make it the right child so that we can fall into the assignment code. switch (n->op) { case A_ASPLUS: leftreg = cgadd(leftreg, rightreg); n->right = n->left; break; case A_ASMINUS: leftreg = cgsub(leftreg, rightreg); n->right = n->left; break; case A_ASSTAR: leftreg = cgmul(leftreg, rightreg); n->right = n->left; break; case A_ASSLASH: leftreg = cgdivmod(leftreg, rightreg, A_DIVIDE); n->right = n->left; break; case A_ASMOD: leftreg = cgdivmod(leftreg, rightreg, A_MOD); n->right = n->left; break; } // Now into the assignment code // Are we assigning to an identifier or through a pointer? switch (n->right->op) { case A_IDENT: if (n->right->sym->class == C_GLOBAL || n->right->sym->class == C_EXTERN || n->right->sym->class == C_STATIC) return (cgstorglob(leftreg, n->right->sym)); else return (cgstorlocal(leftreg, n->right->sym)); case A_DEREF: return (cgstorderef(leftreg, rightreg, n->right->type)); default: fatald("Can't A_ASSIGN in genAST(), op", n->op); } case A_WIDEN: // Widen the child's type to the parent's type return (cgwiden(leftreg, n->left->type, n->type)); case A_RETURN: cgreturn(leftreg, Functionid); return (NOREG); case A_ADDR: return (cgaddress(n->sym)); case A_DEREF: // If we are an rvalue, dereference to get the value we point at, // otherwise leave it for A_ASSIGN to store through the pointer if (n->rvalue) return (cgderef(leftreg, n->left->type)); else return (leftreg); case A_SCALE: // Small optimisation: use shift if the // scale value is a known power of two switch (n->a_size) { case 2: return (cgshlconst(leftreg, 1)); case 4: return (cgshlconst(leftreg, 2)); case 8: return (cgshlconst(leftreg, 3)); default: // Load a register with the size and // multiply the leftreg by this size rightreg = cgloadint(n->a_size, P_INT); return (cgmul(leftreg, rightreg)); } case A_POSTINC: case A_POSTDEC: // Load and decrement the variable's value into a register // and post increment/decrement it return (cgloadvar(n->sym, n->op)); case A_PREINC: case A_PREDEC: // Load and decrement the variable's value into a register // and pre increment/decrement it return (cgloadvar(n->left->sym, n->op)); case A_NEGATE: return (cgnegate(leftreg)); case A_INVERT: return (cginvert(leftreg)); case A_LOGNOT: return (cglognot(leftreg)); case A_TOBOOL: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, set the register // to 0 or 1 based on it's zeroeness or non-zeroeness return (cgboolean(leftreg, parentASTop, iflabel)); case A_BREAK: cgjump(loopendlabel); return (NOREG); case A_CONTINUE: cgjump(looptoplabel); return (NOREG); case A_CAST: return (leftreg); // Not much to do default: fatald("Unknown AST operator", n->op); } return (NOREG); // Keep -Wall happy } void genpreamble(char *filename) { cgpreamble(filename); } void genpostamble() { cgpostamble(); } void genfreeregs(int keepreg) { freeall_registers(keepreg); } void genglobsym(struct symtable *node) { cgglobsym(node); } // Generate a global string. // If append is true, append to // previous genglobstr() call. int genglobstr(char *strvalue, int append) { int l = genlabel(); cgglobstr(l, strvalue, append); return (l); } void genglobstrend(void) { cgglobstrend(); } int genprimsize(int type) { return (cgprimsize(type)); } int genalign(int type, int offset, int direction) { return (cgalign(type, offset, direction)); } ================================================ FILE: 60_TripleTest/include/ctype.h ================================================ #ifndef _CTYPE_H_ # define _CTYPE_H_ int isalnum(int c); int isalpha(int c); int iscntrl(int c); int isdigit(int c); int isgraph(int c); int islower(int c); int isprint(int c); int ispunct(int c); int isspace(int c); int isupper(int c); int isxdigit(int c); int isascii(int c); int isblank(int c); int toupper(int c); int tolower(int c); #endif // _CTYPE_H_ ================================================ FILE: 60_TripleTest/include/errno.h ================================================ #ifndef _ERRNO_H_ # define _ERRNO_H_ int * __errno_location(void); #define errno (* __errno_location()) #endif // _ERRNO_H_ ================================================ FILE: 60_TripleTest/include/fcntl.h ================================================ #ifndef _FCNTL_H_ # define _FCNTL_H_ #define O_RDONLY 00 #define O_WRONLY 01 #define O_RDWR 02 int open(char *pathname, int flags); #endif // _FCNTL_H_ ================================================ FILE: 60_TripleTest/include/stddef.h ================================================ #ifndef _STDDEF_H_ # define _STDDEF_H_ #ifndef NULL # define NULL (void *)0 #endif typedef long size_t; #endif //_STDDEF_H_ ================================================ FILE: 60_TripleTest/include/stdio.h ================================================ #ifndef _STDIO_H_ # define _STDIO_H_ #include #ifndef NULL # define NULL (void *)0 #endif #ifndef EOF # define EOF (-1) #endif // This FILE definition will do for now typedef char * FILE; FILE *fopen(char *pathname, char *mode); size_t fread(void *ptr, size_t size, size_t nmemb, FILE *stream); size_t fwrite(void *ptr, size_t size, size_t nmemb, FILE *stream); int fclose(FILE *stream); int printf(char *format); int fprintf(FILE *stream, char *format); int sprintf(char *str, char *format); int snprintf(char *str, size_t size, char *format); int fgetc(FILE *stream); int fputc(int c, FILE *stream); int fputs(char *s, FILE *stream); int putc(int c, FILE *stream); int putchar(int c); int puts(char *s); FILE *popen(char *command, char *type); int pclose(FILE *stream); extern FILE *stdin; extern FILE *stdout; extern FILE *stderr; #endif // _STDIO_H_ ================================================ FILE: 60_TripleTest/include/stdlib.h ================================================ #ifndef _STDLIB_H_ # define _STDLIB_H_ void exit(int status); void _Exit(int status); void *malloc(int size); void free(void *ptr); void *calloc(int nmemb, int size); void *realloc(void *ptr, int size); int system(char *command); #endif // _STDLIB_H_ ================================================ FILE: 60_TripleTest/include/string.h ================================================ #ifndef _STRING_H_ # define _STRING_H_ char *strdup(char *s); char *strchr(char *s, int c); char *strrchr(char *s, int c); int strcmp(char *s1, char *s2); int strncmp(char *s1, char *s2, size_t n); char *strerror(int errnum); #endif // _STRING_H_ ================================================ FILE: 60_TripleTest/include/unistd.h ================================================ #ifndef _UNISTD_H_ # define _UNISTD_H_ void _exit(int status); int unlink(char *pathname); #endif // _UNISTD_H_ ================================================ FILE: 60_TripleTest/main.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "decl.h" #include #include // Compiler setup and top-level execution // Copyright (c) 2019 Warren Toomey, GPL3 // Given a string with a '.' and at least a 1-character suffix // after the '.', change the suffix to be the given character. // Return the new string or NULL if the original string could // not be modified char *alter_suffix(char *str, char suffix) { char *posn; char *newstr; // Clone the string if ((newstr = strdup(str)) == NULL) return (NULL); // Find the '.' if ((posn = strrchr(newstr, '.')) == NULL) return (NULL); // Ensure there is a suffix posn++; if (*posn == '\0') return (NULL); // Change the suffix and NUL-terminate the string *posn = suffix; posn++; *posn = '\0'; return (newstr); } // Given an input filename, compile that file // down to assembly code. Return the new file's name static char *do_compile(char *filename) { char cmd[TEXTLEN]; // Change the input file's suffix to .s Outfilename = alter_suffix(filename, 's'); if (Outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .c on the end\n", filename); exit(1); } // Generate the pre-processor command snprintf(cmd, TEXTLEN, "%s %s %s", CPPCMD, INCDIR, filename); // Open up the pre-processor pipe if ((Infile = popen(cmd, "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", filename, strerror(errno)); exit(1); } Infilename = filename; // Create the output file if ((Outfile = fopen(Outfilename, "w")) == NULL) { fprintf(stderr, "Unable to create %s: %s\n", Outfilename, strerror(errno)); exit(1); } Line = 1; // Reset the scanner Linestart = 1; Putback = '\n'; clear_symtable(); // Clear the symbol table if (O_verbose) printf("compiling %s\n", filename); scan(&Token); // Get the first token from the input Peektoken.token = 0; // and set there is no lookahead token genpreamble(filename); // Output the preamble global_declarations(); // Parse the global declarations genpostamble(); // Output the postamble fclose(Outfile); // Close the output file // Dump the symbol table if requested if (O_dumpsym) { printf("Symbols for %s\n", filename); dumpsymtables(); fprintf(stdout, "\n\n"); } freestaticsyms(); // Free any static symbols in the file return (Outfilename); } // Given an input filename, assemble that file // down to object code. Return the object filename char *do_assemble(char *filename) { char cmd[TEXTLEN]; int err; char *outfilename = alter_suffix(filename, 'o'); if (outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .s on the end\n", filename); exit(1); } // Build the assembly command and run it #ifdef __NASM__ char *incfilename = alter_suffix(filename, 'n'); if (incfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .s on the end\n", filename); exit(1); } sprintf(cmd, "%s %s %s", ASCMD, outfilename, filename); #else snprintf(cmd, TEXTLEN, "%s %s %s", ASCMD, outfilename, filename); #endif if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Assembly of %s failed\n", filename); exit(1); } return (outfilename); } // Given a list of object files and an output filename, // link all of the object filenames together. void do_link(char *outfilename, char **objlist) { int cnt, size = TEXTLEN; char cmd[TEXTLEN], *cptr; int err; // Start with the linker command and the output file cptr = cmd; cnt = snprintf(cptr, size, "%s %s ", LDCMD, outfilename); cptr += cnt; size -= cnt; // Now append each object file while (*objlist != NULL) { cnt = snprintf(cptr, size, "%s ", *objlist); cptr += cnt; size -= cnt; objlist++; } if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Linking failed\n"); exit(1); } } // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "Usage: %s [-vcSTM] [-o outfile] file [file ...]\n", prog); fprintf(stderr, " -v give verbose output of the compilation stages\n"); fprintf(stderr, " -c generate object files but don't link them\n"); fprintf(stderr, " -S generate assembly files but don't link them\n"); fprintf(stderr, " -T dump the AST trees for each input file\n"); fprintf(stderr, " -M dump the symbol table for each input file\n"); fprintf(stderr, " -o outfile, produce the outfile executable file\n"); exit(1); } // Main program: check arguments and print a usage // if we don't have an argument. Open up the input // file and call scanfile() to scan the tokens in it. enum { MAXOBJ = 100 }; int main(int argc, char **argv) { char *outfilename = AOUT; char *asmfile, *objfile; char *objlist[MAXOBJ]; int i, j, objcnt = 0; // Initialise our variables O_dumpAST = 0; O_dumpsym = 0; O_keepasm = 0; O_assemble = 0; O_verbose = 0; O_dolink = 1; // Scan for command-line options for (i = 1; i < argc; i++) { // No leading '-', stop scanning for options if (*argv[i] != '-') break; // For each option in this argument for (j = 1; (*argv[i] == '-') && argv[i][j]; j++) { switch (argv[i][j]) { case 'o': outfilename = argv[++i]; // Save & skip to next argument break; case 'T': O_dumpAST = 1; break; case 'M': O_dumpsym = 1; break; case 'c': O_assemble = 1; O_keepasm = 0; O_dolink = 0; break; case 'S': O_keepasm = 1; O_assemble = 0; O_dolink = 0; break; case 'v': O_verbose = 1; break; default: usage(argv[0]); } } } // Ensure we have at lease one input file argument if (i >= argc) usage(argv[0]); // Work on each input file in turn while (i < argc) { asmfile = do_compile(argv[i]); // Compile the source file if (O_dolink || O_assemble) { objfile = do_assemble(asmfile); // Assemble it to object forma if (objcnt == (MAXOBJ - 2)) { fprintf(stderr, "Too many object files for the compiler to handle\n"); exit(1); } objlist[objcnt++] = objfile; // Add the object file's name objlist[objcnt] = NULL; // to the list of object files } if (!O_keepasm) // Remove the assembly file if unlink(asmfile); // we don't need to keep it i++; } // Now link all the object files together if (O_dolink) { do_link(outfilename, objlist); // If we don't need to keep the object // files, then remove them if (!O_assemble) { for (i = 0; objlist[i] != NULL; i++) unlink(objlist[i]); } } return (0); } ================================================ FILE: 60_TripleTest/misc.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" #include #include // Miscellaneous functions // Copyright (c) 2019 Warren Toomey, GPL3 // Ensure that the current token is t, // and fetch the next token. Otherwise // throw an error void match(int t, char *what) { if (Token.token == t) { scan(&Token); } else { fatals("Expected", what); } } // Match a semicolon and fetch the next token void semi(void) { match(T_SEMI, ";"); } // Match a left brace and fetch the next token void lbrace(void) { match(T_LBRACE, "{"); } // Match a right brace and fetch the next token void rbrace(void) { match(T_RBRACE, "}"); } // Match a left parenthesis and fetch the next token void lparen(void) { match(T_LPAREN, "("); } // Match a right parenthesis and fetch the next token void rparen(void) { match(T_RPAREN, ")"); } // Match an identifier and fetch the next token void ident(void) { match(T_IDENT, "identifier"); } // Match a comma and fetch the next token void comma(void) { match(T_COMMA, "comma"); } // Print out fatal messages void fatal(char *s) { fprintf(stderr, "%s on line %d of %s\n", s, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatals(char *s1, char *s2) { fprintf(stderr, "%s:%s on line %d of %s\n", s1, s2, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatald(char *s, int d) { fprintf(stderr, "%s:%d on line %d of %s\n", s, d, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatalc(char *s, int c) { fprintf(stderr, "%s:%c on line %d of %s\n", s, c, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } ================================================ FILE: 60_TripleTest/opt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST Tree Optimisation Code // Copyright (c) 2019 Warren Toomey, GPL3 // Fold an AST tree with a binary operator // and two A_INTLIT children. Return either // the original tree or a new leaf node. static struct ASTnode *fold2(struct ASTnode *n) { int val, leftval, rightval; // Get the values from each child leftval = n->left->a_intvalue; rightval = n->right->a_intvalue; // Perform some of the binary operations. // For any AST op we can't do, return // the original tree. switch (n->op) { case A_ADD: val = leftval + rightval; break; case A_SUBTRACT: val = leftval - rightval; break; case A_MULTIPLY: val = leftval * rightval; break; case A_DIVIDE: // Don't try to divide by zero. if (rightval == 0) return (n); val = leftval / rightval; break; default: return (n); } // Return a leaf node with the new value return (mkastleaf(A_INTLIT, n->type, NULL, NULL, val)); } // Fold an AST tree with a unary operator // and one INTLIT children. Return either // the original tree or a new leaf node. static struct ASTnode *fold1(struct ASTnode *n) { int val; // Get the child value. Do the // operation if recognised. // Return the new leaf node. val = n->left->a_intvalue; switch (n->op) { case A_WIDEN: break; case A_INVERT: val = ~val; break; case A_LOGNOT: val = !val; break; default: return (n); } // Return a leaf node with the new value return (mkastleaf(A_INTLIT, n->type, NULL, NULL, val)); } // Attempt to do constant folding on // the AST tree with the root node n static struct ASTnode *fold(struct ASTnode *n) { if (n == NULL) return (NULL); // Fold on the left child, then // do the same on the right child n->left = fold(n->left); n->right = fold(n->right); // If both children are A_INTLITs, do a fold2() if (n->left && n->left->op == A_INTLIT) { if (n->right && n->right->op == A_INTLIT) n = fold2(n); else // If only the left is A_INTLIT, do a fold1() n = fold1(n); } // Return the possibly modified tree return (n); } // Optimise an AST tree by // constant folding in all sub-trees struct ASTnode *optimise(struct ASTnode *n) { n = fold(n); return (n); } ================================================ FILE: 60_TripleTest/scan.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { int i; for (i = 0; s[i] != '\0'; i++) if (s[i] == (char) c) return (i); return (-1); } // Get the next character from the input file. static int next(void) { int c, l; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return (c); } c = fgetc(Infile); // Read from input file while (Linestart && c == '#') { // We've hit a pre-processor statement Linestart = 0; // No longer at the start of the line scan(&Token); // Get the line number into l if (Token.token != T_INTLIT) fatals("Expecting pre-processor line number, got:", Text); l = Token.intvalue; scan(&Token); // Get the filename in Text if (Token.token != T_STRLIT) fatals("Expecting pre-processor file name, got:", Text); if (Text[0] != '<') { // If this is a real filename if (strcmp(Text, Infilename)) // and not the one we have now Infilename = strdup(Text); // save it. Then update the line num Line = l; } while ((c = fgetc(Infile)) != '\n'); // Skip to the end of the line c = fgetc(Infile); // and get the next character Linestart = 1; // Now back at the start of the line } Linestart = 0; // No longer at the start of the line if ('\n' == c) { Line++; // Increment line count Linestart = 1; // Now back at the start of the line } return (c); } // Put back an unwanted character static void putback(int c) { Putback = c; } // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } // Read in a hexadecimal constant from the input static int hexchar(void) { int c, h, n = 0, f = 0; // Loop getting characters while (isxdigit(c = next())) { // Convert from char to int value h = chrpos("0123456789abcdef", tolower(c)); // Add to running hex value n = n * 16 + h; f = 1; } // We hit a non-hex character, put it back putback(c); // Flag tells us we never saw any hex characters if (!f) fatal("missing digits after '\\x'"); if (n > 255) fatal("value out of range after '\\x'"); return (n); } // Return the next character from a character // or string literal static int scanch(void) { int i, c, c2; // Get the next input character and interpret // metacharacters that start with a backslash c = next(); if (c == '\\') { switch (c = next()) { case 'a': return ('\a'); case 'b': return ('\b'); case 'f': return ('\f'); case 'n': return ('\n'); case 'r': return ('\r'); case 't': return ('\t'); case 'v': return ('\v'); case '\\': return ('\\'); case '"': return ('"'); case '\'': return ('\''); // Deal with octal constants by reading in // characters until we hit a non-octal digit. // Build up the octal value in c2 and count // # digits in i. Permit only 3 octal digits. case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': for (i = c2 = 0; isdigit(c) && c < '8'; c = next()) { if (++i > 3) break; c2 = c2 * 8 + (c - '0'); } putback(c); // Put back the first non-octal char return (c2); case 'x': return (hexchar()); default: fatalc("unknown escape sequence", c); } } return (c); // Just an ordinary old character! } // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0, radix = 10; // Assume the radix is 10, but if it starts with 0 if (c == '0') { // and the next character is 'x', it's radix 16 if ((c = next()) == 'x') { radix = 16; c = next(); } else // Otherwise, it's radix 8 radix = 8; } // Convert each character into an int value while ((k = chrpos("0123456789abcdef", tolower(c))) >= 0) { if (k >= radix) fatalc("invalid digit in integer literal", c); val = val * radix + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return (val); } // Scan in a string literal from the input file, // and store it in buf[]. Return the length of // the string. static int scanstr(char *buf) { int i, c; // Loop while we have enough buffer space for (i = 0; i < TEXTLEN - 1; i++) { // Get the next char and append to buf // Return when we hit the ending double quote if ((c = scanch()) == '"') { buf[i] = 0; return (i); } buf[i] = (char)c; } // Ran out of buf[] space fatal("String literal too long"); return (0); } // Scan an identifier from the input file and // store it in buf[]. Return the identifier's length static int scanident(int c, char *buf, int lim) { int i = 0; // Allow digits, alpha and underscores while (isalpha(c) || isdigit(c) || '_' == c) { // Error if we hit the identifier length limit, // else append to buf[] and get next character if (lim - 1 == i) { fatal("Identifier too long"); } else if (i < lim - 1) { buf[i++] = (char)c; } c = next(); } // We hit a non-valid character, put it back. // NUL-terminate the buf[] and return the length putback(c); buf[i] = '\0'; return (i); } // Given a word from the input, return the matching // keyword token number or 0 if it's not a keyword. // Switch on the first letter so that we don't have // to waste time strcmp()ing against all the keywords. static int keyword(char *s) { switch (*s) { case 'b': if (!strcmp(s, "break")) return (T_BREAK); break; case 'c': if (!strcmp(s, "case")) return (T_CASE); if (!strcmp(s, "char")) return (T_CHAR); if (!strcmp(s, "continue")) return (T_CONTINUE); break; case 'd': if (!strcmp(s, "default")) return (T_DEFAULT); break; case 'e': if (!strcmp(s, "else")) return (T_ELSE); if (!strcmp(s, "enum")) return (T_ENUM); if (!strcmp(s, "extern")) return (T_EXTERN); break; case 'f': if (!strcmp(s, "for")) return (T_FOR); break; case 'i': if (!strcmp(s, "if")) return (T_IF); if (!strcmp(s, "int")) return (T_INT); break; case 'l': if (!strcmp(s, "long")) return (T_LONG); break; case 'r': if (!strcmp(s, "return")) return (T_RETURN); break; case 's': if (!strcmp(s, "sizeof")) return (T_SIZEOF); if (!strcmp(s, "static")) return (T_STATIC); if (!strcmp(s, "struct")) return (T_STRUCT); if (!strcmp(s, "switch")) return (T_SWITCH); break; case 't': if (!strcmp(s, "typedef")) return (T_TYPEDEF); break; case 'u': if (!strcmp(s, "union")) return (T_UNION); break; case 'v': if (!strcmp(s, "void")) return (T_VOID); break; case 'w': if (!strcmp(s, "while")) return (T_WHILE); break; } return (0); } // List of token strings, for debugging purposes char *Tstring[] = { "EOF", "=", "+=", "-=", "*=", "/=", "%=", "?", "||", "&&", "|", "^", "&", "==", "!=", ",", ">", "<=", ">=", "<<", ">>", "+", "-", "*", "/", "%", "++", "--", "~", "!", "void", "char", "int", "long", "if", "else", "while", "for", "return", "struct", "union", "enum", "typedef", "extern", "break", "continue", "switch", "case", "default", "sizeof", "static", "intlit", "strlit", ";", "identifier", "{", "}", "(", ")", "[", "]", ",", ".", "->", ":" }; // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c, tokentype; // If we have a lookahead token, return this token if (Peektoken.token != 0) { t->token = Peektoken.token; t->tokstr = Peektoken.tokstr; t->intvalue = Peektoken.intvalue; Peektoken.token = 0; return (1); } // Skip whitespace c = skip(); // Determine the token based on // the input character switch (c) { case EOF: t->token = T_EOF; return (0); case '+': if ((c = next()) == '+') { t->token = T_INC; } else if (c == '=') { t->token = T_ASPLUS; } else { putback(c); t->token = T_PLUS; } break; case '-': if ((c = next()) == '-') { t->token = T_DEC; } else if (c == '>') { t->token = T_ARROW; } else if (c == '=') { t->token = T_ASMINUS; } else if (isdigit(c)) { // Negative int literal t->intvalue = -scanint(c); t->token = T_INTLIT; } else { putback(c); t->token = T_MINUS; } break; case '*': if ((c = next()) == '=') { t->token = T_ASSTAR; } else { putback(c); t->token = T_STAR; } break; case '/': if ((c = next()) == '=') { t->token = T_ASSLASH; } else { putback(c); t->token = T_SLASH; } break; case '%': if ((c = next()) == '=') { t->token = T_ASMOD; } else { putback(c); t->token = T_MOD; } break; case ';': t->token = T_SEMI; break; case '{': t->token = T_LBRACE; break; case '}': t->token = T_RBRACE; break; case '(': t->token = T_LPAREN; break; case ')': t->token = T_RPAREN; break; case '[': t->token = T_LBRACKET; break; case ']': t->token = T_RBRACKET; break; case '~': t->token = T_INVERT; break; case '^': t->token = T_XOR; break; case ',': t->token = T_COMMA; break; case '.': t->token = T_DOT; break; case ':': t->token = T_COLON; break; case '?': t->token = T_QUESTION; break; case '=': if ((c = next()) == '=') { t->token = T_EQ; } else { putback(c); t->token = T_ASSIGN; } break; case '!': if ((c = next()) == '=') { t->token = T_NE; } else { putback(c); t->token = T_LOGNOT; } break; case '<': if ((c = next()) == '=') { t->token = T_LE; } else if (c == '<') { t->token = T_LSHIFT; } else { putback(c); t->token = T_LT; } break; case '>': if ((c = next()) == '=') { t->token = T_GE; } else if (c == '>') { t->token = T_RSHIFT; } else { putback(c); t->token = T_GT; } break; case '&': if ((c = next()) == '&') { t->token = T_LOGAND; } else { putback(c); t->token = T_AMPER; } break; case '|': if ((c = next()) == '|') { t->token = T_LOGOR; } else { putback(c); t->token = T_OR; } break; case '\'': // If it's a quote, scan in the // literal character value and // the trailing quote t->intvalue = scanch(); t->token = T_INTLIT; if (next() != '\'') fatal("Expected '\\'' at end of char literal"); break; case '"': // Scan in a literal string scanstr(Text); t->token = T_STRLIT; break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } else if (isalpha(c) || '_' == c) { // Read in a keyword or identifier scanident(c, Text, TEXTLEN); // If it's a recognised keyword, return that token if ((tokentype = keyword(Text)) != 0) { t->token = tokentype; break; } // Not a recognised keyword, so it must be an identifier t->token = T_IDENT; break; } // The character isn't part of any recognised token, error fatalc("Unrecognised character", c); } // We found a token t->tokstr = Tstring[t->token]; return (1); } ================================================ FILE: 60_TripleTest/stmt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of statements // Copyright (c) 2019 Warren Toomey, GPL3 // Prototypes static struct ASTnode *single_statement(void); // compound_statement: // empty, i.e. no statement // | statement // | statement statements // ; // // statement: declaration // | expression_statement // | function_call // | if_statement // | while_statement // | for_statement // | return_statement // ; // if_statement: if_head // | if_head 'else' statement // ; // // if_head: 'if' '(' true_false_expression ')' statement ; // // Parse an IF statement including any // optional ELSE clause and return its AST static struct ASTnode *if_statement(void) { struct ASTnode *condAST, *trueAST, *falseAST = NULL; // Ensure we have 'if' '(' match(T_IF, "if"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST->ctype, condAST, NULL, 0); rparen(); // Get the AST for the statement trueAST = single_statement(); // If we have an 'else', skip it // and get the AST for the statement if (Token.token == T_ELSE) { scan(&Token); falseAST = single_statement(); } // Build and return the AST for this statement return (mkastnode(A_IF, P_NONE, NULL, condAST, trueAST, falseAST, NULL, 0)); } // while_statement: 'while' '(' true_false_expression ')' statement ; // // Parse a WHILE statement and return its AST static struct ASTnode *while_statement(void) { struct ASTnode *condAST, *bodyAST; // Ensure we have 'while' '(' match(T_WHILE, "while"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST->ctype, condAST, NULL, 0); rparen(); // Get the AST for the statement. // Update the loop depth in the process Looplevel++; bodyAST = single_statement(); Looplevel--; // Build and return the AST for this statement return (mkastnode(A_WHILE, P_NONE, NULL, condAST, NULL, bodyAST, NULL, 0)); } // for_statement: 'for' '(' expression_list ';' // true_false_expression ';' // expression_list ')' statement ; // // Parse a FOR statement and return its AST static struct ASTnode *for_statement(void) { struct ASTnode *condAST, *bodyAST; struct ASTnode *preopAST, *postopAST; struct ASTnode *tree; // Ensure we have 'for' '(' match(T_FOR, "for"); lparen(); // Get the pre_op expression and the ';' preopAST = expression_list(T_SEMI); semi(); // Get the condition and the ';'. // Force a non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST->ctype, condAST, NULL, 0); semi(); // Get the post_op expression and the ')' postopAST = expression_list(T_RPAREN); rparen(); // Get the statement which is the body // Update the loop depth in the process Looplevel++; bodyAST = single_statement(); Looplevel--; // Glue the statement and the postop tree tree = mkastnode(A_GLUE, P_NONE, NULL, bodyAST, NULL, postopAST, NULL, 0); // Make a WHILE loop with the condition and this new body tree = mkastnode(A_WHILE, P_NONE, NULL, condAST, NULL, tree, NULL, 0); // And glue the preop tree to the A_WHILE tree return (mkastnode(A_GLUE, P_NONE, NULL, preopAST, NULL, tree, NULL, 0)); } // return_statement: 'return' '(' expression ')' ; // // Parse a return statement and return its AST static struct ASTnode *return_statement(void) { struct ASTnode *tree= NULL; // Ensure we have 'return' match(T_RETURN, "return"); // See if we have a return value if (Token.token == T_LPAREN) { // Can't return a value if function returns P_VOID if (Functionid->type == P_VOID) fatal("Can't return from a void function"); // Skip the left parenthesis lparen(); // Parse the following expression tree = binexpr(0); // Ensure this is compatible with the function's type tree = modify_type(tree, Functionid->type, Functionid->ctype, 0); if (tree == NULL) fatal("Incompatible type to return"); // Get the ')' rparen(); } // Add on the A_RETURN node tree = mkastunary(A_RETURN, P_NONE, NULL, tree, NULL, 0); // Get the ';' semi(); return (tree); } // break_statement: 'break' ; // // Parse a break statement and return its AST static struct ASTnode *break_statement(void) { if (Looplevel == 0 && Switchlevel == 0) fatal("no loop or switch to break out from"); scan(&Token); semi(); return (mkastleaf(A_BREAK, P_NONE, NULL, NULL, 0)); } // continue_statement: 'continue' ; // // Parse a continue statement and return its AST static struct ASTnode *continue_statement(void) { if (Looplevel == 0) fatal("no loop to continue to"); scan(&Token); semi(); return (mkastleaf(A_CONTINUE, P_NONE, NULL, NULL, 0)); } // Parse a switch statement and return its AST static struct ASTnode *switch_statement(void) { struct ASTnode *left, *body, *n, *c; struct ASTnode *casetree = NULL, *casetail; int inloop = 1, casecount = 0; int seendefault = 0; int ASTop, casevalue; // Skip the 'switch' and '(' scan(&Token); lparen(); // Get the switch expression, the ')' and the '{' left = binexpr(0); rparen(); lbrace(); // Ensure that this is of int type if (!inttype(left->type)) fatal("Switch expression is not of integer type"); // Build an A_SWITCH subtree with the expression as // the child n = mkastunary(A_SWITCH, P_NONE, NULL, left, NULL, 0); // Now parse the cases Switchlevel++; while (inloop) { switch (Token.token) { // Leave the loop when we hit a '}' case T_RBRACE: if (casecount == 0) fatal("No cases in switch"); inloop = 0; break; case T_CASE: case T_DEFAULT: // Ensure this isn't after a previous 'default' if (seendefault) fatal("case or default after existing default"); // Set the AST operation. Scan the case value if required if (Token.token == T_DEFAULT) { ASTop = A_DEFAULT; seendefault = 1; scan(&Token); } else { ASTop = A_CASE; scan(&Token); left = binexpr(0); // Ensure the case value is an integer literal if (left->op != A_INTLIT) fatal("Expecting integer literal for case value"); casevalue = left->a_intvalue; // Walk the list of existing case values to ensure // that there isn't a duplicate case value for (c = casetree; c != NULL; c = c->right) if (casevalue == c->a_intvalue) fatal("Duplicate case value"); } // Scan the ':' and increment the casecount match(T_COLON, ":"); casecount++; // If the next token is a T_CASE, the existing case will fall // into the next case. Otherwise, parse the case body. if (Token.token == T_CASE) body = NULL; else body = compound_statement(1); // Build a sub-tree with any compound statement as the left child // and link it in to the growing A_CASE tree if (casetree == NULL) { casetree = casetail = mkastunary(ASTop, P_NONE, NULL, body, NULL, casevalue); } else { casetail->right = mkastunary(ASTop, P_NONE, NULL, body, NULL, casevalue); casetail = casetail->right; } break; default: fatals("Unexpected token in switch", Token.tokstr); } } Switchlevel--; // We have a sub-tree with the cases and any default. Put the // case count into the A_SWITCH node and attach the case tree. n->a_intvalue = casecount; n->right = casetree; rbrace(); return (n); } // Parse a single statement and return its AST. static struct ASTnode *single_statement(void) { struct ASTnode *stmt; struct symtable *ctype; int linenum= Line; switch (Token.token) { case T_SEMI: // An empty statement semi(); break; case T_LBRACE: // We have a '{', so this is a compound statement lbrace(); stmt = compound_statement(0); stmt->linenum= linenum; rbrace(); return (stmt); case T_IDENT: // We have to see if the identifier matches a typedef. // If not, treat it as an expression. // Otherwise, fall down to the parse_type() call. if (findtypedef(Text) == NULL) { stmt = binexpr(0); stmt->linenum= linenum; semi(); return (stmt); } case T_CHAR: case T_INT: case T_LONG: case T_STRUCT: case T_UNION: case T_ENUM: case T_TYPEDEF: // The beginning of a variable declaration list. declaration_list(&ctype, C_LOCAL, T_SEMI, T_EOF, &stmt); semi(); return (stmt); // Any assignments from the declarations case T_IF: stmt= if_statement(); stmt->linenum= linenum; return(stmt); case T_WHILE: stmt= while_statement(); stmt->linenum= linenum; return(stmt); case T_FOR: stmt= for_statement(); stmt->linenum= linenum; return(stmt); case T_RETURN: stmt= return_statement(); stmt->linenum= linenum; return(stmt); case T_BREAK: stmt= break_statement(); stmt->linenum= linenum; return(stmt); case T_CONTINUE: stmt= continue_statement(); stmt->linenum= linenum; return(stmt); case T_SWITCH: stmt= switch_statement(); stmt->linenum= linenum; return(stmt); default: // For now, see if this is an expression. // This catches assignment statements. stmt = binexpr(0); stmt->linenum= linenum; semi(); return (stmt); } return (NULL); // Keep -Wall happy } // Parse a compound statement // and return its AST. If inswitch is true, // we look for a '}', 'case' or 'default' token // to end the parsing. Otherwise, look for // just a '}' to end the parsing. struct ASTnode *compound_statement(int inswitch) { struct ASTnode *left = NULL; struct ASTnode *tree; while (1) { // Leave if we've hit the end token. We do this first to allow // an empty compound statement if (Token.token == T_RBRACE) return (left); if (inswitch && (Token.token == T_CASE || Token.token == T_DEFAULT)) return (left); // Parse a single statement tree = single_statement(); // For each new tree, either save it in left // if left is empty, or glue the left and the // new tree together if (tree != NULL) { if (left == NULL) left = tree; else left = mkastnode(A_GLUE, P_NONE, NULL, left, NULL, tree, NULL, 0); } } return (NULL); // Keep -Wall happy } ================================================ FILE: 60_TripleTest/sym.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Symbol table functions // Copyright (c) 2019 Warren Toomey, GPL3 // Append a node to the singly-linked list pointed to by head or tail void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node) { // Check for valid pointers if (head == NULL || tail == NULL || node == NULL) fatal("Either head, tail or node is NULL in appendsym"); // Append to the list if (*tail) { (*tail)->next = node; *tail = node; } else *head = *tail = node; node->next = NULL; } // Create a symbol node to be added to a symbol table list. // Set up the node's: // + type: char, int etc. // + ctype: composite type pointer for struct/union // + structural type: var, function, array etc. // + size: number of elements, or endlabel: end label for a function // + posn: Position information for local symbols // Return a pointer to the new node struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn) { // Get a new node struct symtable *node = (struct symtable *) malloc(sizeof(struct symtable)); if (node == NULL) fatal("Unable to malloc a symbol table node in newsym"); // Fill in the values if (name == NULL) node->name = NULL; else node->name = strdup(name); node->type = type; node->ctype = ctype; node->stype = stype; node->class = class; node->nelems = nelems; // For pointers and integer types, set the size // of the symbol. structs and union declarations // manually set this up themselves. if (ptrtype(type) || inttype(type)) node->size = nelems * typesize(type, ctype); node->st_posn = posn; node->next = NULL; node->member = NULL; node->initlist = NULL; #ifdef __NASM__ node->extinit = 0; #endif return (node); } // Add a symbol to the global symbol list struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn) { struct symtable *sym = newsym(name, type, ctype, stype, class, nelems, posn); // For structs and unions, copy the size from the type node if (type == P_STRUCT || type == P_UNION) sym->size = ctype->size; appendsym(&Globhead, &Globtail, sym); return (sym); } // Add a symbol to the local symbol list struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int nelems) { struct symtable *sym = newsym(name, type, ctype, stype, C_LOCAL, nelems, 0); // For structs and unions, copy the size from the type node if (type == P_STRUCT || type == P_UNION) sym->size = ctype->size; appendsym(&Loclhead, &Locltail, sym); return (sym); } // Add a symbol to the parameter list struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype) { struct symtable *sym = newsym(name, type, ctype, stype, C_PARAM, 1, 0); appendsym(&Parmhead, &Parmtail, sym); return (sym); } // Add a symbol to the temporary member list struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int nelems) { struct symtable *sym = newsym(name, type, ctype, stype, C_MEMBER, nelems, 0); // For structs and unions, copy the size from the type node if (type == P_STRUCT || type == P_UNION) sym->size = ctype->size; appendsym(&Membhead, &Membtail, sym); return (sym); } // Add a struct to the struct list struct symtable *addstruct(char *name) { struct symtable *sym = newsym(name, P_STRUCT, NULL, 0, C_STRUCT, 0, 0); appendsym(&Structhead, &Structtail, sym); return (sym); } // Add a struct to the union list struct symtable *addunion(char *name) { struct symtable *sym = newsym(name, P_UNION, NULL, 0, C_UNION, 0, 0); appendsym(&Unionhead, &Uniontail, sym); return (sym); } // Add an enum type or value to the enum list. // Class is C_ENUMTYPE or C_ENUMVAL. // Use posn to store the int value. struct symtable *addenum(char *name, int class, int value) { struct symtable *sym = newsym(name, P_INT, NULL, 0, class, 0, value); appendsym(&Enumhead, &Enumtail, sym); return (sym); } // Add a typedef to the typedef list struct symtable *addtypedef(char *name, int type, struct symtable *ctype) { struct symtable *sym = newsym(name, type, ctype, 0, C_TYPEDEF, 0, 0); appendsym(&Typehead, &Typetail, sym); return (sym); } // Search for a symbol in a specific list. // Return a pointer to the found node or NULL if not found. // If class is not zero, also match on the given class static struct symtable *findsyminlist(char *s, struct symtable *list, int class) { for (; list != NULL; list = list->next) if ((list->name != NULL) && !strcmp(s, list->name)) if (class == 0 || class == list->class) return (list); return (NULL); } // Determine if the symbol s is in the global symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findglob(char *s) { return (findsyminlist(s, Globhead, 0)); } // Determine if the symbol s is in the local symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findlocl(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member, 0); if (node) return (node); } return (findsyminlist(s, Loclhead, 0)); } // Determine if the symbol s is in the symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findsymbol(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member, 0); if (node) return (node); } // Otherwise, try the local and global symbol lists node = findsyminlist(s, Loclhead, 0); if (node) return (node); return (findsyminlist(s, Globhead, 0)); } // Find a member in the member list // Return a pointer to the found node or NULL if not found. struct symtable *findmember(char *s) { return (findsyminlist(s, Membhead, 0)); } // Find a struct in the struct list // Return a pointer to the found node or NULL if not found. struct symtable *findstruct(char *s) { return (findsyminlist(s, Structhead, 0)); } // Find a struct in the union list // Return a pointer to the found node or NULL if not found. struct symtable *findunion(char *s) { return (findsyminlist(s, Unionhead, 0)); } // Find an enum type in the enum list // Return a pointer to the found node or NULL if not found. struct symtable *findenumtype(char *s) { return (findsyminlist(s, Enumhead, C_ENUMTYPE)); } // Find an enum value in the enum list // Return a pointer to the found node or NULL if not found. struct symtable *findenumval(char *s) { return (findsyminlist(s, Enumhead, C_ENUMVAL)); } // Find a type in the tyedef list // Return a pointer to the found node or NULL if not found. struct symtable *findtypedef(char *s) { return (findsyminlist(s, Typehead, 0)); } // Reset the contents of the symbol table void clear_symtable(void) { Globhead = Globtail = NULL; Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Membhead = Membtail = NULL; Structhead = Structtail = NULL; Unionhead = Uniontail = NULL; Enumhead = Enumtail = NULL; Typehead = Typetail = NULL; } // Clear all the entries in the local symbol table void freeloclsyms(void) { Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Functionid = NULL; } // Remove all static symbols from the global symbol table void freestaticsyms(void) { // g points at current node, prev at the previous one struct symtable *g, *prev = NULL; // Walk the global table looking for static entries for (g = Globhead; g != NULL; g = g->next) { if (g->class == C_STATIC) { // If there's a previous node, rearrange the prev pointer // to skip over the current node. If not, g is the head, // so do the same to Globhead if (prev != NULL) prev->next = g->next; else Globhead->next = g->next; // If g is the tail, point Globtail at the previous node // (if there is one), or Globhead if (g == Globtail) { if (prev != NULL) Globtail = prev; else Globtail = Globhead; } } } // Point prev at g before we move up to the next node prev = g; } // Dump a single symbol static void dumpsym(struct symtable *sym, int indent) { int i; for (i = 0; i < indent; i++) printf(" "); switch (sym->type & (~0xf)) { case P_VOID: printf("void "); break; case P_CHAR: printf("char "); break; case P_INT: printf("int "); break; case P_LONG: printf("long "); break; case P_STRUCT: if (sym->ctype != NULL) printf("struct %s ", sym->ctype->name); else printf("struct %s ", sym->name); break; case P_UNION: if (sym->ctype != NULL) printf("union %s ", sym->ctype->name); else printf("union %s ", sym->name); break; default: printf("unknown type "); } for (i = 0; i < (sym->type & 0xf); i++) printf("*"); printf("%s", sym->name); switch (sym->stype) { case S_VARIABLE: break; case S_FUNCTION: printf("()"); break; case S_ARRAY: printf("[]"); break; default: printf(" unknown stype"); } switch (sym->class) { case C_GLOBAL: printf(": global"); break; case C_LOCAL: printf(": local"); break; case C_PARAM: printf(": param"); break; case C_EXTERN: printf(": extern"); break; case C_STATIC: printf(": static"); break; case C_STRUCT: printf(": struct"); break; case C_UNION: printf(": union"); break; case C_MEMBER: printf(": member"); break; case C_ENUMTYPE: printf(": enumtype"); break; case C_ENUMVAL: printf(": enumval"); break; case C_TYPEDEF: printf(": typedef"); break; default: printf(": unknown class"); } switch (sym->stype) { case S_VARIABLE: if (sym->class == C_ENUMVAL) printf(", value %d\n", sym->st_posn); else printf(", size %d\n", sym->size); break; case S_FUNCTION: printf(", %d params\n", sym->nelems); break; case S_ARRAY: printf(", %d elems, size %d\n", sym->nelems, sym->size); break; } switch (sym->type & (~0xf)) { case P_STRUCT: case P_UNION: dumptable(sym->member, NULL, 4); } switch (sym->stype) { case S_FUNCTION: dumptable(sym->member, NULL, 4); } } // Dump one symbol table void dumptable(struct symtable *head, char *name, int indent) { struct symtable *sym; if (head != NULL && name != NULL) printf("%s\n--------\n", name); for (sym = head; sym != NULL; sym = sym->next) dumpsym(sym, indent); } void dumpsymtables(void) { dumptable(Globhead, "Global", 0); printf("\n"); dumptable(Enumhead, "Enums", 0); printf("\n"); dumptable(Typehead, "Typedefs", 0); } ================================================ FILE: 60_TripleTest/tests/err.input031.c ================================================ Expecting a primary expression, got token:+ on line 5 of input031.c ================================================ FILE: 60_TripleTest/tests/err.input032.c ================================================ Unknown variable or function:pizza on line 4 of input032.c ================================================ FILE: 60_TripleTest/tests/err.input033.c ================================================ Incompatible type to return on line 4 of input033.c ================================================ FILE: 60_TripleTest/tests/err.input034.c ================================================ For now, declaration of non-global arrays is not implemented on line 4 of input034.c ================================================ FILE: 60_TripleTest/tests/err.input035.c ================================================ Duplicate local variable declaration:a on line 4 of input035.c ================================================ FILE: 60_TripleTest/tests/err.input036.c ================================================ Type doesn't match prototype for parameter:2 on line 4 of input036.c ================================================ FILE: 60_TripleTest/tests/err.input037.c ================================================ Expected:comma on line 3 of input037.c ================================================ FILE: 60_TripleTest/tests/err.input038.c ================================================ Type doesn't match prototype for parameter:2 on line 4 of input038.c ================================================ FILE: 60_TripleTest/tests/err.input039.c ================================================ No statements in function with non-void type on line 4 of input039.c ================================================ FILE: 60_TripleTest/tests/err.input040.c ================================================ No return for function with non-void type on line 4 of input040.c ================================================ FILE: 60_TripleTest/tests/err.input041.c ================================================ Can't return from a void function on line 3 of input041.c ================================================ FILE: 60_TripleTest/tests/err.input042.c ================================================ Unknown variable or function:fred on line 3 of input042.c ================================================ FILE: 60_TripleTest/tests/err.input043.c ================================================ Unknown variable or function:b on line 3 of input043.c ================================================ FILE: 60_TripleTest/tests/err.input044.c ================================================ Unknown variable or function:z on line 3 of input044.c ================================================ FILE: 60_TripleTest/tests/err.input045.c ================================================ & operator must be followed by an identifier on line 3 of input045.c ================================================ FILE: 60_TripleTest/tests/err.input046.c ================================================ * operator must be followed by an expression of pointer type on line 3 of input046.c ================================================ FILE: 60_TripleTest/tests/err.input047.c ================================================ ++ operator must be followed by an identifier on line 3 of input047.c ================================================ FILE: 60_TripleTest/tests/err.input048.c ================================================ -- operator must be followed by an identifier on line 3 of input048.c ================================================ FILE: 60_TripleTest/tests/err.input049.c ================================================ Incompatible expression in assignment on line 6 of input049.c ================================================ FILE: 60_TripleTest/tests/err.input050.c ================================================ Incompatible types in binary expression on line 6 of input050.c ================================================ FILE: 60_TripleTest/tests/err.input051.c ================================================ Expected '\'' at end of char literal on line 4 of input051.c ================================================ FILE: 60_TripleTest/tests/err.input052.c ================================================ Unrecognised character:$ on line 5 of input052.c ================================================ FILE: 60_TripleTest/tests/err.input056.c ================================================ unknown struct/union type:var1 on line 2 of input056.c ================================================ FILE: 60_TripleTest/tests/err.input057.c ================================================ previously defined struct/union:fred on line 2 of input057.c ================================================ FILE: 60_TripleTest/tests/err.input059.c ================================================ Unknown variable or function:y on line 3 of input059.c ================================================ FILE: 60_TripleTest/tests/err.input060.c ================================================ Expression is not a struct/union on line 3 of input060.c ================================================ FILE: 60_TripleTest/tests/err.input061.c ================================================ Expression is not a pointer to a struct/union on line 3 of input061.c ================================================ FILE: 60_TripleTest/tests/err.input064.c ================================================ undeclared enum type::fred on line 1 of input064.c ================================================ FILE: 60_TripleTest/tests/err.input065.c ================================================ enum type redeclared::fred on line 2 of input065.c ================================================ FILE: 60_TripleTest/tests/err.input066.c ================================================ enum value redeclared::z on line 2 of input066.c ================================================ FILE: 60_TripleTest/tests/err.input068.c ================================================ redefinition of typedef:FOO on line 2 of input068.c ================================================ FILE: 60_TripleTest/tests/err.input069.c ================================================ unknown type:FLOO on line 2 of input069.c ================================================ FILE: 60_TripleTest/tests/err.input072.c ================================================ no loop or switch to break out from on line 1 of input072.c ================================================ FILE: 60_TripleTest/tests/err.input073.c ================================================ no loop to continue to on line 1 of input073.c ================================================ FILE: 60_TripleTest/tests/err.input075.c ================================================ Unexpected token in switch:if on line 4 of input075.c ================================================ FILE: 60_TripleTest/tests/err.input076.c ================================================ No cases in switch on line 3 of input076.c ================================================ FILE: 60_TripleTest/tests/err.input077.c ================================================ case or default after existing default on line 6 of input077.c ================================================ FILE: 60_TripleTest/tests/err.input078.c ================================================ case or default after existing default on line 6 of input078.c ================================================ FILE: 60_TripleTest/tests/err.input079.c ================================================ Duplicate case value on line 6 of input079.c ================================================ FILE: 60_TripleTest/tests/err.input085.c ================================================ Bad type in parameter list on line 1 of input085.c ================================================ FILE: 60_TripleTest/tests/err.input086.c ================================================ Function definition not at global level on line 2 of input086.c ================================================ FILE: 60_TripleTest/tests/err.input087.c ================================================ Bad type in member list on line 4 of input087.c ================================================ FILE: 60_TripleTest/tests/err.input092.c ================================================ Type mismatch: literal vs. variable on line 1 of input092.c ================================================ FILE: 60_TripleTest/tests/err.input093.c ================================================ Unknown variable or function:fred on line 1 of input093.c ================================================ FILE: 60_TripleTest/tests/err.input094.c ================================================ Type mismatch: literal vs. variable on line 1 of input094.c ================================================ FILE: 60_TripleTest/tests/err.input095.c ================================================ Variable can not be initialised:x on line 1 of input095.c ================================================ FILE: 60_TripleTest/tests/err.input096.c ================================================ Array size is illegal:0 on line 1 of input096.c ================================================ FILE: 60_TripleTest/tests/err.input097.c ================================================ For now, declaration of non-global arrays is not implemented on line 2 of input097.c ================================================ FILE: 60_TripleTest/tests/err.input098.c ================================================ Too many values in initialisation list on line 1 of input098.c ================================================ FILE: 60_TripleTest/tests/err.input102.c ================================================ Cannot cast to a struct, union or void type on line 3 of input102.c ================================================ FILE: 60_TripleTest/tests/err.input103.c ================================================ Cannot cast to a struct, union or void type on line 3 of input103.c ================================================ FILE: 60_TripleTest/tests/err.input104.c ================================================ Cannot cast to a struct, union or void type on line 2 of input104.c ================================================ FILE: 60_TripleTest/tests/err.input105.c ================================================ Incompatible expression in assignment on line 4 of input105.c ================================================ FILE: 60_TripleTest/tests/err.input118.c ================================================ Compiler doesn't support static or extern local declarations on line 2 of input118.c ================================================ FILE: 60_TripleTest/tests/err.input124.c ================================================ Cannot ++ on rvalue on line 6 of input124.c ================================================ FILE: 60_TripleTest/tests/err.input126.c ================================================ Unknown variable or function:ptr on line 7 of input126.c ================================================ FILE: 60_TripleTest/tests/err.input129.c ================================================ Cannot ++ and/or -- more than once on line 6 of input129.c ================================================ FILE: 60_TripleTest/tests/err.input141.c ================================================ Declaration of array parameters is not implemented on line 4 of input141.c ================================================ FILE: 60_TripleTest/tests/err.input142.c ================================================ Array must have non-zero elements:fred on line 1 of input142.c ================================================ FILE: 60_TripleTest/tests/input001.c ================================================ int printf(char *fmt); void main() { printf("%d\n", 12 * 3); printf("%d\n", 18 - 2 * 4); printf("%d\n", 1 + 2 + 9 - 5/2 + 3*5); } ================================================ FILE: 60_TripleTest/tests/input002.c ================================================ int printf(char *fmt); void main() { int fred; int jim; fred= 5; jim= 12; printf("%d\n", fred + jim); } ================================================ FILE: 60_TripleTest/tests/input003.c ================================================ int printf(char *fmt); void main() { int x; x= 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); } ================================================ FILE: 60_TripleTest/tests/input004.c ================================================ int printf(char *fmt); void main() { int x; x= 7 < 9; printf("%d\n", x); x= 7 <= 9; printf("%d\n", x); x= 7 != 9; printf("%d\n", x); x= 7 == 7; printf("%d\n", x); x= 7 >= 7; printf("%d\n", x); x= 7 <= 7; printf("%d\n", x); x= 9 > 7; printf("%d\n", x); x= 9 >= 7; printf("%d\n", x); x= 9 != 7; printf("%d\n", x); } ================================================ FILE: 60_TripleTest/tests/input005.c ================================================ int printf(char *fmt); void main() { int i; int j; i=6; j=12; if (i < j) { printf("%d\n", i); } else { printf("%d\n", j); } } ================================================ FILE: 60_TripleTest/tests/input006.c ================================================ int printf(char *fmt); void main() { int i; i=1; while (i <= 10) { printf("%d\n", i); i= i + 1; } } ================================================ FILE: 60_TripleTest/tests/input007.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 60_TripleTest/tests/input008.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 60_TripleTest/tests/input009.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } void fred() { int a; int b; a= 12; b= 3 * a; if (a >= b) { printf("%d\n", 2 * b - a); } } ================================================ FILE: 60_TripleTest/tests/input010.c ================================================ int printf(char *fmt); void main() { int i; char j; j= 20; printf("%d\n", j); i= 10; printf("%d\n", i); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 2; j= j + 1) { printf("%d\n", j); } } ================================================ FILE: 60_TripleTest/tests/input011.c ================================================ int printf(char *fmt); int main() { int i; char j; long k; i= 10; printf("%d\n", i); j= 20; printf("%d\n", j); k= 30; printf("%d\n", k); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 4; j= j + 1) { printf("%d\n", j); } for (k= 1; k <= 5; k= k + 1) { printf("%d\n", k); } return(i); printf("%d\n", 12345); return(3); } ================================================ FILE: 60_TripleTest/tests/input012.c ================================================ int printf(char *fmt); int fred() { return(5); } void main() { int x; x= fred(2); printf("%d\n", x); } ================================================ FILE: 60_TripleTest/tests/input013.c ================================================ int printf(char *fmt); int fred() { return(56); } void main() { int dummy; int result; dummy= printf("%d\n", 23); result= fred(10); dummy= printf("%d\n", result); } ================================================ FILE: 60_TripleTest/tests/input014.c ================================================ int printf(char *fmt); int fred() { return(20); } int main() { int result; printf("%d\n", 10); result= fred(15); printf("%d\n", result); printf("%d\n", fred(15)+10); return(0); } ================================================ FILE: 60_TripleTest/tests/input015.c ================================================ int printf(char *fmt); int main() { char a; char *b; char c; int d; int *e; int f; a= 18; printf("%d\n", a); b= &a; c= *b; printf("%d\n", c); d= 12; printf("%d\n", d); e= &d; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 60_TripleTest/tests/input016.c ================================================ int printf(char *fmt); int c; int d; int *e; int f; int main() { c= 12; d=18; printf("%d\n", c); e= &c + 1; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 60_TripleTest/tests/input017.c ================================================ int printf(char *fmt); int main() { char a; char *b; int d; int *e; b= &a; *b= 19; printf("%d\n", a); e= &d; *e= 12; printf("%d\n", d); return(0); } ================================================ FILE: 60_TripleTest/tests/input018.c ================================================ int printf(char *fmt); int main() { int a; int b; a= b= 34; printf("%d\n", a); printf("%d\n", b); return(0); } ================================================ FILE: 60_TripleTest/tests/input018a.c ================================================ int printf(char *fmt); int a; int *b; char c; char *d; int main() { b= &a; *b= 15; printf("%d\n", a); d= &c; *d= 16; printf("%d\n", c); return(0); } ================================================ FILE: 60_TripleTest/tests/input019.c ================================================ int printf(char *fmt); int a; int b; int c; int d; int e; int main() { a= 2; b= 4; c= 3; d= 2; e= (a+b) * (c+d); printf("%d\n", e); return(0); } ================================================ FILE: 60_TripleTest/tests/input020.c ================================================ int printf(char *fmt); int a; int b[25]; int main() { b[3]= 12; a= b[3]; printf("%d\n", a); return(0); } ================================================ FILE: 60_TripleTest/tests/input021.c ================================================ int printf(char *fmt); char c; char *str; int main() { c= '\n'; printf("%d\n", c); for (str= "Hello world\n"; *str != 0; str= str + 1) { printf("%c", *str); } return(0); } ================================================ FILE: 60_TripleTest/tests/input022.c ================================================ int printf(char *fmt); char a; char b; char c; int d; int e; int f; long g; long h; long i; int main() { b= 5; c= 7; a= b + c++; printf("%d\n", a); e= 5; f= 7; d= e + f++; printf("%d\n", d); h= 5; i= 7; g= h + i++; printf("%d\n", g); a= b-- + c; printf("%d\n", a); d= e-- + f; printf("%d\n", d); g= h-- + i; printf("%d\n", g); a= ++b + c; printf("%d\n", a); d= ++e + f; printf("%d\n", d); g= ++h + i; printf("%d\n", g); a= b * --c; printf("%d\n", a); d= e * --f; printf("%d\n", d); g= h * --i; printf("%d\n", g); return(0); } ================================================ FILE: 60_TripleTest/tests/input023.c ================================================ int printf(char *fmt); char *str; int x; int main() { x= -23; printf("%d\n", x); printf("%d\n", -10 * -10); x= 1; x= ~x; printf("%d\n", x); x= 2 > 5; printf("%d\n", x); x= !x; printf("%d\n", x); x= !x; printf("%d\n", x); x= 13; if (x) { printf("%d\n", 13); } x= 0; if (!x) { printf("%d\n", 14); } for (str= "Hello world\n"; *str; str++) { printf("%c", *str); } return(0); } ================================================ FILE: 60_TripleTest/tests/input024.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { a= 42; b= 19; printf("%d\n", a & b); printf("%d\n", a | b); printf("%d\n", a ^ b); printf("%d\n", 1 << 3); printf("%d\n", 63 >> 3); return(0); } ================================================ FILE: 60_TripleTest/tests/input025.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { char z; int y; int x; x= 10; y= 20; z= 30; printf("%d\n", x); printf("%d\n", y); printf("%d\n", z); a= 5; b= 15; c= 25; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); return(0); } ================================================ FILE: 60_TripleTest/tests/input026.c ================================================ int printf(char *fmt); int main(int a, char b, long c, int d, int e, int f, int g, int h) { int i; int j; int k; a= 13; printf("%d\n", a); b= 23; printf("%d\n", b); c= 34; printf("%d\n", c); d= 44; printf("%d\n", d); e= 54; printf("%d\n", e); f= 64; printf("%d\n", f); g= 74; printf("%d\n", g); h= 84; printf("%d\n", h); i= 94; printf("%d\n", i); j= 95; printf("%d\n", j); k= 96; printf("%d\n", k); return(0); } ================================================ FILE: 60_TripleTest/tests/input027.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int param5(int a, int b, int c, int d, int e) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param2(int a, int b) { int c; int d; int e; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param0() { int a; int b; int c; int d; int e; a= 1; b= 2; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int main() { param8(1,2,3,4,5,6,7,8); param5(1,2,3,4,5); param2(1,2); param0(); return(0); } ================================================ FILE: 60_TripleTest/tests/input028.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 60_TripleTest/tests/input029.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h); int fred(int a, int b, int c); int main(); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 60_TripleTest/tests/input030.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input030.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 60_TripleTest/tests/input031.c ================================================ int printf(char *fmt); int main() { int x; x= 2 + + 3 - * / ; } ================================================ FILE: 60_TripleTest/tests/input032.c ================================================ int printf(char *fmt); int main() { pizza cow llama sausage; } ================================================ FILE: 60_TripleTest/tests/input033.c ================================================ int printf(char *fmt); int main() { char *z; return(z); } ================================================ FILE: 60_TripleTest/tests/input035.c ================================================ int printf(char *fmt); int fred(int a, int b) { int a; return(a); } ================================================ FILE: 60_TripleTest/tests/input036.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c); ================================================ FILE: 60_TripleTest/tests/input037.c ================================================ int printf(char *fmt); int fred(int a, char b +, int z); ================================================ FILE: 60_TripleTest/tests/input038.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c, int g); ================================================ FILE: 60_TripleTest/tests/input039.c ================================================ int printf(char *fmt); int main() { int a; } ================================================ FILE: 60_TripleTest/tests/input040.c ================================================ int printf(char *fmt); int main() { int a; a= 5; } ================================================ FILE: 60_TripleTest/tests/input041.c ================================================ int printf(char *fmt); void fred() { return(5); } ================================================ FILE: 60_TripleTest/tests/input042.c ================================================ int printf(char *fmt); int main() { fred(5); } ================================================ FILE: 60_TripleTest/tests/input043.c ================================================ int printf(char *fmt); int main() { int a; a= b[4]; } ================================================ FILE: 60_TripleTest/tests/input044.c ================================================ int printf(char *fmt); int main() { int a; a= z; } ================================================ FILE: 60_TripleTest/tests/input045.c ================================================ int printf(char *fmt); int main() { int a; a= &5; } ================================================ FILE: 60_TripleTest/tests/input046.c ================================================ int printf(char *fmt); int main() { int a; a= *5; } ================================================ FILE: 60_TripleTest/tests/input047.c ================================================ int printf(char *fmt); int main() { int a; a= ++5; } ================================================ FILE: 60_TripleTest/tests/input048.c ================================================ int printf(char *fmt); int main() { int a; a= --5; } ================================================ FILE: 60_TripleTest/tests/input049.c ================================================ int printf(char *fmt); int main() { int x; char y; y= x; } ================================================ FILE: 60_TripleTest/tests/input050.c ================================================ int printf(char *fmt); int main() { char *a; char *b; a= a + b; } ================================================ FILE: 60_TripleTest/tests/input051.c ================================================ int printf(char *fmt); int main() { char a; a= 'fred'; } ================================================ FILE: 60_TripleTest/tests/input052.c ================================================ int printf(char *fmt); int main() { int a; a= $5.00; } ================================================ FILE: 60_TripleTest/tests/input053.c ================================================ int printf(char *fmt); int main() { printf("Hello world, %d\n", 23); return(0); } ================================================ FILE: 60_TripleTest/tests/input054.c ================================================ int printf(char *fmt); int main() { int i; for (i=0; i < 20; i++) { printf("Hello world, %d\n", i); } return(0); } ================================================ FILE: 60_TripleTest/tests/input055.c ================================================ int printf(char *fmt); int main(int argc, char **argv) { int i; char *argument; printf("Hello world\n"); for (i=0; i < argc; i++) { argument= *argv; argv= argv + 1; printf("Argument %d is %s\n", i, argument); } return(0); } ================================================ FILE: 60_TripleTest/tests/input056.c ================================================ struct foo { int x; }; struct mary var1; ================================================ FILE: 60_TripleTest/tests/input057.c ================================================ struct fred { int x; } ; struct fred { char y; } ; ================================================ FILE: 60_TripleTest/tests/input058.c ================================================ int printf(char *fmt); struct fred { int x; char y; long z; }; struct fred var2; struct fred *varptr; int main() { long result; var2.x= 12; printf("%d\n", var2.x); var2.y= 'c'; printf("%d\n", var2.y); var2.z= 4005; printf("%d\n", var2.z); result= var2.x + var2.y + var2.z; printf("%d\n", result); varptr= &var2; result= varptr->x + varptr->y + varptr->z; printf("%d\n", result); return(0); } ================================================ FILE: 60_TripleTest/tests/input059.c ================================================ int main() { int x; x= y.foo; } ================================================ FILE: 60_TripleTest/tests/input060.c ================================================ int main() { int x; x= x.foo; } ================================================ FILE: 60_TripleTest/tests/input061.c ================================================ int main() { int x; x= x->foo; } ================================================ FILE: 60_TripleTest/tests/input062.c ================================================ int printf(char *fmt); union fred { char w; int x; int y; long z; }; union fred var1; union fred *varptr; int main() { var1.x= 65; printf("%d\n", var1.x); var1.x= 66; printf("%d\n", var1.x); printf("%d\n", var1.y); printf("The next two depend on the endian of the platform\n"); printf("%d\n", var1.w); printf("%d\n", var1.z); varptr= &var1; varptr->x= 67; printf("%d\n", varptr->x); printf("%d\n", varptr->y); return(0); } ================================================ FILE: 60_TripleTest/tests/input063.c ================================================ int printf(char *fmt); enum fred { apple=1, banana, carrot, pear=10, peach, mango, papaya }; enum jane { aple=1, bnana, crrot, par=10, pech, mago, paaya }; enum fred var1; enum jane var2; enum fred var3; int main() { var1= carrot + pear + mango; printf("%d\n", var1); return(0); } ================================================ FILE: 60_TripleTest/tests/input064.c ================================================ enum fred var3; ================================================ FILE: 60_TripleTest/tests/input065.c ================================================ enum fred { x, y, z }; enum fred { a, b }; ================================================ FILE: 60_TripleTest/tests/input066.c ================================================ enum fred { x, y, z }; enum mary { a, b, z }; ================================================ FILE: 60_TripleTest/tests/input067.c ================================================ int printf(char *fmt); typedef int FOO; FOO var1; struct bar { int x; int y} ; typedef struct bar BAR; BAR var2; int main() { var1= 5; printf("%d\n", var1); var2.x= 7; var2.y= 10; printf("%d\n", var2.x + var2.y); return(0); } ================================================ FILE: 60_TripleTest/tests/input068.c ================================================ typedef int FOO; typedef char FOO; ================================================ FILE: 60_TripleTest/tests/input069.c ================================================ typedef int FOO; FLOO y; ================================================ FILE: 60_TripleTest/tests/input070.c ================================================ #include typedef int FOO; int main() { FOO x; x= 56; printf("%d\n", x); return(0); } ================================================ FILE: 60_TripleTest/tests/input071.c ================================================ #include int main() { int x; x = 0; while (x < 100) { if (x == 5) { x = x + 2; continue; } printf("%d\n", x); if (x == 14) { break; } x = x + 1; } printf("Done\n"); return (0); } ================================================ FILE: 60_TripleTest/tests/input072.c ================================================ int main() { break; } ================================================ FILE: 60_TripleTest/tests/input073.c ================================================ int main() { continue; } ================================================ FILE: 60_TripleTest/tests/input074.c ================================================ #include int main() { int x; int y; y= 0; for (x=0; x < 5; x++) { switch(x) { case 1: { y= 5; break; } case 2: { y= 7; break; } case 3: { y= 9; } default: { y= 100; } } printf("%d\n", y); } return(0); } ================================================ FILE: 60_TripleTest/tests/input075.c ================================================ int main() { int x; switch(x) { if (x<5); } } ================================================ FILE: 60_TripleTest/tests/input076.c ================================================ int main() { int x; switch(x) { } } ================================================ FILE: 60_TripleTest/tests/input077.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } default: { x= 3; } case 4: { x= 2; } } } ================================================ FILE: 60_TripleTest/tests/input078.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } default: { x= 3; } default: { x= 2; } } } ================================================ FILE: 60_TripleTest/tests/input079.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } case 2: { x= 2; } case 1: { x= 2; } default: { x= 2; } } } ================================================ FILE: 60_TripleTest/tests/input080.c ================================================ #include int x; int y; int main() { for (x=0, y=1; x < 6; x++, y=y+2) { printf("%d %d\n", x, y); } return(0); } ================================================ FILE: 60_TripleTest/tests/input081.c ================================================ #include int x; int y; int main() { x= 0; y=1; for (;x<5;) { printf("%d %d\n", x, y); x=x+1; y=y+2; } return(0); } ================================================ FILE: 60_TripleTest/tests/input082.c ================================================ #include int main() { int x; x= 10; // Dangling else test if (x > 5) if (x > 15) printf("x > 15\n"); else printf("15 >= x > 5\n"); else printf("5 >= x\n"); return(0); } ================================================ FILE: 60_TripleTest/tests/input083.c ================================================ #include int main() { int x; // Dangling else test. // We should not print anything for x<= 5 for (x=0; x < 12; x++) if (x > 5) if (x > 10) printf("10 < %2d\n", x); else printf(" 5 < %2d <= 10\n", x); return(0); } ================================================ FILE: 60_TripleTest/tests/input084.c ================================================ #include int main() { int x, y; x=2; y=3; printf("%d %d\n", x, y); char a, *b; a= 'f'; b= &a; printf("%c %c\n", a, *b); return(0); } ================================================ FILE: 60_TripleTest/tests/input085.c ================================================ int main(struct foo { int x; }; , int a) { return(0); } ================================================ FILE: 60_TripleTest/tests/input086.c ================================================ int main() { int fred() { return(5); } int x; x=2; return(x); } ================================================ FILE: 60_TripleTest/tests/input087.c ================================================ struct foo { int x; int y; struct blah { int g; }; }; ================================================ FILE: 60_TripleTest/tests/input088.c ================================================ #include struct foo { int x; int y; } fred, mary; struct foo james; int main() { fred.x= 5; mary.y= 6; printf("%d %d\n", fred.x, mary.y); return(0); } ================================================ FILE: 60_TripleTest/tests/input089.c ================================================ #include int x= 23; char y= 'H'; char *z= "Hello world"; int main() { printf("%d %c %s\n", x, y, z); return(0); } ================================================ FILE: 60_TripleTest/tests/input090.c ================================================ #include int a = 23, b = 100; char y = 'H', *z = "Hello world"; int main() { printf("%d %d %c %s\n", a, b, y, z); return (0); } ================================================ FILE: 60_TripleTest/tests/input091.c ================================================ #include int fred[] = { 1, 2, 3, 4, 5 }; int jim[10] = { 1, 2, 3, 4, 5 }; int main() { int i; for (i=0; i < 5; i++) printf("%d\n", fred[i]); for (i=0; i < 10; i++) printf("%d\n", jim[i]); return(0); } ================================================ FILE: 60_TripleTest/tests/input092.c ================================================ char x= 3000; ================================================ FILE: 60_TripleTest/tests/input093.c ================================================ char x= fred; ================================================ FILE: 60_TripleTest/tests/input094.c ================================================ char *s= 54; ================================================ FILE: 60_TripleTest/tests/input095.c ================================================ int fred(int x=2) { return(x); } ================================================ FILE: 60_TripleTest/tests/input096.c ================================================ int fred[0]; ================================================ FILE: 60_TripleTest/tests/input098.c ================================================ int fred[3]= { 1, 2, 3, 4, 5 }; ================================================ FILE: 60_TripleTest/tests/input099.c ================================================ #include // List of token strings, for debugging purposes. // As yet, we can't store a NULL into the list char *Tstring[] = { "EOF", "=", "||", "&&", "|", "^", "&", "==", "!=", ",", ">", "<=", ">=", "<<", ">>", "+", "-", "*", "/", "++", "--", "~", "!", "void", "char", "int", "long", "if", "else", "while", "for", "return", "struct", "union", "enum", "typedef", "extern", "break", "continue", "switch", "case", "default", "intlit", "strlit", ";", "identifier", "{", "}", "(", ")", "[", "]", ",", ".", "->", ":", "" }; int main() { int i; char *str; i=0; while (1) { str= Tstring[i]; if (*str == 0) break; printf("%s\n", str); i++; } return(0); } ================================================ FILE: 60_TripleTest/tests/input100.c ================================================ #include int main() { int x= 3, y=14; int z= 2 * x + y; char *str= "Hello world"; printf("%s %d %d\n", str, x+y, z); return(0); } ================================================ FILE: 60_TripleTest/tests/input101.c ================================================ #include int main() { int x= 65535; char y; char *str; y= (char )x; printf("0x%x\n", y); str= (char *)0; printf("0x%lx\n", (long)str); return(0); } ================================================ FILE: 60_TripleTest/tests/input102.c ================================================ int main() { struct foo { int p; }; int y= (struct foo) x; } ================================================ FILE: 60_TripleTest/tests/input103.c ================================================ int main() { union foo { int p; }; int y= (union foo) x; } ================================================ FILE: 60_TripleTest/tests/input104.c ================================================ int main() { int y= (void) x; } ================================================ FILE: 60_TripleTest/tests/input105.c ================================================ int main() { int x; char *y; y= (char) x; } ================================================ FILE: 60_TripleTest/tests/input106.c ================================================ #include char *y= (char *)0; int main() { printf("0x%lx\n", (long)y); return(0); } ================================================ FILE: 60_TripleTest/tests/input107.c ================================================ #include char *y[] = { "fish", "cow", NULL }; char *z= NULL; int main() { int i; char *ptr; for (i=0; i < 3; i++) { ptr= y[i]; if (ptr != (char *)0) printf("%s\n", y[i]); else printf("NULL\n"); } return(0); } ================================================ FILE: 60_TripleTest/tests/input108.c ================================================ int main() { char *str= (void *)0; return(0); } ================================================ FILE: 60_TripleTest/tests/input109.c ================================================ #include int main() { int x= 17 - 1; printf("%d\n", x); return(0); } ================================================ FILE: 60_TripleTest/tests/input110.c ================================================ #include int x; int y; int main() { x= 3; y= 15; y += x; printf("%d\n", y); x= 3; y= 15; y -= x; printf("%d\n", y); x= 3; y= 15; y *= x; printf("%d\n", y); x= 3; y= 15; y /= x; printf("%d\n", y); return(0); } ================================================ FILE: 60_TripleTest/tests/input111.c ================================================ #include int main() { int x= 2000 + 3 + 4 * 5 + 6; printf("%d\n", x); return(0); } ================================================ FILE: 60_TripleTest/tests/input112.c ================================================ #include char* y = NULL; int x= 10 + 6; int fred [ 2 + 3 ]; int main() { fred[2]= x; printf("%d\n", fred[2]); return(0); } ================================================ FILE: 60_TripleTest/tests/input113.c ================================================ #include void fred(void); void fred(void) { printf("fred says hello\n"); } int main(void) { fred(); return(0); } ================================================ FILE: 60_TripleTest/tests/input114.c ================================================ #include int main() { int x= 0x4a; printf("%c\n", x); return(0); } ================================================ FILE: 60_TripleTest/tests/input115.c ================================================ #include struct foo { int x; char y; long z; }; typedef struct foo blah; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol struct symtable *ctype; // If struct/union, ptr to that type int stype; // Structural type for the symbol int class; // Storage class for the symbol int size; // Total size in bytes of this symbol int nelems; // Functions: # params. Arrays: # elements #define st_endlabel st_posn // For functions, the end label int st_posn; // For locals, the negative offset // from the stack base pointer int *initlist; // List of initial values struct symtable *next; // Next symbol in one list struct symtable *member; // First member of a function, struct, }; // union or enum // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates int rvalue; // True if the node is an rvalue struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; struct symtable *sym; // For many AST nodes, the pointer to // the symbol in the symbol table #define a_intvalue a_size // For A_INTLIT, the integer value int a_size; // For A_SCALE, the size to scale by }; int main() { printf("%ld\n", sizeof(char)); printf("%ld\n", sizeof(int)); printf("%ld\n", sizeof(long)); printf("%ld\n", sizeof(char *)); printf("%ld\n", sizeof(blah)); printf("%ld\n", sizeof(struct symtable)); printf("%ld\n", sizeof(struct ASTnode)); return(0); } ================================================ FILE: 60_TripleTest/tests/input116.c ================================================ #include static int counter=0; static int fred(void) { return(counter++); } int main(void) { int i; for (i=0; i < 5; i++) printf("%d\n", fred()); return(0); } ================================================ FILE: 60_TripleTest/tests/input117.c ================================================ #include static char *fred(void) { return("Hello"); } int main(void) { printf("%s\n", fred()); return(0); } ================================================ FILE: 60_TripleTest/tests/input118.c ================================================ int main(void) { static int x; } ================================================ FILE: 60_TripleTest/tests/input119.c ================================================ #include int x; int y= 3; int main() { x= y != 3 ? 6 : 8; printf("%d\n", x); x= (y == 3) ? 6 : 8; printf("%d\n", x); return(0); } ================================================ FILE: 60_TripleTest/tests/input120.c ================================================ #include int x; int y= 3; int main() { for (y= 0; y < 10; y++) { x= y > 4 ? y + 2 : y + 9; printf("%d\n", x); } return(0); } ================================================ FILE: 60_TripleTest/tests/input121.c ================================================ #include int x; int y= 3; int main() { for (y= 0; y < 10; y++) { x= (y < 4) ? y + 2 : (y > 7) ? 1000 : y + 9; printf("%d\n", x); } return(0); } ================================================ FILE: 60_TripleTest/tests/input122.c ================================================ #include int x, y, z1, z2; int main() { for (x= 0; x <= 1; x++) { for (y= 0; y <= 1; y++) { z1= x || y; z2= x && y; printf("x %d, y %d, x || y %d, x && y %d\n", x, y, z1, z2); } } //z= x || y; return(0); } ================================================ FILE: 60_TripleTest/tests/input123.c ================================================ #include int main() { int x; for (x=0; x < 20; x++) switch(x) { case 2: case 3: case 5: case 7: case 11: printf("%2d infant prime\n", x); break; case 13: case 17: case 19: printf("%2d teen prime\n", x); break; case 0: case 1: case 4: case 6: case 8: case 9: case 10: case 12: printf("%2d infant composite\n", x); break; default: printf("%2d teen composite\n", x); break; } return(0); } ================================================ FILE: 60_TripleTest/tests/input124.c ================================================ #include int ary[5]; int main() { ary++; return(0); } ================================================ FILE: 60_TripleTest/tests/input125.c ================================================ #include int ary[5]; int *ptr; int x; int main() { ary[3]= 2008; ptr= ary; // Load ary's address into ptr x= ary[3]; printf("%d\n", x); x= ptr[3]; printf("%d\n", x); // Treat ptr as an array return(0); } ================================================ FILE: 60_TripleTest/tests/input126.c ================================================ #include int ary[5]; int main() { ary[3]= 2008; ptr= &ary; return(0); } ================================================ FILE: 60_TripleTest/tests/input127.c ================================================ #include int ary[5]; void fred(int *ptr) { // Receive a pointer printf("%d\n", ptr[3]); } int main() { ary[3]= 2008; printf("%d\n", ary[3]); fred(ary); // Pass ary as a pointer return(0); } ================================================ FILE: 60_TripleTest/tests/input128.c ================================================ #include struct foo { int val; struct foo *next; }; struct foo head, mid, tail; int main() { struct foo *ptr; tail.val= 20; tail.next= NULL; mid.val= 15; mid.next= &tail; head.val= 10; head.next= ∣ ptr= &head; printf("%d %d\n", head.val, ptr->val); printf("%d %d\n", mid.val, ptr->next->val); printf("%d %d\n", tail.val, ptr->next->next->val); return(0); } ================================================ FILE: 60_TripleTest/tests/input129.c ================================================ #include int x= 6; int main() { printf("%d\n", x++ ++); return(0); } ================================================ FILE: 60_TripleTest/tests/input130.c ================================================ #include char *x= "foo"; int main() { printf("Hello " "world" "\n"); return(0); } ================================================ FILE: 60_TripleTest/tests/input131.c ================================================ #include void donothing() { } int main() { int x=0; printf("Doing nothing... "); donothing(); printf("nothing done\n"); while (++x < 100) ; printf("x is now %d\n", x); return(0); } ================================================ FILE: 60_TripleTest/tests/input132.c ================================================ extern int fred; int fred; int mary; extern int mary; int main() { return(0); } ================================================ FILE: 60_TripleTest/tests/input133.c ================================================ #include extern int fred[]; int fred[23]; char mary[100]; extern char mary[]; void main() { printf("OK\n"); } ================================================ FILE: 60_TripleTest/tests/input134.c ================================================ #include char y = 'a'; char *x; int main() { x= &y; if (x && y == 'a') printf("1st match\n"); x= NULL; if (x && y == 'a') printf("2nd match\n"); x= &y; y='b'; if (x && y == 'a') printf("3rd match\n"); return(0); } ================================================ FILE: 60_TripleTest/tests/input135.c ================================================ #include void fred() { int x= 5; printf("testing x\n"); if (x > 4) return; printf("x below 5\n"); } int main() { fred(); return(0); } ================================================ FILE: 60_TripleTest/tests/input136.c ================================================ #include int add(int x, int y) { return(x+y); } int main() { int result; result= 3 * add(2,3) - 5 * add(4,6); printf("%d\n", result); return(0); } ================================================ FILE: 60_TripleTest/tests/input137.c ================================================ #include int a=1, b=2, c=3, d=4, e=5, f=6, g=7, h=8; int main() { int x; x= ((((((a + b) + c) + d) + e) + f) + g) + h; x= a + (b + (c + (d + (e + (f + (g + h)))))); printf("x is %d\n", x); return(0); } ================================================ FILE: 60_TripleTest/tests/input138.c ================================================ #include int x, y, z; int a=1; int *aptr; int main() { // See if generic AND works for (x=0; x <= 1; x++) for (y=0; y <= 1; y++) { z= x && y; printf("%d %d | %d\n", x, y, z); } // See if generic AND works for (x=0; x <= 1; x++) for (y=0; y <= 1; y++) { z= x || y; printf("%d %d | %d\n", x, y, z); } // Now some lazy evaluation aptr= NULL; if (aptr && *aptr == 1) printf("aptr points at 1\n"); else printf("aptr is NULL or doesn't point at 1\n"); aptr= &a; if (aptr && *aptr == 1) printf("aptr points at 1\n"); else printf("aptr is NULL or doesn't point at 1\n"); return(0); } ================================================ FILE: 60_TripleTest/tests/input139.c ================================================ #include int same(int x) { return(x); } int main() { int a= 3; if (same(a) && same(a) >= same(a)) printf("same apparently\n"); return(0); } ================================================ FILE: 60_TripleTest/tests/input140.c ================================================ #include int main() { int i; int ary[5]; char z; // Write below the array z= 'H'; // Fill the array for (i=0; i < 5; i++) ary[i]= i * i; // Write above the array i=14; // Print out the array for (i=0; i < 5; i++) printf("%d\n", ary[i]); // See if either side is OK printf("%d %c\n", i, z); return(0); } ================================================ FILE: 60_TripleTest/tests/input141.c ================================================ static int fred[5]; int jim; int foo(int mary[6]) { return(5); } ================================================ FILE: 60_TripleTest/tests/input142.c ================================================ static int fred[]; int jim; ================================================ FILE: 60_TripleTest/tests/input143.c ================================================ #include char foo; char *a, *b, *c; int main() { a= b= c= NULL; if (a==NULL || b==NULL || c==NULL) printf("One of the three is NULL\n"); a= &foo; if (a==NULL || b==NULL || c==NULL) printf("One of the three is NULL\n"); b= &foo; if (a==NULL || b==NULL || c==NULL) printf("One of the three is NULL\n"); c= &foo; if (a==NULL || b==NULL || c==NULL) printf("One of the three is NULL\n"); else printf("All three are non-NULL\n"); return(0); } ================================================ FILE: 60_TripleTest/tests/input144.c ================================================ #include #include #include char *filename= "fred"; int main() { fprintf(stdout, "Unable to open %s: %s\n", filename, strerror(errno)); return(0); } ================================================ FILE: 60_TripleTest/tests/input145.c ================================================ #include char *str= "qwertyuiop"; int list[]= {3, 5, 7, 9, 11, 13, 15}; int *lptr; int main() { printf("%c\n", *str); str= str + 1; printf("%c\n", *str); str += 1; printf("%c\n", *str); str += 1; printf("%c\n", *str); str -= 1; printf("%c\n", *str); lptr= list; printf("%d\n", *lptr); lptr= lptr + 1; printf("%d\n", *lptr); lptr += 1; printf("%d\n", *lptr); lptr += 1; printf("%d\n", *lptr); lptr -= 1; printf("%d\n", *lptr); return(0); } ================================================ FILE: 60_TripleTest/tests/input146.c ================================================ #include char *str= "qwertyuiop"; int list[]= {3, 5, 7, 9, 11, 13, 15}; int *lptr; int main() { printf("%c\n", *str); str= str + 1; printf("%c\n", *str); str += 1; printf("%c\n", *str); str += 1; printf("%c\n", *str); str -= 1; printf("%c\n", *str); str++; printf("%c\n", *str); str--; printf("%c\n", *str); ++str; printf("%c\n", *str); --str; printf("%c\n\n", *str); lptr= list; printf("%d\n", *lptr); lptr= lptr + 1; printf("%d\n", *lptr); lptr += 1; printf("%d\n", *lptr); lptr += 1; printf("%d\n", *lptr); lptr -= 1; printf("%d\n", *lptr); lptr++ ; printf("%d\n", *lptr); lptr-- ; printf("%d\n", *lptr); ++lptr ; printf("%d\n", *lptr); --lptr ; printf("%d\n", *lptr); return(0); } ================================================ FILE: 60_TripleTest/tests/input147.c ================================================ #include int a; int main() { printf("%d\n", 24 % 9); printf("%d\n", 31 % 11); a= 24; a %= 9; printf("%d\n",a); a= 31; a %= 11; printf("%d\n",a); return(0); } ================================================ FILE: 60_TripleTest/tests/input148.c ================================================ #include char *argv[]= { "unused", "-fish", "-cat", "owl" }; int argc= 4; int main() { int i; for (i = 1; i < argc; i++) { printf("i is %d\n", i); if (*argv[i] != '-') break; } while (i < argc) { printf("leftover %s\n", argv[i]); i++; } return (0); } ================================================ FILE: 60_TripleTest/tests/input149.c ================================================ #include static int localOffset=0; static int newlocaloffset(int size) { localOffset += (size > 4) ? size : 4; return (-localOffset); } int main() { int i, r; for (i=1; i <= 12; i++) { r= newlocaloffset(i); printf("%d %d\n", i, r); } return(0); } ================================================ FILE: 60_TripleTest/tests/mktests ================================================ #!/bin/sh # Make the output files for each test # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make install) fi for i in input*c do if [ ! -f "out.$i" -a ! -f "err.$i" ] then ../cwj -o out $i 2> "err.$i" # If the err file is empty if [ ! -s "err.$i" ] then rm -f "err.$i" cc -o out $i ./out > "out.$i" fi fi rm -f out out.s done ================================================ FILE: 60_TripleTest/tests/out.input001.c ================================================ 36 10 25 ================================================ FILE: 60_TripleTest/tests/out.input002.c ================================================ 17 ================================================ FILE: 60_TripleTest/tests/out.input003.c ================================================ 1 2 3 4 5 ================================================ FILE: 60_TripleTest/tests/out.input004.c ================================================ 1 1 1 1 1 1 1 1 1 ================================================ FILE: 60_TripleTest/tests/out.input005.c ================================================ 6 ================================================ FILE: 60_TripleTest/tests/out.input006.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 60_TripleTest/tests/out.input007.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 60_TripleTest/tests/out.input008.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 60_TripleTest/tests/out.input009.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 60_TripleTest/tests/out.input010.c ================================================ 20 10 1 2 3 4 5 253 254 255 0 1 ================================================ FILE: 60_TripleTest/tests/out.input011.c ================================================ 10 20 30 1 2 3 4 5 253 254 255 0 1 2 3 1 2 3 4 5 ================================================ FILE: 60_TripleTest/tests/out.input012.c ================================================ 5 ================================================ FILE: 60_TripleTest/tests/out.input013.c ================================================ 23 56 ================================================ FILE: 60_TripleTest/tests/out.input014.c ================================================ 10 20 30 ================================================ FILE: 60_TripleTest/tests/out.input015.c ================================================ 18 18 12 12 ================================================ FILE: 60_TripleTest/tests/out.input016.c ================================================ 12 18 ================================================ FILE: 60_TripleTest/tests/out.input017.c ================================================ 19 12 ================================================ FILE: 60_TripleTest/tests/out.input018.c ================================================ 34 34 ================================================ FILE: 60_TripleTest/tests/out.input018a.c ================================================ 15 16 ================================================ FILE: 60_TripleTest/tests/out.input019.c ================================================ 30 ================================================ FILE: 60_TripleTest/tests/out.input020.c ================================================ 12 ================================================ FILE: 60_TripleTest/tests/out.input021.c ================================================ 10 Hello world ================================================ FILE: 60_TripleTest/tests/out.input022.c ================================================ 12 12 12 13 13 13 13 13 13 35 35 35 ================================================ FILE: 60_TripleTest/tests/out.input023.c ================================================ -23 100 -2 0 1 0 13 14 Hello world ================================================ FILE: 60_TripleTest/tests/out.input024.c ================================================ 2 59 57 8 7 ================================================ FILE: 60_TripleTest/tests/out.input025.c ================================================ 10 20 30 5 15 25 ================================================ FILE: 60_TripleTest/tests/out.input026.c ================================================ 13 23 34 44 54 64 74 84 94 95 96 ================================================ FILE: 60_TripleTest/tests/out.input027.c ================================================ 1 2 3 4 5 6 7 8 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 ================================================ FILE: 60_TripleTest/tests/out.input028.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 60_TripleTest/tests/out.input029.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 60_TripleTest/tests/out.input030.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input030.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 60_TripleTest/tests/out.input053.c ================================================ Hello world, 23 ================================================ FILE: 60_TripleTest/tests/out.input054.c ================================================ Hello world, 0 Hello world, 1 Hello world, 2 Hello world, 3 Hello world, 4 Hello world, 5 Hello world, 6 Hello world, 7 Hello world, 8 Hello world, 9 Hello world, 10 Hello world, 11 Hello world, 12 Hello world, 13 Hello world, 14 Hello world, 15 Hello world, 16 Hello world, 17 Hello world, 18 Hello world, 19 ================================================ FILE: 60_TripleTest/tests/out.input055.c ================================================ Hello world Argument 0 is ./out ================================================ FILE: 60_TripleTest/tests/out.input058.c ================================================ 12 99 4005 4116 4116 ================================================ FILE: 60_TripleTest/tests/out.input062.c ================================================ 65 66 66 The next two depend on the endian of the platform 66 66 67 67 ================================================ FILE: 60_TripleTest/tests/out.input063.c ================================================ 25 ================================================ FILE: 60_TripleTest/tests/out.input067.c ================================================ 5 17 ================================================ FILE: 60_TripleTest/tests/out.input070.c ================================================ 56 ================================================ FILE: 60_TripleTest/tests/out.input071.c ================================================ 0 1 2 3 4 7 8 9 10 11 12 13 14 Done ================================================ FILE: 60_TripleTest/tests/out.input074.c ================================================ 100 5 7 100 100 ================================================ FILE: 60_TripleTest/tests/out.input080.c ================================================ 0 1 1 3 2 5 3 7 4 9 5 11 ================================================ FILE: 60_TripleTest/tests/out.input081.c ================================================ 0 1 1 3 2 5 3 7 4 9 ================================================ FILE: 60_TripleTest/tests/out.input082.c ================================================ 15 >= x > 5 ================================================ FILE: 60_TripleTest/tests/out.input083.c ================================================ 5 < 6 <= 10 5 < 7 <= 10 5 < 8 <= 10 5 < 9 <= 10 5 < 10 <= 10 10 < 11 ================================================ FILE: 60_TripleTest/tests/out.input084.c ================================================ 2 3 f f ================================================ FILE: 60_TripleTest/tests/out.input088.c ================================================ 5 6 ================================================ FILE: 60_TripleTest/tests/out.input089.c ================================================ 23 H Hello world ================================================ FILE: 60_TripleTest/tests/out.input090.c ================================================ 23 100 H Hello world ================================================ FILE: 60_TripleTest/tests/out.input091.c ================================================ 1 2 3 4 5 1 2 3 4 5 0 0 0 0 0 ================================================ FILE: 60_TripleTest/tests/out.input099.c ================================================ EOF = || && | ^ & == != , > <= >= << >> + - * / ++ -- ~ ! void char int long if else while for return struct union enum typedef extern break continue switch case default intlit strlit ; identifier { } ( ) [ ] , . -> : ================================================ FILE: 60_TripleTest/tests/out.input100.c ================================================ Hello world 17 20 ================================================ FILE: 60_TripleTest/tests/out.input101.c ================================================ 0xff 0x0 ================================================ FILE: 60_TripleTest/tests/out.input106.c ================================================ 0x0 ================================================ FILE: 60_TripleTest/tests/out.input107.c ================================================ fish cow NULL ================================================ FILE: 60_TripleTest/tests/out.input108.c ================================================ ================================================ FILE: 60_TripleTest/tests/out.input109.c ================================================ 16 ================================================ FILE: 60_TripleTest/tests/out.input110.c ================================================ 18 12 45 5 ================================================ FILE: 60_TripleTest/tests/out.input111.c ================================================ 2029 ================================================ FILE: 60_TripleTest/tests/out.input112.c ================================================ 16 ================================================ FILE: 60_TripleTest/tests/out.input113.c ================================================ fred says hello ================================================ FILE: 60_TripleTest/tests/out.input114.c ================================================ J ================================================ FILE: 60_TripleTest/tests/out.input115.c ================================================ 1 4 8 8 13 64 48 ================================================ FILE: 60_TripleTest/tests/out.input116.c ================================================ 0 1 2 3 4 ================================================ FILE: 60_TripleTest/tests/out.input117.c ================================================ Hello ================================================ FILE: 60_TripleTest/tests/out.input119.c ================================================ 8 6 ================================================ FILE: 60_TripleTest/tests/out.input120.c ================================================ 9 10 11 12 13 7 8 9 10 11 ================================================ FILE: 60_TripleTest/tests/out.input121.c ================================================ 2 3 4 5 13 14 15 16 1000 1000 ================================================ FILE: 60_TripleTest/tests/out.input122.c ================================================ x 0, y 0, x || y 0, x && y 0 x 0, y 1, x || y 1, x && y 0 x 1, y 0, x || y 1, x && y 0 x 1, y 1, x || y 1, x && y 1 ================================================ FILE: 60_TripleTest/tests/out.input123.c ================================================ 0 infant composite 1 infant composite 2 infant prime 3 infant prime 4 infant composite 5 infant prime 6 infant composite 7 infant prime 8 infant composite 9 infant composite 10 infant composite 11 infant prime 12 infant composite 13 teen prime 14 teen composite 15 teen composite 16 teen composite 17 teen prime 18 teen composite 19 teen prime ================================================ FILE: 60_TripleTest/tests/out.input125.c ================================================ 2008 2008 ================================================ FILE: 60_TripleTest/tests/out.input127.c ================================================ 2008 2008 ================================================ FILE: 60_TripleTest/tests/out.input128.c ================================================ 10 10 15 15 20 20 ================================================ FILE: 60_TripleTest/tests/out.input130.c ================================================ Hello world ================================================ FILE: 60_TripleTest/tests/out.input131.c ================================================ Doing nothing... nothing done x is now 100 ================================================ FILE: 60_TripleTest/tests/out.input132.c ================================================ ================================================ FILE: 60_TripleTest/tests/out.input133.c ================================================ OK ================================================ FILE: 60_TripleTest/tests/out.input134.c ================================================ 1st match ================================================ FILE: 60_TripleTest/tests/out.input135.c ================================================ testing x ================================================ FILE: 60_TripleTest/tests/out.input136.c ================================================ -35 ================================================ FILE: 60_TripleTest/tests/out.input137.c ================================================ x is 36 ================================================ FILE: 60_TripleTest/tests/out.input138.c ================================================ 0 0 | 0 0 1 | 0 1 0 | 0 1 1 | 1 0 0 | 0 0 1 | 1 1 0 | 1 1 1 | 1 aptr is NULL or doesn't point at 1 aptr points at 1 ================================================ FILE: 60_TripleTest/tests/out.input139.c ================================================ same apparently ================================================ FILE: 60_TripleTest/tests/out.input140.c ================================================ 0 1 4 9 16 5 H ================================================ FILE: 60_TripleTest/tests/out.input143.c ================================================ One of the three is NULL One of the three is NULL One of the three is NULL All three are non-NULL ================================================ FILE: 60_TripleTest/tests/out.input144.c ================================================ Unable to open fred: Success ================================================ FILE: 60_TripleTest/tests/out.input145.c ================================================ q w e r e 3 5 7 9 7 ================================================ FILE: 60_TripleTest/tests/out.input146.c ================================================ q w e r e r e r e 3 5 7 9 7 9 7 9 7 ================================================ FILE: 60_TripleTest/tests/out.input147.c ================================================ 6 9 6 9 ================================================ FILE: 60_TripleTest/tests/out.input148.c ================================================ i is 1 i is 2 i is 3 leftover owl ================================================ FILE: 60_TripleTest/tests/out.input149.c ================================================ 1 -4 2 -8 3 -12 4 -16 5 -21 6 -27 7 -34 8 -42 9 -51 10 -61 11 -72 12 -84 ================================================ FILE: 60_TripleTest/tests/runtests ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make install) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../cwj -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../cwj $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.s "trial.$i" done ================================================ FILE: 60_TripleTest/tests/runtests0 ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../cwj0 ] then (cd ..; make install; make cwj0) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../cwj0 -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../cwj0 $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.s "trial.$i" done ================================================ FILE: 60_TripleTest/tests/runtests0n ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../compn ] then (cd ..; make install; make compn0) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do bn=$(echo $i | cut -d. -f1) if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../compn0 -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../compn0 $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.o out.s ${bn}.s "trial.$i" done ================================================ FILE: 60_TripleTest/tests/runtestsn ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../compn ] then (cd ..; make install) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do bn=$(echo $i | cut -d. -f1) if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../compn -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../compn $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.o out.s ${bn}.s "trial.$i" done ================================================ FILE: 60_TripleTest/tree.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree functions // Copyright (c) 2019 Warren Toomey, GPL3 // Build and return a generic AST node struct ASTnode *mkastnode(int op, int type, struct symtable *ctype, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue) { struct ASTnode *n; // Malloc a new ASTnode n = (struct ASTnode *) malloc(sizeof(struct ASTnode)); if (n == NULL) fatal("Unable to malloc in mkastnode()"); // Copy in the field values and return it n->op = op; n->type = type; n->ctype = ctype; n->left = left; n->mid = mid; n->right = right; n->sym = sym; n->a_intvalue = intvalue; n->linenum= 0; return (n); } // Make an AST leaf node struct ASTnode *mkastleaf(int op, int type, struct symtable *ctype, struct symtable *sym, int intvalue) { return (mkastnode(op, type, ctype, NULL, NULL, NULL, sym, intvalue)); } // Make a unary AST node: only one child struct ASTnode *mkastunary(int op, int type, struct symtable *ctype, struct ASTnode *left, struct symtable *sym, int intvalue) { return (mkastnode(op, type, ctype, left, NULL, NULL, sym, intvalue)); } // Generate and return a new label number // just for AST dumping purposes static int dumpid = 1; static int gendumplabel(void) { return (dumpid++); } // Given an AST tree, print it out and follow the // traversal of the tree that genAST() follows void dumpAST(struct ASTnode *n, int label, int level) { int Lfalse, Lstart, Lend; int i; switch (n->op) { case A_IF: Lfalse = gendumplabel(); for (i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_IF"); if (n->right) { Lend = gendumplabel(); fprintf(stdout, ", end L%d", Lend); } fprintf(stdout, "\n"); dumpAST(n->left, Lfalse, level + 2); dumpAST(n->mid, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); return; case A_WHILE: Lstart = gendumplabel(); for (i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_WHILE, start L%d\n", Lstart); Lend = gendumplabel(); dumpAST(n->left, Lend, level + 2); dumpAST(n->right, NOLABEL, level + 2); return; } // Reset level to -2 for A_GLUE if (n->op == A_GLUE) level = -2; // General AST node handling if (n->left) dumpAST(n->left, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); for (i = 0; i < level; i++) fprintf(stdout, " "); switch (n->op) { case A_GLUE: fprintf(stdout, "\n\n"); return; case A_FUNCTION: fprintf(stdout, "A_FUNCTION %s\n", n->sym->name); return; case A_ADD: fprintf(stdout, "A_ADD\n"); return; case A_SUBTRACT: fprintf(stdout, "A_SUBTRACT\n"); return; case A_MULTIPLY: fprintf(stdout, "A_MULTIPLY\n"); return; case A_DIVIDE: fprintf(stdout, "A_DIVIDE\n"); return; case A_EQ: fprintf(stdout, "A_EQ\n"); return; case A_NE: fprintf(stdout, "A_NE\n"); return; case A_LT: fprintf(stdout, "A_LE\n"); return; case A_GT: fprintf(stdout, "A_GT\n"); return; case A_LE: fprintf(stdout, "A_LE\n"); return; case A_GE: fprintf(stdout, "A_GE\n"); return; case A_INTLIT: fprintf(stdout, "A_INTLIT %d\n", n->a_intvalue); return; case A_STRLIT: fprintf(stdout, "A_STRLIT rval label L%d\n", n->a_intvalue); return; case A_IDENT: if (n->rvalue) fprintf(stdout, "A_IDENT rval %s\n", n->sym->name); else fprintf(stdout, "A_IDENT %s\n", n->sym->name); return; case A_ASSIGN: fprintf(stdout, "A_ASSIGN\n"); return; case A_WIDEN: fprintf(stdout, "A_WIDEN\n"); return; case A_RETURN: fprintf(stdout, "A_RETURN\n"); return; case A_FUNCCALL: fprintf(stdout, "A_FUNCCALL %s\n", n->sym->name); return; case A_ADDR: fprintf(stdout, "A_ADDR %s\n", n->sym->name); return; case A_DEREF: if (n->rvalue) fprintf(stdout, "A_DEREF rval\n"); else fprintf(stdout, "A_DEREF\n"); return; case A_SCALE: fprintf(stdout, "A_SCALE %d\n", n->a_size); return; case A_PREINC: fprintf(stdout, "A_PREINC %s\n", n->sym->name); return; case A_PREDEC: fprintf(stdout, "A_PREDEC %s\n", n->sym->name); return; case A_POSTINC: fprintf(stdout, "A_POSTINC\n"); return; case A_POSTDEC: fprintf(stdout, "A_POSTDEC\n"); return; case A_NEGATE: fprintf(stdout, "A_NEGATE\n"); return; case A_BREAK: fprintf(stdout, "A_BREAK\n"); return; case A_CONTINUE: fprintf(stdout, "A_CONTINUE\n"); return; case A_CASE: fprintf(stdout, "A_CASE %d\n", n->a_intvalue); return; case A_DEFAULT: fprintf(stdout, "A_DEFAULT\n"); return; case A_SWITCH: fprintf(stdout, "A_SWITCH\n"); return; case A_CAST: fprintf(stdout, "A_CAST %d\n", n->type); return; case A_ASPLUS: fprintf(stdout, "A_ASPLUS\n"); return; case A_ASMINUS: fprintf(stdout, "A_ASMINUS\n"); return; case A_ASSTAR: fprintf(stdout, "A_ASSTAR\n"); return; case A_ASSLASH: fprintf(stdout, "A_ASSLASH\n"); return; case A_TOBOOL: fprintf(stdout, "A_TOBOOL\n"); return; case A_LOGOR: fprintf(stdout, "A_LOGOR\n"); return; case A_LOGAND: fprintf(stdout, "A_LOGAND\n"); return; case A_AND: fprintf(stdout, "A_AND\n"); return; case A_ASMOD: fprintf(stdout, "A_ASMOD\n"); return; case A_INVERT: fprintf(stdout, "A_INVERT\n"); return; case A_LOGNOT: fprintf(stdout, "A_LOGNOT\n"); return; case A_LSHIFT: fprintf(stdout, "A_LSHIFT\n"); return; case A_MOD: fprintf(stdout, "A_MOD\n"); return; case A_OR: fprintf(stdout, "A_OR\n"); return; case A_RSHIFT: fprintf(stdout, "A_RSHIFT\n"); return; case A_TERNARY: fprintf(stdout, "A_TERNARY\n"); return; case A_XOR: fprintf(stdout, "A_XOR\n"); return; default: fatald("Unknown dumpAST operator", n->op); } } ================================================ FILE: 60_TripleTest/types.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Types and type handling // Copyright (c) 2019 Warren Toomey, GPL3 // Return true if a type is an int type // of any size, false otherwise int inttype(int type) { return (((type & 0xf) == 0) && (type >= P_CHAR && type <= P_LONG)); } // Return true if a type is of pointer type int ptrtype(int type) { return ((type & 0xf) != 0); } // Given a primitive type, return // the type which is a pointer to it int pointer_to(int type) { if ((type & 0xf) == 0xf) fatald("Unrecognised in pointer_to: type", type); return (type + 1); } // Given a primitive pointer type, return // the type which it points to int value_at(int type) { if ((type & 0xf) == 0x0) fatald("Unrecognised in value_at: type", type); return (type - 1); } // Given a type and a composite type pointer, return // the size of this type in bytes int typesize(int type, struct symtable *ctype) { if (type == P_STRUCT || type == P_UNION) return (ctype->size); return (genprimsize(type)); } // Given an AST tree and a type which we want it to become, // possibly modify the tree by widening or scaling so that // it is compatible with this type. Return the original tree // if no changes occurred, a modified tree, or NULL if the // tree is not compatible with the given type. // If this will be part of a binary operation, the AST op is not zero. struct ASTnode *modify_type(struct ASTnode *tree, int rtype, struct symtable *rctype, int op) { int ltype; int lsize, rsize; ltype = tree->type; // For A_LOGOR and A_LOGAND, both types have to be int or pointer types if (op==A_LOGOR || op==A_LOGAND) { if (!inttype(ltype) && !ptrtype(ltype)) return(NULL); if (!inttype(ltype) && !ptrtype(rtype)) return(NULL); return (tree); } // XXX No idea on these yet if (ltype == P_STRUCT || ltype == P_UNION) fatal("Don't know how to do this yet"); if (rtype == P_STRUCT || rtype == P_UNION) fatal("Don't know how to do this yet"); // Compare scalar int types if (inttype(ltype) && inttype(rtype)) { // Both types same, nothing to do if (ltype == rtype) return (tree); // Get the sizes for each type lsize = typesize(ltype, NULL); rsize = typesize(rtype, NULL); // Tree's size is too big if (lsize > rsize) return (NULL); // Widen to the right if (rsize > lsize) return (mkastunary(A_WIDEN, rtype, NULL, tree, NULL, 0)); } // For pointers if (ptrtype(ltype) && ptrtype(rtype)) { // We can compare them if (op >= A_EQ && op <= A_GE) return (tree); // A comparison of the same type for a non-binary operation is OK, // or when the left tree is of `void *` type. if (op == 0 && (ltype == rtype || ltype == pointer_to(P_VOID))) return (tree); } // We can scale only on add and subtract operations if (op == A_ADD || op == A_SUBTRACT || op == A_ASPLUS || op == A_ASMINUS) { // Left is int type, right is pointer type and the size // of the original type is >1: scale the left if (inttype(ltype) && ptrtype(rtype)) { rsize = genprimsize(value_at(rtype)); if (rsize > 1) return (mkastunary(A_SCALE, rtype, rctype, tree, NULL, rsize)); else return (tree); // Size 1, no need to scale } } // If we get here, the types are not compatible return (NULL); } ================================================ FILE: 61_What_Next/Readme.md ================================================ # Part 61: What's Next? We've achieved the goal of writing a self-compiling compiler. Now that this goal has been reached, what else could we do with the codebase? From the start, I'm going to say that there are already a number of working, production-ready C compilers: [GCC](https://gcc.gnu.org), [LLVM](https://llvm.org) etc. We don't need another production-ready C compiler. But the point of writing this compiler was pedagogical: to explain the basics of how compilers work, and to put this knowledge into practice. So, I see the future work on the compiler to continue to explain how compilers work and to put this into practice. With this direction set, let's look at the possibilities. ## Code Cleanup I wrote the compiler fairly quickly, with only a little thought about the overarching design of the code. I think the design is reasonable, but the whole codebase needs a clean up. There's a fair bit of [DRY code](https://en.wikipedia.org/wiki/Don%27t_repeat_yourself) in places which could be refactored. Some of the code is ugly and could be improved. Also, some of the comments no longer reflect the code. This wouldn't change the compiler's functionality, but it would make it easier to understand. ## Fix the Bugs The compiler, as it stands, purports to implement a specific subset of the C language. But I'm sure there are plenty of bugs in this implementation. We could spend some time identifying these bugs and fixing them, while keeping the compiler's functionality constant. ## Write Out the Final BNF Grammar This suggestions goes along with the previous one. We should document the exact subset of the C language that the compiler supports, as a BNF grammar. I did write snippets of BNF grammar thoughout the journey, but near the end I stopped doing it. It would be good to write out the full, final, BNF grammar. ## Support Variadic Functions The compiler still doesn't check that the number of arguments to a function matches the number of function parameters. We need this because the compiler also doesn't support variadic functions like `printf()` and friends. So, we need to add in the `...` token, somehow mark a function has having either "exactly N" or "N or more" parameters, and then write the code to use this information. ## Add `short`s It shouldn't be too hard to add a 16-bit signed `short` type. But Nils mentions, in his SubC book, that adding `unsigned` integer to a C compiler is tricky. ## Rewrite the Register Allocation and Spilling Right now, the mechanism for register allocation and register spilling is really awful, especially the spilling of registers before and after a function call. The assembly code is terribly inefficient. I'd like to see this rewritten using some of the theory on register allocation, e.g [graph colouring](https://en.wikipedia.org/wiki/Register_allocation#Graph-coloring_allocation). Even better, if this was written up like the past journey steps, it would help newcomers (like me) understand it better. ## AST Optimisations I did mention the idea of optimising the generated code by restructing the AST trees. An example of this is [strength reduction](https://en.wikipedia.org/wiki/Strength_reduction). The [SubC](http://www.t3x.org/subc/) compiler does this, and it would be easy to add to our compiler, along with a writeup. There might be other AST tree optimisations that could be done. ## Code Generation Optimisation Another place to do output optimisation is in the code generator. A good example is [peephole optimisation](https://en.wikipedia.org/wiki/Peephole_optimization). To do this, however, the way the assembly code is generated would have to change. Instead of `fprintf()`ing the output, it should be stored in a data structure to make it easier for the peephole optimiser to traverse the assembly code. That's as far as I've thought, but it would be interesting to do. ## Add Debugging Output I started to do this in step 59. We should be able to output `gdb` directives into the assembly output to allow `gdb` to see the original C source lines, and step through a program line by line. Right now, the compiler is outputting this information but the `gdb` directives are not placed correctly in the assembly output. There's another step in here with a writeup on how to do this properly. ## Complete the ARM Backend, plus Others I did start the ARM back-end, and at the time I promised that I would keep it in sync with the x86-64 back-end. Well, I broke that promise as I got too interested in extending the compiler's functionality. Now that the compiler's functionality is relatively stable, I should go back and complete the ARM back-end. Even better would be a third back-end to prove that the compiler is fairly portable. ## Extending the Recognised Grammar I've left this suggestion to near the end as it doesn't continue the theme of explaining how compilers work. There is always scope to add more elements of the C language to the compiler. We don't need to do this to make the compiler self-compiling, but it would make the compiler more useful as a general-purpose compiler. ## Work Out How to Call `ld` Directly A long time ago, when I was playing around with BSD and Linux systems, I used to be able to link excecutables by hand with the `ld` command. I've been unable to work out how to do this on current Linux systems, and I'm relying on `cc` to do the linking for me. I'd love to learn how to link by hand with `ld` on Linux. ## Port the Compiler to non-Linux Systems Following on from the last point, it would be good to "port" the compiler to non-Linux systems like some of the BSD platforms. ## Conclusion These are all the possible things that I can of (at the moment) to continue the work on our compiler. I will get on to some of them, but at this point I'd be very happy to have other people help out with the project, and/or fork the compiler's code and do their own thing with it! [Next step](../62_Cleanup/Readme.md) ================================================ FILE: 62_Cleanup/Makefile ================================================ # Define the location of the include directory # and the location to install the compiler binary INCDIR=/tmp/include BINDIR=/tmp HSRCS= data.h decl.h defs.h incdir.h SRCS= cg.c decl.c expr.c gen.c main.c misc.c \ opt.c scan.c stmt.c sym.c tree.c types.c SRCN= cgn.c decl.c expr.c gen.c main.c misc.c \ opt.c scan.c stmt.c sym.c tree.c types.c ARMSRCS= cg_arm.c decl.c expr.c gen.c main.c misc.c \ opt.c scan.c stmt.c sym.c tree.c types.c cwj: $(SRCS) $(HSRCS) echo "#define INCDIR \"$(INCDIR)\"" > incdir.h cc -o cwj -g -Wall -DINCDIR=\"$(INCDIR)\" $(SRCS) compn: $(SRCN) $(HSRCS) echo "#define __NASM__ 1" >> incdir.h cc -D__NASM__ -o compn -g -Wall -DINCDIR=\"$(INCDIR)\" $(SRCN) cwjarm: $(ARMSRCS) $(HSRCS) echo "#define INCDIR \"$(INCDIR)\"" > incdir.h cc -o cwjarm -g -Wall $(ARMSRCS) cp cwjarm cwj incdir.h: echo "#define INCDIR \"$(INCDIR)\"" > incdir.h install: cwj mkdir -p $(INCDIR) rsync -a include/. $(INCDIR) cp cwj $(BINDIR) chmod +x $(BINDIR)/cwj installn: compn mkdir -p $(INCDIR) rsync -a include/. $(INCDIR) cp compn $(BINDIR) chmod +x $(BINDIR)/compn clean: rm -f cwj cwj[0-9] cwjarm compn compn[0-9] *.o *.s out a.out incdir.h test: install tests/runtests (cd tests; chmod +x runtests; ./runtests) # Run the tests with the # compiler that compiled itself test0: install tests/runtests0 cwj0 (cd tests; chmod +x runtests0; ./runtests0) # Run the tests with the # compiler that compiled itself test0n: install tests/runtests0n compn0 (cd tests; chmod +x runtests0n; ./runtests0n) armtest: cwjarm tests/runtests (cd tests; chmod +x runtests; ./runtests) testn: installn tests/runtestsn (cd tests; chmod +x runtestsn; ./runtestsn) # Try to do the triple test triple: cwj1 size cwj[01] # Paranoid: quadruple test quad: cwj2 size cwj[012] cwj2: cwj1 $(SRCS) $(HSRCS) ./cwj1 -o cwj2 $(SRCS) cwj1: cwj0 $(SRCS) $(HSRCS) ./cwj0 -o cwj1 $(SRCS) cwj0: install $(SRCS) $(HSRCS) ./cwj -o cwj0 $(SRCS) # Try to do the triple test with nasm triplen: compn1 size compn[01] quadn: compn2 size compn[012] compn2: compn1 $(SRCN) $(HSRCS) ./compn1 -o compn2 $(SRCN) compn1: compn0 $(SRCN) $(HSRCS) ./compn0 -o compn1 $(SRCN) compn0: installn $(SRCN) $(HSRCS) echo "#define __NASM__ 1" >> incdir.h ./compn -o compn0 $(SRCN) ================================================ FILE: 62_Cleanup/Readme.md ================================================ # Part 62: Code Cleanup This version of the compiler is essentially the same as in part 60. I am using this part to fix up comments, fix up bugs, do a bit of code cleanup, rename some functions and variables etc. ## Some Small Bugfixes For the changes to the compiler that I'm planning, I need to be able to put structs into structs. Therefore, I should be able to do: ```c printf("%d\n", thing.member1.age_in_years); ``` where `thing` is a struct, but it has a `member1` which is of type struct. To do this, we need to find the offset of `member1` from the base of `thing`, then find the offset of `age_in_years` from the previous offset. However, the code to do this expects the things on the left-hand side of the '.' token to be a variable which has a symbol table entry and thus a fixed location in memory. We need to fix this to deal with the situation where the left-hand side of the '.' token is an offset that has already been calculated. Fortunately, this was quite easy to do. We don't have to change the parser code, but let's look at what is already there. In `member_access()` in `expr.c`: ```c // Check that the left AST tree is a struct or union. // If so, change it from an A_IDENT to an A_ADDR so that // we get the base address, not the value at this address. if (!withpointer) { if (left->type == P_STRUCT || left->type == P_UNION) left->op = A_ADDR; ``` We mark the left-hand AST tree as A_ADDR (instead of A_IDENT) to say that we need the base address of it, not the value at this address. Now we need to fix the code generation. When we get an A_ADDR AST node, we either have a variable whose address we need (e.g. `thing` in `thing.member1`), or our child tree has the pre-calculated offset (e.g. the offset of `member1` in `member1.age_in_years). So in `genAST()` in `gen.c`, we do: ```c case A_ADDR: // If we have a symbol, get its address. Otherwise, // the left register already has the address because // it's a member access if (n->sym != NULL) return (cgaddress(n->sym)); else return (leftreg); ``` That should be all, but we have one more fix. The code to work out the alignment of types doesn't deal with structs inside structs, only scalar types inside structs. So, I've modified `cgalign()` in `cg.c` as follows: ```c // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch (type) { case P_CHAR: break; default: // Align whatever we have now on a 4-byte alignment. // I put the generic code here so it can be reused elsewhere. alignment = 4; offset = (offset + direction * (alignment - 1)) & ~(alignment - 1); } return (offset); } ``` Everything but P_CHAR gets aligned on a 4-byte alignment, including structs and unions. ## Known but Unfixed Bugs Now that this Github repository is up and has gained some attention, several people have reported bugs and misfeatures. The list of open and closed issues is here: ![https://github.com/DoctorWkt/acwj/issues](https://github.com/DoctorWkt/acwj/issues). If you spot any bugs or misfeatures, feel free to report them. However, I can't promise I'll get time to fix them all! ## What's Next I've been reading up on register allocation, and I think I'll add a linear scan register allocation mechanism to the compiler. To do this, though, I need to add an intermediate representation stage. This will be the goal for the next few stages, but so far I haven't done anything concrete. [Next step](../63_QBE/Readme.md) ================================================ FILE: 62_Cleanup/cg.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; // Switch to the text segment void cgtextseg() { if (currSeg != text_seg) { fputs("\t.text\n", Outfile); currSeg = text_seg; } } // Switch to the data segment void cgdataseg() { if (currSeg != data_seg) { fputs("\t.data\n", Outfile); currSeg = data_seg; } } // Given a scalar type value, return the // size of the type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch (type) { case P_CHAR: break; default: // Align whatever we have now on a 4-byte alignment. // I put the generic code here so it can be reused elsewhere. alignment = 4; offset = (offset + direction * (alignment - 1)) & ~(alignment - 1); } return (offset); } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; // Position of stack pointer offset relative to stack base pointer. // We need this to ensure it is aligned on a 16-byte boundary. static int stackOffset; // Create the position of a new local variable. static int newlocaloffset(int size) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (size > 4) ? size : 4; return (-localOffset); } // List of available registers and their names. // We need a list of byte and doubleword registers, too. // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "%r10", "%r11", "%r12", "%r13", "%r9", "%r8", "%rcx", "%rdx", "%rsi", "%rdi" }; // We also need the 8-bit and 32-bit register names static char *breglist[] = { "%r10b", "%r11b", "%r12b", "%r13b", "%r9b", "%r8b", "%cl", "%dl", "%sil", "%dil" }; static char *dreglist[] = { "%r10d", "%r11d", "%r12d", "%r13d", "%r9d", "%r8d", "%ecx", "%edx", "%esi", "%edi" }; // Push and pop a register on/off the stack static void pushreg(int r) { fprintf(Outfile, "\tpushq\t%s\n", reglist[r]); } static void popreg(int r) { fprintf(Outfile, "\tpopq\t%s\n", reglist[r]); } // Set all registers as available. // But if reg is positive, don't free that one. void cgfreeallregs(int keepreg) { int i; // fprintf(Outfile, "# freeing all registers\n"); for (i = 0; i < NUMFREEREGS; i++) if (i != keepreg) freereg[i] = 1; } // When we need to spill a register, we choose // the following register and then cycle through // the remaining registers. The spillreg increments // continually, so we need to take a modulo NUMFREEREGS // on it. static int spillreg = 0; // Allocate a free register. Return the number of // the register. Die if no available registers. int cgallocreg(void) { int reg; for (reg = 0; reg < NUMFREEREGS; reg++) { if (freereg[reg]) { freereg[reg] = 0; // fprintf(Outfile, "# allocated register %s\n", reglist[reg]); return (reg); } } // We have no registers, so we must spill one reg = (spillreg % NUMFREEREGS); spillreg++; // fprintf(Outfile, "# spilling reg %s\n", reglist[reg]); pushreg(reg); return (reg); } // Return a register to the list of available registers. // Check to see if it's not already there. void cgfreereg(int reg) { if (freereg[reg] != 0) { // fprintf(Outfile, "# error trying to free register %s\n", reglist[reg]); fatald("Error trying to free register", reg); } // If this was a spilled register, get it back if (spillreg > 0) { spillreg--; reg = (spillreg % NUMFREEREGS); // fprintf(Outfile, "# unspilling reg %s\n", reglist[reg]); popreg(reg); } else { // fprintf(Outfile, "# freeing reg %s\n", reglist[reg]); freereg[reg] = 1; } } // Spill all registers on the stack void cgspillregs(void) { int i; for (i = 0; i < NUMFREEREGS; i++) pushreg(i); } // Unspill all registers from the stack static void cgunspillregs(void) { int i; for (i = NUMFREEREGS - 1; i >= 0; i--) popreg(i); } // Print out the assembly preamble // for one output file void cgpreamble(char *filename) { cgfreeallregs(NOREG); cgtextseg(); fprintf(Outfile, "\t.file 1 "); fputc('"', Outfile); fprintf(Outfile, "%s", filename); fputc('"', Outfile); fputc('\n', Outfile); fprintf(Outfile, "# internal switch(expr) routine\n" "# %%rsi = switch table, %%rax = expr\n" "# from SubC: http://www.t3x.org/subc/\n" "\n" "__switch:\n" " pushq %%rsi\n" " movq %%rdx, %%rsi\n" " movq %%rax, %%rbx\n" " cld\n" " lodsq\n" " movq %%rax, %%rcx\n" "__next:\n" " lodsq\n" " movq %%rax, %%rdx\n" " lodsq\n" " cmpq %%rdx, %%rbx\n" " jnz __no\n" " popq %%rsi\n" " jmp *%%rax\n" "__no:\n" " loop __next\n" " lodsq\n" " popq %%rsi\n" " jmp *%%rax\n\n"); } // Nothing to do for the end of a file void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the %rsp and %rsp if (sym->class == C_GLOBAL) fprintf(Outfile, "\t.globl\t%s\n" "\t.type\t%s, @function\n", name, name); fprintf(Outfile, "%s:\n" "\tpushq\t%%rbp\n" "\tmovq\t%%rsp, %%rbp\n", name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->st_posn = paramOffset; paramOffset += 8; } else { parm->st_posn = newlocaloffset(parm->size); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->st_posn = newlocaloffset(locvar->size); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\taddq\t$%d,%%rsp\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->st_endlabel); fprintf(Outfile, "\taddq\t$%d,%%rsp\n", stackOffset); fputs("\tpopq %rbp\n" "\tret\n", Outfile); cgfreeallregs(NOREG); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = cgallocreg(); fprintf(Outfile, "\tmovq\t$%d, %s\n", value, reglist[r]); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadvar(struct symtable *sym, int op) { int r, postreg, offset = 1; // Get a new register r = cgallocreg(); // If the symbol is a pointer, use the size // of the type that it points to as any // increment or decrement. If not, it's one. if (ptrtype(sym->type)) offset = typesize(value_at(sym->type), sym->ctype); // Negate the offset for decrements if (op == A_PREDEC || op == A_POSTDEC) offset = -offset; // If we have a pre-operation if (op == A_PREINC || op == A_PREDEC) { // Load the symbol's address if (sym->class == C_LOCAL || sym->class == C_PARAM) fprintf(Outfile, "\tleaq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); else fprintf(Outfile, "\tleaq\t%s(%%rip), %s\n", sym->name, reglist[r]); // and change the value at that address switch (sym->size) { case 1: fprintf(Outfile, "\taddb\t$%d,(%s)\n", offset, reglist[r]); break; case 4: fprintf(Outfile, "\taddl\t$%d,(%s)\n", offset, reglist[r]); break; case 8: fprintf(Outfile, "\taddq\t$%d,(%s)\n", offset, reglist[r]); break; } } // Now load the output register with the value if (sym->class == C_LOCAL || sym->class == C_PARAM) { switch (sym->size) { case 1: fprintf(Outfile, "\tmovzbq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); break; case 4: fprintf(Outfile, "\tmovslq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); break; case 8: fprintf(Outfile, "\tmovq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); } } else { switch (sym->size) { case 1: fprintf(Outfile, "\tmovzbq\t%s(%%rip), %s\n", sym->name, reglist[r]); break; case 4: fprintf(Outfile, "\tmovslq\t%s(%%rip), %s\n", sym->name, reglist[r]); break; case 8: fprintf(Outfile, "\tmovq\t%s(%%rip), %s\n", sym->name, reglist[r]); } } // If we have a post-operation, get a new register if (op == A_POSTINC || op == A_POSTDEC) { postreg = cgallocreg(); // Load the symbol's address if (sym->class == C_LOCAL || sym->class == C_PARAM) fprintf(Outfile, "\tleaq\t%d(%%rbp), %s\n", sym->st_posn, reglist[postreg]); else fprintf(Outfile, "\tleaq\t%s(%%rip), %s\n", sym->name, reglist[postreg]); // and change the value at that address switch (sym->size) { case 1: fprintf(Outfile, "\taddb\t$%d,(%s)\n", offset, reglist[postreg]); break; case 4: fprintf(Outfile, "\taddl\t$%d,(%s)\n", offset, reglist[postreg]); break; case 8: fprintf(Outfile, "\taddq\t$%d,(%s)\n", offset, reglist[postreg]); break; } // Finally, free the register cgfreereg(postreg); } // Return the register with the value return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = cgallocreg(); fprintf(Outfile, "\tleaq\tL%d(%%rip), %s\n", label, reglist[r]); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\taddq\t%s, %s\n", reglist[r2], reglist[r1]); cgfreereg(r2); return (r1); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsubq\t%s, %s\n", reglist[r2], reglist[r1]); cgfreereg(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timulq\t%s, %s\n", reglist[r2], reglist[r1]); cgfreereg(r2); return (r1); } // Divide or modulo the first register by the second and // return the number of the register with the result int cgdivmod(int r1, int r2, int op) { fprintf(Outfile, "\tmovq\t%s,%%rax\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidivq\t%s\n", reglist[r2]); if (op == A_DIVIDE) fprintf(Outfile, "\tmovq\t%%rax,%s\n", reglist[r1]); else fprintf(Outfile, "\tmovq\t%%rdx,%s\n", reglist[r1]); cgfreereg(r2); return (r1); } // Bitwise AND two registers int cgand(int r1, int r2) { fprintf(Outfile, "\tandq\t%s, %s\n", reglist[r2], reglist[r1]); cgfreereg(r2); return (r1); } // Bitwise OR two registers int cgor(int r1, int r2) { fprintf(Outfile, "\torq\t%s, %s\n", reglist[r2], reglist[r1]); cgfreereg(r2); return (r1); } // Bitwise XOR two registers int cgxor(int r1, int r2) { fprintf(Outfile, "\txorq\t%s, %s\n", reglist[r2], reglist[r1]); cgfreereg(r2); return (r1); } // Shift left r1 by r2 bits int cgshl(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshlq\t%%cl, %s\n", reglist[r1]); cgfreereg(r2); return (r1); } // Shift right r1 by r2 bits int cgshr(int r1, int r2) { fprintf(Outfile, "\tmovb\t%s, %%cl\n", breglist[r2]); fprintf(Outfile, "\tshrq\t%%cl, %s\n", reglist[r1]); cgfreereg(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tnegq\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnotq\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); return (r); } // Load a boolean value (only 0 or 1) // into the given register void cgloadboolean(int r, int val) { fprintf(Outfile, "\tmovq\t$%d, %s\n", val, reglist[r]); } // Convert an integer value to a boolean value. Jump if // it's an IF, WHILE, LOGAND or LOGOR operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); switch (op) { case A_IF: case A_WHILE: case A_LOGAND: fprintf(Outfile, "\tje\tL%d\n", label); break; case A_LOGOR: fprintf(Outfile, "\tjne\tL%d\n", label); break; default: fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r], reglist[r]); } return (r); } // Call a function with the given symbol id. // Pop off any arguments pushed on the stack. // Return the register with the result int cgcall(struct symtable *sym, int numargs) { int outr; // Call the function fprintf(Outfile, "\tcall\t%s@PLT\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\taddq\t$%d, %%rsp\n", 8 * (numargs - 6)); // Unspill all the registers cgunspillregs(); // Get a new register and copy the return value into it outr = cgallocreg(); fprintf(Outfile, "\tmovq\t%%rax, %s\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function call. // Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpushq\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmovq\t%s, %s\n", reglist[r], reglist[FIRSTPARAMREG - argposn + 1]); } cgfreereg(r); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsalq\t$%d, %s\n", val, reglist[r]); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %s(%%rip)\n", reglist[r], sym->name); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %s(%%rip)\n", breglist[r], sym->name); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %s(%%rip)\n", dreglist[r], sym->name); break; default: fatald("Bad type in cgstorglob:", sym->type); } return (r); } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmovq\t%s, %d(%%rbp)\n", reglist[r], sym->st_posn); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovb\t%s, %d(%%rbp)\n", breglist[r], sym->st_posn); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %d(%%rbp)\n", dreglist[r], sym->st_posn); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size, type; int initvalue; int i; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the variable (or its elements if an array) // and the type of the variable if (node->stype == S_ARRAY) { size = typesize(value_at(node->type), node->ctype); type = value_at(node->type); } else { size = node->size; type = node->type; } // Generate the global identity and the label cgdataseg(); if (node->class == C_GLOBAL) fprintf(Outfile, "\t.globl\t%s\n", node->name); fprintf(Outfile, "%s:\n", node->name); // Output space for one or more elements for (i = 0; i < node->nelems; i++) { // Get any initial value initvalue = 0; if (node->initlist != NULL) initvalue = node->initlist[i]; // Generate the space for this type switch (size) { case 1: fprintf(Outfile, "\t.byte\t%d\n", initvalue); break; case 4: fprintf(Outfile, "\t.long\t%d\n", initvalue); break; case 8: // Generate the pointer to a string literal. Treat a zero value // as actually zero, not the label L0 if (node->initlist != NULL && type == pointer_to(P_CHAR) && initvalue != 0) fprintf(Outfile, "\t.quad\tL%d\n", initvalue); else fprintf(Outfile, "\t.quad\t%d\n", initvalue); break; default: for (i = 0; i < size; i++) fprintf(Outfile, "\t.byte\t0\n"); } } } // Generate a global string and its start label. // Don't output the label if append is true. void cgglobstr(int l, char *strvalue, int append) { char *cptr; if (!append) cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\t.byte\t%d\n", *cptr); } } // NUL terminate a global string void cgglobstrend(void) { fprintf(Outfile, "\t.byte\t0\n"); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2, int type) { int size = cgprimsize(type); // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); switch (size) { case 1: fprintf(Outfile, "\tcmpb\t%s, %s\n", breglist[r2], breglist[r1]); break; case 4: fprintf(Outfile, "\tcmpl\t%s, %s\n", dreglist[r2], dreglist[r1]); break; default: fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); } fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzbq\t%s, %s\n", breglist[r2], reglist[r2]); cgfreereg(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label, int type) { int size = cgprimsize(type); // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); switch (size) { case 1: fprintf(Outfile, "\tcmpb\t%s, %s\n", breglist[r2], breglist[r1]); break; case 4: fprintf(Outfile, "\tcmpl\t%s, %s\n", dreglist[r2], dreglist[r1]); break; default: fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); } fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); cgfreereg(r1); cgfreereg(r2); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Only return a value if we have a value to return if (reg != NOREG) { // Deal with pointers here as we can't put them in // the switch statement if (ptrtype(sym->type)) fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); else { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzbl\t%s, %%eax\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmovl\t%s, %%eax\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } } } cgjump(sym->st_endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = cgallocreg(); if (sym->class == C_GLOBAL || sym->class == C_EXTERN || sym->class == C_STATIC) fprintf(Outfile, "\tleaq\t%s(%%rip), %s\n", sym->name, reglist[r]); else fprintf(Outfile, "\tleaq\t%d(%%rbp), %s\n", sym->st_posn, reglist[r]); return (r); } // Dereference a pointer to get the value // it points at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzbq\t(%s), %s\n", reglist[r], reglist[r]); break; case 4: fprintf(Outfile, "\tmovslq\t(%s), %s\n", reglist[r], reglist[r]); break; case 8: fprintf(Outfile, "\tmovq\t(%s), %s\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { // Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmovb\t%s, (%s)\n", breglist[r1], reglist[r2]); break; case 4: fprintf(Outfile, "\tmovl\t%s, (%s)\n", dreglist[r1], reglist[r2]); break; case 8: fprintf(Outfile, "\tmovq\t%s, (%s)\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } // Generate a switch jump table and the code to // load the registers and call the switch() code void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel) { int i, label; // Get a label for the switch table label = genlabel(); cglabel(label); // Heuristic. If we have no cases, create one case // which points to the default case if (casecount == 0) { caseval[0] = 0; caselabel[0] = defaultlabel; casecount = 1; } // Generate the switch jump table. fprintf(Outfile, "\t.quad\t%d\n", casecount); for (i = 0; i < casecount; i++) fprintf(Outfile, "\t.quad\t%d, L%d\n", caseval[i], caselabel[i]); fprintf(Outfile, "\t.quad\tL%d\n", defaultlabel); // Load the specific registers cglabel(toplabel); fprintf(Outfile, "\tmovq\t%s, %%rax\n", reglist[reg]); fprintf(Outfile, "\tleaq\tL%d(%%rip), %%rdx\n", label); fprintf(Outfile, "\tjmp\t__switch\n"); } // Move value between registers void cgmove(int r1, int r2) { fprintf(Outfile, "\tmovq\t%s, %s\n", reglist[r1], reglist[r2]); } // Output a gdb directive to say on which // source code line number the following // assembly code came from void cglinenum(int line) { // fprintf(Outfile, "\t.loc 1 %d 0\n", line); } ================================================ FILE: 62_Cleanup/cg_arm.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for ARMv6 on Raspberry Pi // Copyright (c) 2019 Warren Toomey, GPL3 // List of available registers and their names. static int freereg[4]; static char *reglist[4] = { "r4", "r5", "r6", "r7" }; // Set all registers as available void freeall_registers(void) { freereg[0] = freereg[1] = freereg[2] = freereg[3] = 1; } // Allocate a free register. Return the number of // the register. Die if no available registers. static int alloc_register(void) { for (int i = 0; i < 4; i++) { if (freereg[i]) { freereg[i] = 0; return (i); } } fatal("Out of registers"); return (NOREG); // Keep -Wall happy } // Return a register to the list of available registers. // Check to see if it's not already there. static void free_register(int reg) { if (freereg[reg] != 0) fatald("Error trying to free register", reg); freereg[reg] = 1; } // We have to store large integer literal values in memory. // Keep a list of them which will be output in the postamble #define MAXINTS 1024 int Intlist[MAXINTS]; static int Intslot = 0; // Determine the offset of a large integer // literal from the .L3 label. If the integer // isn't in the list, add it. static void set_int_offset(int val) { int offset = -1; // See if it is already there for (int i = 0; i < Intslot; i++) { if (Intlist[i] == val) { offset = 4 * i; break; } } // Not in the list, so add it if (offset == -1) { offset = 4 * Intslot; if (Intslot == MAXINTS) fatal("Out of int slots in set_int_offset()"); Intlist[Intslot++] = val; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L3+%d\n", offset); } // Print out the assembly preamble void cgpreamble() { freeall_registers(); fputs("\t.text\n", Outfile); } // Print out the assembly postamble void cgpostamble() { // Print out the global variables fprintf(Outfile, ".L2:\n"); for (int i = 0; i < Globs; i++) { if (Symtable[i].stype == S_VARIABLE) fprintf(Outfile, "\t.word %s\n", Symtable[i].name); } // Print out the integer literals fprintf(Outfile, ".L3:\n"); for (int i = 0; i < Intslot; i++) { fprintf(Outfile, "\t.word %d\n", Intlist[i]); } } // Print out a function preamble void cgfuncpreamble(int id) { char *name = Symtable[id].name; fprintf(Outfile, "\t.text\n" "\t.globl\t%s\n" "\t.type\t%s, \%%function\n" "%s:\n" "\tpush\t{fp, lr}\n" "\tadd\tfp, sp, #4\n" "\tsub\tsp, sp, #8\n" "\tstr\tr0, [fp, #-8]\n", name, name, name); } // Print out a function postamble void cgfuncpostamble(int id) { cglabel(Symtable[id].endlabel); fputs("\tsub\tsp, fp, #4\n" "\tpop\t{fp, pc}\n" "\t.align\t2\n", Outfile); } // Load an integer literal value into a register. // Return the number of the register. int cgloadint(int value, int type) { // Get a new register int r = alloc_register(); // If the literal value is small, do it with one instruction if (value <= 1000) fprintf(Outfile, "\tmov\t%s, #%d\n", reglist[r], value); else { set_int_offset(value); fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); } return (r); } // Determine the offset of a variable from the .L2 // label. Yes, this is inefficient code. static void set_var_offset(int id) { int offset = 0; // Walk the symbol table up to id. // Find S_VARIABLEs and add on 4 until // we get to our variable for (int i = 0; i < id; i++) { if (Symtable[i].stype == S_VARIABLE) offset += 4; } // Load r3 with this offset fprintf(Outfile, "\tldr\tr3, .L2+%d\n", offset); } // Load a value from a variable into a register. // Return the number of the register int cgloadglob(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tldrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgloadglob:", Symtable[id].type); } return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s, %s\n", reglist[r1], reglist[r1], reglist[r2]); free_register(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\tmul\t%s, %s, %s\n", reglist[r2], reglist[r1], reglist[r2]); free_register(r1); return (r2); } // Divide the first register by the second and // return the number of the register with the result int cgdiv(int r1, int r2) { // To do a divide: r0 holds the dividend, r1 holds the divisor. // The quotient is in r0. fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r1]); fprintf(Outfile, "\tmov\tr1, %s\n", reglist[r2]); fprintf(Outfile, "\tbl\t__aeabi_idiv\n"); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r1]); free_register(r2); return (r1); } // Call a function with one argument from the given register // Return the register with the result int cgcall(int r, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[r]); fprintf(Outfile, "\tbl\t%s\n", Symtable[id].name); fprintf(Outfile, "\tmov\t%s, r0\n", reglist[r]); return (r); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tlsl\t%s, %s, #%d\n", reglist[r], reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, int id) { // Get the offset to the variable set_var_offset(id); switch (Symtable[id].type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [r3]\n", reglist[r]); break; case P_INT: case P_LONG: case P_CHARPTR: case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tstr\t%s, [r3]\n", reglist[r]); break; default: fatald("Bad type in cgstorglob:", Symtable[id].type); } return (r); } // Given a P_XXX type value, return the // size of a primitive type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (4); switch (type) { case P_CHAR: return (1); case P_INT: case P_LONG: return (4); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Generate a global symbol void cgglobsym(int id) { int typesize; // Get the size of the type typesize = cgprimsize(Symtable[id].type); fprintf(Outfile, "\t.data\n" "\t.globl\t%s\n", Symtable[id].name); switch (typesize) { case 1: fprintf(Outfile, "%s:\t.byte\t0\n", Symtable[id].name); break; case 4: fprintf(Outfile, "%s:\t.long\t0\n", Symtable[id].name); break; default: fatald("Unknown typesize in cgglobsym: ", typesize); } } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "moveq", "movne", "movlt", "movgt", "movle", "movge" }; // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "movne", "moveq", "movge", "movle", "movgt", "movlt" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #1\n", cmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\t%s\t%s, #0\n", invcmplist[ASTop - A_EQ], reglist[r2]); fprintf(Outfile, "\tuxtb\t%s, %s\n", reglist[r2], reglist[r2]); free_register(r1); return (r2); } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tb\tL%d\n", l); } // List of inverted branch instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *brlist[] = { "bne", "beq", "bge", "ble", "bgt", "blt" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label) { // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); fprintf(Outfile, "\t%s\tL%d\n", brlist[ASTop - A_EQ], label); freeall_registers(); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, int id) { fprintf(Outfile, "\tmov\tr0, %s\n", reglist[reg]); cgjump(Symtable[id].endlabel); } // Generate code to load the address of a global // identifier into a variable. Return a new register int cgaddress(int id) { // Get a new register int r = alloc_register(); // Get the offset to the variable set_var_offset(id); fprintf(Outfile, "\tmov\t%s, r3\n", reglist[r]); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { switch (type) { case P_CHARPTR: fprintf(Outfile, "\tldrb\t%s, [%s]\n", reglist[r], reglist[r]); break; case P_INTPTR: case P_LONGPTR: fprintf(Outfile, "\tldr\t%s, [%s]\n", reglist[r], reglist[r]); break; } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { switch (type) { case P_CHAR: fprintf(Outfile, "\tstrb\t%s, [%s]\n", reglist[r1], reglist[r2]); break; case P_INT: case P_LONG: fprintf(Outfile, "\tstr\t%s, [%s]\n", reglist[r1], reglist[r2]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } ================================================ FILE: 62_Cleanup/cgn.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 // Copyright (c) 2019 Warren Toomey, GPL3 // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg } currSeg = no_seg; // List of available registers and their names. // We need a list of byte and doubleword registers, too. // The list also includes the registers used to // hold function parameters #define NUMFREEREGS 4 #define FIRSTPARAMREG 9 // Position of first parameter register static int freereg[NUMFREEREGS]; static char *reglist[] = { "r10", "r11", "r12", "r13", "r9", "r8", "rcx", "rdx", "rsi", "rdi" }; // We also need the 8-bit and 32-bit register names static char *breglist[] = { "r10b", "r11b", "r12b", "r13b", "r9b", "r8b", "cl", "dl", "sil", "dil" }; static char *dreglist[] = { "r10d", "r11d", "r12d", "r13d", "r9d", "r8d", "ecx", "edx", "esi", "edi" }; // Switch to the text segment void cgtextseg() { if (currSeg != text_seg) { fputs("\tsection .text\n", Outfile); currSeg = text_seg; } } // Switch to the data segment void cgdataseg() { if (currSeg != data_seg) { fputs("\tsection .data\n", Outfile); currSeg = data_seg; } } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Given a scalar type value, return the // size of the type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Store a register's value into a local variable int cgstorlocal(int r, struct symtable *sym) { if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\tqword\t[rbp+%d], %s\n", sym->st_posn, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\tbyte\t[rbp+%d], %s\n", sym->st_posn, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\tdword\t[rbp+%d], %s\n", sym->st_posn, dreglist[r]); break; default: fatald("Bad type in cgstorlocal:", sym->type); } return (r); } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch (type) { case P_CHAR: break; default: // Align whatever we have now on a 4-byte alignment // I put the generic code here so it can be reused elsewhere alignment = 4; offset = (offset + direction * (alignment - 1)) & ~(alignment - 1); } return (offset); } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; // Position of stack pointer offset relative to stack base pointer. // We need this to ensure it is aligned on a 16-byte boundary. static int stackOffset; // Create the position of a new local variable. static int newlocaloffset(int size) { // Decrement the offset by a minimum of 4 bytes // and allocate on the stack localOffset += (size > 4) ? size : 4; return (-localOffset); } // Push and pop a register on/off the stack static void pushreg(int r) { fprintf(Outfile, "\tpush\t%s\n", reglist[r]); } static void popreg(int r) { fprintf(Outfile, "\tpop\t%s\n", reglist[r]); } // Set all registers as available. // But if reg is positive, don't free that one. void cgfreeallregs(int keepreg) { int i; // fprintf(Outfile, "; freeing all registers\n"); for (i = 0; i < NUMFREEREGS; i++) if (i != keepreg) freereg[i] = 1; } // When we need to spill a register, we choose // the following register and then cycle through // the remaining registers. The spillreg increments // continually, so we need to take a modulo NUMFREEREGS // on it. static int spillreg = 0; // Allocate a free register. Return the number of // the register. Die if no available registers. int cgallocreg(void) { int reg; for (reg = 0; reg < NUMFREEREGS; reg++) { if (freereg[reg]) { freereg[reg] = 0; // fprintf(Outfile, "; allocated register %s\n", reglist[reg]); return (reg); } } // We have no registers, so we must spill one reg = (spillreg % NUMFREEREGS); spillreg++; // fprintf(Outfile, "; spilling reg %s\n", reglist[reg]); pushreg(reg); return (reg); } // Return a register to the list of available registers. // Check to see if it's not already there. void cgfreereg(int reg) { if (freereg[reg] != 0) { //fprintf(Outfile, "# error trying to free register %s\n", reglist[reg]); fatald("Error trying to free register", reg); } // If this was a spilled register, get it back if (spillreg > 0) { spillreg--; reg = (spillreg % NUMFREEREGS); // fprintf(Outfile, "; unspilling reg %s\n", reglist[reg]); popreg(reg); } else { // fprintf(Outfile, "; freeing reg %s\n", reglist[reg]); freereg[reg] = 1; } } // Spill all registers on the stack void cgspillregs(void) { int i; for (i = 0; i < NUMFREEREGS; i++) pushreg(i); } // Unspill all registers from the stack static void cgunspillregs(void) { int i; for (i = NUMFREEREGS - 1; i >= 0; i--) popreg(i); } // Print out the assembly preamble // for one output file void cgpreamble(char *filename) { cgfreeallregs(NOREG); cgtextseg(); fprintf(Outfile, ";\t%s\n", filename); fprintf(Outfile, "; internal switch(expr) routine\n" "; rsi = switch table, rax = expr\n" "; from SubC: http://www.t3x.org/subc/\n" "\n" "__switch:\n" " push rsi\n" " mov rsi, rdx\n" " mov rbx, rax\n" " cld\n" " lodsq\n" " mov rcx, rax\n" "__next:\n" " lodsq\n" " mov rdx, rax\n" " lodsq\n" " cmp rbx, rdx\n" " jnz __no\n" " pop rsi\n" " jmp rax\n" "__no:\n" " loop __next\n" " lodsq\n" " pop rsi\n" " jmp rax\n\n"); } // Nothing to do for the end of a file void cgpostamble() { } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int cnt; int paramOffset = 16; // Any pushed params start at this stack offset int paramReg = FIRSTPARAMREG; // Index to the first param register in above reg lists // Output in the text segment, reset local offset cgtextseg(); localOffset = 0; // Output the function start, save the rsp and rbp // if (sym->class == C_GLOBAL) if(!sym->extinit) { fprintf(Outfile, "\tglobal\t%s\n", name); sym->extinit = 1; } fprintf(Outfile, "%s:\n" "\tpush\trbp\n" "\tmov\trbp, rsp\n", name); // Copy any in-register parameters to the stack, up to six of them // The remaining parameters are already on the stack for (parm = sym->member, cnt = 1; parm != NULL; parm = parm->next, cnt++) { if (cnt > 6) { parm->st_posn = paramOffset; paramOffset += 8; } else { parm->st_posn = newlocaloffset(parm->size); cgstorlocal(paramReg--, parm); } } // For the remainder, if they are a parameter then they are // already on the stack. If only a local, make a stack position. for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { locvar->st_posn = newlocaloffset(locvar->size); } // Align the stack pointer to be a multiple of 16 // less than its previous value stackOffset = (localOffset + 15) & ~15; fprintf(Outfile, "\tadd\trsp, %d\n", -stackOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->st_endlabel); fprintf(Outfile, "\tadd\trsp, %d\n", stackOffset); fputs("\tpop rbp\n" "\tret\n", Outfile); cgfreeallregs(NOREG); } // Load an integer literal value into a register. // Return the number of the register. // For x86-64, we don't need to worry about the type. int cgloadint(int value, int type) { // Get a new register int r = cgallocreg(); fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], value); return (r); } // Load a value from a variable into a register. // Return the number of the register. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadvar(struct symtable *sym, int op) { int r, postreg, offset = 1; if(!sym->extinit) { fprintf(Outfile, "extern\t%s\n", sym->name); sym->extinit = 1; } // Get a new register r = cgallocreg(); // If the symbol is a pointer, use the size // of the type that it points to as any // increment or decrement. If not, it's one. if (ptrtype(sym->type)) offset = typesize(value_at(sym->type), sym->ctype); // Negate the offset for decrements if (op == A_PREDEC || op == A_POSTDEC) offset = -offset; // If we have a pre-operation if (op == A_PREINC || op == A_PREDEC) { // Load the symbol's address if (sym->class == C_LOCAL || sym->class == C_PARAM) fprintf(Outfile, "\tlea\t%s, [rbp+%d]\n", reglist[r], sym->st_posn); else fprintf(Outfile, "\tlea\t%s, [%s]\n", reglist[r], sym->name); // and change the value at that address switch (sym->size) { case 1: fprintf(Outfile, "\tadd\tbyte [%s], %d\n", reglist[r], offset); break; case 4: fprintf(Outfile, "\tadd\tdword [%s], %d\n", reglist[r], offset); break; case 8: fprintf(Outfile, "\tadd\tqword [%s], %d\n", reglist[r], offset); break; } } // Now load the output register with the value if (sym->class == C_LOCAL || sym->class == C_PARAM) { switch (sym->size) { case 1: fprintf(Outfile, "\tmovzx\t%s, byte [rbp+%d]\n", reglist[r], sym->st_posn); break; case 4: fprintf(Outfile, "\tmovsxd\t%s, dword [rbp+%d]\n", reglist[r], sym->st_posn); break; case 8: fprintf(Outfile, "\tmov\t%s, [rbp+%d]\n", reglist[r], sym->st_posn); } } else { switch (sym->size) { case 1: fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], sym->name); break; case 4: fprintf(Outfile, "\tmovsxd\t%s, dword [%s]\n", reglist[r], sym->name); break; case 8: fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], sym->name); } } // If we have a post-operation, get a new register if (op == A_POSTINC || op == A_POSTDEC) { postreg = cgallocreg(); // Load the symbol's address if (sym->class == C_LOCAL || sym->class == C_PARAM) fprintf(Outfile, "\tlea\t%s, [rbp+%d]\n", reglist[postreg], sym->st_posn); else fprintf(Outfile, "\tlea\t%s, [%s]\n", reglist[postreg], sym->name); // and change the value at that address switch (sym->size) { case 1: fprintf(Outfile, "\tadd\tbyte [%s], %d\n", reglist[postreg], offset); break; case 4: fprintf(Outfile, "\tadd\tdword [%s], %d\n", reglist[postreg], offset); break; case 8: fprintf(Outfile, "\tadd\tqword [%s], %d\n", reglist[postreg], offset); break; } // Finally, free the register cgfreereg(postreg); } // Return the register with the value return (r); } // Given the label number of a global string, // load its address into a new register int cgloadglobstr(int label) { // Get a new register int r = cgallocreg(); fprintf(Outfile, "\tmov\t%s, L%d\n", reglist[r], label); return (r); } // Add two registers together and return // the number of the register with the result int cgadd(int r1, int r2) { fprintf(Outfile, "\tadd\t%s, %s\n", reglist[r1], reglist[r2]); cgfreereg(r2); return (r1); } // Subtract the second register from the first and // return the number of the register with the result int cgsub(int r1, int r2) { fprintf(Outfile, "\tsub\t%s, %s\n", reglist[r1], reglist[r2]); cgfreereg(r2); return (r1); } // Multiply two registers together and return // the number of the register with the result int cgmul(int r1, int r2) { fprintf(Outfile, "\timul\t%s, %s\n", reglist[r1], reglist[r2]); cgfreereg(r2); return (r1); } // Divide or modulo the first register by the second and // return the number of the register with the result int cgdivmod(int r1, int r2, int op) { fprintf(Outfile, "\tmov\trax, %s\n", reglist[r1]); fprintf(Outfile, "\tcqo\n"); fprintf(Outfile, "\tidiv\t%s\n", reglist[r2]); if (op == A_DIVIDE) fprintf(Outfile, "\tmov\t%s, rax\n", reglist[r1]); else fprintf(Outfile, "\tmov\t%s, rdx\n", reglist[r1]); cgfreereg(r2); return (r1); } // Bitwise AND two registers int cgand(int r1, int r2) { fprintf(Outfile, "\tand\t%s, %s\n", reglist[r1], reglist[r2]); cgfreereg(r2); return (r1); } // Bitwise OR two registers int cgor(int r1, int r2) { fprintf(Outfile, "\tor\t%s, %s\n", reglist[r1], reglist[r2]); cgfreereg(r2); return (r1); } // Bitwise XOR two registers int cgxor(int r1, int r2) { fprintf(Outfile, "\txor\t%s, %s\n", reglist[r1], reglist[r2]); cgfreereg(r2); return (r1); } // Shift left r1 by r2 bits int cgshl(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshl\t%s, cl\n", reglist[r1]); cgfreereg(r2); return (r1); } // Shift right r1 by r2 bits int cgshr(int r1, int r2) { fprintf(Outfile, "\tmov\tcl, %s\n", breglist[r2]); fprintf(Outfile, "\tshr\t%s, cl\n", reglist[r1]); cgfreereg(r2); return (r1); } // Negate a register's value int cgnegate(int r) { fprintf(Outfile, "\tneg\t%s\n", reglist[r]); return (r); } // Invert a register's value int cginvert(int r) { fprintf(Outfile, "\tnot\t%s\n", reglist[r]); return (r); } // Logically negate a register's value int cglognot(int r) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); fprintf(Outfile, "\tsete\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r], breglist[r]); return (r); } // Load a boolean value (only 0 or 1) // into the given register void cgloadboolean(int r, int val) { fprintf(Outfile, "\tmov\t%s, %d\n", reglist[r], val); } // Convert an integer value to a boolean value. Jump if // it's an IF, WHILE, LOGAND or LOGOR operation int cgboolean(int r, int op, int label) { fprintf(Outfile, "\ttest\t%s, %s\n", reglist[r], reglist[r]); switch (op) { case A_IF: case A_WHILE: case A_LOGAND: fprintf(Outfile, "\tje\tL%d\n", label); break; case A_LOGOR: fprintf(Outfile, "\tjne\tL%d\n", label); break; default: fprintf(Outfile, "\tsetnz\t%s\n", breglist[r]); fprintf(Outfile, "\tmovzx\t%s, byte %s\n", reglist[r], breglist[r]); } return (r); } // Call a function with the given symbol id. // Pop off any arguments pushed on the stack. // Return the register with the result int cgcall(struct symtable *sym, int numargs) { int outr; // Call the function if(!sym->extinit) { fprintf(Outfile, "extern\t%s\n", sym->name); sym->extinit = 1; } fprintf(Outfile, "\tcall\t%s\n", sym->name); // Remove any arguments pushed on the stack if (numargs > 6) fprintf(Outfile, "\tadd\trsp, %d\n", 8 * (numargs - 6)); // Unspill all the registers cgunspillregs(); // Get a new register and copy the return value into it outr = cgallocreg(); fprintf(Outfile, "\tmov\t%s, rax\n", reglist[outr]); return (outr); } // Given a register with an argument value, // copy this argument into the argposn'th // parameter in preparation for a future function call. // Note that argposn is 1, 2, 3, 4, ..., never zero. void cgcopyarg(int r, int argposn) { // If this is above the sixth argument, simply push the // register on the stack. We rely on being called with // successive arguments in the correct order for x86-64 if (argposn > 6) { fprintf(Outfile, "\tpush\t%s\n", reglist[r]); } else { // Otherwise, copy the value into one of the six registers // used to hold parameter values fprintf(Outfile, "\tmov\t%s, %s\n", reglist[FIRSTPARAMREG - argposn + 1], reglist[r]); } cgfreereg(r); } // Shift a register left by a constant int cgshlconst(int r, int val) { fprintf(Outfile, "\tsal\t%s, %d\n", reglist[r], val); return (r); } // Store a register's value into a variable int cgstorglob(int r, struct symtable *sym) { if(!sym->extinit) { fprintf(Outfile, "extern\t%s\n", sym->name); sym->extinit = 1; } if (cgprimsize(sym->type) == 8) { fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, reglist[r]); } else switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, breglist[r]); break; case P_INT: fprintf(Outfile, "\tmov\t[%s], %s\n", sym->name, dreglist[r]); break; default: fatald("Bad type in cgloadglob:", sym->type); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size, type; int initvalue; int i; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the variable (or its elements if an array) // and the type of the variable if (node->stype == S_ARRAY) { size = typesize(value_at(node->type), node->ctype); type = value_at(node->type); } else { size = node->size; type = node->type; } // Generate the global identity and the label cgdataseg(); if (node->class == C_GLOBAL) fprintf(Outfile, "\tglobal\t%s\n", node->name); if(!node->extinit) { node->extinit = 1; } fprintf(Outfile, "%s:\n", node->name); // Output space for one or more elements for (i = 0; i < node->nelems; i++) { // Get any initial value initvalue = 0; if (node->initlist != NULL) initvalue = node->initlist[i]; // Generate the space for this type // original version switch(size) { case 1: fprintf(Outfile, "\tdb\t%d\n", initvalue); break; case 4: fprintf(Outfile, "\tdd\t%d\n", initvalue); break; case 8: // Generate the pointer to a string literal. Treat a zero value // as actually zero, not the label L0 if (node->initlist != NULL && type == pointer_to(P_CHAR) && initvalue != 0) fprintf(Outfile, "\tdq\tL%d\n", initvalue); else fprintf(Outfile, "\tdq\t%d\n", initvalue); break; default: for (i = 0; i < size; i++) fprintf(Outfile, "\tdb\t0\n"); } } } // Generate a global string and its start label. // Don't output the label if append is true. void cgglobstr(int l, char *strvalue, int append) { char *cptr; if (!append) cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\tdb\t%d\n", *cptr); } } // NULL terminate a global string void cgglobstrend(void) { fprintf(Outfile, "\tdb\t0\n"); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "sete", "setne", "setl", "setg", "setle", "setge" }; // Compare two registers and set if true. int cgcompare_and_set(int ASTop, int r1, int r2, int type) { int size = cgprimsize(type); // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); switch (size) { case 1: fprintf(Outfile, "\tcmp\t%s, %s\n", breglist[r1], breglist[r2]); break; case 4: fprintf(Outfile, "\tcmp\t%s, %s\n", dreglist[r1], dreglist[r2]); break; default: fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); } fprintf(Outfile, "\t%s\t%s\n", cmplist[ASTop - A_EQ], breglist[r2]); fprintf(Outfile, "\tmovzx\t%s, %s\n", reglist[r2], breglist[r2]); cgfreereg(r1); return (r2); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tjmp\tL%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "jne", "je", "jge", "jle", "jg", "jl" }; // Compare two registers and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label, int type) { int size = cgprimsize(type); // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); switch (size) { case 1: fprintf(Outfile, "\tcmp\t%s, %s\n", breglist[r1], breglist[r2]); break; case 4: fprintf(Outfile, "\tcmp\t%s, %s\n", dreglist[r1], dreglist[r2]); break; default: fprintf(Outfile, "\tcmp\t%s, %s\n", reglist[r1], reglist[r2]); } fprintf(Outfile, "\t%s\tL%d\n", invcmplist[ASTop - A_EQ], label); cgfreereg(r1); cgfreereg(r2); return (NOREG); } // Widen the value in the register from the old // to the new type, and return a register with // this new value int cgwiden(int r, int oldtype, int newtype) { // Nothing to do return (r); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Only return a value if we have a value to return if (reg != NOREG) { // Deal with pointers here as we can't put them in // the switch statement if (ptrtype(sym->type)) fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); else { // Generate code depending on the function's type switch (sym->type) { case P_CHAR: fprintf(Outfile, "\tmovzx\teax, %s\n", breglist[reg]); break; case P_INT: fprintf(Outfile, "\tmov\teax, %s\n", dreglist[reg]); break; case P_LONG: fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); break; default: fatald("Bad function type in cgreturn:", sym->type); } } } cgjump(sym->st_endlabel); } // Generate code to load the address of an // identifier into a variable. Return a new register int cgaddress(struct symtable *sym) { int r = cgallocreg(); if (!sym->extinit) { fprintf(Outfile, "extern\t%s\n", sym->name); sym->extinit = 1; } if (sym->class == C_GLOBAL || sym->class == C_EXTERN || sym->class == C_STATIC) fprintf(Outfile, "\tmov\t%s, %s\n", reglist[r], sym->name); else fprintf(Outfile, "\tlea\t%s, [rbp+%d]\n", reglist[r], sym->st_posn); return (r); } // Dereference a pointer to get the value it // pointing at into the same register int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); switch (size) { case 1: fprintf(Outfile, "\tmovzx\t%s, byte [%s]\n", reglist[r], reglist[r]); break; case 4: fprintf(Outfile, "\tmovsx\t%s, dword [%s]\n", reglist[r], reglist[r]); break; case 8: fprintf(Outfile, "\tmov\t%s, [%s]\n", reglist[r], reglist[r]); break; default: fatald("Can't cgderef on type:", type); } return (r); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { //Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, "\tmov\t[%s], byte %s\n", reglist[r2], breglist[r1]); break; case 4: fprintf(Outfile, "\tmov\t[%s], dword %s\n", reglist[r2], dreglist[r1]); break; case 8: fprintf(Outfile, "\tmov\t[%s], %s\n", reglist[r2], reglist[r1]); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } // Generate a switch jump table and the code to // load the registers and call the switch() code void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel) { int i, label; // Get a label for the switch table label = genlabel(); cglabel(label); // Heuristic. If we have no cases, create one case // which points to the default case if (casecount == 0) { caseval[0] = 0; caselabel[0] = defaultlabel; casecount = 1; } // Generate the switch jump table. fprintf(Outfile, "\tdq\t%d\n", casecount); for (i = 0; i < casecount; i++) fprintf(Outfile, "\tdq\t%d, L%d\n", caseval[i], caselabel[i]); fprintf(Outfile, "\tdq\tL%d\n", defaultlabel); // Load the specific registers cglabel(toplabel); fprintf(Outfile, "\tmov\trax, %s\n", reglist[reg]); fprintf(Outfile, "\tmov\trdx, L%d\n", label); fprintf(Outfile, "\tjmp\t__switch\n"); } // Move value between registers void cgmove(int r1, int r2) { fprintf(Outfile, "\tmov\t%s, %s\n", reglist[r2], reglist[r1]); } // Output a gdb directive to say on which // source code line number the following // assembly code came from (not with nasm) void cglinenum(int line) { //fprintf(Outfile, ";\t.loc 1 %d 0\n", line); } ================================================ FILE: 62_Cleanup/data.h ================================================ #ifndef extern_ #define extern_ extern #endif // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 extern_ int Line; // Current line number extern_ int Linestart; // True if at start of a line extern_ int Putback; // Character put back by scanner extern_ struct symtable *Functionid; // Symbol ptr of the current function extern_ FILE *Infile; // Input and output files extern_ FILE *Outfile; extern_ char *Infilename; // Name of file we are parsing extern_ char *Outfilename; // Name of file we opened as Outfile extern_ struct token Token; // Last token scanned extern_ struct token Peektoken; // A look-ahead token extern_ char Text[TEXTLEN + 1]; // Last identifier scanned extern_ int Looplevel; // Depth of nested loops extern_ int Switchlevel; // Depth of nested switches extern char *Tstring[]; // List of token strings // Symbol table lists extern_ struct symtable *Globhead, *Globtail; // Global variables and functions extern_ struct symtable *Loclhead, *Locltail; // Local variables extern_ struct symtable *Parmhead, *Parmtail; // Local parameters extern_ struct symtable *Membhead, *Membtail; // Temp list of struct/union members extern_ struct symtable *Structhead, *Structtail; // List of struct types extern_ struct symtable *Unionhead, *Uniontail; // List of union types extern_ struct symtable *Enumhead, *Enumtail; // List of enum types and values extern_ struct symtable *Typehead, *Typetail; // List of typedefs // Command-line flags extern_ int O_dumpAST; // If true, dump the AST trees extern_ int O_dumpsym; // If true, dump the symbol table extern_ int O_keepasm; // If true, keep any assembly files extern_ int O_assemble; // If true, assemble the assembly files extern_ int O_dolink; // If true, link the object files extern_ int O_verbose; // If true, print info on compilation stages ================================================ FILE: 62_Cleanup/decl.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of declarations // Copyright (c) 2019 Warren Toomey, GPL3 static struct symtable *composite_declaration(int type); static int typedef_declaration(struct symtable **ctype); static int type_of_typedef(char *name, struct symtable **ctype); static void enum_declaration(void); // Parse the current token and return a primitive type enum value, // a pointer to any composite type and possibly modify // the class of the type. int parse_type(struct symtable **ctype, int *class) { int type, exstatic = 1; // See if the class has been changed to extern or static while (exstatic) { switch (Token.token) { case T_EXTERN: if (*class == C_STATIC) fatal("Illegal to have extern and static at the same time"); *class = C_EXTERN; scan(&Token); break; case T_STATIC: if (*class == C_LOCAL) fatal("Compiler doesn't support static local declarations"); if (*class == C_EXTERN) fatal("Illegal to have extern and static at the same time"); *class = C_STATIC; scan(&Token); break; default: exstatic = 0; } } // Now work on the actual type keyword switch (Token.token) { case T_VOID: type = P_VOID; scan(&Token); break; case T_CHAR: type = P_CHAR; scan(&Token); break; case T_INT: type = P_INT; scan(&Token); break; case T_LONG: type = P_LONG; scan(&Token); break; // For the following, if we have a ';' after the // parsing then there is no type, so return -1. // Example: struct x {int y; int z}; case T_STRUCT: type = P_STRUCT; *ctype = composite_declaration(P_STRUCT); if (Token.token == T_SEMI) type = -1; break; case T_UNION: type = P_UNION; *ctype = composite_declaration(P_UNION); if (Token.token == T_SEMI) type = -1; break; case T_ENUM: type = P_INT; // Enums are really ints enum_declaration(); if (Token.token == T_SEMI) type = -1; break; case T_TYPEDEF: type = typedef_declaration(ctype); if (Token.token == T_SEMI) type = -1; break; case T_IDENT: type = type_of_typedef(Text, ctype); break; default: fatals("Illegal type, token", Token.tokstr); } return (type); } // Given a type parsed by parse_type(), scan in any following // '*' tokens and return the new type int parse_stars(int type) { while (1) { if (Token.token != T_STAR) break; type = pointer_to(type); scan(&Token); } return (type); } // Parse a type which appears inside a cast int parse_cast(struct symtable **ctype) { int type, class = 0; // Get the type inside the parentheses type = parse_stars(parse_type(ctype, &class)); // Do some error checking. I'm sure more can be done if (type == P_STRUCT || type == P_UNION || type == P_VOID) fatal("Cannot cast to a struct, union or void type"); return (type); } // Given a type, parse an expression of literals and ensure // that the type of this expression matches the given type. // Parse any type cast that precedes the expression. // If an integer literal, return this value. // If a string literal, return the label number of the string. int parse_literal(int type) { struct ASTnode *tree; // Parse the expression and optimise the resulting AST tree tree = optimise(binexpr(0)); // If there's a cast, get the child and // mark it as having the type from the cast if (tree->op == A_CAST) { tree->left->type = tree->type; tree = tree->left; } // The tree must now have an integer or string literal if (tree->op != A_INTLIT && tree->op != A_STRLIT) fatal("Cannot initialise globals with a general expression"); // If the type is char * and if (type == pointer_to(P_CHAR)) { // We have a string literal, return the label number if (tree->op == A_STRLIT) return (tree->a_intvalue); // We have a zero int literal, so that's a NULL if (tree->op == A_INTLIT && tree->a_intvalue == 0) return (0); } // We only get here with an integer literal. The input type // is an integer type and is wide enough to hold the literal value if (inttype(type) && typesize(type, NULL) >= typesize(tree->type, NULL)) return (tree->a_intvalue); fatal("Type mismatch: literal vs. variable"); return (0); // Keep -Wall happy } // Given a pointer to a symbol that may already exist // return true if this symbol doesn't exist. We use // this function to convert externs into globals static int is_new_symbol(struct symtable *sym, int class, int type, struct symtable *ctype) { // There is no existing symbol, thus is new if (sym==NULL) return(1); // global versus extern: if they match that it's not new // and we can convert the class to global if ((sym->class== C_GLOBAL && class== C_EXTERN) || (sym->class== C_EXTERN && class== C_GLOBAL)) { // If the types don't match, there's a problem if (type != sym->type) fatals("Type mismatch between global/extern", sym->name); // Struct/unions, also compare the ctype if (type >= P_STRUCT && ctype != sym->ctype) fatals("Type mismatch between global/extern", sym->name); // If we get to here, the types match, so mark the symbol // as global sym->class= C_GLOBAL; // Return that symbol is not new return(0); } // It must be a duplicate symbol if we get here fatals("Duplicate global variable declaration", sym->name); return(-1); // Keep -Wall happy } // Given the type, name and class of a scalar variable, // parse any initialisation value and allocate storage for it. // Return the variable's symbol table entry. static struct symtable *scalar_declaration(char *varname, int type, struct symtable *ctype, int class, struct ASTnode **tree) { struct symtable *sym = NULL; struct ASTnode *varnode, *exprnode; *tree = NULL; // Add this as a known scalar switch (class) { case C_STATIC: case C_EXTERN: case C_GLOBAL: // See if this variable is new or already exists sym= findglob(varname); if (is_new_symbol(sym, class, type, ctype)) sym = addglob(varname, type, ctype, S_VARIABLE, class, 1, 0); break; case C_LOCAL: sym = addlocl(varname, type, ctype, S_VARIABLE, 1); break; case C_PARAM: sym = addparm(varname, type, ctype, S_VARIABLE); break; case C_MEMBER: sym = addmemb(varname, type, ctype, S_VARIABLE, 1); break; } // The variable is being initialised if (Token.token == T_ASSIGN) { // Only possible for a global or local if (class != C_GLOBAL && class != C_LOCAL && class != C_STATIC) fatals("Variable can not be initialised", varname); scan(&Token); // Globals must be assigned a literal value if (class == C_GLOBAL || class == C_STATIC) { // Create one initial value for the variable and // parse this value sym->initlist = (int *) malloc(sizeof(int)); sym->initlist[0] = parse_literal(type); } if (class == C_LOCAL) { // Make an A_IDENT AST node with the variable varnode = mkastleaf(A_IDENT, sym->type, sym->ctype, sym, 0); // Get the expression for the assignment, make into a rvalue exprnode = binexpr(0); exprnode->rvalue = 1; // Ensure the expression's type matches the variable exprnode = modify_type(exprnode, varnode->type, varnode->ctype, 0); if (exprnode == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree *tree = mkastnode(A_ASSIGN, exprnode->type, exprnode->ctype, exprnode, NULL, varnode, NULL, 0); } } // Generate any global space if (class == C_GLOBAL || class == C_STATIC) genglobsym(sym); return (sym); } // Given the type, name and class of an array variable, parse // the size of the array, if any. Then parse any initialisation // value and allocate storage for it. // Return the variable's symbol table entry. static struct symtable *array_declaration(char *varname, int type, struct symtable *ctype, int class) { struct symtable *sym; // New symbol table entry int nelems = -1; // Assume the number of elements won't be given int maxelems; // The maximum number of elements in the init list int *initlist; // The list of initial elements int i = 0, j; // Skip past the '[' scan(&Token); // See if we have an array size if (Token.token != T_RBRACKET) { nelems = parse_literal(P_INT); if (nelems <= 0) fatald("Array size is illegal", nelems); } // Ensure we have a following ']' match(T_RBRACKET, "]"); // Add this as a known array. We treat the // array as a pointer to its elements' type switch (class) { case C_STATIC: case C_EXTERN: case C_GLOBAL: // See if this variable is new or already exists sym= findglob(varname); if (is_new_symbol(sym, class, pointer_to(type), ctype)) sym = addglob(varname, pointer_to(type), ctype, S_ARRAY, class, 0, 0); break; case C_LOCAL: sym = addlocl(varname, pointer_to(type), ctype, S_ARRAY, 0); break; default: fatal("Declaration of array parameters is not implemented"); } // Array initialisation if (Token.token == T_ASSIGN) { if (class != C_GLOBAL && class != C_STATIC) fatals("Variable can not be initialised", varname); scan(&Token); // Get the following left curly bracket match(T_LBRACE, "{"); #define TABLE_INCREMENT 10 // If the array already has nelems, allocate that many elements // in the list. Otherwise, start with TABLE_INCREMENT. if (nelems != -1) maxelems = nelems; else maxelems = TABLE_INCREMENT; initlist = (int *) malloc(maxelems * sizeof(int)); // Loop getting a new literal value from the list while (1) { // Check we can add the next value, then parse and add it if (nelems != -1 && i == maxelems) fatal("Too many values in initialisation list"); initlist[i++] = parse_literal(type); // Increase the list size if the original size was // not set and we have hit the end of the current list if (nelems == -1 && i == maxelems) { maxelems += TABLE_INCREMENT; initlist = (int *) realloc(initlist, maxelems * sizeof(int)); } // Leave when we hit the right curly bracket if (Token.token == T_RBRACE) { scan(&Token); break; } // Next token must be a comma, then comma(); } // Zero any unused elements in the initlist. // Attach the list to the symbol table entry for (j = i; j < sym->nelems; j++) initlist[j] = 0; if (i > nelems) nelems = i; sym->initlist = initlist; } // Set the size of the array and the number of elements // Only externs can have no elements. if (class != C_EXTERN && nelems<=0) fatals("Array must have non-zero elements", sym->name); sym->nelems = nelems; sym->size = sym->nelems * typesize(type, ctype); // Generate any global space if (class == C_GLOBAL || class == C_STATIC) genglobsym(sym); return (sym); } // Given a pointer to the new function being declared and // a possibly NULL pointer to the function's previous declaration, // parse a list of parameters and cross-check them against the // previous declaration. Return the count of parameters static int param_declaration_list(struct symtable *oldfuncsym, struct symtable *newfuncsym) { int type, paramcnt = 0; struct symtable *ctype; struct symtable *protoptr = NULL; struct ASTnode *unused; // Get the pointer to the first prototype parameter if (oldfuncsym != NULL) protoptr = oldfuncsym->member; // Loop getting any parameters while (Token.token != T_RPAREN) { // If the first token is 'void' if (Token.token == T_VOID) { // Peek at the next token. If a ')', the function // has no parameters, so leave the loop. scan(&Peektoken); if (Peektoken.token == T_RPAREN) { // Move the Peektoken into the Token paramcnt = 0; scan(&Token); break; } } // Get the type of the next parameter type = declaration_list(&ctype, C_PARAM, T_COMMA, T_RPAREN, &unused); if (type == -1) fatal("Bad type in parameter list"); // Ensure the type of this parameter matches the prototype if (protoptr != NULL) { if (type != protoptr->type) fatald("Type doesn't match prototype for parameter", paramcnt + 1); protoptr = protoptr->next; } paramcnt++; // Stop when we hit the right parenthesis if (Token.token == T_RPAREN) break; // We need a comma as separator comma(); } if (oldfuncsym != NULL && paramcnt != oldfuncsym->nelems) fatals("Parameter count mismatch for function", oldfuncsym->name); // Return the count of parameters return (paramcnt); } // // function_declaration: type identifier '(' parameter_list ')' ; // | type identifier '(' parameter_list ')' compound_statement ; // // Parse the declaration of function. static struct symtable *function_declaration(char *funcname, int type, struct symtable *ctype, int class) { struct ASTnode *tree, *finalstmt; struct symtable *oldfuncsym, *newfuncsym = NULL; int endlabel, paramcnt; int linenum= Line; // Text has the identifier's name. If this exists and is a // function, get the id. Otherwise, set oldfuncsym to NULL. if ((oldfuncsym = findsymbol(funcname)) != NULL) if (oldfuncsym->stype != S_FUNCTION) oldfuncsym = NULL; // If this is a new function declaration, get a // label-id for the end label, and add the function // to the symbol table, if (oldfuncsym == NULL) { endlabel = genlabel(); // Assumption: functions only return scalar types, so NULL below newfuncsym = addglob(funcname, type, NULL, S_FUNCTION, class, 0, endlabel); } // Scan in the '(', any parameters and the ')'. // Pass in any existing function prototype pointer lparen(); paramcnt = param_declaration_list(oldfuncsym, newfuncsym); rparen(); // If this is a new function declaration, update the // function symbol entry with the number of parameters. // Also copy the parameter list into the function's node. if (newfuncsym) { newfuncsym->nelems = paramcnt; newfuncsym->member = Parmhead; oldfuncsym = newfuncsym; } // Clear out the parameter list Parmhead = Parmtail = NULL; // If the declaration ends in a semicolon, only a prototype. if (Token.token == T_SEMI) return (oldfuncsym); // This is not just a prototype. // Set the Functionid global to the function's symbol pointer Functionid = oldfuncsym; // Get the AST tree for the compound statement and mark // that we have parsed no loops or switches yet Looplevel = 0; Switchlevel = 0; lbrace(); tree = compound_statement(0); rbrace(); // If the function type isn't P_VOID ... if (type != P_VOID) { // Error if no statements in the function if (tree == NULL) fatal("No statements in function with non-void type"); // Check that the last AST operation in the // compound statement was a return statement finalstmt = (tree->op == A_GLUE) ? tree->right : tree; if (finalstmt == NULL || finalstmt->op != A_RETURN) fatal("No return for function with non-void type"); } // Build the A_FUNCTION node which has the function's symbol pointer // and the compound statement sub-tree tree = mkastunary(A_FUNCTION, type, ctype, tree, oldfuncsym, endlabel); tree->linenum= linenum; // Do optimisations on the AST tree tree = optimise(tree); // Dump the AST tree if requested if (O_dumpAST) { dumpAST(tree, NOLABEL, 0); fprintf(stdout, "\n\n"); } // Generate the assembly code for it genAST(tree, NOLABEL, NOLABEL, NOLABEL, 0); // Now free the symbols associated with this function freeloclsyms(); return (oldfuncsym); } // Parse composite type declarations: structs or unions. // Either find an existing struct/union declaration, or build // a struct/union symbol table entry and return its pointer. static struct symtable *composite_declaration(int type) { struct symtable *ctype = NULL; struct symtable *m; struct ASTnode *unused; int offset; int t; // Skip the struct/union keyword scan(&Token); // See if there is a following struct/union name if (Token.token == T_IDENT) { // Find any matching composite type if (type == P_STRUCT) ctype = findstruct(Text); else ctype = findunion(Text); scan(&Token); } // If the next token isn't an LBRACE , this is // the usage of an existing struct/union type. // Return the pointer to the type. if (Token.token != T_LBRACE) { if (ctype == NULL) fatals("unknown struct/union type", Text); return (ctype); } // Ensure this struct/union type hasn't been // previously defined if (ctype) fatals("previously defined struct/union", Text); // Build the composite type and skip the left brace if (type == P_STRUCT) ctype = addstruct(Text); else ctype = addunion(Text); scan(&Token); // Scan in the list of members while (1) { // Get the next member. m is used as a dummy t = declaration_list(&m, C_MEMBER, T_SEMI, T_RBRACE, &unused); if (t == -1) fatal("Bad type in member list"); if (Token.token == T_SEMI) scan(&Token); if (Token.token == T_RBRACE) break; } // Attach to the struct type's node rbrace(); if (Membhead == NULL) fatals("No members in struct", ctype->name); ctype->member = Membhead; Membhead = Membtail = NULL; // Set the offset of the initial member // and find the first free byte after it m = ctype->member; m->st_posn = 0; offset = typesize(m->type, m->ctype); // Set the position of each successive member in the composite type // Unions are easy. For structs, align the member and find the next free byte for (m = m->next; m != NULL; m = m->next) { // Set the offset for this member if (type == P_STRUCT) m->st_posn = genalign(m->type, offset, 1); else m->st_posn = 0; // Get the offset of the next free byte after this member offset += typesize(m->type, m->ctype); } // Set the overall size of the composite type ctype->size = offset; return (ctype); } // Parse an enum declaration static void enum_declaration(void) { struct symtable *etype = NULL; char *name = NULL; int intval = 0; // Skip the enum keyword. scan(&Token); // If there's a following enum type name, get a // pointer to any existing enum type node. if (Token.token == T_IDENT) { etype = findenumtype(Text); name = strdup(Text); // As it gets tromped soon scan(&Token); } // If the next token isn't a LBRACE, check // that we have an enum type name, then return if (Token.token != T_LBRACE) { if (etype == NULL) fatals("undeclared enum type:", name); return; } // We do have an LBRACE. Skip it scan(&Token); // If we have an enum type name, ensure that it // hasn't been declared before. if (etype != NULL) fatals("enum type redeclared:", etype->name); else // Build an enum type node for this identifier etype = addenum(name, C_ENUMTYPE, 0); // Loop to get all the enum values while (1) { // Ensure we have an identifier // Copy it in case there's an int literal coming up ident(); name = strdup(Text); // Ensure this enum value hasn't been declared before etype = findenumval(name); if (etype != NULL) fatals("enum value redeclared:", Text); // If the next token is an '=', skip it and // get the following int literal if (Token.token == T_ASSIGN) { scan(&Token); if (Token.token != T_INTLIT) fatal("Expected int literal after '='"); intval = Token.intvalue; scan(&Token); } // Build an enum value node for this identifier. // Increment the value for the next enum identifier. etype = addenum(name, C_ENUMVAL, intval++); // Bail out on a right curly bracket, else get a comma if (Token.token == T_RBRACE) break; comma(); } scan(&Token); // Skip over the right curly bracket } // Parse a typedef declaration and return the type // and ctype that it represents static int typedef_declaration(struct symtable **ctype) { int type, class = 0; // Skip the typedef keyword. scan(&Token); // Get the actual type following the keyword type = parse_type(ctype, &class); if (class != 0) fatal("Can't have static/extern in a typedef declaration"); // See if the typedef identifier already exists if (findtypedef(Text) != NULL) fatals("redefinition of typedef", Text); // Get any following '*' tokens type = parse_stars(type); // It doesn't exist so add it to the typedef list addtypedef(Text, type, *ctype); scan(&Token); return (type); } // Given a typedef name, return the type it represents static int type_of_typedef(char *name, struct symtable **ctype) { struct symtable *t; // Look up the typedef in the list t = findtypedef(name); if (t == NULL) fatals("unknown type", name); scan(&Token); *ctype = t->ctype; return (t->type); } // Parse the declaration of a variable or function. // The type and any following '*'s have been scanned, and we // have the identifier in the Token variable. // The class argument is the symbol's class. // Return a pointer to the symbol's entry in the symbol table static struct symtable *symbol_declaration(int type, struct symtable *ctype, int class, struct ASTnode **tree) { struct symtable *sym = NULL; char *varname = strdup(Text); // Ensure that we have an identifier. // We copied it above so we can scan more tokens in, e.g. // an assignment expression for a local variable. ident(); // Deal with function declarations if (Token.token == T_LPAREN) { return (function_declaration(varname, type, ctype, class)); } // See if this array or scalar variable has already been declared switch (class) { case C_EXTERN: case C_STATIC: case C_GLOBAL: case C_LOCAL: case C_PARAM: if (findlocl(varname) != NULL) fatals("Duplicate local variable declaration", varname); case C_MEMBER: if (findmember(varname) != NULL) fatals("Duplicate struct/union member declaration", varname); } // Add the array or scalar variable to the symbol table if (Token.token == T_LBRACKET) { sym = array_declaration(varname, type, ctype, class); *tree= NULL; // Local arrays are not initialised } else sym = scalar_declaration(varname, type, ctype, class, tree); return (sym); } // Parse a list of symbols where there is an initial type. // Return the type of the symbols. et1 and et2 are end tokens. int declaration_list(struct symtable **ctype, int class, int et1, int et2, struct ASTnode **gluetree) { int inittype, type; struct symtable *sym; struct ASTnode *tree; *gluetree = NULL; // Get the initial type. If -1, it was // a composite type definition, return this if ((inittype = parse_type(ctype, &class)) == -1) return (inittype); // Now parse the list of symbols while (1) { // See if this symbol is a pointer type = parse_stars(inittype); // Parse this symbol sym = symbol_declaration(type, *ctype, class, &tree); // We parsed a function, there is no list so leave if (sym->stype == S_FUNCTION) { if (class != C_GLOBAL && class != C_STATIC) fatal("Function definition not at global level"); return (type); } // Glue any AST tree from a local declaration // to build a sequence of assignments to perform if (*gluetree == NULL) *gluetree = tree; else *gluetree = mkastnode(A_GLUE, P_NONE, NULL, *gluetree, NULL, tree, NULL, 0); // We are at the end of the list, leave if (Token.token == et1 || Token.token == et2) return (type); // Otherwise, we need a comma as separator comma(); } return(0); // Keep -Wall happy } // Parse one or more global declarations, // either variables, functions or structs void global_declarations(void) { struct symtable *ctype= NULL; struct ASTnode *unused; // Loop parsing one declaration list until the end of file while (Token.token != T_EOF) { declaration_list(&ctype, C_GLOBAL, T_SEMI, T_EOF, &unused); // Skip any separating semicolons if (Token.token == T_SEMI) scan(&Token); } } ================================================ FILE: 62_Cleanup/decl.h ================================================ // Function prototypes for all compiler files // Copyright (c) 2019 Warren Toomey, GPL3 // scan.c void reject_token(struct token *t); int scan(struct token *t); // tree.c struct ASTnode *mkastnode(int op, int type, struct symtable *ctype, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue); struct ASTnode *mkastleaf(int op, int type, struct symtable *ctype, struct symtable *sym, int intvalue); struct ASTnode *mkastunary(int op, int type, struct symtable *ctype, struct ASTnode *left, struct symtable *sym, int intvalue); void dumpAST(struct ASTnode *n, int label, int level); // gen.c int genlabel(void); int genAST(struct ASTnode *n, int iflabel, int looptoplabel, int loopendlabel, int parentASTop); void genpreamble(char *filename); void genpostamble(); void genfreeregs(int keepreg); void genglobsym(struct symtable *node); int genglobstr(char *strvalue, int append); void genglobstrend(void); int genprimsize(int type); int genalign(int type, int offset, int direction); void genreturn(int reg, int id); // cg.c int cgprimsize(int type); int cgalign(int type, int offset, int direction); void cgtextseg(); void cgdataseg(); int cgallocreg(void); void cgfreeallregs(int keepreg); void cgfreereg(int reg); void cgspillregs(void); void cgpreamble(char *filename); void cgpostamble(); void cgfuncpreamble(struct symtable *sym); void cgfuncpostamble(struct symtable *sym); int cgloadint(int value, int type); int cgloadvar(struct symtable *sym, int op); int cgloadglobstr(int label); int cgadd(int r1, int r2); int cgsub(int r1, int r2); int cgmul(int r1, int r2); int cgdivmod(int r1, int r2, int op); int cgshlconst(int r, int val); int cgcall(struct symtable *sym, int numargs); void cgcopyarg(int r, int argposn); int cgstorglob(int r, struct symtable *sym); int cgstorlocal(int r, struct symtable *sym); void cgglobsym(struct symtable *node); void cgglobstr(int l, char *strvalue, int append); void cgglobstrend(void); int cgcompare_and_set(int ASTop, int r1, int r2, int type); int cgcompare_and_jump(int ASTop, int r1, int r2, int label, int type); void cglabel(int l); void cgjump(int l); int cgwiden(int r, int oldtype, int newtype); void cgreturn(int reg, struct symtable *sym); int cgaddress(struct symtable *sym); int cgderef(int r, int type); int cgstorderef(int r1, int r2, int type); int cgnegate(int r); int cginvert(int r); int cglognot(int r); void cgloadboolean(int r, int val); int cgboolean(int r, int op, int label); int cgand(int r1, int r2); int cgor(int r1, int r2); int cgxor(int r1, int r2); int cgshl(int r1, int r2); int cgshr(int r1, int r2); void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel); void cgmove(int r1, int r2); void cglinenum(int line); // expr.c struct ASTnode *expression_list(int endtoken); struct ASTnode *binexpr(int ptp); // stmt.c struct ASTnode *compound_statement(int inswitch); // misc.c void match(int t, char *what); void semi(void); void lbrace(void); void rbrace(void); void lparen(void); void rparen(void); void ident(void); void comma(void); void fatal(char *s); void fatals(char *s1, char *s2); void fatald(char *s, int d); void fatalc(char *s, int c); // sym.c void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node); struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn); struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn); struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int nelems); struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype); struct symtable *addstruct(char *name); struct symtable *addunion(char *name); struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int nelems); struct symtable *addenum(char *name, int class, int value); struct symtable *addtypedef(char *name, int type, struct symtable *ctype); struct symtable *findglob(char *s); struct symtable *findlocl(char *s); struct symtable *findsymbol(char *s); struct symtable *findmember(char *s); struct symtable *findstruct(char *s); struct symtable *findunion(char *s); struct symtable *findenumtype(char *s); struct symtable *findenumval(char *s); struct symtable *findtypedef(char *s); void clear_symtable(void); void freeloclsyms(void); void freestaticsyms(void); void dumptable(struct symtable *head, char *name, int indent); void dumpsymtables(void); // decl.c int parse_type(struct symtable **ctype, int *class); int parse_stars(int type); int parse_cast(struct symtable **ctype); int declaration_list(struct symtable **ctype, int class, int et1, int et2, struct ASTnode **gluetree); void global_declarations(void); // types.c int inttype(int type); int ptrtype(int type); int pointer_to(int type); int value_at(int type); int typesize(int type, struct symtable *ctype); struct ASTnode *modify_type(struct ASTnode *tree, int rtype, struct symtable *rctype, int op); // opt.c struct ASTnode *optimise(struct ASTnode *n); ================================================ FILE: 62_Cleanup/defs.h ================================================ #include #include #include #include #include "incdir.h" // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 enum { TEXTLEN = 512 // Length of identifiers in input }; // Commands and default filenames #define AOUT "a.out" #ifdef __NASM__ #define ASCMD "nasm -g -f elf64 -w-ptr -o " #define LDCMD "cc -g -no-pie -fno-plt -Wall -o " #else #define ASCMD "as -g -o " #define LDCMD "cc -g -o " #endif #define CPPCMD "cpp -nostdinc -isystem " // Token types enum { T_EOF, // Binary operators T_ASSIGN, T_ASPLUS, T_ASMINUS, T_ASSTAR, T_ASSLASH, T_ASMOD, T_QUESTION, T_LOGOR, T_LOGAND, T_OR, T_XOR, T_AMPER, T_EQ, T_NE, T_LT, T_GT, T_LE, T_GE, T_LSHIFT, T_RSHIFT, T_PLUS, T_MINUS, T_STAR, T_SLASH, T_MOD, // Other operators T_INC, T_DEC, T_INVERT, T_LOGNOT, // Type keywords T_VOID, T_CHAR, T_INT, T_LONG, // Other keywords T_IF, T_ELSE, T_WHILE, T_FOR, T_RETURN, T_STRUCT, T_UNION, T_ENUM, T_TYPEDEF, T_EXTERN, T_BREAK, T_CONTINUE, T_SWITCH, T_CASE, T_DEFAULT, T_SIZEOF, T_STATIC, // Structural tokens T_INTLIT, T_STRLIT, T_SEMI, T_IDENT, T_LBRACE, T_RBRACE, T_LPAREN, T_RPAREN, T_LBRACKET, T_RBRACKET, T_COMMA, T_DOT, T_ARROW, T_COLON }; // Token structure struct token { int token; // Token type, from the enum list above char *tokstr; // String version of the token int intvalue; // For T_INTLIT, the integer value }; // AST node types. The first few line up // with the related tokens enum { A_ASSIGN = 1, A_ASPLUS, A_ASMINUS, A_ASSTAR, // 1 A_ASSLASH, A_ASMOD, A_TERNARY, A_LOGOR, // 5 A_LOGAND, A_OR, A_XOR, A_AND, A_EQ, A_NE, A_LT, // 9 A_GT, A_LE, A_GE, A_LSHIFT, A_RSHIFT, // 16 A_ADD, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, A_MOD, // 21 A_INTLIT, A_STRLIT, A_IDENT, A_GLUE, // 26 A_IF, A_WHILE, A_FUNCTION, A_WIDEN, A_RETURN, // 30 A_FUNCCALL, A_DEREF, A_ADDR, A_SCALE, // 35 A_PREINC, A_PREDEC, A_POSTINC, A_POSTDEC, // 39 A_NEGATE, A_INVERT, A_LOGNOT, A_TOBOOL, A_BREAK, // 43 A_CONTINUE, A_SWITCH, A_CASE, A_DEFAULT, A_CAST // 48 }; // Primitive types. The bottom 4 bits is an integer // value that represents the level of indirection, // e.g. 0= no pointer, 1= pointer, 2= pointer pointer etc. enum { P_NONE, P_VOID = 16, P_CHAR = 32, P_INT = 48, P_LONG = 64, P_STRUCT=80, P_UNION=96 }; // Structural types enum { S_VARIABLE, S_FUNCTION, S_ARRAY }; // Storage classes enum { C_GLOBAL = 1, // Globally visible symbol C_LOCAL, // Locally visible symbol C_PARAM, // Locally visible function parameter C_EXTERN, // External globally visible symbol C_STATIC, // Static symbol, visible in one file C_STRUCT, // A struct C_UNION, // A union C_MEMBER, // Member of a struct or union C_ENUMTYPE, // A named enumeration type C_ENUMVAL, // A named enumeration value C_TYPEDEF // A named typedef }; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol struct symtable *ctype; // If struct/union, ptr to that type int stype; // Structural type for the symbol int class; // Storage class for the symbol int size; // Total size in bytes of this symbol int nelems; // Functions: # params. Arrays: # elements #define st_endlabel st_posn // For functions, the end label int st_posn; // For locals, the negative offset // from the stack base pointer int *initlist; // List of initial values struct symtable *next; // Next symbol in one list struct symtable *member; // First member of a function, struct, #ifdef __NASM__ int extinit; #endif }; // union or enum // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates struct symtable *ctype; // If struct/union, ptr to that type int rvalue; // True if the node is an rvalue struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; struct symtable *sym; // For many AST nodes, the pointer to // the symbol in the symbol table #define a_intvalue a_size // For A_INTLIT, the integer value int a_size; // For A_SCALE, the size to scale by int linenum; // Line number from where this node comes }; enum { NOREG = -1, // Use NOREG when the AST generation // functions have no register to return NOLABEL = 0 // Use NOLABEL when we have no label to // pass to genAST() }; ================================================ FILE: 62_Cleanup/expr.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of expressions // Copyright (c) 2019 Warren Toomey, GPL3 // expression_list: // | expression // | expression ',' expression_list // ; // Parse a list of zero or more comma-separated expressions and // return an AST composed of A_GLUE nodes with the left-hand child // being the sub-tree of previous expressions (or NULL) and the right-hand // child being the next expression. Each A_GLUE node will have size field // set to the number of expressions in the tree at this point. If no // expressions are parsed, NULL is returned struct ASTnode *expression_list(int endtoken) { struct ASTnode *tree = NULL; struct ASTnode *child = NULL; int exprcount = 0; // Loop until the end token while (Token.token != endtoken) { // Parse the next expression and increment the expression count child = binexpr(0); exprcount++; // Build an A_GLUE AST node with the previous tree as the left child // and the new expression as the right child. Store the expression count. tree = mkastnode(A_GLUE, P_NONE, NULL, tree, NULL, child, NULL, exprcount); // Stop when we reach the end token if (Token.token == endtoken) break; // Must have a ',' at this point match(T_COMMA, ","); } // Return the tree of expressions return (tree); } // Parse a function call and return its AST static struct ASTnode *funccall(void) { struct ASTnode *tree; struct symtable *funcptr; // Check that the identifier has been defined as a function, // then make a leaf node for it. if ((funcptr = findsymbol(Text)) == NULL || funcptr->stype != S_FUNCTION) { fatals("Undeclared function", Text); } // Get the '(' lparen(); // Parse the argument expression list tree = expression_list(T_RPAREN); // XXX Check type of each argument against the function's prototype // Build the function call AST node. Store the // function's return type as this node's type. // Also record the function's symbol-id tree = mkastunary(A_FUNCCALL, funcptr->type, funcptr->ctype, tree, funcptr, 0); // Get the ')' rparen(); return (tree); } // Parse the index into an array and return an AST tree for it static struct ASTnode *array_access(struct ASTnode *left) { struct ASTnode *right; // Check that the sub-tree is a pointer if (!ptrtype(left->type)) fatal("Not an array or pointer"); // Get the '[' scan(&Token); // Parse the following expression right = binexpr(0); // Get the ']' match(T_RBRACKET, "]"); // Ensure that this is of int type if (!inttype(right->type)) fatal("Array index is not of integer type"); // Make the left tree an rvalue left->rvalue = 1; // Scale the index by the size of the element's type right = modify_type(right, left->type, left->ctype, A_ADD); // Return an AST tree where the array's base has the offset added to it, // and dereference the element. Still an lvalue at this point. left = mkastnode(A_ADD, left->type, left->ctype, left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, value_at(left->type), left->ctype, left, NULL, 0); return (left); } // Parse the member reference of a struct or union // and return an AST tree for it. If withpointer is true, // the access is through a pointer to the member. static struct ASTnode *member_access(struct ASTnode *left, int withpointer) { struct ASTnode *right; struct symtable *typeptr; struct symtable *m; // Check that the left AST tree is a pointer to struct or union if (withpointer && left->type != pointer_to(P_STRUCT) && left->type != pointer_to(P_UNION)) fatal("Expression is not a pointer to a struct/union"); // Or, check that the left AST tree is a struct or union. // If so, change it from an A_IDENT to an A_ADDR so that // we get the base address, not the value at this address. if (!withpointer) { if (left->type == P_STRUCT || left->type == P_UNION) left->op = A_ADDR; else fatal("Expression is not a struct/union"); } // Get the details of the composite type typeptr = left->ctype; // Skip the '.' or '->' token and get the member's name scan(&Token); ident(); // Find the matching member's name in the type // Die if we can't find it for (m = typeptr->member; m != NULL; m = m->next) if (!strcmp(m->name, Text)) break; if (m == NULL) fatals("No member found in struct/union: ", Text); // Make the left tree an rvalue left->rvalue = 1; // Build an A_INTLIT node with the offset right = mkastleaf(A_INTLIT, P_INT, NULL, NULL, m->st_posn); // Add the member's offset to the base of the struct/union // and dereference it. Still an lvalue at this point left = mkastnode(A_ADD, pointer_to(m->type), m->ctype, left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, m->type, m->ctype, left, NULL, 0); return (left); } // Parse a parenthesised expression and // return an AST node representing it. static struct ASTnode *paren_expression(int ptp) { struct ASTnode *n; int type = 0; struct symtable *ctype = NULL; // Beginning of a parenthesised expression, skip the '('. scan(&Token); // If the token after is a type identifier, this is a cast expression switch (Token.token) { case T_IDENT: // We have to see if the identifier matches a typedef. // If not, treat it as an expression. if (findtypedef(Text) == NULL) { n = binexpr(0); // ptp is zero as expression inside ( ) break; } case T_VOID: case T_CHAR: case T_INT: case T_LONG: case T_STRUCT: case T_UNION: case T_ENUM: // Get the type inside the parentheses type = parse_cast(&ctype); // Skip the closing ')' and then parse the following expression rparen(); default: n = binexpr(ptp); // Scan in the expression. We pass in ptp // as the cast doesn't change the // expression's precedence } // We now have at least an expression in n, and possibly a non-zero type // in type if there was a cast. Skip the closing ')' if there was no cast. if (type == 0) rparen(); else // Otherwise, make a unary AST node for the cast n = mkastunary(A_CAST, type, ctype, n, NULL, 0); return (n); } // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(int ptp) { struct ASTnode *n; struct symtable *enumptr; struct symtable *varptr; int id; int type = 0; int size, class; struct symtable *ctype; switch (Token.token) { case T_STATIC: case T_EXTERN: fatal("Compiler doesn't support static or extern local declarations"); case T_SIZEOF: // Skip the T_SIZEOF and ensure we have a left parenthesis scan(&Token); if (Token.token != T_LPAREN) fatal("Left parenthesis expected after sizeof"); scan(&Token); // Get the type inside the parentheses type = parse_stars(parse_type(&ctype, &class)); // Get the type's size size = typesize(type, ctype); rparen(); // Make a leaf node int literal with the size return (mkastleaf(A_INTLIT, P_INT, NULL, NULL, size)); case T_INTLIT: // For an INTLIT token, make a leaf AST node for it. // Make it a P_CHAR if it's within the P_CHAR range if (Token.intvalue >= 0 && Token.intvalue < 256) n = mkastleaf(A_INTLIT, P_CHAR, NULL, NULL, Token.intvalue); else n = mkastleaf(A_INTLIT, P_INT, NULL, NULL, Token.intvalue); break; case T_STRLIT: // For a STRLIT token, generate the assembly for it. id = genglobstr(Text, 0); // For successive STRLIT tokens, append their contents // to this one while (1) { scan(&Peektoken); if (Peektoken.token != T_STRLIT) break; genglobstr(Text, 1); scan(&Token); // To skip it properly } // Now make a leaf AST node for it. id is the string's label. genglobstrend(); n = mkastleaf(A_STRLIT, pointer_to(P_CHAR), NULL, NULL, id); break; case T_IDENT: // If the identifier matches an enum value, // return an A_INTLIT node if ((enumptr = findenumval(Text)) != NULL) { n = mkastleaf(A_INTLIT, P_INT, NULL, NULL, enumptr->st_posn); break; } // See if this identifier exists as a symbol. For arrays, set rvalue to 1. if ((varptr = findsymbol(Text)) == NULL) fatals("Unknown variable or function", Text); switch (varptr->stype) { case S_VARIABLE: n = mkastleaf(A_IDENT, varptr->type, varptr->ctype, varptr, 0); break; case S_ARRAY: n = mkastleaf(A_ADDR, varptr->type, varptr->ctype, varptr, 0); n->rvalue = 1; break; case S_FUNCTION: // Function call, see if the next token is a left parenthesis scan(&Token); if (Token.token != T_LPAREN) fatals("Function name used without parentheses", Text); return (funccall()); default: fatals("Identifier not a scalar or array variable", Text); } break; case T_LPAREN: return (paren_expression(ptp)); default: fatals("Expecting a primary expression, got token", Token.tokstr); } // Scan in the next token and return the leaf node scan(&Token); return (n); } // Parse a postfix expression and return // an AST node representing it. The // identifier is already in Text. static struct ASTnode *postfix(int ptp) { struct ASTnode *n; // Get the primary expression n = primary(ptp); // Loop until there are no more postfix operators while (1) { switch (Token.token) { case T_LBRACKET: // An array reference n = array_access(n); break; case T_DOT: // Access into a struct or union n = member_access(n, 0); break; case T_ARROW: // Pointer access into a struct or union n = member_access(n, 1); break; case T_INC: // Post-increment: skip over the token if (n->rvalue == 1) fatal("Cannot ++ on rvalue"); scan(&Token); // Can't do it twice if (n->op == A_POSTINC || n->op == A_POSTDEC) fatal("Cannot ++ and/or -- more than once"); // and change the AST operation n->op = A_POSTINC; break; case T_DEC: // Post-decrement: skip over the token if (n->rvalue == 1) fatal("Cannot -- on rvalue"); scan(&Token); // Can't do it twice if (n->op == A_POSTINC || n->op == A_POSTDEC) fatal("Cannot ++ and/or -- more than once"); // and change the AST operation n->op = A_POSTDEC; break; default: return (n); } } return (NULL); // Keep -Wall happy } // Convert a binary operator token into a binary AST operation. // We rely on a 1:1 mapping from token to AST operation static int binastop(int tokentype) { if (tokentype > T_EOF && tokentype <= T_MOD) return (tokentype); fatals("Syntax error, token", Tstring[tokentype]); return (0); // Keep -Wall happy } // Return true if a token is right-associative, // false otherwise. static int rightassoc(int tokentype) { if (tokentype >= T_ASSIGN && tokentype <= T_ASSLASH) return (1); return (0); } // Operator precedence for each token. Must // match up with the order of tokens in defs.h static int OpPrec[] = { 0, 10, 10, // T_EOF, T_ASSIGN, T_ASPLUS, 10, 10, // T_ASMINUS, T_ASSTAR, 10, 10, // T_ASSLASH, T_ASMOD, 15, // T_QUESTION, 20, 30, // T_LOGOR, T_LOGAND 40, 50, 60, // T_OR, T_XOR, T_AMPER 70, 70, // T_EQ, T_NE 80, 80, 80, 80, // T_LT, T_GT, T_LE, T_GE 90, 90, // T_LSHIFT, T_RSHIFT 100, 100, // T_PLUS, T_MINUS 110, 110, 110 // T_STAR, T_SLASH, T_MOD }; // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec; if (tokentype > T_MOD) fatals("Token with no precedence in op_precedence:", Tstring[tokentype]); prec = OpPrec[tokentype]; if (prec == 0) fatals("Syntax error, token", Tstring[tokentype]); return (prec); } // prefix_expression: postfix_expression // | '*' prefix_expression // | '&' prefix_expression // | '-' prefix_expression // | '++' prefix_expression // | '--' prefix_expression // ; // Parse a prefix expression and return // a sub-tree representing it. static struct ASTnode *prefix(int ptp) { struct ASTnode *tree; switch (Token.token) { case T_AMPER: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(ptp); // Ensure that it's an identifier if (tree->op != A_IDENT) fatal("& operator must be followed by an identifier"); // Prevent '&' being performed on an array if (tree->sym->stype == S_ARRAY) fatal("& operator cannot be performed on an array"); // Now change the operator to A_ADDR and the type to // a pointer to the original type tree->op = A_ADDR; tree->type = pointer_to(tree->type); break; case T_STAR: // Get the next token and parse it // recursively as a prefix expression. // Make it an rvalue scan(&Token); tree = prefix(ptp); tree->rvalue= 1; // Ensure the tree's type is a pointer if (!ptrtype(tree->type)) fatal("* operator must be followed by an expression of pointer type"); // Prepend an A_DEREF operation to the tree tree = mkastunary(A_DEREF, value_at(tree->type), tree->ctype, tree, NULL, 0); break; case T_MINUS: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(ptp); // Prepend a A_NEGATE operation to the tree and // make the child an rvalue. Because chars are unsigned, // also widen this if needed to int so that it's signed tree->rvalue = 1; if (tree->type == P_CHAR) tree->type = P_INT; tree = mkastunary(A_NEGATE, tree->type, tree->ctype, tree, NULL, 0); break; case T_INVERT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(ptp); // Prepend a A_INVERT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_INVERT, tree->type, tree->ctype, tree, NULL, 0); break; case T_LOGNOT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(ptp); // Prepend a A_LOGNOT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_LOGNOT, tree->type, tree->ctype, tree, NULL, 0); break; case T_INC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(ptp); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("++ operator must be followed by an identifier"); // Prepend an A_PREINC operation to the tree tree = mkastunary(A_PREINC, tree->type, tree->ctype, tree, NULL, 0); break; case T_DEC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(ptp); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("-- operator must be followed by an identifier"); // Prepend an A_PREDEC operation to the tree tree = mkastunary(A_PREDEC, tree->type, tree->ctype, tree, NULL, 0); break; default: tree = postfix(ptp); } return (tree); } // Return an AST tree whose root is a binary operator. // Parameter ptp is the previous token's precedence. struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; struct ASTnode *ltemp, *rtemp; int ASTop; int tokentype; // Get the tree on the left. // Fetch the next token at the same time. left = prefix(ptp); // If we hit one of several terminating tokens, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA || tokentype == T_COLON || tokentype == T_RBRACE) { left->rvalue = 1; return (left); } // While the precedence of this token is more than that of the // previous token precedence, or it's right associative and // equal to the previous token's precedence while ((op_precedence(tokentype) > ptp) || (rightassoc(tokentype) && op_precedence(tokentype) == ptp)) { // Fetch in the next integer literal scan(&Token); // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Determine the operation to be performed on the sub-trees ASTop = binastop(tokentype); switch (ASTop) { case A_TERNARY: // Ensure we have a ':' token, scan in the expression after it match(T_COLON, ":"); ltemp = binexpr(0); // Build and return the AST for this statement. Use the middle // expression's type as the return type. XXX We should also // consider the third expression's type. return (mkastnode (A_TERNARY, right->type, right->ctype, left, right, ltemp, NULL, 0)); case A_ASSIGN: // Assignment // Make the right tree into an rvalue right->rvalue = 1; // Ensure the right's type matches the left right = modify_type(right, left->type, left->ctype, 0); if (right == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree. However, switch // left and right around, so that the right expression's // code will be generated before the left expression ltemp = left; left = right; right = ltemp; break; default: // We are not doing a ternary or assignment, so both trees should // be rvalues. Convert both trees into rvalue if they are lvalue trees left->rvalue = 1; right->rvalue = 1; // Ensure the two types are compatible by trying // to modify each tree to match the other's type. ltemp = modify_type(left, right->type, right->ctype, ASTop); rtemp = modify_type(right, left->type, left->ctype, ASTop); if (ltemp == NULL && rtemp == NULL) fatal("Incompatible types in binary expression"); if (ltemp != NULL) left = ltemp; if (rtemp != NULL) right = rtemp; } // Join that sub-tree with ours. Convert the token // into an AST operation at the same time. left = mkastnode(binastop(tokentype), left->type, left->ctype, left, NULL, right, NULL, 0); // Some operators produce an int result regardless of their operands switch (binastop(tokentype)) { case A_LOGOR: case A_LOGAND: case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: left->type = P_INT; } // Update the details of the current token. // If we hit a terminating token, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA || tokentype == T_COLON || tokentype == T_RBRACE) { left->rvalue = 1; return (left); } } // Return the tree we have when the precedence // is the same or lower left->rvalue = 1; return (left); } ================================================ FILE: 62_Cleanup/gen.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Generic code generator // Copyright (c) 2019 Warren Toomey, GPL3 // Generate and return a new label number static int labelid = 1; int genlabel(void) { return (labelid++); } static void update_line(struct ASTnode *n) { // Output the line into the assembly if we've // changed the line number in the AST node if (n->linenum != 0 && Line != n->linenum) { Line = n->linenum; cglinenum(Line); } } // Generate the code for an IF statement // and an optional ELSE clause. static int genIF(struct ASTnode *n, int looptoplabel, int loopendlabel) { int Lfalse, Lend; // Generate two labels: one for the // false compound statement, and one // for the end of the overall IF statement. // When there is no ELSE clause, Lfalse _is_ // the ending label! Lfalse = genlabel(); if (n->right) Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. genAST(n->left, Lfalse, NOLABEL, NOLABEL, n->op); genfreeregs(NOREG); // Generate the true compound statement genAST(n->mid, NOLABEL, looptoplabel, loopendlabel, n->op); genfreeregs(NOREG); // If there is an optional ELSE clause, // generate the jump to skip to the end if (n->right) cgjump(Lend); // Now the false label cglabel(Lfalse); // Optional ELSE clause: generate the // false compound statement and the // end label if (n->right) { genAST(n->right, NOLABEL, NOLABEL, loopendlabel, n->op); genfreeregs(NOREG); cglabel(Lend); } return (NOREG); } // Generate the code for a WHILE statement static int genWHILE(struct ASTnode *n) { int Lstart, Lend; // Generate the start and end labels // and output the start label Lstart = genlabel(); Lend = genlabel(); cglabel(Lstart); // Generate the condition code followed // by a jump to the end label. genAST(n->left, Lend, Lstart, Lend, n->op); genfreeregs(NOREG); // Generate the compound statement for the body genAST(n->right, NOLABEL, Lstart, Lend, n->op); genfreeregs(NOREG); // Finally output the jump back to the condition, // and the end label cgjump(Lstart); cglabel(Lend); return (NOREG); } // Generate the code for a SWITCH statement static int genSWITCH(struct ASTnode *n) { int *caseval, *caselabel; int Ljumptop, Lend; int i, reg, defaultlabel = 0, casecount = 0; struct ASTnode *c; // Create arrays for the case values and associated labels. // Ensure that we have at least one position in each array. caseval = (int *) malloc((n->a_intvalue + 1) * sizeof(int)); caselabel = (int *) malloc((n->a_intvalue + 1) * sizeof(int)); // Generate labels for the top of the jump table, and the // end of the switch statement. Set a default label for // the end of the switch, in case we don't have a default. Ljumptop = genlabel(); Lend = genlabel(); defaultlabel = Lend; // Output the code to calculate the switch condition reg = genAST(n->left, NOLABEL, NOLABEL, NOLABEL, 0); cgjump(Ljumptop); genfreeregs(reg); // Walk the right-child linked list to // generate the code for each case for (i = 0, c = n->right; c != NULL; i++, c = c->right) { // Get a label for this case. Store it // and the case value in the arrays. // Record if it is the default case. caselabel[i] = genlabel(); caseval[i] = c->a_intvalue; cglabel(caselabel[i]); if (c->op == A_DEFAULT) defaultlabel = caselabel[i]; else casecount++; // Generate the case code. Pass in the end label for the breaks. // If case has no body, we will fall into the following body. if (c->left) genAST(c->left, NOLABEL, NOLABEL, Lend, 0); genfreeregs(NOREG); } // Ensure the last case jumps past the switch table cgjump(Lend); // Now output the switch table and the end label. cgswitch(reg, casecount, Ljumptop, caselabel, caseval, defaultlabel); cglabel(Lend); return (NOREG); } // Generate the code for an // A_LOGAND or A_LOGOR operation static int gen_logandor(struct ASTnode *n) { // Generate two labels int Lfalse = genlabel(); int Lend = genlabel(); int reg; // Generate the code for the left expression // followed by the jump to the false label reg = genAST(n->left, NOLABEL, NOLABEL, NOLABEL, 0); cgboolean(reg, n->op, Lfalse); genfreeregs(NOREG); // Generate the code for the right expression // followed by the jump to the false label reg = genAST(n->right, NOLABEL, NOLABEL, NOLABEL, 0); cgboolean(reg, n->op, Lfalse); genfreeregs(reg); // We didn't jump so set the right boolean value if (n->op == A_LOGAND) { cgloadboolean(reg, 1); cgjump(Lend); cglabel(Lfalse); cgloadboolean(reg, 0); } else { cgloadboolean(reg, 0); cgjump(Lend); cglabel(Lfalse); cgloadboolean(reg, 1); } cglabel(Lend); return (reg); } // Generate the code to copy the arguments of a // function call to its parameters, then call the // function itself. Return the register that holds // the function's return value. static int gen_funccall(struct ASTnode *n) { struct ASTnode *gluetree = n->left; int reg; int numargs = 0; // Save the registers before we copy the arguments cgspillregs(); // If there is a list of arguments, walk this list // from the last argument (right-hand child) to the // first while (gluetree) { // Calculate the expression's value reg = genAST(gluetree->right, NOLABEL, NOLABEL, NOLABEL, gluetree->op); // Copy this into the n'th function parameter: size is 1, 2, 3, ... cgcopyarg(reg, gluetree->a_size); // Keep the first (highest) number of arguments if (numargs == 0) numargs = gluetree->a_size; gluetree = gluetree->left; } // Call the function, clean up the stack (based on numargs), // and return its result return (cgcall(n->sym, numargs)); } // Generate code for a ternary expression static int gen_ternary(struct ASTnode *n) { int Lfalse, Lend; int reg, expreg; // Generate two labels: one for the // false expression, and one for the // end of the overall expression Lfalse = genlabel(); Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. genAST(n->left, Lfalse, NOLABEL, NOLABEL, n->op); // genfreeregs(NOREG); // Get a register to hold the result of the two expressions reg = cgallocreg(); // Generate the true expression and the false label. // Move the expression result into the known register. expreg = genAST(n->mid, NOLABEL, NOLABEL, NOLABEL, n->op); cgmove(expreg, reg); cgfreereg(expreg); cgjump(Lend); cglabel(Lfalse); // Generate the false expression and the end label. // Move the expression result into the known register. expreg = genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); cgmove(expreg, reg); cgfreereg(expreg); cglabel(Lend); return (reg); } // Given an AST, an optional label, and the AST op // of the parent, generate assembly code recursively. // Return the register id with the tree's final value. int genAST(struct ASTnode *n, int iflabel, int looptoplabel, int loopendlabel, int parentASTop) { int leftreg = NOREG, rightreg = NOREG; // Empty tree, do nothing if (n == NULL) return (NOREG); // Update the line number in the output update_line(n); // We have some specific AST node handling at the top // so that we don't evaluate the child sub-trees immediately switch (n->op) { case A_IF: return (genIF(n, looptoplabel, loopendlabel)); case A_WHILE: return (genWHILE(n)); case A_SWITCH: return (genSWITCH(n)); case A_FUNCCALL: return (gen_funccall(n)); case A_TERNARY: return (gen_ternary(n)); case A_LOGOR: return (gen_logandor(n)); case A_LOGAND: return (gen_logandor(n)); case A_GLUE: // Do each child statement, and free the // registers after each child if (n->left != NULL) genAST(n->left, iflabel, looptoplabel, loopendlabel, n->op); genfreeregs(NOREG); if (n->right != NULL) genAST(n->right, iflabel, looptoplabel, loopendlabel, n->op); genfreeregs(NOREG); return (NOREG); case A_FUNCTION: // Generate the function's preamble before the code // in the child sub-tree cgfuncpreamble(n->sym); genAST(n->left, NOLABEL, NOLABEL, NOLABEL, n->op); cgfuncpostamble(n->sym); return (NOREG); } // General AST node handling below // Get the left and right sub-tree values if (n->left) leftreg = genAST(n->left, NOLABEL, NOLABEL, NOLABEL, n->op); if (n->right) rightreg = genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); switch (n->op) { case A_ADD: return (cgadd(leftreg, rightreg)); case A_SUBTRACT: return (cgsub(leftreg, rightreg)); case A_MULTIPLY: return (cgmul(leftreg, rightreg)); case A_DIVIDE: return (cgdivmod(leftreg, rightreg, A_DIVIDE)); case A_MOD: return (cgdivmod(leftreg, rightreg, A_MOD)); case A_AND: return (cgand(leftreg, rightreg)); case A_OR: return (cgor(leftreg, rightreg)); case A_XOR: return (cgxor(leftreg, rightreg)); case A_LSHIFT: return (cgshl(leftreg, rightreg)); case A_RSHIFT: return (cgshr(leftreg, rightreg)); case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: // If the parent AST node is an A_IF, A_WHILE or A_TERNARY, // generate a compare followed by a jump. Otherwise, compare // registers and set one to 1 or 0 based on the comparison. if (parentASTop == A_IF || parentASTop == A_WHILE || parentASTop == A_TERNARY) return (cgcompare_and_jump (n->op, leftreg, rightreg, iflabel, n->left->type)); else return (cgcompare_and_set(n->op, leftreg, rightreg, n->left->type)); case A_INTLIT: return (cgloadint(n->a_intvalue, n->type)); case A_STRLIT: return (cgloadglobstr(n->a_intvalue)); case A_IDENT: // Load our value if we are an rvalue // or we are being dereferenced if (n->rvalue || parentASTop == A_DEREF) { return (cgloadvar(n->sym, n->op)); } else return (NOREG); case A_ASPLUS: case A_ASMINUS: case A_ASSTAR: case A_ASSLASH: case A_ASMOD: case A_ASSIGN: // For the '+=' and friends operators, generate suitable code // and get the register with the result. Then take the left child, // make it the right child so that we can fall into the assignment code. switch (n->op) { case A_ASPLUS: leftreg = cgadd(leftreg, rightreg); n->right = n->left; break; case A_ASMINUS: leftreg = cgsub(leftreg, rightreg); n->right = n->left; break; case A_ASSTAR: leftreg = cgmul(leftreg, rightreg); n->right = n->left; break; case A_ASSLASH: leftreg = cgdivmod(leftreg, rightreg, A_DIVIDE); n->right = n->left; break; case A_ASMOD: leftreg = cgdivmod(leftreg, rightreg, A_MOD); n->right = n->left; break; } // Now into the assignment code // Are we assigning to an identifier or through a pointer? switch (n->right->op) { case A_IDENT: if (n->right->sym->class == C_GLOBAL || n->right->sym->class == C_EXTERN || n->right->sym->class == C_STATIC) return (cgstorglob(leftreg, n->right->sym)); else return (cgstorlocal(leftreg, n->right->sym)); case A_DEREF: return (cgstorderef(leftreg, rightreg, n->right->type)); default: fatald("Can't A_ASSIGN in genAST(), op", n->op); } case A_WIDEN: // Widen the child's type to the parent's type return (cgwiden(leftreg, n->left->type, n->type)); case A_RETURN: cgreturn(leftreg, Functionid); return (NOREG); case A_ADDR: // If we have a symbol, get its address. Otherwise, // the left register already has the address because // it's a member access if (n->sym != NULL) return (cgaddress(n->sym)); else return (leftreg); case A_DEREF: // If we are an rvalue, dereference to get the value we point at, // otherwise leave it for A_ASSIGN to store through the pointer if (n->rvalue) return (cgderef(leftreg, n->left->type)); else return (leftreg); case A_SCALE: // Small optimisation: use shift if the // scale value is a known power of two switch (n->a_size) { case 2: return (cgshlconst(leftreg, 1)); case 4: return (cgshlconst(leftreg, 2)); case 8: return (cgshlconst(leftreg, 3)); default: // Load a register with the size and // multiply the leftreg by this size rightreg = cgloadint(n->a_size, P_INT); return (cgmul(leftreg, rightreg)); } case A_POSTINC: case A_POSTDEC: // Load and decrement the variable's value into a register // and post increment/decrement it return (cgloadvar(n->sym, n->op)); case A_PREINC: case A_PREDEC: // Load and decrement the variable's value into a register // and pre increment/decrement it return (cgloadvar(n->left->sym, n->op)); case A_NEGATE: return (cgnegate(leftreg)); case A_INVERT: return (cginvert(leftreg)); case A_LOGNOT: return (cglognot(leftreg)); case A_TOBOOL: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, set the register // to 0 or 1 based on it's zeroeness or non-zeroeness return (cgboolean(leftreg, parentASTop, iflabel)); case A_BREAK: cgjump(loopendlabel); return (NOREG); case A_CONTINUE: cgjump(looptoplabel); return (NOREG); case A_CAST: return (leftreg); // Not much to do default: fatald("Unknown AST operator", n->op); } return (NOREG); // Keep -Wall happy } void genpreamble(char *filename) { cgpreamble(filename); } void genpostamble() { cgpostamble(); } void genfreeregs(int keepreg) { cgfreeallregs(keepreg); } void genglobsym(struct symtable *node) { cgglobsym(node); } // Generate a global string. // If append is true, append to // previous genglobstr() call. int genglobstr(char *strvalue, int append) { int l = genlabel(); cgglobstr(l, strvalue, append); return (l); } void genglobstrend(void) { cgglobstrend(); } int genprimsize(int type) { return (cgprimsize(type)); } int genalign(int type, int offset, int direction) { return (cgalign(type, offset, direction)); } ================================================ FILE: 62_Cleanup/include/ctype.h ================================================ #ifndef _CTYPE_H_ # define _CTYPE_H_ int isalnum(int c); int isalpha(int c); int iscntrl(int c); int isdigit(int c); int isgraph(int c); int islower(int c); int isprint(int c); int ispunct(int c); int isspace(int c); int isupper(int c); int isxdigit(int c); int isascii(int c); int isblank(int c); int toupper(int c); int tolower(int c); #endif // _CTYPE_H_ ================================================ FILE: 62_Cleanup/include/errno.h ================================================ #ifndef _ERRNO_H_ # define _ERRNO_H_ int * __errno_location(void); #define errno (* __errno_location()) #endif // _ERRNO_H_ ================================================ FILE: 62_Cleanup/include/fcntl.h ================================================ #ifndef _FCNTL_H_ # define _FCNTL_H_ #define O_RDONLY 00 #define O_WRONLY 01 #define O_RDWR 02 int open(char *pathname, int flags); #endif // _FCNTL_H_ ================================================ FILE: 62_Cleanup/include/stddef.h ================================================ #ifndef _STDDEF_H_ # define _STDDEF_H_ #ifndef NULL # define NULL (void *)0 #endif typedef long size_t; #endif //_STDDEF_H_ ================================================ FILE: 62_Cleanup/include/stdio.h ================================================ #ifndef _STDIO_H_ # define _STDIO_H_ #include #ifndef NULL # define NULL (void *)0 #endif #ifndef EOF # define EOF (-1) #endif // This FILE definition will do for now typedef char * FILE; FILE *fopen(char *pathname, char *mode); size_t fread(void *ptr, size_t size, size_t nmemb, FILE *stream); size_t fwrite(void *ptr, size_t size, size_t nmemb, FILE *stream); int fclose(FILE *stream); int printf(char *format); int fprintf(FILE *stream, char *format); int sprintf(char *str, char *format); int snprintf(char *str, size_t size, char *format); int fgetc(FILE *stream); int fputc(int c, FILE *stream); int fputs(char *s, FILE *stream); int putc(int c, FILE *stream); int putchar(int c); int puts(char *s); FILE *popen(char *command, char *type); int pclose(FILE *stream); extern FILE *stdin; extern FILE *stdout; extern FILE *stderr; #endif // _STDIO_H_ ================================================ FILE: 62_Cleanup/include/stdlib.h ================================================ #ifndef _STDLIB_H_ # define _STDLIB_H_ void exit(int status); void _Exit(int status); void *malloc(int size); void free(void *ptr); void *calloc(int nmemb, int size); void *realloc(void *ptr, int size); int system(char *command); #endif // _STDLIB_H_ ================================================ FILE: 62_Cleanup/include/string.h ================================================ #ifndef _STRING_H_ # define _STRING_H_ char *strdup(char *s); char *strchr(char *s, int c); char *strrchr(char *s, int c); int strcmp(char *s1, char *s2); int strncmp(char *s1, char *s2, size_t n); char *strerror(int errnum); #endif // _STRING_H_ ================================================ FILE: 62_Cleanup/include/unistd.h ================================================ #ifndef _UNISTD_H_ # define _UNISTD_H_ void _exit(int status); int unlink(char *pathname); #endif // _UNISTD_H_ ================================================ FILE: 62_Cleanup/main.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "decl.h" #include #include // Compiler setup and top-level execution // Copyright (c) 2019 Warren Toomey, GPL3 // Given a string with a '.' and at least a 1-character suffix // after the '.', change the suffix to be the given character. // Return the new string or NULL if the original string could // not be modified char *alter_suffix(char *str, char suffix) { char *posn; char *newstr; // Clone the string if ((newstr = strdup(str)) == NULL) return (NULL); // Find the '.' if ((posn = strrchr(newstr, '.')) == NULL) return (NULL); // Ensure there is a suffix posn++; if (*posn == '\0') return (NULL); // Change the suffix and NUL-terminate the string *posn = suffix; posn++; *posn = '\0'; return (newstr); } // Given an input filename, compile that file // down to assembly code. Return the new file's name static char *do_compile(char *filename) { char cmd[TEXTLEN]; // Change the input file's suffix to .s Outfilename = alter_suffix(filename, 's'); if (Outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .c on the end\n", filename); exit(1); } // Generate the pre-processor command snprintf(cmd, TEXTLEN, "%s %s %s", CPPCMD, INCDIR, filename); // Open up the pre-processor pipe if ((Infile = popen(cmd, "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", filename, strerror(errno)); exit(1); } Infilename = filename; // Create the output file if ((Outfile = fopen(Outfilename, "w")) == NULL) { fprintf(stderr, "Unable to create %s: %s\n", Outfilename, strerror(errno)); exit(1); } Line = 1; // Reset the scanner Linestart = 1; Putback = '\n'; clear_symtable(); // Clear the symbol table if (O_verbose) printf("compiling %s\n", filename); scan(&Token); // Get the first token from the input Peektoken.token = 0; // and set there is no lookahead token genpreamble(filename); // Output the preamble global_declarations(); // Parse the global declarations genpostamble(); // Output the postamble fclose(Outfile); // Close the output file // Dump the symbol table if requested if (O_dumpsym) { printf("Symbols for %s\n", filename); dumpsymtables(); fprintf(stdout, "\n\n"); } freestaticsyms(); // Free any static symbols in the file return (Outfilename); } // Given an input filename, assemble that file // down to object code. Return the object filename char *do_assemble(char *filename) { char cmd[TEXTLEN]; int err; char *outfilename = alter_suffix(filename, 'o'); if (outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .s on the end\n", filename); exit(1); } // Build the assembly command and run it #ifdef __NASM__ char *incfilename = alter_suffix(filename, 'n'); if (incfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .s on the end\n", filename); exit(1); } // sprintf(cmd, "%s %s -p%s %s", ASCMD, outfilename, incfilename, filename); sprintf(cmd, "%s %s %s", ASCMD, outfilename, filename); #else snprintf(cmd, TEXTLEN, "%s %s %s", ASCMD, outfilename, filename); #endif if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Assembly of %s failed\n", filename); exit(1); } return (outfilename); } // Given a list of object files and an output filename, // link all of the object filenames together. void do_link(char *outfilename, char **objlist) { int cnt, size = TEXTLEN; char cmd[TEXTLEN], *cptr; int err; // Start with the linker command and the output file cptr = cmd; cnt = snprintf(cptr, size, "%s %s ", LDCMD, outfilename); cptr += cnt; size -= cnt; // Now append each object file while (*objlist != NULL) { cnt = snprintf(cptr, size, "%s ", *objlist); cptr += cnt; size -= cnt; objlist++; } if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Linking failed\n"); exit(1); } } // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "Usage: %s [-vcSTM] [-o outfile] file [file ...]\n", prog); fprintf(stderr, " -v give verbose output of the compilation stages\n"); fprintf(stderr, " -c generate object files but don't link them\n"); fprintf(stderr, " -S generate assembly files but don't link them\n"); fprintf(stderr, " -T dump the AST trees for each input file\n"); fprintf(stderr, " -M dump the symbol table for each input file\n"); fprintf(stderr, " -o outfile, produce the outfile executable file\n"); exit(1); } // Main program: check arguments and print a usage // if we don't have an argument. Open up the input // file and call scanfile() to scan the tokens in it. enum { MAXOBJ = 100 }; int main(int argc, char **argv) { char *outfilename = AOUT; char *asmfile, *objfile; char *objlist[MAXOBJ]; int i, j, objcnt = 0; // Initialise our variables O_dumpAST = 0; O_dumpsym = 0; O_keepasm = 0; O_assemble = 0; O_verbose = 0; O_dolink = 1; // Scan for command-line options for (i = 1; i < argc; i++) { // No leading '-', stop scanning for options if (*argv[i] != '-') break; // For each option in this argument for (j = 1; (*argv[i] == '-') && argv[i][j]; j++) { switch (argv[i][j]) { case 'o': outfilename = argv[++i]; // Save & skip to next argument break; case 'T': O_dumpAST = 1; break; case 'M': O_dumpsym = 1; break; case 'c': O_assemble = 1; O_keepasm = 0; O_dolink = 0; break; case 'S': O_keepasm = 1; O_assemble = 0; O_dolink = 0; break; case 'v': O_verbose = 1; break; default: usage(argv[0]); } } } // Ensure we have at lease one input file argument if (i >= argc) usage(argv[0]); // Work on each input file in turn while (i < argc) { asmfile = do_compile(argv[i]); // Compile the source file if (O_dolink || O_assemble) { objfile = do_assemble(asmfile); // Assemble it to object forma if (objcnt == (MAXOBJ - 2)) { fprintf(stderr, "Too many object files for the compiler to handle\n"); exit(1); } objlist[objcnt++] = objfile; // Add the object file's name objlist[objcnt] = NULL; // to the list of object files } if (!O_keepasm) // Remove the assembly file if unlink(asmfile); // we don't need to keep it i++; } // Now link all the object files together if (O_dolink) { do_link(outfilename, objlist); // If we don't need to keep the object // files, then remove them if (!O_assemble) { for (i = 0; objlist[i] != NULL; i++) unlink(objlist[i]); } } return (0); } ================================================ FILE: 62_Cleanup/misc.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" #include #include // Miscellaneous functions // Copyright (c) 2019 Warren Toomey, GPL3 // Ensure that the current token is t, // and fetch the next token. Otherwise // throw an error void match(int t, char *what) { if (Token.token == t) { scan(&Token); } else { fatals("Expected", what); } } // Match a semicolon and fetch the next token void semi(void) { match(T_SEMI, ";"); } // Match a left brace and fetch the next token void lbrace(void) { match(T_LBRACE, "{"); } // Match a right brace and fetch the next token void rbrace(void) { match(T_RBRACE, "}"); } // Match a left parenthesis and fetch the next token void lparen(void) { match(T_LPAREN, "("); } // Match a right parenthesis and fetch the next token void rparen(void) { match(T_RPAREN, ")"); } // Match an identifier and fetch the next token void ident(void) { match(T_IDENT, "identifier"); } // Match a comma and fetch the next token void comma(void) { match(T_COMMA, "comma"); } // Print out fatal messages void fatal(char *s) { fprintf(stderr, "%s on line %d of %s\n", s, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatals(char *s1, char *s2) { fprintf(stderr, "%s:%s on line %d of %s\n", s1, s2, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatald(char *s, int d) { fprintf(stderr, "%s:%d on line %d of %s\n", s, d, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatalc(char *s, int c) { fprintf(stderr, "%s:%c on line %d of %s\n", s, c, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } ================================================ FILE: 62_Cleanup/opt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST Tree Optimisation Code // Copyright (c) 2019 Warren Toomey, GPL3 // Fold an AST tree with a binary operator // and two A_INTLIT children. Return either // the original tree or a new leaf node. static struct ASTnode *fold2(struct ASTnode *n) { int val, leftval, rightval; // Get the values from each child leftval = n->left->a_intvalue; rightval = n->right->a_intvalue; // Perform some of the binary operations. // For any AST op we can't do, return // the original tree. switch (n->op) { case A_ADD: val = leftval + rightval; break; case A_SUBTRACT: val = leftval - rightval; break; case A_MULTIPLY: val = leftval * rightval; break; case A_DIVIDE: // Don't try to divide by zero. if (rightval == 0) return (n); val = leftval / rightval; break; default: return (n); } // Return a leaf node with the new value return (mkastleaf(A_INTLIT, n->type, NULL, NULL, val)); } // Fold an AST tree with a unary operator // and one INTLIT children. Return either // the original tree or a new leaf node. static struct ASTnode *fold1(struct ASTnode *n) { int val; // Get the child value. Do the // operation if recognised. // Return the new leaf node. val = n->left->a_intvalue; switch (n->op) { case A_WIDEN: break; case A_INVERT: val = ~val; break; case A_LOGNOT: val = !val; break; default: return (n); } // Return a leaf node with the new value return (mkastleaf(A_INTLIT, n->type, NULL, NULL, val)); } // Attempt to do constant folding on // the AST tree with the root node n static struct ASTnode *fold(struct ASTnode *n) { if (n == NULL) return (NULL); // Fold on the left child, then // do the same on the right child n->left = fold(n->left); n->right = fold(n->right); // If both children are A_INTLITs, do a fold2() if (n->left && n->left->op == A_INTLIT) { if (n->right && n->right->op == A_INTLIT) n = fold2(n); else // If only the left is A_INTLIT, do a fold1() n = fold1(n); } // Return the possibly modified tree return (n); } // Optimise an AST tree by // constant folding in all sub-trees struct ASTnode *optimise(struct ASTnode *n) { n = fold(n); return (n); } ================================================ FILE: 62_Cleanup/scan.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { int i; for (i = 0; s[i] != '\0'; i++) if (s[i] == (char) c) return (i); return (-1); } // Get the next character from the input file. static int next(void) { int c, l; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return (c); } c = fgetc(Infile); // Read from input file while (Linestart && c == '#') { // We've hit a pre-processor statement Linestart = 0; // No longer at the start of the line scan(&Token); // Get the line number into l if (Token.token != T_INTLIT) fatals("Expecting pre-processor line number, got:", Text); l = Token.intvalue; scan(&Token); // Get the filename in Text if (Token.token != T_STRLIT) fatals("Expecting pre-processor file name, got:", Text); if (Text[0] != '<') { // If this is a real filename if (strcmp(Text, Infilename)) // and not the one we have now Infilename = strdup(Text); // save it. Then update the line num Line = l; } while ((c = fgetc(Infile)) != '\n'); // Skip to the end of the line c = fgetc(Infile); // and get the next character Linestart = 1; // Now back at the start of the line } Linestart = 0; // No longer at the start of the line if ('\n' == c) { Line++; // Increment line count Linestart = 1; // Now back at the start of the line } return (c); } // Put back an unwanted character static void putback(int c) { Putback = c; } // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } // Read in a hexadecimal constant from the input static int hexchar(void) { int c, h, n = 0, f = 0; // Loop getting characters while (isxdigit(c = next())) { // Convert from char to int value h = chrpos("0123456789abcdef", tolower(c)); // Add to running hex value n = n * 16 + h; f = 1; } // We hit a non-hex character, put it back putback(c); // Flag tells us we never saw any hex characters if (!f) fatal("missing digits after '\\x'"); if (n > 255) fatal("value out of range after '\\x'"); return (n); } // Return the next character from a character // or string literal static int scanch(void) { int i, c, c2; // Get the next input character and interpret // metacharacters that start with a backslash c = next(); if (c == '\\') { switch (c = next()) { case 'a': return ('\a'); case 'b': return ('\b'); case 'f': return ('\f'); case 'n': return ('\n'); case 'r': return ('\r'); case 't': return ('\t'); case 'v': return ('\v'); case '\\': return ('\\'); case '"': return ('"'); case '\'': return ('\''); // Deal with octal constants by reading in // characters until we hit a non-octal digit. // Build up the octal value in c2 and count // # digits in i. Permit only 3 octal digits. case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': for (i = c2 = 0; isdigit(c) && c < '8'; c = next()) { if (++i > 3) break; c2 = c2 * 8 + (c - '0'); } putback(c); // Put back the first non-octal char return (c2); case 'x': return (hexchar()); default: fatalc("unknown escape sequence", c); } } return (c); // Just an ordinary old character! } // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0, radix = 10; // Assume the radix is 10, but if it starts with 0 if (c == '0') { // and the next character is 'x', it's radix 16 if ((c = next()) == 'x') { radix = 16; c = next(); } else // Otherwise, it's radix 8 radix = 8; } // Convert each character into an int value while ((k = chrpos("0123456789abcdef", tolower(c))) >= 0) { if (k >= radix) fatalc("invalid digit in integer literal", c); val = val * radix + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return (val); } // Scan in a string literal from the input file, // and store it in buf[]. Return the length of // the string. static int scanstr(char *buf) { int i, c; // Loop while we have enough buffer space for (i = 0; i < TEXTLEN - 1; i++) { // Get the next char and append to buf // Return when we hit the ending double quote if ((c = scanch()) == '"') { buf[i] = 0; return (i); } buf[i] = (char)c; } // Ran out of buf[] space fatal("String literal too long"); return (0); } // Scan an identifier from the input file and // store it in buf[]. Return the identifier's length static int scanident(int c, char *buf, int lim) { int i = 0; // Allow digits, alpha and underscores while (isalpha(c) || isdigit(c) || '_' == c) { // Error if we hit the identifier length limit, // else append to buf[] and get next character if (lim - 1 == i) { fatal("Identifier too long"); } else if (i < lim - 1) { buf[i++] = (char)c; } c = next(); } // We hit a non-valid character, put it back. // NUL-terminate the buf[] and return the length putback(c); buf[i] = '\0'; return (i); } // Given a word from the input, return the matching // keyword token number or 0 if it's not a keyword. // Switch on the first letter so that we don't have // to waste time strcmp()ing against all the keywords. static int keyword(char *s) { switch (*s) { case 'b': if (!strcmp(s, "break")) return (T_BREAK); break; case 'c': if (!strcmp(s, "case")) return (T_CASE); if (!strcmp(s, "char")) return (T_CHAR); if (!strcmp(s, "continue")) return (T_CONTINUE); break; case 'd': if (!strcmp(s, "default")) return (T_DEFAULT); break; case 'e': if (!strcmp(s, "else")) return (T_ELSE); if (!strcmp(s, "enum")) return (T_ENUM); if (!strcmp(s, "extern")) return (T_EXTERN); break; case 'f': if (!strcmp(s, "for")) return (T_FOR); break; case 'i': if (!strcmp(s, "if")) return (T_IF); if (!strcmp(s, "int")) return (T_INT); break; case 'l': if (!strcmp(s, "long")) return (T_LONG); break; case 'r': if (!strcmp(s, "return")) return (T_RETURN); break; case 's': if (!strcmp(s, "sizeof")) return (T_SIZEOF); if (!strcmp(s, "static")) return (T_STATIC); if (!strcmp(s, "struct")) return (T_STRUCT); if (!strcmp(s, "switch")) return (T_SWITCH); break; case 't': if (!strcmp(s, "typedef")) return (T_TYPEDEF); break; case 'u': if (!strcmp(s, "union")) return (T_UNION); break; case 'v': if (!strcmp(s, "void")) return (T_VOID); break; case 'w': if (!strcmp(s, "while")) return (T_WHILE); break; } return (0); } // List of token strings, for debugging purposes char *Tstring[] = { "EOF", "=", "+=", "-=", "*=", "/=", "%=", "?", "||", "&&", "|", "^", "&", "==", "!=", "<", ">", "<=", ">=", "<<", ">>", "+", "-", "*", "/", "%", "++", "--", "~", "!", "void", "char", "int", "long", "if", "else", "while", "for", "return", "struct", "union", "enum", "typedef", "extern", "break", "continue", "switch", "case", "default", "sizeof", "static", "intlit", "strlit", ";", "identifier", "{", "}", "(", ")", "[", "]", ",", ".", "->", ":" }; // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c, tokentype; // If we have a lookahead token, return this token if (Peektoken.token != 0) { t->token = Peektoken.token; t->tokstr = Peektoken.tokstr; t->intvalue = Peektoken.intvalue; Peektoken.token = 0; return (1); } // Skip whitespace c = skip(); // Determine the token based on // the input character switch (c) { case EOF: t->token = T_EOF; return (0); case '+': if ((c = next()) == '+') { t->token = T_INC; } else if (c == '=') { t->token = T_ASPLUS; } else { putback(c); t->token = T_PLUS; } break; case '-': if ((c = next()) == '-') { t->token = T_DEC; } else if (c == '>') { t->token = T_ARROW; } else if (c == '=') { t->token = T_ASMINUS; } else if (isdigit(c)) { // Negative int literal t->intvalue = -scanint(c); t->token = T_INTLIT; } else { putback(c); t->token = T_MINUS; } break; case '*': if ((c = next()) == '=') { t->token = T_ASSTAR; } else { putback(c); t->token = T_STAR; } break; case '/': if ((c = next()) == '=') { t->token = T_ASSLASH; } else { putback(c); t->token = T_SLASH; } break; case '%': if ((c = next()) == '=') { t->token = T_ASMOD; } else { putback(c); t->token = T_MOD; } break; case ';': t->token = T_SEMI; break; case '{': t->token = T_LBRACE; break; case '}': t->token = T_RBRACE; break; case '(': t->token = T_LPAREN; break; case ')': t->token = T_RPAREN; break; case '[': t->token = T_LBRACKET; break; case ']': t->token = T_RBRACKET; break; case '~': t->token = T_INVERT; break; case '^': t->token = T_XOR; break; case ',': t->token = T_COMMA; break; case '.': t->token = T_DOT; break; case ':': t->token = T_COLON; break; case '?': t->token = T_QUESTION; break; case '=': if ((c = next()) == '=') { t->token = T_EQ; } else { putback(c); t->token = T_ASSIGN; } break; case '!': if ((c = next()) == '=') { t->token = T_NE; } else { putback(c); t->token = T_LOGNOT; } break; case '<': if ((c = next()) == '=') { t->token = T_LE; } else if (c == '<') { t->token = T_LSHIFT; } else { putback(c); t->token = T_LT; } break; case '>': if ((c = next()) == '=') { t->token = T_GE; } else if (c == '>') { t->token = T_RSHIFT; } else { putback(c); t->token = T_GT; } break; case '&': if ((c = next()) == '&') { t->token = T_LOGAND; } else { putback(c); t->token = T_AMPER; } break; case '|': if ((c = next()) == '|') { t->token = T_LOGOR; } else { putback(c); t->token = T_OR; } break; case '\'': // If it's a quote, scan in the // literal character value and // the trailing quote t->intvalue = scanch(); t->token = T_INTLIT; if (next() != '\'') fatal("Expected '\\'' at end of char literal"); break; case '"': // Scan in a literal string scanstr(Text); t->token = T_STRLIT; break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } else if (isalpha(c) || '_' == c) { // Read in a keyword or identifier scanident(c, Text, TEXTLEN); // If it's a recognised keyword, return that token if ((tokentype = keyword(Text)) != 0) { t->token = tokentype; break; } // Not a recognised keyword, so it must be an identifier t->token = T_IDENT; break; } // The character isn't part of any recognised token, error fatalc("Unrecognised character", c); } // We found a token t->tokstr = Tstring[t->token]; return (1); } ================================================ FILE: 62_Cleanup/stmt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of statements // Copyright (c) 2019 Warren Toomey, GPL3 // Prototypes static struct ASTnode *single_statement(void); // compound_statement: // empty, i.e. no statement // | statement // | statement statements // ; // // statement: declaration // | expression_statement // | function_call // | if_statement // | while_statement // | for_statement // | return_statement // ; // if_statement: if_head // | if_head 'else' statement // ; // // if_head: 'if' '(' true_false_expression ')' statement ; // // Parse an IF statement including any // optional ELSE clause and return its AST static struct ASTnode *if_statement(void) { struct ASTnode *condAST, *trueAST, *falseAST = NULL; // Ensure we have 'if' '(' match(T_IF, "if"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST->ctype, condAST, NULL, 0); rparen(); // Get the AST for the statement trueAST = single_statement(); // If we have an 'else', skip it // and get the AST for the statement if (Token.token == T_ELSE) { scan(&Token); falseAST = single_statement(); } // Build and return the AST for this statement return (mkastnode(A_IF, P_NONE, NULL, condAST, trueAST, falseAST, NULL, 0)); } // while_statement: 'while' '(' true_false_expression ')' statement ; // // Parse a WHILE statement and return its AST static struct ASTnode *while_statement(void) { struct ASTnode *condAST, *bodyAST; // Ensure we have 'while' '(' match(T_WHILE, "while"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST->ctype, condAST, NULL, 0); rparen(); // Get the AST for the statement. // Update the loop depth in the process Looplevel++; bodyAST = single_statement(); Looplevel--; // Build and return the AST for this statement return (mkastnode(A_WHILE, P_NONE, NULL, condAST, NULL, bodyAST, NULL, 0)); } // for_statement: 'for' '(' expression_list ';' // true_false_expression ';' // expression_list ')' statement ; // // Parse a FOR statement and return its AST static struct ASTnode *for_statement(void) { struct ASTnode *condAST, *bodyAST; struct ASTnode *preopAST, *postopAST; struct ASTnode *tree; // Ensure we have 'for' '(' match(T_FOR, "for"); lparen(); // Get the pre_op expression and the ';' preopAST = expression_list(T_SEMI); semi(); // Get the condition and the ';'. // Force a non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST->ctype, condAST, NULL, 0); semi(); // Get the post_op expression and the ')' postopAST = expression_list(T_RPAREN); rparen(); // Get the statement which is the body // Update the loop depth in the process Looplevel++; bodyAST = single_statement(); Looplevel--; // Glue the statement and the postop tree tree = mkastnode(A_GLUE, P_NONE, NULL, bodyAST, NULL, postopAST, NULL, 0); // Make a WHILE loop with the condition and this new body tree = mkastnode(A_WHILE, P_NONE, NULL, condAST, NULL, tree, NULL, 0); // And glue the preop tree to the A_WHILE tree return (mkastnode(A_GLUE, P_NONE, NULL, preopAST, NULL, tree, NULL, 0)); } // return_statement: 'return' '(' expression ')' ; // // Parse a return statement and return its AST static struct ASTnode *return_statement(void) { struct ASTnode *tree= NULL; // Ensure we have 'return' match(T_RETURN, "return"); // See if we have a return value if (Token.token == T_LPAREN) { // Can't return a value if function returns P_VOID if (Functionid->type == P_VOID) fatal("Can't return from a void function"); // Skip the left parenthesis lparen(); // Parse the following expression tree = binexpr(0); // Ensure this is compatible with the function's type tree = modify_type(tree, Functionid->type, Functionid->ctype, 0); if (tree == NULL) fatal("Incompatible type to return"); // Get the ')' rparen(); } // Add on the A_RETURN node tree = mkastunary(A_RETURN, P_NONE, NULL, tree, NULL, 0); // Get the ';' semi(); return (tree); } // break_statement: 'break' ; // // Parse a break statement and return its AST static struct ASTnode *break_statement(void) { if (Looplevel == 0 && Switchlevel == 0) fatal("no loop or switch to break out from"); scan(&Token); semi(); return (mkastleaf(A_BREAK, P_NONE, NULL, NULL, 0)); } // continue_statement: 'continue' ; // // Parse a continue statement and return its AST static struct ASTnode *continue_statement(void) { if (Looplevel == 0) fatal("no loop to continue to"); scan(&Token); semi(); return (mkastleaf(A_CONTINUE, P_NONE, NULL, NULL, 0)); } // Parse a switch statement and return its AST static struct ASTnode *switch_statement(void) { struct ASTnode *left, *body, *n, *c; struct ASTnode *casetree = NULL, *casetail; int inloop = 1, casecount = 0; int seendefault = 0; int ASTop, casevalue; // Skip the 'switch' and '(' scan(&Token); lparen(); // Get the switch expression, the ')' and the '{' left = binexpr(0); rparen(); lbrace(); // Ensure that this is of int type if (!inttype(left->type)) fatal("Switch expression is not of integer type"); // Build an A_SWITCH subtree with the expression as // the child n = mkastunary(A_SWITCH, P_NONE, NULL, left, NULL, 0); // Now parse the cases Switchlevel++; while (inloop) { switch (Token.token) { // Leave the loop when we hit a '}' case T_RBRACE: if (casecount == 0) fatal("No cases in switch"); inloop = 0; break; case T_CASE: case T_DEFAULT: // Ensure this isn't after a previous 'default' if (seendefault) fatal("case or default after existing default"); // Set the AST operation. Scan the case value if required if (Token.token == T_DEFAULT) { ASTop = A_DEFAULT; seendefault = 1; scan(&Token); } else { ASTop = A_CASE; scan(&Token); left = binexpr(0); // Ensure the case value is an integer literal if (left->op != A_INTLIT) fatal("Expecting integer literal for case value"); casevalue = left->a_intvalue; // Walk the list of existing case values to ensure // that there isn't a duplicate case value for (c = casetree; c != NULL; c = c->right) if (casevalue == c->a_intvalue) fatal("Duplicate case value"); } // Scan the ':' and increment the casecount match(T_COLON, ":"); casecount++; // If the next token is a T_CASE, the existing case will fall // into the next case. Otherwise, parse the case body. if (Token.token == T_CASE) body = NULL; else body = compound_statement(1); // Build a sub-tree with any compound statement as the left child // and link it in to the growing A_CASE tree if (casetree == NULL) { casetree = casetail = mkastunary(ASTop, P_NONE, NULL, body, NULL, casevalue); } else { casetail->right = mkastunary(ASTop, P_NONE, NULL, body, NULL, casevalue); casetail = casetail->right; } break; default: fatals("Unexpected token in switch", Token.tokstr); } } Switchlevel--; // We have a sub-tree with the cases and any default. Put the // case count into the A_SWITCH node and attach the case tree. n->a_intvalue = casecount; n->right = casetree; rbrace(); return (n); } // Parse a single statement and return its AST. static struct ASTnode *single_statement(void) { struct ASTnode *stmt; struct symtable *ctype; int linenum= Line; switch (Token.token) { case T_SEMI: // An empty statement semi(); break; case T_LBRACE: // We have a '{', so this is a compound statement lbrace(); stmt = compound_statement(0); stmt->linenum= linenum; rbrace(); return (stmt); case T_IDENT: // We have to see if the identifier matches a typedef. // If not, treat it as an expression. // Otherwise, fall down to the parse_type() call. if (findtypedef(Text) == NULL) { stmt = binexpr(0); stmt->linenum= linenum; semi(); return (stmt); } case T_CHAR: case T_INT: case T_LONG: case T_STRUCT: case T_UNION: case T_ENUM: case T_TYPEDEF: // The beginning of a variable declaration list. declaration_list(&ctype, C_LOCAL, T_SEMI, T_EOF, &stmt); semi(); return (stmt); // Any assignments from the declarations case T_IF: stmt= if_statement(); stmt->linenum= linenum; return(stmt); case T_WHILE: stmt= while_statement(); stmt->linenum= linenum; return(stmt); case T_FOR: stmt= for_statement(); stmt->linenum= linenum; return(stmt); case T_RETURN: stmt= return_statement(); stmt->linenum= linenum; return(stmt); case T_BREAK: stmt= break_statement(); stmt->linenum= linenum; return(stmt); case T_CONTINUE: stmt= continue_statement(); stmt->linenum= linenum; return(stmt); case T_SWITCH: stmt= switch_statement(); stmt->linenum= linenum; return(stmt); default: // For now, see if this is an expression. // This catches assignment statements. stmt = binexpr(0); stmt->linenum= linenum; semi(); return (stmt); } return (NULL); // Keep -Wall happy } // Parse a compound statement // and return its AST. If inswitch is true, // we look for a '}', 'case' or 'default' token // to end the parsing. Otherwise, look for // just a '}' to end the parsing. struct ASTnode *compound_statement(int inswitch) { struct ASTnode *left = NULL; struct ASTnode *tree; while (1) { // Leave if we've hit the end token. We do this first to allow // an empty compound statement if (Token.token == T_RBRACE) return (left); if (inswitch && (Token.token == T_CASE || Token.token == T_DEFAULT)) return (left); // Parse a single statement tree = single_statement(); // For each new tree, either save it in left // if left is empty, or glue the left and the // new tree together if (tree != NULL) { if (left == NULL) left = tree; else left = mkastnode(A_GLUE, P_NONE, NULL, left, NULL, tree, NULL, 0); } } return (NULL); // Keep -Wall happy } ================================================ FILE: 62_Cleanup/sym.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Symbol table functions // Copyright (c) 2019 Warren Toomey, GPL3 // Append a node to the singly-linked list pointed to by head or tail void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node) { // Check for valid pointers if (head == NULL || tail == NULL || node == NULL) fatal("Either head, tail or node is NULL in appendsym"); // Append to the list if (*tail) { (*tail)->next = node; *tail = node; } else *head = *tail = node; node->next = NULL; } // Create a symbol node to be added to a symbol table list. // Set up the node's: // + type: char, int etc. // + ctype: composite type pointer for struct/union // + structural type: var, function, array etc. // + size: number of elements, or endlabel: end label for a function // + posn: Position information for local symbols // Return a pointer to the new node struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn) { // Get a new node struct symtable *node = (struct symtable *) malloc(sizeof(struct symtable)); if (node == NULL) fatal("Unable to malloc a symbol table node in newsym"); // Fill in the values if (name == NULL) node->name = NULL; else node->name = strdup(name); node->type = type; node->ctype = ctype; node->stype = stype; node->class = class; node->nelems = nelems; // For pointers and integer types, set the size // of the symbol. structs and union declarations // manually set this up themselves. if (ptrtype(type) || inttype(type)) node->size = nelems * typesize(type, ctype); node->st_posn = posn; node->next = NULL; node->member = NULL; node->initlist = NULL; #ifdef __NASM__ node->extinit = 0; #endif return (node); } // Add a symbol to the global symbol list struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn) { struct symtable *sym = newsym(name, type, ctype, stype, class, nelems, posn); // For structs and unions, copy the size from the type node if (type == P_STRUCT || type == P_UNION) sym->size = ctype->size; appendsym(&Globhead, &Globtail, sym); return (sym); } // Add a symbol to the local symbol list struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int nelems) { struct symtable *sym = newsym(name, type, ctype, stype, C_LOCAL, nelems, 0); // For structs and unions, copy the size from the type node if (type == P_STRUCT || type == P_UNION) sym->size = ctype->size; appendsym(&Loclhead, &Locltail, sym); return (sym); } // Add a symbol to the parameter list struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype) { struct symtable *sym = newsym(name, type, ctype, stype, C_PARAM, 1, 0); appendsym(&Parmhead, &Parmtail, sym); return (sym); } // Add a symbol to the temporary member list struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int nelems) { struct symtable *sym = newsym(name, type, ctype, stype, C_MEMBER, nelems, 0); // For structs and unions, copy the size from the type node if (type == P_STRUCT || type == P_UNION) sym->size = ctype->size; appendsym(&Membhead, &Membtail, sym); return (sym); } // Add a struct to the struct list struct symtable *addstruct(char *name) { struct symtable *sym = newsym(name, P_STRUCT, NULL, 0, C_STRUCT, 0, 0); appendsym(&Structhead, &Structtail, sym); return (sym); } // Add a struct to the union list struct symtable *addunion(char *name) { struct symtable *sym = newsym(name, P_UNION, NULL, 0, C_UNION, 0, 0); appendsym(&Unionhead, &Uniontail, sym); return (sym); } // Add an enum type or value to the enum list. // Class is C_ENUMTYPE or C_ENUMVAL. // Use posn to store the int value. struct symtable *addenum(char *name, int class, int value) { struct symtable *sym = newsym(name, P_INT, NULL, 0, class, 0, value); appendsym(&Enumhead, &Enumtail, sym); return (sym); } // Add a typedef to the typedef list struct symtable *addtypedef(char *name, int type, struct symtable *ctype) { struct symtable *sym = newsym(name, type, ctype, 0, C_TYPEDEF, 0, 0); appendsym(&Typehead, &Typetail, sym); return (sym); } // Search for a symbol in a specific list. // Return a pointer to the found node or NULL if not found. // If class is not zero, also match on the given class static struct symtable *findsyminlist(char *s, struct symtable *list, int class) { for (; list != NULL; list = list->next) if ((list->name != NULL) && !strcmp(s, list->name)) if (class == 0 || class == list->class) return (list); return (NULL); } // Determine if the symbol s is in the global symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findglob(char *s) { return (findsyminlist(s, Globhead, 0)); } // Determine if the symbol s is in the local symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findlocl(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member, 0); if (node) return (node); } return (findsyminlist(s, Loclhead, 0)); } // Determine if the symbol s is in the symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findsymbol(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member, 0); if (node) return (node); } // Otherwise, try the local and global symbol lists node = findsyminlist(s, Loclhead, 0); if (node) return (node); return (findsyminlist(s, Globhead, 0)); } // Find a member in the member list // Return a pointer to the found node or NULL if not found. struct symtable *findmember(char *s) { return (findsyminlist(s, Membhead, 0)); } // Find a struct in the struct list // Return a pointer to the found node or NULL if not found. struct symtable *findstruct(char *s) { return (findsyminlist(s, Structhead, 0)); } // Find a struct in the union list // Return a pointer to the found node or NULL if not found. struct symtable *findunion(char *s) { return (findsyminlist(s, Unionhead, 0)); } // Find an enum type in the enum list // Return a pointer to the found node or NULL if not found. struct symtable *findenumtype(char *s) { return (findsyminlist(s, Enumhead, C_ENUMTYPE)); } // Find an enum value in the enum list // Return a pointer to the found node or NULL if not found. struct symtable *findenumval(char *s) { return (findsyminlist(s, Enumhead, C_ENUMVAL)); } // Find a type in the tyedef list // Return a pointer to the found node or NULL if not found. struct symtable *findtypedef(char *s) { return (findsyminlist(s, Typehead, 0)); } // Reset the contents of the symbol table void clear_symtable(void) { Globhead = Globtail = NULL; Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Membhead = Membtail = NULL; Structhead = Structtail = NULL; Unionhead = Uniontail = NULL; Enumhead = Enumtail = NULL; Typehead = Typetail = NULL; } // Clear all the entries in the local symbol table void freeloclsyms(void) { Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Functionid = NULL; } // Remove all static symbols from the global symbol table void freestaticsyms(void) { // g points at current node, prev at the previous one struct symtable *g, *prev = NULL; // Walk the global table looking for static entries for (g = Globhead; g != NULL; g = g->next) { if (g->class == C_STATIC) { // If there's a previous node, rearrange the prev pointer // to skip over the current node. If not, g is the head, // so do the same to Globhead if (prev != NULL) prev->next = g->next; else Globhead->next = g->next; // If g is the tail, point Globtail at the previous node // (if there is one), or Globhead if (g == Globtail) { if (prev != NULL) Globtail = prev; else Globtail = Globhead; } } } // Point prev at g before we move up to the next node prev = g; } // Dump a single symbol static void dumpsym(struct symtable *sym, int indent) { int i; for (i = 0; i < indent; i++) printf(" "); switch (sym->type & (~0xf)) { case P_VOID: printf("void "); break; case P_CHAR: printf("char "); break; case P_INT: printf("int "); break; case P_LONG: printf("long "); break; case P_STRUCT: if (sym->ctype != NULL) printf("struct %s ", sym->ctype->name); else printf("struct %s ", sym->name); break; case P_UNION: if (sym->ctype != NULL) printf("union %s ", sym->ctype->name); else printf("union %s ", sym->name); break; default: printf("unknown type "); } for (i = 0; i < (sym->type & 0xf); i++) printf("*"); printf("%s", sym->name); switch (sym->stype) { case S_VARIABLE: break; case S_FUNCTION: printf("()"); break; case S_ARRAY: printf("[]"); break; default: printf(" unknown stype"); } switch (sym->class) { case C_GLOBAL: printf(": global"); break; case C_LOCAL: printf(": local"); break; case C_PARAM: printf(": param"); break; case C_EXTERN: printf(": extern"); break; case C_STATIC: printf(": static"); break; case C_STRUCT: printf(": struct"); break; case C_UNION: printf(": union"); break; case C_MEMBER: printf(": member"); break; case C_ENUMTYPE: printf(": enumtype"); break; case C_ENUMVAL: printf(": enumval"); break; case C_TYPEDEF: printf(": typedef"); break; default: printf(": unknown class"); } switch (sym->stype) { case S_VARIABLE: if (sym->class == C_ENUMVAL) printf(", value %d\n", sym->st_posn); else printf(", size %d\n", sym->size); break; case S_FUNCTION: printf(", %d params\n", sym->nelems); break; case S_ARRAY: printf(", %d elems, size %d\n", sym->nelems, sym->size); break; } switch (sym->type & (~0xf)) { case P_STRUCT: case P_UNION: dumptable(sym->member, NULL, 4); } switch (sym->stype) { case S_FUNCTION: dumptable(sym->member, NULL, 4); } } // Dump one symbol table void dumptable(struct symtable *head, char *name, int indent) { struct symtable *sym; if (head != NULL && name != NULL) printf("%s\n--------\n", name); for (sym = head; sym != NULL; sym = sym->next) dumpsym(sym, indent); } void dumpsymtables(void) { dumptable(Globhead, "Global", 0); printf("\n"); dumptable(Enumhead, "Enums", 0); printf("\n"); dumptable(Typehead, "Typedefs", 0); } ================================================ FILE: 62_Cleanup/tests/err.input031.c ================================================ Expecting a primary expression, got token:+ on line 5 of input031.c ================================================ FILE: 62_Cleanup/tests/err.input032.c ================================================ Unknown variable or function:pizza on line 4 of input032.c ================================================ FILE: 62_Cleanup/tests/err.input033.c ================================================ Incompatible type to return on line 4 of input033.c ================================================ FILE: 62_Cleanup/tests/err.input034.c ================================================ For now, declaration of non-global arrays is not implemented on line 4 of input034.c ================================================ FILE: 62_Cleanup/tests/err.input035.c ================================================ Duplicate local variable declaration:a on line 4 of input035.c ================================================ FILE: 62_Cleanup/tests/err.input036.c ================================================ Type doesn't match prototype for parameter:2 on line 4 of input036.c ================================================ FILE: 62_Cleanup/tests/err.input037.c ================================================ Expected:comma on line 3 of input037.c ================================================ FILE: 62_Cleanup/tests/err.input038.c ================================================ Type doesn't match prototype for parameter:2 on line 4 of input038.c ================================================ FILE: 62_Cleanup/tests/err.input039.c ================================================ No statements in function with non-void type on line 4 of input039.c ================================================ FILE: 62_Cleanup/tests/err.input040.c ================================================ No return for function with non-void type on line 4 of input040.c ================================================ FILE: 62_Cleanup/tests/err.input041.c ================================================ Can't return from a void function on line 3 of input041.c ================================================ FILE: 62_Cleanup/tests/err.input042.c ================================================ Unknown variable or function:fred on line 3 of input042.c ================================================ FILE: 62_Cleanup/tests/err.input043.c ================================================ Unknown variable or function:b on line 3 of input043.c ================================================ FILE: 62_Cleanup/tests/err.input044.c ================================================ Unknown variable or function:z on line 3 of input044.c ================================================ FILE: 62_Cleanup/tests/err.input045.c ================================================ & operator must be followed by an identifier on line 3 of input045.c ================================================ FILE: 62_Cleanup/tests/err.input046.c ================================================ * operator must be followed by an expression of pointer type on line 3 of input046.c ================================================ FILE: 62_Cleanup/tests/err.input047.c ================================================ ++ operator must be followed by an identifier on line 3 of input047.c ================================================ FILE: 62_Cleanup/tests/err.input048.c ================================================ -- operator must be followed by an identifier on line 3 of input048.c ================================================ FILE: 62_Cleanup/tests/err.input049.c ================================================ Incompatible expression in assignment on line 6 of input049.c ================================================ FILE: 62_Cleanup/tests/err.input050.c ================================================ Incompatible types in binary expression on line 6 of input050.c ================================================ FILE: 62_Cleanup/tests/err.input051.c ================================================ Expected '\'' at end of char literal on line 4 of input051.c ================================================ FILE: 62_Cleanup/tests/err.input052.c ================================================ Unrecognised character:$ on line 5 of input052.c ================================================ FILE: 62_Cleanup/tests/err.input056.c ================================================ unknown struct/union type:var1 on line 2 of input056.c ================================================ FILE: 62_Cleanup/tests/err.input057.c ================================================ previously defined struct/union:fred on line 2 of input057.c ================================================ FILE: 62_Cleanup/tests/err.input059.c ================================================ Unknown variable or function:y on line 3 of input059.c ================================================ FILE: 62_Cleanup/tests/err.input060.c ================================================ Expression is not a struct/union on line 3 of input060.c ================================================ FILE: 62_Cleanup/tests/err.input061.c ================================================ Expression is not a pointer to a struct/union on line 3 of input061.c ================================================ FILE: 62_Cleanup/tests/err.input064.c ================================================ undeclared enum type::fred on line 1 of input064.c ================================================ FILE: 62_Cleanup/tests/err.input065.c ================================================ enum type redeclared::fred on line 2 of input065.c ================================================ FILE: 62_Cleanup/tests/err.input066.c ================================================ enum value redeclared::z on line 2 of input066.c ================================================ FILE: 62_Cleanup/tests/err.input068.c ================================================ redefinition of typedef:FOO on line 2 of input068.c ================================================ FILE: 62_Cleanup/tests/err.input069.c ================================================ unknown type:FLOO on line 2 of input069.c ================================================ FILE: 62_Cleanup/tests/err.input072.c ================================================ no loop or switch to break out from on line 1 of input072.c ================================================ FILE: 62_Cleanup/tests/err.input073.c ================================================ no loop to continue to on line 1 of input073.c ================================================ FILE: 62_Cleanup/tests/err.input075.c ================================================ Unexpected token in switch:if on line 4 of input075.c ================================================ FILE: 62_Cleanup/tests/err.input076.c ================================================ No cases in switch on line 3 of input076.c ================================================ FILE: 62_Cleanup/tests/err.input077.c ================================================ case or default after existing default on line 6 of input077.c ================================================ FILE: 62_Cleanup/tests/err.input078.c ================================================ case or default after existing default on line 6 of input078.c ================================================ FILE: 62_Cleanup/tests/err.input079.c ================================================ Duplicate case value on line 6 of input079.c ================================================ FILE: 62_Cleanup/tests/err.input085.c ================================================ Bad type in parameter list on line 1 of input085.c ================================================ FILE: 62_Cleanup/tests/err.input086.c ================================================ Function definition not at global level on line 2 of input086.c ================================================ FILE: 62_Cleanup/tests/err.input087.c ================================================ Bad type in member list on line 4 of input087.c ================================================ FILE: 62_Cleanup/tests/err.input092.c ================================================ Type mismatch: literal vs. variable on line 1 of input092.c ================================================ FILE: 62_Cleanup/tests/err.input093.c ================================================ Unknown variable or function:fred on line 1 of input093.c ================================================ FILE: 62_Cleanup/tests/err.input094.c ================================================ Type mismatch: literal vs. variable on line 1 of input094.c ================================================ FILE: 62_Cleanup/tests/err.input095.c ================================================ Variable can not be initialised:x on line 1 of input095.c ================================================ FILE: 62_Cleanup/tests/err.input096.c ================================================ Array size is illegal:0 on line 1 of input096.c ================================================ FILE: 62_Cleanup/tests/err.input097.c ================================================ For now, declaration of non-global arrays is not implemented on line 2 of input097.c ================================================ FILE: 62_Cleanup/tests/err.input098.c ================================================ Too many values in initialisation list on line 1 of input098.c ================================================ FILE: 62_Cleanup/tests/err.input102.c ================================================ Cannot cast to a struct, union or void type on line 3 of input102.c ================================================ FILE: 62_Cleanup/tests/err.input103.c ================================================ Cannot cast to a struct, union or void type on line 3 of input103.c ================================================ FILE: 62_Cleanup/tests/err.input104.c ================================================ Cannot cast to a struct, union or void type on line 2 of input104.c ================================================ FILE: 62_Cleanup/tests/err.input105.c ================================================ Incompatible expression in assignment on line 4 of input105.c ================================================ FILE: 62_Cleanup/tests/err.input118.c ================================================ Compiler doesn't support static or extern local declarations on line 2 of input118.c ================================================ FILE: 62_Cleanup/tests/err.input124.c ================================================ Cannot ++ on rvalue on line 6 of input124.c ================================================ FILE: 62_Cleanup/tests/err.input126.c ================================================ Unknown variable or function:ptr on line 7 of input126.c ================================================ FILE: 62_Cleanup/tests/err.input129.c ================================================ Cannot ++ and/or -- more than once on line 6 of input129.c ================================================ FILE: 62_Cleanup/tests/err.input141.c ================================================ Declaration of array parameters is not implemented on line 4 of input141.c ================================================ FILE: 62_Cleanup/tests/err.input142.c ================================================ Array must have non-zero elements:fred on line 1 of input142.c ================================================ FILE: 62_Cleanup/tests/input001.c ================================================ int printf(char *fmt); void main() { printf("%d\n", 12 * 3); printf("%d\n", 18 - 2 * 4); printf("%d\n", 1 + 2 + 9 - 5/2 + 3*5); } ================================================ FILE: 62_Cleanup/tests/input002.c ================================================ int printf(char *fmt); void main() { int fred; int jim; fred= 5; jim= 12; printf("%d\n", fred + jim); } ================================================ FILE: 62_Cleanup/tests/input003.c ================================================ int printf(char *fmt); void main() { int x; x= 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); } ================================================ FILE: 62_Cleanup/tests/input004.c ================================================ int printf(char *fmt); void main() { int x; x= 7 < 9; printf("%d\n", x); x= 7 <= 9; printf("%d\n", x); x= 7 != 9; printf("%d\n", x); x= 7 == 7; printf("%d\n", x); x= 7 >= 7; printf("%d\n", x); x= 7 <= 7; printf("%d\n", x); x= 9 > 7; printf("%d\n", x); x= 9 >= 7; printf("%d\n", x); x= 9 != 7; printf("%d\n", x); } ================================================ FILE: 62_Cleanup/tests/input005.c ================================================ int printf(char *fmt); void main() { int i; int j; i=6; j=12; if (i < j) { printf("%d\n", i); } else { printf("%d\n", j); } } ================================================ FILE: 62_Cleanup/tests/input006.c ================================================ int printf(char *fmt); void main() { int i; i=1; while (i <= 10) { printf("%d\n", i); i= i + 1; } } ================================================ FILE: 62_Cleanup/tests/input007.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 62_Cleanup/tests/input008.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 62_Cleanup/tests/input009.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } void fred() { int a; int b; a= 12; b= 3 * a; if (a >= b) { printf("%d\n", 2 * b - a); } } ================================================ FILE: 62_Cleanup/tests/input010.c ================================================ int printf(char *fmt); void main() { int i; char j; j= 20; printf("%d\n", j); i= 10; printf("%d\n", i); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 2; j= j + 1) { printf("%d\n", j); } } ================================================ FILE: 62_Cleanup/tests/input011.c ================================================ int printf(char *fmt); int main() { int i; char j; long k; i= 10; printf("%d\n", i); j= 20; printf("%d\n", j); k= 30; printf("%d\n", k); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 4; j= j + 1) { printf("%d\n", j); } for (k= 1; k <= 5; k= k + 1) { printf("%d\n", k); } return(i); printf("%d\n", 12345); return(3); } ================================================ FILE: 62_Cleanup/tests/input012.c ================================================ int printf(char *fmt); int fred() { return(5); } void main() { int x; x= fred(2); printf("%d\n", x); } ================================================ FILE: 62_Cleanup/tests/input013.c ================================================ int printf(char *fmt); int fred() { return(56); } void main() { int dummy; int result; dummy= printf("%d\n", 23); result= fred(10); dummy= printf("%d\n", result); } ================================================ FILE: 62_Cleanup/tests/input014.c ================================================ int printf(char *fmt); int fred() { return(20); } int main() { int result; printf("%d\n", 10); result= fred(15); printf("%d\n", result); printf("%d\n", fred(15)+10); return(0); } ================================================ FILE: 62_Cleanup/tests/input015.c ================================================ int printf(char *fmt); int main() { char a; char *b; char c; int d; int *e; int f; a= 18; printf("%d\n", a); b= &a; c= *b; printf("%d\n", c); d= 12; printf("%d\n", d); e= &d; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 62_Cleanup/tests/input016.c ================================================ int printf(char *fmt); int c; int d; int *e; int f; int main() { c= 12; d=18; printf("%d\n", c); e= &c + 1; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 62_Cleanup/tests/input017.c ================================================ int printf(char *fmt); int main() { char a; char *b; int d; int *e; b= &a; *b= 19; printf("%d\n", a); e= &d; *e= 12; printf("%d\n", d); return(0); } ================================================ FILE: 62_Cleanup/tests/input018.c ================================================ int printf(char *fmt); int main() { int a; int b; a= b= 34; printf("%d\n", a); printf("%d\n", b); return(0); } ================================================ FILE: 62_Cleanup/tests/input018a.c ================================================ int printf(char *fmt); int a; int *b; char c; char *d; int main() { b= &a; *b= 15; printf("%d\n", a); d= &c; *d= 16; printf("%d\n", c); return(0); } ================================================ FILE: 62_Cleanup/tests/input019.c ================================================ int printf(char *fmt); int a; int b; int c; int d; int e; int main() { a= 2; b= 4; c= 3; d= 2; e= (a+b) * (c+d); printf("%d\n", e); return(0); } ================================================ FILE: 62_Cleanup/tests/input020.c ================================================ int printf(char *fmt); int a; int b[25]; int main() { b[3]= 12; a= b[3]; printf("%d\n", a); return(0); } ================================================ FILE: 62_Cleanup/tests/input021.c ================================================ int printf(char *fmt); char c; char *str; int main() { c= '\n'; printf("%d\n", c); for (str= "Hello world\n"; *str != 0; str= str + 1) { printf("%c", *str); } return(0); } ================================================ FILE: 62_Cleanup/tests/input022.c ================================================ int printf(char *fmt); char a; char b; char c; int d; int e; int f; long g; long h; long i; int main() { b= 5; c= 7; a= b + c++; printf("%d\n", a); e= 5; f= 7; d= e + f++; printf("%d\n", d); h= 5; i= 7; g= h + i++; printf("%d\n", g); a= b-- + c; printf("%d\n", a); d= e-- + f; printf("%d\n", d); g= h-- + i; printf("%d\n", g); a= ++b + c; printf("%d\n", a); d= ++e + f; printf("%d\n", d); g= ++h + i; printf("%d\n", g); a= b * --c; printf("%d\n", a); d= e * --f; printf("%d\n", d); g= h * --i; printf("%d\n", g); return(0); } ================================================ FILE: 62_Cleanup/tests/input023.c ================================================ int printf(char *fmt); char *str; int x; int main() { x= -23; printf("%d\n", x); printf("%d\n", -10 * -10); x= 1; x= ~x; printf("%d\n", x); x= 2 > 5; printf("%d\n", x); x= !x; printf("%d\n", x); x= !x; printf("%d\n", x); x= 13; if (x) { printf("%d\n", 13); } x= 0; if (!x) { printf("%d\n", 14); } for (str= "Hello world\n"; *str; str++) { printf("%c", *str); } return(0); } ================================================ FILE: 62_Cleanup/tests/input024.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { a= 42; b= 19; printf("%d\n", a & b); printf("%d\n", a | b); printf("%d\n", a ^ b); printf("%d\n", 1 << 3); printf("%d\n", 63 >> 3); return(0); } ================================================ FILE: 62_Cleanup/tests/input025.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { char z; int y; int x; x= 10; y= 20; z= 30; printf("%d\n", x); printf("%d\n", y); printf("%d\n", z); a= 5; b= 15; c= 25; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); return(0); } ================================================ FILE: 62_Cleanup/tests/input026.c ================================================ int printf(char *fmt); int main(int a, char b, long c, int d, int e, int f, int g, int h) { int i; int j; int k; a= 13; printf("%d\n", a); b= 23; printf("%d\n", b); c= 34; printf("%d\n", c); d= 44; printf("%d\n", d); e= 54; printf("%d\n", e); f= 64; printf("%d\n", f); g= 74; printf("%d\n", g); h= 84; printf("%d\n", h); i= 94; printf("%d\n", i); j= 95; printf("%d\n", j); k= 96; printf("%d\n", k); return(0); } ================================================ FILE: 62_Cleanup/tests/input027.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int param5(int a, int b, int c, int d, int e) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param2(int a, int b) { int c; int d; int e; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param0() { int a; int b; int c; int d; int e; a= 1; b= 2; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int main() { param8(1,2,3,4,5,6,7,8); param5(1,2,3,4,5); param2(1,2); param0(); return(0); } ================================================ FILE: 62_Cleanup/tests/input028.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 62_Cleanup/tests/input029.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h); int fred(int a, int b, int c); int main(); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 62_Cleanup/tests/input030.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input030.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 62_Cleanup/tests/input031.c ================================================ int printf(char *fmt); int main() { int x; x= 2 + + 3 - * / ; } ================================================ FILE: 62_Cleanup/tests/input032.c ================================================ int printf(char *fmt); int main() { pizza cow llama sausage; } ================================================ FILE: 62_Cleanup/tests/input033.c ================================================ int printf(char *fmt); int main() { char *z; return(z); } ================================================ FILE: 62_Cleanup/tests/input035.c ================================================ int printf(char *fmt); int fred(int a, int b) { int a; return(a); } ================================================ FILE: 62_Cleanup/tests/input036.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c); ================================================ FILE: 62_Cleanup/tests/input037.c ================================================ int printf(char *fmt); int fred(int a, char b +, int z); ================================================ FILE: 62_Cleanup/tests/input038.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c, int g); ================================================ FILE: 62_Cleanup/tests/input039.c ================================================ int printf(char *fmt); int main() { int a; } ================================================ FILE: 62_Cleanup/tests/input040.c ================================================ int printf(char *fmt); int main() { int a; a= 5; } ================================================ FILE: 62_Cleanup/tests/input041.c ================================================ int printf(char *fmt); void fred() { return(5); } ================================================ FILE: 62_Cleanup/tests/input042.c ================================================ int printf(char *fmt); int main() { fred(5); } ================================================ FILE: 62_Cleanup/tests/input043.c ================================================ int printf(char *fmt); int main() { int a; a= b[4]; } ================================================ FILE: 62_Cleanup/tests/input044.c ================================================ int printf(char *fmt); int main() { int a; a= z; } ================================================ FILE: 62_Cleanup/tests/input045.c ================================================ int printf(char *fmt); int main() { int a; a= &5; } ================================================ FILE: 62_Cleanup/tests/input046.c ================================================ int printf(char *fmt); int main() { int a; a= *5; } ================================================ FILE: 62_Cleanup/tests/input047.c ================================================ int printf(char *fmt); int main() { int a; a= ++5; } ================================================ FILE: 62_Cleanup/tests/input048.c ================================================ int printf(char *fmt); int main() { int a; a= --5; } ================================================ FILE: 62_Cleanup/tests/input049.c ================================================ int printf(char *fmt); int main() { int x; char y; y= x; } ================================================ FILE: 62_Cleanup/tests/input050.c ================================================ int printf(char *fmt); int main() { char *a; char *b; a= a + b; } ================================================ FILE: 62_Cleanup/tests/input051.c ================================================ int printf(char *fmt); int main() { char a; a= 'fred'; } ================================================ FILE: 62_Cleanup/tests/input052.c ================================================ int printf(char *fmt); int main() { int a; a= $5.00; } ================================================ FILE: 62_Cleanup/tests/input053.c ================================================ int printf(char *fmt); int main() { printf("Hello world, %d\n", 23); return(0); } ================================================ FILE: 62_Cleanup/tests/input054.c ================================================ int printf(char *fmt); int main() { int i; for (i=0; i < 20; i++) { printf("Hello world, %d\n", i); } return(0); } ================================================ FILE: 62_Cleanup/tests/input055.c ================================================ int printf(char *fmt); int main(int argc, char **argv) { int i; char *argument; printf("Hello world\n"); for (i=0; i < argc; i++) { argument= *argv; argv= argv + 1; printf("Argument %d is %s\n", i, argument); } return(0); } ================================================ FILE: 62_Cleanup/tests/input056.c ================================================ struct foo { int x; }; struct mary var1; ================================================ FILE: 62_Cleanup/tests/input057.c ================================================ struct fred { int x; } ; struct fred { char y; } ; ================================================ FILE: 62_Cleanup/tests/input058.c ================================================ int printf(char *fmt); struct fred { int x; char y; long z; }; struct fred var2; struct fred *varptr; int main() { long result; var2.x= 12; printf("%d\n", var2.x); var2.y= 'c'; printf("%d\n", var2.y); var2.z= 4005; printf("%d\n", var2.z); result= var2.x + var2.y + var2.z; printf("%d\n", result); varptr= &var2; result= varptr->x + varptr->y + varptr->z; printf("%d\n", result); return(0); } ================================================ FILE: 62_Cleanup/tests/input059.c ================================================ int main() { int x; x= y.foo; } ================================================ FILE: 62_Cleanup/tests/input060.c ================================================ int main() { int x; x= x.foo; } ================================================ FILE: 62_Cleanup/tests/input061.c ================================================ int main() { int x; x= x->foo; } ================================================ FILE: 62_Cleanup/tests/input062.c ================================================ int printf(char *fmt); union fred { char w; int x; int y; long z; }; union fred var1; union fred *varptr; int main() { var1.x= 65; printf("%d\n", var1.x); var1.x= 66; printf("%d\n", var1.x); printf("%d\n", var1.y); printf("The next two depend on the endian of the platform\n"); printf("%d\n", var1.w); printf("%d\n", var1.z); varptr= &var1; varptr->x= 67; printf("%d\n", varptr->x); printf("%d\n", varptr->y); return(0); } ================================================ FILE: 62_Cleanup/tests/input063.c ================================================ int printf(char *fmt); enum fred { apple=1, banana, carrot, pear=10, peach, mango, papaya }; enum jane { aple=1, bnana, crrot, par=10, pech, mago, paaya }; enum fred var1; enum jane var2; enum fred var3; int main() { var1= carrot + pear + mango; printf("%d\n", var1); return(0); } ================================================ FILE: 62_Cleanup/tests/input064.c ================================================ enum fred var3; ================================================ FILE: 62_Cleanup/tests/input065.c ================================================ enum fred { x, y, z }; enum fred { a, b }; ================================================ FILE: 62_Cleanup/tests/input066.c ================================================ enum fred { x, y, z }; enum mary { a, b, z }; ================================================ FILE: 62_Cleanup/tests/input067.c ================================================ int printf(char *fmt); typedef int FOO; FOO var1; struct bar { int x; int y} ; typedef struct bar BAR; BAR var2; int main() { var1= 5; printf("%d\n", var1); var2.x= 7; var2.y= 10; printf("%d\n", var2.x + var2.y); return(0); } ================================================ FILE: 62_Cleanup/tests/input068.c ================================================ typedef int FOO; typedef char FOO; ================================================ FILE: 62_Cleanup/tests/input069.c ================================================ typedef int FOO; FLOO y; ================================================ FILE: 62_Cleanup/tests/input070.c ================================================ #include typedef int FOO; int main() { FOO x; x= 56; printf("%d\n", x); return(0); } ================================================ FILE: 62_Cleanup/tests/input071.c ================================================ #include int main() { int x; x = 0; while (x < 100) { if (x == 5) { x = x + 2; continue; } printf("%d\n", x); if (x == 14) { break; } x = x + 1; } printf("Done\n"); return (0); } ================================================ FILE: 62_Cleanup/tests/input072.c ================================================ int main() { break; } ================================================ FILE: 62_Cleanup/tests/input073.c ================================================ int main() { continue; } ================================================ FILE: 62_Cleanup/tests/input074.c ================================================ #include int main() { int x; int y; y= 0; for (x=0; x < 5; x++) { switch(x) { case 1: { y= 5; break; } case 2: { y= 7; break; } case 3: { y= 9; } default: { y= 100; } } printf("%d\n", y); } return(0); } ================================================ FILE: 62_Cleanup/tests/input075.c ================================================ int main() { int x; switch(x) { if (x<5); } } ================================================ FILE: 62_Cleanup/tests/input076.c ================================================ int main() { int x; switch(x) { } } ================================================ FILE: 62_Cleanup/tests/input077.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } default: { x= 3; } case 4: { x= 2; } } } ================================================ FILE: 62_Cleanup/tests/input078.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } default: { x= 3; } default: { x= 2; } } } ================================================ FILE: 62_Cleanup/tests/input079.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } case 2: { x= 2; } case 1: { x= 2; } default: { x= 2; } } } ================================================ FILE: 62_Cleanup/tests/input080.c ================================================ #include int x; int y; int main() { for (x=0, y=1; x < 6; x++, y=y+2) { printf("%d %d\n", x, y); } return(0); } ================================================ FILE: 62_Cleanup/tests/input081.c ================================================ #include int x; int y; int main() { x= 0; y=1; for (;x<5;) { printf("%d %d\n", x, y); x=x+1; y=y+2; } return(0); } ================================================ FILE: 62_Cleanup/tests/input082.c ================================================ #include int main() { int x; x= 10; // Dangling else test if (x > 5) if (x > 15) printf("x > 15\n"); else printf("15 >= x > 5\n"); else printf("5 >= x\n"); return(0); } ================================================ FILE: 62_Cleanup/tests/input083.c ================================================ #include int main() { int x; // Dangling else test. // We should not print anything for x<= 5 for (x=0; x < 12; x++) if (x > 5) if (x > 10) printf("10 < %2d\n", x); else printf(" 5 < %2d <= 10\n", x); return(0); } ================================================ FILE: 62_Cleanup/tests/input084.c ================================================ #include int main() { int x, y; x=2; y=3; printf("%d %d\n", x, y); char a, *b; a= 'f'; b= &a; printf("%c %c\n", a, *b); return(0); } ================================================ FILE: 62_Cleanup/tests/input085.c ================================================ int main(struct foo { int x; }; , int a) { return(0); } ================================================ FILE: 62_Cleanup/tests/input086.c ================================================ int main() { int fred() { return(5); } int x; x=2; return(x); } ================================================ FILE: 62_Cleanup/tests/input087.c ================================================ struct foo { int x; int y; struct blah { int g; }; }; ================================================ FILE: 62_Cleanup/tests/input088.c ================================================ #include struct foo { int x; int y; } fred, mary; struct foo james; int main() { fred.x= 5; mary.y= 6; printf("%d %d\n", fred.x, mary.y); return(0); } ================================================ FILE: 62_Cleanup/tests/input089.c ================================================ #include int x= 23; char y= 'H'; char *z= "Hello world"; int main() { printf("%d %c %s\n", x, y, z); return(0); } ================================================ FILE: 62_Cleanup/tests/input090.c ================================================ #include int a = 23, b = 100; char y = 'H', *z = "Hello world"; int main() { printf("%d %d %c %s\n", a, b, y, z); return (0); } ================================================ FILE: 62_Cleanup/tests/input091.c ================================================ #include int fred[] = { 1, 2, 3, 4, 5 }; int jim[10] = { 1, 2, 3, 4, 5 }; int main() { int i; for (i=0; i < 5; i++) printf("%d\n", fred[i]); for (i=0; i < 10; i++) printf("%d\n", jim[i]); return(0); } ================================================ FILE: 62_Cleanup/tests/input092.c ================================================ char x= 3000; ================================================ FILE: 62_Cleanup/tests/input093.c ================================================ char x= fred; ================================================ FILE: 62_Cleanup/tests/input094.c ================================================ char *s= 54; ================================================ FILE: 62_Cleanup/tests/input095.c ================================================ int fred(int x=2) { return(x); } ================================================ FILE: 62_Cleanup/tests/input096.c ================================================ int fred[0]; ================================================ FILE: 62_Cleanup/tests/input098.c ================================================ int fred[3]= { 1, 2, 3, 4, 5 }; ================================================ FILE: 62_Cleanup/tests/input099.c ================================================ #include // List of token strings, for debugging purposes. // As yet, we can't store a NULL into the list char *Tstring[] = { "EOF", "=", "||", "&&", "|", "^", "&", "==", "!=", ",", ">", "<=", ">=", "<<", ">>", "+", "-", "*", "/", "++", "--", "~", "!", "void", "char", "int", "long", "if", "else", "while", "for", "return", "struct", "union", "enum", "typedef", "extern", "break", "continue", "switch", "case", "default", "intlit", "strlit", ";", "identifier", "{", "}", "(", ")", "[", "]", ",", ".", "->", ":", "" }; int main() { int i; char *str; i=0; while (1) { str= Tstring[i]; if (*str == 0) break; printf("%s\n", str); i++; } return(0); } ================================================ FILE: 62_Cleanup/tests/input100.c ================================================ #include int main() { int x= 3, y=14; int z= 2 * x + y; char *str= "Hello world"; printf("%s %d %d\n", str, x+y, z); return(0); } ================================================ FILE: 62_Cleanup/tests/input101.c ================================================ #include int main() { int x= 65535; char y; char *str; y= (char )x; printf("0x%x\n", y); str= (char *)0; printf("0x%lx\n", (long)str); return(0); } ================================================ FILE: 62_Cleanup/tests/input102.c ================================================ int main() { struct foo { int p; }; int y= (struct foo) x; } ================================================ FILE: 62_Cleanup/tests/input103.c ================================================ int main() { union foo { int p; }; int y= (union foo) x; } ================================================ FILE: 62_Cleanup/tests/input104.c ================================================ int main() { int y= (void) x; } ================================================ FILE: 62_Cleanup/tests/input105.c ================================================ int main() { int x; char *y; y= (char) x; } ================================================ FILE: 62_Cleanup/tests/input106.c ================================================ #include char *y= (char *)0; int main() { printf("0x%lx\n", (long)y); return(0); } ================================================ FILE: 62_Cleanup/tests/input107.c ================================================ #include char *y[] = { "fish", "cow", NULL }; char *z= NULL; int main() { int i; char *ptr; for (i=0; i < 3; i++) { ptr= y[i]; if (ptr != (char *)0) printf("%s\n", y[i]); else printf("NULL\n"); } return(0); } ================================================ FILE: 62_Cleanup/tests/input108.c ================================================ int main() { char *str= (void *)0; return(0); } ================================================ FILE: 62_Cleanup/tests/input109.c ================================================ #include int main() { int x= 17 - 1; printf("%d\n", x); return(0); } ================================================ FILE: 62_Cleanup/tests/input110.c ================================================ #include int x; int y; int main() { x= 3; y= 15; y += x; printf("%d\n", y); x= 3; y= 15; y -= x; printf("%d\n", y); x= 3; y= 15; y *= x; printf("%d\n", y); x= 3; y= 15; y /= x; printf("%d\n", y); return(0); } ================================================ FILE: 62_Cleanup/tests/input111.c ================================================ #include int main() { int x= 2000 + 3 + 4 * 5 + 6; printf("%d\n", x); return(0); } ================================================ FILE: 62_Cleanup/tests/input112.c ================================================ #include char* y = NULL; int x= 10 + 6; int fred [ 2 + 3 ]; int main() { fred[2]= x; printf("%d\n", fred[2]); return(0); } ================================================ FILE: 62_Cleanup/tests/input113.c ================================================ #include void fred(void); void fred(void) { printf("fred says hello\n"); } int main(void) { fred(); return(0); } ================================================ FILE: 62_Cleanup/tests/input114.c ================================================ #include int main() { int x= 0x4a; printf("%c\n", x); return(0); } ================================================ FILE: 62_Cleanup/tests/input115.c ================================================ #include struct foo { int x; char y; long z; }; typedef struct foo blah; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol struct symtable *ctype; // If struct/union, ptr to that type int stype; // Structural type for the symbol int class; // Storage class for the symbol int size; // Total size in bytes of this symbol int nelems; // Functions: # params. Arrays: # elements #define st_endlabel st_posn // For functions, the end label int st_posn; // For locals, the negative offset // from the stack base pointer int *initlist; // List of initial values struct symtable *next; // Next symbol in one list struct symtable *member; // First member of a function, struct, }; // union or enum // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates int rvalue; // True if the node is an rvalue struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; struct symtable *sym; // For many AST nodes, the pointer to // the symbol in the symbol table #define a_intvalue a_size // For A_INTLIT, the integer value int a_size; // For A_SCALE, the size to scale by }; int main() { printf("%ld\n", sizeof(char)); printf("%ld\n", sizeof(int)); printf("%ld\n", sizeof(long)); printf("%ld\n", sizeof(char *)); printf("%ld\n", sizeof(blah)); printf("%ld\n", sizeof(struct symtable)); printf("%ld\n", sizeof(struct ASTnode)); return(0); } ================================================ FILE: 62_Cleanup/tests/input116.c ================================================ #include static int counter=0; static int fred(void) { return(counter++); } int main(void) { int i; for (i=0; i < 5; i++) printf("%d\n", fred()); return(0); } ================================================ FILE: 62_Cleanup/tests/input117.c ================================================ #include static char *fred(void) { return("Hello"); } int main(void) { printf("%s\n", fred()); return(0); } ================================================ FILE: 62_Cleanup/tests/input118.c ================================================ int main(void) { static int x; } ================================================ FILE: 62_Cleanup/tests/input119.c ================================================ #include int x; int y= 3; int main() { x= y != 3 ? 6 : 8; printf("%d\n", x); x= (y == 3) ? 6 : 8; printf("%d\n", x); return(0); } ================================================ FILE: 62_Cleanup/tests/input120.c ================================================ #include int x; int y= 3; int main() { for (y= 0; y < 10; y++) { x= y > 4 ? y + 2 : y + 9; printf("%d\n", x); } return(0); } ================================================ FILE: 62_Cleanup/tests/input121.c ================================================ #include int x; int y= 3; int main() { for (y= 0; y < 10; y++) { x= (y < 4) ? y + 2 : (y > 7) ? 1000 : y + 9; printf("%d\n", x); } return(0); } ================================================ FILE: 62_Cleanup/tests/input122.c ================================================ #include int x, y, z1, z2; int main() { for (x= 0; x <= 1; x++) { for (y= 0; y <= 1; y++) { z1= x || y; z2= x && y; printf("x %d, y %d, x || y %d, x && y %d\n", x, y, z1, z2); } } //z= x || y; return(0); } ================================================ FILE: 62_Cleanup/tests/input123.c ================================================ #include int main() { int x; for (x=0; x < 20; x++) switch(x) { case 2: case 3: case 5: case 7: case 11: printf("%2d infant prime\n", x); break; case 13: case 17: case 19: printf("%2d teen prime\n", x); break; case 0: case 1: case 4: case 6: case 8: case 9: case 10: case 12: printf("%2d infant composite\n", x); break; default: printf("%2d teen composite\n", x); break; } return(0); } ================================================ FILE: 62_Cleanup/tests/input124.c ================================================ #include int ary[5]; int main() { ary++; return(0); } ================================================ FILE: 62_Cleanup/tests/input125.c ================================================ #include int ary[5]; int *ptr; int x; int main() { ary[3]= 2008; ptr= ary; // Load ary's address into ptr x= ary[3]; printf("%d\n", x); x= ptr[3]; printf("%d\n", x); // Treat ptr as an array return(0); } ================================================ FILE: 62_Cleanup/tests/input126.c ================================================ #include int ary[5]; int main() { ary[3]= 2008; ptr= &ary; return(0); } ================================================ FILE: 62_Cleanup/tests/input127.c ================================================ #include int ary[5]; void fred(int *ptr) { // Receive a pointer printf("%d\n", ptr[3]); } int main() { ary[3]= 2008; printf("%d\n", ary[3]); fred(ary); // Pass ary as a pointer return(0); } ================================================ FILE: 62_Cleanup/tests/input128.c ================================================ #include struct foo { int val; struct foo *next; }; struct foo head, mid, tail; int main() { struct foo *ptr; tail.val= 20; tail.next= NULL; mid.val= 15; mid.next= &tail; head.val= 10; head.next= ∣ ptr= &head; printf("%d %d\n", head.val, ptr->val); printf("%d %d\n", mid.val, ptr->next->val); printf("%d %d\n", tail.val, ptr->next->next->val); return(0); } ================================================ FILE: 62_Cleanup/tests/input129.c ================================================ #include int x= 6; int main() { printf("%d\n", x++ ++); return(0); } ================================================ FILE: 62_Cleanup/tests/input130.c ================================================ #include char *x= "foo"; int main() { printf("Hello " "world" "\n"); return(0); } ================================================ FILE: 62_Cleanup/tests/input131.c ================================================ #include void donothing() { } int main() { int x=0; printf("Doing nothing... "); donothing(); printf("nothing done\n"); while (++x < 100) ; printf("x is now %d\n", x); return(0); } ================================================ FILE: 62_Cleanup/tests/input132.c ================================================ extern int fred; int fred; int mary; extern int mary; int main() { return(0); } ================================================ FILE: 62_Cleanup/tests/input133.c ================================================ #include extern int fred[]; int fred[23]; char mary[100]; extern char mary[]; void main() { printf("OK\n"); } ================================================ FILE: 62_Cleanup/tests/input134.c ================================================ #include char y = 'a'; char *x; int main() { x= &y; if (x && y == 'a') printf("1st match\n"); x= NULL; if (x && y == 'a') printf("2nd match\n"); x= &y; y='b'; if (x && y == 'a') printf("3rd match\n"); return(0); } ================================================ FILE: 62_Cleanup/tests/input135.c ================================================ #include void fred() { int x= 5; printf("testing x\n"); if (x > 4) return; printf("x below 5\n"); } int main() { fred(); return(0); } ================================================ FILE: 62_Cleanup/tests/input136.c ================================================ #include int add(int x, int y) { return(x+y); } int main() { int result; result= 3 * add(2,3) - 5 * add(4,6); printf("%d\n", result); return(0); } ================================================ FILE: 62_Cleanup/tests/input137.c ================================================ #include int a=1, b=2, c=3, d=4, e=5, f=6, g=7, h=8; int main() { int x; x= ((((((a + b) + c) + d) + e) + f) + g) + h; x= a + (b + (c + (d + (e + (f + (g + h)))))); printf("x is %d\n", x); return(0); } ================================================ FILE: 62_Cleanup/tests/input138.c ================================================ #include int x, y, z; int a=1; int *aptr; int main() { // See if generic AND works for (x=0; x <= 1; x++) for (y=0; y <= 1; y++) { z= x && y; printf("%d %d | %d\n", x, y, z); } // See if generic AND works for (x=0; x <= 1; x++) for (y=0; y <= 1; y++) { z= x || y; printf("%d %d | %d\n", x, y, z); } // Now some lazy evaluation aptr= NULL; if (aptr && *aptr == 1) printf("aptr points at 1\n"); else printf("aptr is NULL or doesn't point at 1\n"); aptr= &a; if (aptr && *aptr == 1) printf("aptr points at 1\n"); else printf("aptr is NULL or doesn't point at 1\n"); return(0); } ================================================ FILE: 62_Cleanup/tests/input139.c ================================================ #include int same(int x) { return(x); } int main() { int a= 3; if (same(a) && same(a) >= same(a)) printf("same apparently\n"); return(0); } ================================================ FILE: 62_Cleanup/tests/input140.c ================================================ #include int main() { int i; int ary[5]; char z; // Write below the array z= 'H'; // Fill the array for (i=0; i < 5; i++) ary[i]= i * i; // Write above the array i=14; // Print out the array for (i=0; i < 5; i++) printf("%d\n", ary[i]); // See if either side is OK printf("%d %c\n", i, z); return(0); } ================================================ FILE: 62_Cleanup/tests/input141.c ================================================ static int fred[5]; int jim; int foo(int mary[6]) { return(5); } ================================================ FILE: 62_Cleanup/tests/input142.c ================================================ static int fred[]; int jim; ================================================ FILE: 62_Cleanup/tests/input143.c ================================================ #include char foo; char *a, *b, *c; int main() { a= b= c= NULL; if (a==NULL || b==NULL || c==NULL) printf("One of the three is NULL\n"); a= &foo; if (a==NULL || b==NULL || c==NULL) printf("One of the three is NULL\n"); b= &foo; if (a==NULL || b==NULL || c==NULL) printf("One of the three is NULL\n"); c= &foo; if (a==NULL || b==NULL || c==NULL) printf("One of the three is NULL\n"); else printf("All three are non-NULL\n"); return(0); } ================================================ FILE: 62_Cleanup/tests/input144.c ================================================ #include #include #include char *filename= "fred"; int main() { fprintf(stdout, "Unable to open %s: %s\n", filename, strerror(errno)); return(0); } ================================================ FILE: 62_Cleanup/tests/input145.c ================================================ #include char *str= "qwertyuiop"; int list[]= {3, 5, 7, 9, 11, 13, 15}; int *lptr; int main() { printf("%c\n", *str); str= str + 1; printf("%c\n", *str); str += 1; printf("%c\n", *str); str += 1; printf("%c\n", *str); str -= 1; printf("%c\n", *str); lptr= list; printf("%d\n", *lptr); lptr= lptr + 1; printf("%d\n", *lptr); lptr += 1; printf("%d\n", *lptr); lptr += 1; printf("%d\n", *lptr); lptr -= 1; printf("%d\n", *lptr); return(0); } ================================================ FILE: 62_Cleanup/tests/input146.c ================================================ #include char *str= "qwertyuiop"; int list[]= {3, 5, 7, 9, 11, 13, 15}; int *lptr; int main() { printf("%c\n", *str); str= str + 1; printf("%c\n", *str); str += 1; printf("%c\n", *str); str += 1; printf("%c\n", *str); str -= 1; printf("%c\n", *str); str++; printf("%c\n", *str); str--; printf("%c\n", *str); ++str; printf("%c\n", *str); --str; printf("%c\n\n", *str); lptr= list; printf("%d\n", *lptr); lptr= lptr + 1; printf("%d\n", *lptr); lptr += 1; printf("%d\n", *lptr); lptr += 1; printf("%d\n", *lptr); lptr -= 1; printf("%d\n", *lptr); lptr++ ; printf("%d\n", *lptr); lptr-- ; printf("%d\n", *lptr); ++lptr ; printf("%d\n", *lptr); --lptr ; printf("%d\n", *lptr); return(0); } ================================================ FILE: 62_Cleanup/tests/input147.c ================================================ #include int a; int main() { printf("%d\n", 24 % 9); printf("%d\n", 31 % 11); a= 24; a %= 9; printf("%d\n",a); a= 31; a %= 11; printf("%d\n",a); return(0); } ================================================ FILE: 62_Cleanup/tests/input148.c ================================================ #include char *argv[]= { "unused", "-fish", "-cat", "owl" }; int argc= 4; int main() { int i; for (i = 1; i < argc; i++) { printf("i is %d\n", i); if (*argv[i] != '-') break; } while (i < argc) { printf("leftover %s\n", argv[i]); i++; } return (0); } ================================================ FILE: 62_Cleanup/tests/input149.c ================================================ #include static int localOffset=0; static int newlocaloffset(int size) { localOffset += (size > 4) ? size : 4; return (-localOffset); } int main() { int i, r; for (i=1; i <= 12; i++) { r= newlocaloffset(i); printf("%d %d\n", i, r); } return(0); } ================================================ FILE: 62_Cleanup/tests/input150.c ================================================ #include #include struct Svalue { char *thing; int vreg; int intval; }; struct IR { int label; int op; struct Svalue dst; struct Svalue src1; struct Svalue src2; int jmplabel; }; struct foo { int a; int b; struct Svalue *c; int d; }; struct IR *fred; struct foo jane; int main() { fred= (struct IR *)malloc(sizeof(struct IR)); fred->label= 1; fred->op= 2; fred->dst.thing= NULL; fred->dst.vreg=3; fred->dst.intval=4; fred->src1.thing= NULL; fred->src1.vreg=5; fred->src1.intval=6; fred->src2.thing= NULL; fred->src2.vreg=7; fred->src2.intval=8; fred->jmplabel= 9; printf("%d %d %d\n", fred->label, fred->op, fred->dst.vreg); printf("%d %d %d\n", fred->dst.intval, fred->src1.vreg, fred->src1.intval); printf("%d %d %d\n\n", fred->src2.vreg, fred->src2.intval, fred->jmplabel); jane.c= (struct Svalue *)malloc(sizeof(struct Svalue)); jane.a= 1; jane.b= 2; jane.d= 4; jane.c->thing= "fish"; jane.c->vreg= 3; jane.c->intval= 5; printf("%d %d %d\n", jane.a, jane.b, jane.c->vreg); printf("%d %d %s\n", jane.d, jane.c->intval, jane.c->thing); return(0); } ================================================ FILE: 62_Cleanup/tests/mktests ================================================ #!/bin/sh # Make the output files for each test # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make install) fi for i in input*c do if [ ! -f "out.$i" -a ! -f "err.$i" ] then ../cwj -o out $i 2> "err.$i" # If the err file is empty if [ ! -s "err.$i" ] then rm -f "err.$i" cc -o out $i ./out > "out.$i" fi fi rm -f out out.s done ================================================ FILE: 62_Cleanup/tests/out.input001.c ================================================ 36 10 25 ================================================ FILE: 62_Cleanup/tests/out.input002.c ================================================ 17 ================================================ FILE: 62_Cleanup/tests/out.input003.c ================================================ 1 2 3 4 5 ================================================ FILE: 62_Cleanup/tests/out.input004.c ================================================ 1 1 1 1 1 1 1 1 1 ================================================ FILE: 62_Cleanup/tests/out.input005.c ================================================ 6 ================================================ FILE: 62_Cleanup/tests/out.input006.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 62_Cleanup/tests/out.input007.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 62_Cleanup/tests/out.input008.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 62_Cleanup/tests/out.input009.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 62_Cleanup/tests/out.input010.c ================================================ 20 10 1 2 3 4 5 253 254 255 0 1 ================================================ FILE: 62_Cleanup/tests/out.input011.c ================================================ 10 20 30 1 2 3 4 5 253 254 255 0 1 2 3 1 2 3 4 5 ================================================ FILE: 62_Cleanup/tests/out.input012.c ================================================ 5 ================================================ FILE: 62_Cleanup/tests/out.input013.c ================================================ 23 56 ================================================ FILE: 62_Cleanup/tests/out.input014.c ================================================ 10 20 30 ================================================ FILE: 62_Cleanup/tests/out.input015.c ================================================ 18 18 12 12 ================================================ FILE: 62_Cleanup/tests/out.input016.c ================================================ 12 18 ================================================ FILE: 62_Cleanup/tests/out.input017.c ================================================ 19 12 ================================================ FILE: 62_Cleanup/tests/out.input018.c ================================================ 34 34 ================================================ FILE: 62_Cleanup/tests/out.input018a.c ================================================ 15 16 ================================================ FILE: 62_Cleanup/tests/out.input019.c ================================================ 30 ================================================ FILE: 62_Cleanup/tests/out.input020.c ================================================ 12 ================================================ FILE: 62_Cleanup/tests/out.input021.c ================================================ 10 Hello world ================================================ FILE: 62_Cleanup/tests/out.input022.c ================================================ 12 12 12 13 13 13 13 13 13 35 35 35 ================================================ FILE: 62_Cleanup/tests/out.input023.c ================================================ -23 100 -2 0 1 0 13 14 Hello world ================================================ FILE: 62_Cleanup/tests/out.input024.c ================================================ 2 59 57 8 7 ================================================ FILE: 62_Cleanup/tests/out.input025.c ================================================ 10 20 30 5 15 25 ================================================ FILE: 62_Cleanup/tests/out.input026.c ================================================ 13 23 34 44 54 64 74 84 94 95 96 ================================================ FILE: 62_Cleanup/tests/out.input027.c ================================================ 1 2 3 4 5 6 7 8 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 ================================================ FILE: 62_Cleanup/tests/out.input028.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 62_Cleanup/tests/out.input029.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 62_Cleanup/tests/out.input030.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input030.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 62_Cleanup/tests/out.input053.c ================================================ Hello world, 23 ================================================ FILE: 62_Cleanup/tests/out.input054.c ================================================ Hello world, 0 Hello world, 1 Hello world, 2 Hello world, 3 Hello world, 4 Hello world, 5 Hello world, 6 Hello world, 7 Hello world, 8 Hello world, 9 Hello world, 10 Hello world, 11 Hello world, 12 Hello world, 13 Hello world, 14 Hello world, 15 Hello world, 16 Hello world, 17 Hello world, 18 Hello world, 19 ================================================ FILE: 62_Cleanup/tests/out.input055.c ================================================ Hello world Argument 0 is ./out ================================================ FILE: 62_Cleanup/tests/out.input058.c ================================================ 12 99 4005 4116 4116 ================================================ FILE: 62_Cleanup/tests/out.input062.c ================================================ 65 66 66 The next two depend on the endian of the platform 66 66 67 67 ================================================ FILE: 62_Cleanup/tests/out.input063.c ================================================ 25 ================================================ FILE: 62_Cleanup/tests/out.input067.c ================================================ 5 17 ================================================ FILE: 62_Cleanup/tests/out.input070.c ================================================ 56 ================================================ FILE: 62_Cleanup/tests/out.input071.c ================================================ 0 1 2 3 4 7 8 9 10 11 12 13 14 Done ================================================ FILE: 62_Cleanup/tests/out.input074.c ================================================ 100 5 7 100 100 ================================================ FILE: 62_Cleanup/tests/out.input080.c ================================================ 0 1 1 3 2 5 3 7 4 9 5 11 ================================================ FILE: 62_Cleanup/tests/out.input081.c ================================================ 0 1 1 3 2 5 3 7 4 9 ================================================ FILE: 62_Cleanup/tests/out.input082.c ================================================ 15 >= x > 5 ================================================ FILE: 62_Cleanup/tests/out.input083.c ================================================ 5 < 6 <= 10 5 < 7 <= 10 5 < 8 <= 10 5 < 9 <= 10 5 < 10 <= 10 10 < 11 ================================================ FILE: 62_Cleanup/tests/out.input084.c ================================================ 2 3 f f ================================================ FILE: 62_Cleanup/tests/out.input088.c ================================================ 5 6 ================================================ FILE: 62_Cleanup/tests/out.input089.c ================================================ 23 H Hello world ================================================ FILE: 62_Cleanup/tests/out.input090.c ================================================ 23 100 H Hello world ================================================ FILE: 62_Cleanup/tests/out.input091.c ================================================ 1 2 3 4 5 1 2 3 4 5 0 0 0 0 0 ================================================ FILE: 62_Cleanup/tests/out.input099.c ================================================ EOF = || && | ^ & == != , > <= >= << >> + - * / ++ -- ~ ! void char int long if else while for return struct union enum typedef extern break continue switch case default intlit strlit ; identifier { } ( ) [ ] , . -> : ================================================ FILE: 62_Cleanup/tests/out.input100.c ================================================ Hello world 17 20 ================================================ FILE: 62_Cleanup/tests/out.input101.c ================================================ 0xff 0x0 ================================================ FILE: 62_Cleanup/tests/out.input106.c ================================================ 0x0 ================================================ FILE: 62_Cleanup/tests/out.input107.c ================================================ fish cow NULL ================================================ FILE: 62_Cleanup/tests/out.input108.c ================================================ ================================================ FILE: 62_Cleanup/tests/out.input109.c ================================================ 16 ================================================ FILE: 62_Cleanup/tests/out.input110.c ================================================ 18 12 45 5 ================================================ FILE: 62_Cleanup/tests/out.input111.c ================================================ 2029 ================================================ FILE: 62_Cleanup/tests/out.input112.c ================================================ 16 ================================================ FILE: 62_Cleanup/tests/out.input113.c ================================================ fred says hello ================================================ FILE: 62_Cleanup/tests/out.input114.c ================================================ J ================================================ FILE: 62_Cleanup/tests/out.input115.c ================================================ 1 4 8 8 13 64 48 ================================================ FILE: 62_Cleanup/tests/out.input116.c ================================================ 0 1 2 3 4 ================================================ FILE: 62_Cleanup/tests/out.input117.c ================================================ Hello ================================================ FILE: 62_Cleanup/tests/out.input119.c ================================================ 8 6 ================================================ FILE: 62_Cleanup/tests/out.input120.c ================================================ 9 10 11 12 13 7 8 9 10 11 ================================================ FILE: 62_Cleanup/tests/out.input121.c ================================================ 2 3 4 5 13 14 15 16 1000 1000 ================================================ FILE: 62_Cleanup/tests/out.input122.c ================================================ x 0, y 0, x || y 0, x && y 0 x 0, y 1, x || y 1, x && y 0 x 1, y 0, x || y 1, x && y 0 x 1, y 1, x || y 1, x && y 1 ================================================ FILE: 62_Cleanup/tests/out.input123.c ================================================ 0 infant composite 1 infant composite 2 infant prime 3 infant prime 4 infant composite 5 infant prime 6 infant composite 7 infant prime 8 infant composite 9 infant composite 10 infant composite 11 infant prime 12 infant composite 13 teen prime 14 teen composite 15 teen composite 16 teen composite 17 teen prime 18 teen composite 19 teen prime ================================================ FILE: 62_Cleanup/tests/out.input125.c ================================================ 2008 2008 ================================================ FILE: 62_Cleanup/tests/out.input127.c ================================================ 2008 2008 ================================================ FILE: 62_Cleanup/tests/out.input128.c ================================================ 10 10 15 15 20 20 ================================================ FILE: 62_Cleanup/tests/out.input130.c ================================================ Hello world ================================================ FILE: 62_Cleanup/tests/out.input131.c ================================================ Doing nothing... nothing done x is now 100 ================================================ FILE: 62_Cleanup/tests/out.input132.c ================================================ ================================================ FILE: 62_Cleanup/tests/out.input133.c ================================================ OK ================================================ FILE: 62_Cleanup/tests/out.input134.c ================================================ 1st match ================================================ FILE: 62_Cleanup/tests/out.input135.c ================================================ testing x ================================================ FILE: 62_Cleanup/tests/out.input136.c ================================================ -35 ================================================ FILE: 62_Cleanup/tests/out.input137.c ================================================ x is 36 ================================================ FILE: 62_Cleanup/tests/out.input138.c ================================================ 0 0 | 0 0 1 | 0 1 0 | 0 1 1 | 1 0 0 | 0 0 1 | 1 1 0 | 1 1 1 | 1 aptr is NULL or doesn't point at 1 aptr points at 1 ================================================ FILE: 62_Cleanup/tests/out.input139.c ================================================ same apparently ================================================ FILE: 62_Cleanup/tests/out.input140.c ================================================ 0 1 4 9 16 5 H ================================================ FILE: 62_Cleanup/tests/out.input143.c ================================================ One of the three is NULL One of the three is NULL One of the three is NULL All three are non-NULL ================================================ FILE: 62_Cleanup/tests/out.input144.c ================================================ Unable to open fred: Success ================================================ FILE: 62_Cleanup/tests/out.input145.c ================================================ q w e r e 3 5 7 9 7 ================================================ FILE: 62_Cleanup/tests/out.input146.c ================================================ q w e r e r e r e 3 5 7 9 7 9 7 9 7 ================================================ FILE: 62_Cleanup/tests/out.input147.c ================================================ 6 9 6 9 ================================================ FILE: 62_Cleanup/tests/out.input148.c ================================================ i is 1 i is 2 i is 3 leftover owl ================================================ FILE: 62_Cleanup/tests/out.input149.c ================================================ 1 -4 2 -8 3 -12 4 -16 5 -21 6 -27 7 -34 8 -42 9 -51 10 -61 11 -72 12 -84 ================================================ FILE: 62_Cleanup/tests/out.input150.c ================================================ 1 2 3 4 5 6 7 8 9 1 2 3 4 5 fish ================================================ FILE: 62_Cleanup/tests/runtests ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make install) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../cwj -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../cwj $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.s "trial.$i" done ================================================ FILE: 62_Cleanup/tests/runtests0 ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../cwj0 ] then (cd ..; make install; make cwj0) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../cwj0 -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../cwj0 $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.s "trial.$i" done ================================================ FILE: 62_Cleanup/tests/runtests0n ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../compn ] then (cd ..; make install; make compn0) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do bn=$(echo $i | cut -d. -f1) if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../compn0 -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../compn0 $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.o out.s ${bn}.s "trial.$i" done ================================================ FILE: 62_Cleanup/tests/runtestsn ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../compn ] then (cd ..; make install) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do bn=$(echo $i | cut -d. -f1) if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../compn -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../compn $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.o out.s ${bn}.s "trial.$i" done ================================================ FILE: 62_Cleanup/tree.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree functions // Copyright (c) 2019 Warren Toomey, GPL3 // Build and return a generic AST node struct ASTnode *mkastnode(int op, int type, struct symtable *ctype, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue) { struct ASTnode *n; // Malloc a new ASTnode n = (struct ASTnode *) malloc(sizeof(struct ASTnode)); if (n == NULL) fatal("Unable to malloc in mkastnode()"); // Copy in the field values and return it n->op = op; n->type = type; n->ctype = ctype; n->left = left; n->mid = mid; n->right = right; n->sym = sym; n->a_intvalue = intvalue; n->linenum= 0; return (n); } // Make an AST leaf node struct ASTnode *mkastleaf(int op, int type, struct symtable *ctype, struct symtable *sym, int intvalue) { return (mkastnode(op, type, ctype, NULL, NULL, NULL, sym, intvalue)); } // Make a unary AST node: only one child struct ASTnode *mkastunary(int op, int type, struct symtable *ctype, struct ASTnode *left, struct symtable *sym, int intvalue) { return (mkastnode(op, type, ctype, left, NULL, NULL, sym, intvalue)); } // Generate and return a new label number // just for AST dumping purposes static int dumpid = 1; static int gendumplabel(void) { return (dumpid++); } // Given an AST tree, print it out and follow the // traversal of the tree that genAST() follows void dumpAST(struct ASTnode *n, int label, int level) { int Lfalse, Lstart, Lend; int i; switch (n->op) { case A_IF: Lfalse = gendumplabel(); for (i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_IF"); if (n->right) { Lend = gendumplabel(); fprintf(stdout, ", end L%d", Lend); } fprintf(stdout, "\n"); dumpAST(n->left, Lfalse, level + 2); dumpAST(n->mid, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); return; case A_WHILE: Lstart = gendumplabel(); for (i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "A_WHILE, start L%d\n", Lstart); Lend = gendumplabel(); dumpAST(n->left, Lend, level + 2); dumpAST(n->right, NOLABEL, level + 2); return; } // Reset level to -2 for A_GLUE if (n->op == A_GLUE) level = -2; // General AST node handling if (n->left) dumpAST(n->left, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); for (i = 0; i < level; i++) fprintf(stdout, " "); switch (n->op) { case A_GLUE: fprintf(stdout, "\n\n"); return; case A_FUNCTION: fprintf(stdout, "A_FUNCTION %s\n", n->sym->name); return; case A_ADD: fprintf(stdout, "A_ADD\n"); return; case A_SUBTRACT: fprintf(stdout, "A_SUBTRACT\n"); return; case A_MULTIPLY: fprintf(stdout, "A_MULTIPLY\n"); return; case A_DIVIDE: fprintf(stdout, "A_DIVIDE\n"); return; case A_EQ: fprintf(stdout, "A_EQ\n"); return; case A_NE: fprintf(stdout, "A_NE\n"); return; case A_LT: fprintf(stdout, "A_LE\n"); return; case A_GT: fprintf(stdout, "A_GT\n"); return; case A_LE: fprintf(stdout, "A_LE\n"); return; case A_GE: fprintf(stdout, "A_GE\n"); return; case A_INTLIT: fprintf(stdout, "A_INTLIT %d\n", n->a_intvalue); return; case A_STRLIT: fprintf(stdout, "A_STRLIT rval label L%d\n", n->a_intvalue); return; case A_IDENT: if (n->rvalue) fprintf(stdout, "A_IDENT rval %s\n", n->sym->name); else fprintf(stdout, "A_IDENT %s\n", n->sym->name); return; case A_ASSIGN: fprintf(stdout, "A_ASSIGN\n"); return; case A_WIDEN: fprintf(stdout, "A_WIDEN\n"); return; case A_RETURN: fprintf(stdout, "A_RETURN\n"); return; case A_FUNCCALL: fprintf(stdout, "A_FUNCCALL %s\n", n->sym->name); return; case A_ADDR: fprintf(stdout, "A_ADDR %s\n", n->sym->name); return; case A_DEREF: if (n->rvalue) fprintf(stdout, "A_DEREF rval\n"); else fprintf(stdout, "A_DEREF\n"); return; case A_SCALE: fprintf(stdout, "A_SCALE %d\n", n->a_size); return; case A_PREINC: fprintf(stdout, "A_PREINC %s\n", n->sym->name); return; case A_PREDEC: fprintf(stdout, "A_PREDEC %s\n", n->sym->name); return; case A_POSTINC: fprintf(stdout, "A_POSTINC\n"); return; case A_POSTDEC: fprintf(stdout, "A_POSTDEC\n"); return; case A_NEGATE: fprintf(stdout, "A_NEGATE\n"); return; case A_BREAK: fprintf(stdout, "A_BREAK\n"); return; case A_CONTINUE: fprintf(stdout, "A_CONTINUE\n"); return; case A_CASE: fprintf(stdout, "A_CASE %d\n", n->a_intvalue); return; case A_DEFAULT: fprintf(stdout, "A_DEFAULT\n"); return; case A_SWITCH: fprintf(stdout, "A_SWITCH\n"); return; case A_CAST: fprintf(stdout, "A_CAST %d\n", n->type); return; case A_ASPLUS: fprintf(stdout, "A_ASPLUS\n"); return; case A_ASMINUS: fprintf(stdout, "A_ASMINUS\n"); return; case A_ASSTAR: fprintf(stdout, "A_ASSTAR\n"); return; case A_ASSLASH: fprintf(stdout, "A_ASSLASH\n"); return; case A_TOBOOL: fprintf(stdout, "A_TOBOOL\n"); return; case A_LOGOR: fprintf(stdout, "A_LOGOR\n"); return; case A_LOGAND: fprintf(stdout, "A_LOGAND\n"); return; case A_AND: fprintf(stdout, "A_AND\n"); return; case A_ASMOD: fprintf(stdout, "A_ASMOD\n"); return; case A_INVERT: fprintf(stdout, "A_INVERT\n"); return; case A_LOGNOT: fprintf(stdout, "A_LOGNOT\n"); return; case A_LSHIFT: fprintf(stdout, "A_LSHIFT\n"); return; case A_MOD: fprintf(stdout, "A_MOD\n"); return; case A_OR: fprintf(stdout, "A_OR\n"); return; case A_RSHIFT: fprintf(stdout, "A_RSHIFT\n"); return; case A_TERNARY: fprintf(stdout, "A_TERNARY\n"); return; case A_XOR: fprintf(stdout, "A_XOR\n"); return; default: fatald("Unknown dumpAST operator", n->op); } } ================================================ FILE: 62_Cleanup/types.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Types and type handling // Copyright (c) 2019 Warren Toomey, GPL3 // Return true if a type is an int type // of any size, false otherwise int inttype(int type) { return (((type & 0xf) == 0) && (type >= P_CHAR && type <= P_LONG)); } // Return true if a type is of pointer type int ptrtype(int type) { return ((type & 0xf) != 0); } // Given a primitive type, return // the type which is a pointer to it int pointer_to(int type) { if ((type & 0xf) == 0xf) fatald("Unrecognised in pointer_to: type", type); return (type + 1); } // Given a primitive pointer type, return // the type which it points to int value_at(int type) { if ((type & 0xf) == 0x0) fatald("Unrecognised in value_at: type", type); return (type - 1); } // Given a type and a composite type pointer, return // the size of this type in bytes int typesize(int type, struct symtable *ctype) { if (type == P_STRUCT || type == P_UNION) return (ctype->size); return (genprimsize(type)); } // Given an AST tree and a type which we want it to become, // possibly modify the tree by widening or scaling so that // it is compatible with this type. Return the original tree // if no changes occurred, a modified tree, or NULL if the // tree is not compatible with the given type. // If this will be part of a binary operation, the AST op is not zero. struct ASTnode *modify_type(struct ASTnode *tree, int rtype, struct symtable *rctype, int op) { int ltype; int lsize, rsize; ltype = tree->type; // For A_LOGOR and A_LOGAND, both types have to be int or pointer types if (op==A_LOGOR || op==A_LOGAND) { if (!inttype(ltype) && !ptrtype(ltype)) return(NULL); if (!inttype(ltype) && !ptrtype(rtype)) return(NULL); return (tree); } // XXX No idea on these yet if (ltype == P_STRUCT || ltype == P_UNION) fatal("Don't know how to do this yet"); if (rtype == P_STRUCT || rtype == P_UNION) fatal("Don't know how to do this yet"); // Compare scalar int types if (inttype(ltype) && inttype(rtype)) { // Both types same, nothing to do if (ltype == rtype) return (tree); // Get the sizes for each type lsize = typesize(ltype, NULL); rsize = typesize(rtype, NULL); // The tree's type size is too big and we can't narrow if (lsize > rsize) return (NULL); // Widen to the right if (rsize > lsize) return (mkastunary(A_WIDEN, rtype, NULL, tree, NULL, 0)); } // For pointers if (ptrtype(ltype) && ptrtype(rtype)) { // We can compare them if (op >= A_EQ && op <= A_GE) return (tree); // A comparison of the same type for a non-binary operation is OK, // or when the left tree is of `void *` type. if (op == 0 && (ltype == rtype || ltype == pointer_to(P_VOID))) return (tree); } // We can scale only on add and subtract operations if (op == A_ADD || op == A_SUBTRACT || op == A_ASPLUS || op == A_ASMINUS) { // Left is int type, right is pointer type and the size // of the original type is >1: scale the left if (inttype(ltype) && ptrtype(rtype)) { rsize = genprimsize(value_at(rtype)); if (rsize > 1) return (mkastunary(A_SCALE, rtype, rctype, tree, NULL, rsize)); else return (tree); // Size 1, no need to scale } } // If we get here, the types are not compatible return (NULL); } ================================================ FILE: 63_QBE/Makefile ================================================ # Define the location of the include directory # and the location to install the compiler binary INCDIR=/tmp/include BINDIR=/tmp HSRCS= data.h decl.h defs.h incdir.h SRCS= cg.c decl.c expr.c gen.c main.c misc.c \ opt.c scan.c stmt.c sym.c tree.c types.c cwj: $(SRCS) $(HSRCS) cc -o cwj -g -Wall $(SRCS) incdir.h: echo "#define INCDIR \"$(INCDIR)\"" > incdir.h install: cwj mkdir -p $(INCDIR) rsync -a include/. $(INCDIR) cp cwj $(BINDIR) chmod +x $(BINDIR)/cwj clean: rm -f cwj cwj[0-9] *.o *.s *.q out a.out incdir.h test: install tests/runtests (cd tests; chmod +x runtests; ./runtests) # Run the tests, stop on the first failure stoptest: install tests/runtests (cd tests; chmod +x runtests; ./runtests stop) # Run the tests with the # compiler that compiled itself test2: install tests/runtests2 cwj2 (cd tests; chmod +x runtests2; ./runtests2) # Try to do the triple test triple: cwj3 size cwj[23] # Paranoid: quadruple test quad: cwj4 size cwj[234] cwj4: cwj3 $(SRCS) $(HSRCS) ./cwj3 -o cwj4 $(SRCS) cwj3: cwj2 $(SRCS) $(HSRCS) ./cwj2 -o cwj3 $(SRCS) cwj2: install $(SRCS) $(HSRCS) ./cwj -o cwj2 $(SRCS) ================================================ FILE: 63_QBE/Readme.md ================================================ # Part 63: A QBE Backend I left the last version of the compiler back at the end of 2019 with a plan on working on an improved register allocation scheme. Well, several things got in the road of that, including a personal tragedy in the middle of 2020. Just a few weeks ago (in mid-December 2021), I came across a project called [QBE](https://c9x.me/compile/) written by Quentin Carbonneaux. This tool describes an intermediate language which is eminently suitable for a compiler like mine to output. This intermediate language is then translated down to real assembly code. As well, QBE provides and performs: * An SSA-based intermediate language * A linear register allocator with hinting * Copy elimination * Sparse conditional constant propagation * Dead instruction elimination * Registerization of small stack slots * Split spiller and register allocator thanks to the use of the SSA form * Smart spilling heuristic based on loop analysis Essentially, QBE performs many of the back-end register and code optimisations that a compiler should do. And, given that Quentin has already written the code, I decided to discard my existing x86-64 code generator and write a code generator that outputs the QBE intermediate language. The result is this version of the `acwj` compiler. It still passes the triple test. However, the assembly code output using the QBE backend is about half the size as the assembly code output by version 62 of the compiler. If you want to try this version of the `acwj` compiler out, then you need to download and compile [QBE](https://c9x.me/compile/), and install the `qbe` executable somewhere on your $PATH. The `acwj` compiler will output intermediate code to a file ending with the `.q` suffix. It then invokes `qbe` to translate this to assembly code, and then continues with the usual steps to assemble and link this resulting code. So, let's begin! ## The QBE Intermediate Language Now, what I *really* would like to do is to explain to you how QBE implements the [static single assignment form](https://en.wikipedia.org/wiki/Static_single_assignment_form), register allocation, dead code elimination etc. However, I don't yet have a good grasp of these things myself. Perhaps someone could go through the QBE source code and explain how it works in the way that I've done with the `acwj` compiler. Instead, I'm going to do a bit of a walk-through on the intermediate language that QBE uses and explain how I'm targetting this language in `cg.c`, my new code generator. ## Temporary Locations, not Registers The QBE intermediate language is an abstract language and not the assembly language of a real CPU. Therefore, it doesn't have to have the same limitations such as a fixed set of registers. Instead, there are an infinite number of *temporary* locations, each with its own name. Temporary locations which are globally visible start with the `$` character, and those which are visible only within a function start with the `%` character. Temporary locations do not need to be defined in advance: they can be created on the fly. However, when created, each temporary location is defined to have one of several *types*. These types (and their suffixes) are: * 8-bit bytes (*b*) * 16-bit halfwords (*h*) * 32-bit words (*w*) * 64-bit longs (*l*) QBE also provides **s**ingle precision floats, **d**ouble precision floats and a way to define aggregate types. I don't use these in `acwj`, so I won't discuss them. However, you can read about them in the [reference document for QBE's intermediate language](https://c9x.me/compile/doc/il.html). Local temporary variables can be created by performing the usual actions in an assembly language. Some examples are: ``` %b0 =w copy 5 # Create %b0 as a word temporary and # initialise it with the value 5 %fred =w add %c, %d # Add two temporaries and store in the # %fred word temporary %p =h call ntohs(h %foo) # Call ntohs() with the value of the %foo # temporary and save the halfword result # in the %p temporary ``` ## Mixing Types Each temporary has a type. This means that you do have to do some conversion between types. For example, you can't do this: ``` %x =w copy 5 # int x = 5; %y =l copy %x # long y = x; ``` When you want to widen a value from a smaller type to a larger type, you need to decide if the smaller type was *signed* or *unsigned*. Example: ``` %x =w copy -5 # int x = -5; 32-bit value 0xfffffffb %y =l extsw %x # long y = x; 0xfffffffffffffffb %z =l extuw %x # long z = (unsigned) x; 0x00000000fffffffb ``` On the other hand, you can store a wide value into a smaller temporary location; QBE simply truncates off the most significant bits. ## Our First Example So here is a hand-translation of this C program: ```c #include int main() { int x= 5; long y= x; int z= (int)y; printf("%d %ld %d\n", x, y, z); return(0); } ``` into the QBE intermediate language: ``` data $L19 = { b "%d %ld %d\n" } export function w $main() { @L20 %x =w copy 5 %y =l extsw %x %z =w copy %y call $printf(l $L19, w %x, l %y, w %z) ret 0 } ``` There are some things which I haven't described yet. The string literal `"%d %ld %d\n"` is stored as a sequence of **b**ytes in a global temporary called `$L19`. Technically, `$L19` is the address of the first byte of the string. `main()` is defined as a non-local function (hence the `$`) which returns a 32-bit **w**ord. The `export` keyword indicates that the function is visible outside this file. The `@L20` is a label, just like a normal assembly label. QBE requires that each function has a starting label. Finally, the `ret` operation returns from the function. There can only be one `ret` operation in any function. It must be the last line in the function, and any value that it is given must match the function's type. ## acwj's QBE Output Now let's look at how `acwj` compiles the above C program down to the QBE intermediate language: ``` export function w $main() { @L20 %.t1 =w copy 5 %x =w copy %.t1 # x = 5; %.t2 =w copy %x %.t3 =l extsw %.t2 %y =l copy %.t3 # y = x; %.t4 =l copy %y %.t5 =w copy %.t4 %z =w copy %.t5 # z = (int) y; %.t6 =w copy %z %.t7 =l copy %y # Put the arguments into "registers" %.t8 =w copy %x %.t9 =l copy $L19 # Call pritnf(), get result back %.t10 =w call $printf(l %.t9, w %.t8, l %.t7, w %.t6, ) %.t11 =w copy 0 %.ret =w copy %.t11 # Set the return value to 0 jmp @L18 @L18 ret %.ret } ``` Pretty suboptimal, huh?! `acwj` still believes that variables like `x` and `y` live in memory and that "registers" have to be used to move data between the variables. I'm using temporary names starting with ".t" so there won't be any conflict with actual C variable names. The `return(0)` gets translated into code that copies the value into the `%.ret` temporary and then jumps to the last line in the function. Obviously, that jump is not required here. So, overall, the code that `acwj` outputs is pretty inefficient. That's why I had wanted to add some optimisations to `acwj`. The nice thing, now, is that QBE does a great job at dead code elimination and code optimisation. Here is the x86-64 translation that QBE performs on the above intermediate code: ```asm .text .globl main main: pushq %rbp movq %rsp, %rbp # Set up the frame & stack pointers movl $5, %ecx # Copy 5 into three arguments movl $5, %edx movl $5, %esi leaq L19(%rip), %rdi # Load the address of the string callq printf # Call printf() movl $0, %eax # Set the main() return value leave ret # and return from main() ``` Lovely. Everything is stored in registers and there's no use of the stack for any of the local variables. ## Locals with Addresses QBE does a great job of keeping as much data in registers as possible. But there are times when this is not possible. Consider when we need to get the address of a variable, e.g. ```c int main() { int x= 5; int *p = &x; printf("%d %lx\n", x, (long)p); return(0); } ``` The `x` variable definitely needs to be stored in memory so that we can obtain the address to store in `p`. To do this, we use the QBE operations that allocate and access memory: ``` export function w $main() { @L20 %x =l alloc8 1 # Allocate 8 bytes for x %.t1 =w copy 5 storew %.t1, %x # Store 5 as a 32-bit value in x %.t2 =l copy %x # Get the address of x %p =l copy %.t2 %.t3 =l copy %p %.t5 =w loadsw %x # Get the 32-bit value at x %.t6 =l copy $L19 %.t7 =w call $printf(l %.t6, w %.t5, l %.t3) %.t8 =w copy 0 %.ret =w copy %.t8 jmp @L18 @L18 ret %.ret } ``` `%x` is now treated as a pointer to eight bytes on the stack. I chose to allocate in groups of eight bytes as this helps to keep 8-byte longs and pointers correctly aligned. We now need to use the `store` and `load` operations to write to and read from the memory locations that `%x` points to. The above intermediate code gets translated by QBE to: ``` main: pushq %rbp movq %rsp, %rbp subq $16, %rsp # Make space on the stack movl $5, -8(%rbp) # Store 5 on the stack as x leaq -8(%rbp), %rdx # Get the address of x movl $5, %esi # Optimisation: use literal 5 leaq L19(%rip), %rdi # instead of accessing the stack callq printf movl $0, %eax leave ret ``` And that's a pretty optimal translation of `acwj`'s intermediate language! ## QBE and chars QBE doesn't treat 8-bit bytes or 16-bit halfwords as primary types: there are no byte or halfword temporary locations. Instead, these have to be stored on the stack or on the heap. So, `acwj` compiles this C code: ```c int main() { char x= 65; printf("%c\n", x); return(0); } ``` to: ``` export function w $main() { @L20 %x =l alloc4 1 # Allocate 4 bytes on the stack %.t1 =w copy 65 storew %.t1, %x # Store 65 as a 16-bit word %.t2 =w loadub %x # Reload it as an 8-bit unsigned byte ... } ``` # Comparisons and Conditional Jumps QBE has instructions to compare two temporaries and set a third temporary to 1 if the comparison is true, 0 otherwise. The instructions are: * `ceq` for equality * `cne` for inequality * `csle` for signed lower or equal * `cslt` for signed lower * `csge` for signed greater or equal * `csgt` for signed greater * `cule` for unsigned lower or equal * `cult` for unsigned lower * `cuge` for unsigned greater or equal * `cugt` for unsigned greater followed by the type letter of the two arguments. So, the C code: ```c int x= 5; int y= 6; int z= x>y; ``` can be compiled down to the intermediate code: ``` %x =w copy 5 %y =w copy 6 %z =w csgtw %x, %y ``` QBE has only one conditional jump instruction: `jnz`. When the named temporary location is non-zero, `jnz` jumps to the first label. Otherwise it jumps to the second label. There has to be two labels for `jnz`. Using this, we can translate this C code: ```c if (5>6) z= 100; else z= 200; ``` to: ``` @L19 %.t1 =w csgtw 5, 6 # Compare 5>6, store result in %.t1 jnz %.t1, @Ltrue, @Lfalse # Jump to @Ltrue if true, @Lfalse otherwise @Ltrue %z =w copy 100 # Set z to 100 and skip using the jmp @L18 # absolute jump instruction, jmp @Lfalse %z =w copy 200 # Set z to 200 @L18 ... ``` Using the comparison instructions and `jnz`, we can implement IF, FOR and WHILE constructs. ## Structs and Arrays These are pretty straightforward. To access a field in a struct, take the base address and add on the offset of the field. For arrays elements, we need to scale the element's index by the size of each element. So this C code: ```c struct foo { int field1; int field2; } x; int main() { x.field2= 45; return(0); } ``` is compiled by `acwj` to: ``` export data $x = align 8 { l 0 } export function w $main() { @L19 %.t1 =w copy 45 %.t2 =l copy $x # Get the base address of x %.t3 =l copy 4 %.t2 =l add %.t2, %.t3 # Add 4 to it storew %.t1, %.t2 # Store 45 at this address ... } ``` ## Comparing the Old and QBE Code Sizes QBE provides an easier target for the compiler writer than the underlying machine's assembly code. QBE also optimises the assembly code that it creates from the intermediate language. And QBE has at least two machine targets: x86-64 and ARM-64, which makes the front-end compiler a portable one. Let's use the old and new `acwj` compilers to compile each C file in the compiler itself. We can then compare the code size in bytes that gets produced by each version: ``` Version 62 QBE File ----------------------------- 18079 6961 cg.o 14200 7440 decl.o 11735 4815 expr.o 10063 4349 gen.o 4965 2571 main.o 1492 522 misc.o 1248 424 opt.o 9466 4495 scan.o 6531 2888 stmt.o 7770 3611 sym.o 3617 1711 tree.o 2473 1777 types.o 106964 44257 Self-compiled cwj binary ``` ## Summary I'm very glad that I actually targetted a real machine when I wrote `acwj` because it forced me to cover such topics as register allocation, argument passing to functions, alignment of values etc. But the quality of assembly code that `acwj` produced was pretty terrible. And now, I'm glad that I found a way to produce high-quality assembly output by using the [QBE intermediate language](https://c9x.me/compile/doc/il.html) for the front-end of the compiler. ## What's Next I don't know if there's a next after this. The compiler passes all the tests, it compiles itself, and the code that it produces is now pretty good. I would like to learn about the concepts that QBE embodies, e.g. SSA and register allocation. So, perhaps I'll go off and do some research and write that up. [Next step](../64_6809_Target/Readme.md) ================================================ FILE: 63_QBE/cg.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Code generator for x86-64 using the QBE intermediate language. // Copyright (c) 2019 Warren Toomey, GPL3 // Switch to the text segment void cgtextseg() { } // Switch to the data segment void cgdataseg() { } // Given a scalar type value, return the // character that matches the QBE type. // Because chars are stored on the stack, // we can return 'w' for P_CHAR. char cgqbetype(int type) { if (ptrtype(type)) return ('l'); switch (type) { case P_VOID: return (' '); case P_CHAR: return ('w'); case P_INT: return ('w'); case P_LONG: return ('l'); default: fatald("Bad type in cgqbetype:", type); } return (0); // Keep -Wall happy } // Given a scalar type value, return the // size of the QBE type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch (type) { case P_CHAR: break; default: // Align whatever we have now on a 4-byte alignment. // I put the generic code here so it can be reused elsewhere. alignment = 4; offset = (offset + direction * (alignment - 1)) & ~(alignment - 1); } return (offset); } // Allocate a QBE temporary static int nexttemp = 0; int cgalloctemp(void) { return (++nexttemp); } // Print out the assembly preamble // for one output file void cgpreamble(char *filename) { } // Nothing to do for the end of a file void cgpostamble() { } // Boolean flag: has there been a switch statement // in this function yet? static int used_switch; // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int size, bigsize; int label; // Output the function's name and return type if (sym->class == C_GLOBAL) fprintf(Outfile, "export "); fprintf(Outfile, "function %c $%s(", cgqbetype(sym->type), name); // Output the parameter names and types. For any parameters which // need addresses, change their name as we copy their value below for (parm = sym->member; parm != NULL; parm = parm->next) { if (parm->st_hasaddr == 1) fprintf(Outfile, "%c %%.p%s, ", cgqbetype(parm->type), parm->name); else fprintf(Outfile, "%c %%%s, ", cgqbetype(parm->type), parm->name); } fprintf(Outfile, ") {\n"); // Get a label for the function start label = genlabel(); cglabel(label); // For any parameters which need addresses, allocate memory // on the stack for them. QBE won't let us do alloc1, so // we allocate 4 bytes for chars. Copy the value from the // parameter to the new memory location. // of the parameter for (parm = sym->member; parm != NULL; parm = parm->next) { if (parm->st_hasaddr == 1) { size = cgprimsize(parm->type); bigsize = (size == 1) ? 4 : size; fprintf(Outfile, " %%%s =l alloc%d 1\n", parm->name, bigsize); // Copy to the allocated memory switch (size) { case 1: fprintf(Outfile, " storeb %%.p%s, %%%s\n", parm->name, parm->name); break; case 4: fprintf(Outfile, " storew %%.p%s, %%%s\n", parm->name, parm->name); break; case 8: fprintf(Outfile, " storel %%.p%s, %%%s\n", parm->name, parm->name); } } } // Allocate memory for any local variables that need to be on the // stack. There are two reasons for this. The first is for locals // where their address is used. The second is for char variables // We need to do this as QBE can only truncate down to 8 bits // for locations in memory for (locvar = Loclhead; locvar != NULL; locvar = locvar->next) { if (locvar->st_hasaddr == 1) { // Get the total size for all elements (if an array). // Round up to the nearest multiple of 8, to ensure that // pointers are aligned on 8-byte boundaries size = locvar->size * locvar->nelems; size = (size + 7) >> 3; fprintf(Outfile, " %%%s =l alloc8 %d\n", locvar->name, size); } else if (locvar->type == P_CHAR) { locvar->st_hasaddr = 1; fprintf(Outfile, " %%%s =l alloc4 1\n", locvar->name); } } used_switch = 0; // We haven't output the switch handling code yet } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->st_endlabel); // Return a value if the function's type isn't void if (sym->type != P_VOID) fprintf(Outfile, " ret %%.ret\n}\n"); else fprintf(Outfile, " ret\n}\n"); } // Load an integer literal value into a temporary. // Return the number of the temporary. int cgloadint(int value, int type) { // Get a new temporary int t = cgalloctemp(); fprintf(Outfile, " %%.t%d =%c copy %d\n", t, cgqbetype(type), value); return (t); } // Load a value from a variable into a temporary. // Return the number of the temporary. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadvar(struct symtable *sym, int op) { int r, posttemp, offset = 1; char qbeprefix; // Get a new temporary r = cgalloctemp(); // If the symbol is a pointer, use the size // of the type that it points to as any // increment or decrement. If not, it's one. if (ptrtype(sym->type)) offset = typesize(value_at(sym->type), sym->ctype); // Negate the offset for decrements if (op == A_PREDEC || op == A_POSTDEC) offset = -offset; // Get the relevant QBE prefix for the symbol qbeprefix = ((sym->class == C_GLOBAL) || (sym->class == C_STATIC) || (sym->class == C_EXTERN)) ? '$' : '%'; // If we have a pre-operation if (op == A_PREINC || op == A_PREDEC) { if (sym->st_hasaddr || qbeprefix == '$') { // Get a new temporary posttemp = cgalloctemp(); switch (sym->size) { case 1: fprintf(Outfile, " %%.t%d =w loadub %c%s\n", posttemp, qbeprefix, sym->name); fprintf(Outfile, " %%.t%d =w add %%.t%d, %d\n", posttemp, posttemp, offset); fprintf(Outfile, " storeb %%.t%d, %c%s\n", posttemp, qbeprefix, sym->name); break; case 4: fprintf(Outfile, " %%.t%d =w loadsw %c%s\n", posttemp, qbeprefix, sym->name); fprintf(Outfile, " %%.t%d =w add %%.t%d, %d\n", posttemp, posttemp, offset); fprintf(Outfile, " storew %%.t%d, %c%s\n", posttemp, qbeprefix, sym->name); break; case 8: fprintf(Outfile, " %%.t%d =l loadl %c%s\n", posttemp, qbeprefix, sym->name); fprintf(Outfile, " %%.t%d =l add %%.t%d, %d\n", posttemp, posttemp, offset); fprintf(Outfile, " storel %%.t%d, %c%s\n", posttemp, qbeprefix, sym->name); } } else fprintf(Outfile, " %c%s =%c add %c%s, %d\n", qbeprefix, sym->name, cgqbetype(sym->type), qbeprefix, sym->name, offset); } // Now load the output temporary with the value if (sym->st_hasaddr || qbeprefix == '$') { switch (sym->size) { case 1: fprintf(Outfile, " %%.t%d =w loadub %c%s\n", r, qbeprefix, sym->name); break; case 4: fprintf(Outfile, " %%.t%d =w loadsw %c%s\n", r, qbeprefix, sym->name); break; case 8: fprintf(Outfile, " %%.t%d =l loadl %c%s\n", r, qbeprefix, sym->name); } } else fprintf(Outfile, " %%.t%d =%c copy %c%s\n", r, cgqbetype(sym->type), qbeprefix, sym->name); // If we have a post-operation if (op == A_POSTINC || op == A_POSTDEC) { if (sym->st_hasaddr || qbeprefix == '$') { // Get a new temporary posttemp = cgalloctemp(); switch (sym->size) { case 1: fprintf(Outfile, " %%.t%d =w loadub %c%s\n", posttemp, qbeprefix, sym->name); fprintf(Outfile, " %%.t%d =w add %%.t%d, %d\n", posttemp, posttemp, offset); fprintf(Outfile, " storeb %%.t%d, %c%s\n", posttemp, qbeprefix, sym->name); break; case 4: fprintf(Outfile, " %%.t%d =w loadsw %c%s\n", posttemp, qbeprefix, sym->name); fprintf(Outfile, " %%.t%d =w add %%.t%d, %d\n", posttemp, posttemp, offset); fprintf(Outfile, " storew %%.t%d, %c%s\n", posttemp, qbeprefix, sym->name); break; case 8: fprintf(Outfile, " %%.t%d =l loadl %c%s\n", posttemp, qbeprefix, sym->name); fprintf(Outfile, " %%.t%d =l add %%.t%d, %d\n", posttemp, posttemp, offset); fprintf(Outfile, " storel %%.t%d, %c%s\n", posttemp, qbeprefix, sym->name); } } else fprintf(Outfile, " %c%s =%c add %c%s, %d\n", qbeprefix, sym->name, cgqbetype(sym->type), qbeprefix, sym->name, offset); } // Return the temporary with the value return (r); } // Given the label number of a global string, // load its address into a new temporary int cgloadglobstr(int label) { // Get a new temporary int r = cgalloctemp(); fprintf(Outfile, " %%.t%d =l copy $L%d\n", r, label); return (r); } // Add two temporaries together and return // the number of the temporary with the result int cgadd(int r1, int r2, int type) { fprintf(Outfile, " %%.t%d =%c add %%.t%d, %%.t%d\n", r1, cgqbetype(type), r1, r2); return (r1); } // Subtract the second temporary from the first and // return the number of the temporary with the result int cgsub(int r1, int r2, int type) { fprintf(Outfile, " %%.t%d =%c sub %%.t%d, %%.t%d\n", r1, cgqbetype(type), r1, r2); return (r1); } // Multiply two temporaries together and return // the number of the temporary with the result int cgmul(int r1, int r2, int type) { fprintf(Outfile, " %%.t%d =%c mul %%.t%d, %%.t%d\n", r1, cgqbetype(type), r1, r2); return (r1); } // Divide or modulo the first temporary by the second and // return the number of the temporary with the result int cgdivmod(int r1, int r2, int op, int type) { if (op == A_DIVIDE) fprintf(Outfile, " %%.t%d =%c div %%.t%d, %%.t%d\n", r1, cgqbetype(type), r1, r2); else fprintf(Outfile, " %%.t%d =%c rem %%.t%d, %%.t%d\n", r1, cgqbetype(type), r1, r2); return (r1); } // Bitwise AND two temporaries int cgand(int r1, int r2, int type) { fprintf(Outfile, " %%.t%d =%c and %%.t%d, %%.t%d\n", r1, cgqbetype(type), r1, r2); return (r1); } // Bitwise OR two temporaries int cgor(int r1, int r2, int type) { fprintf(Outfile, " %%.t%d =%c or %%.t%d, %%.t%d\n", r1, cgqbetype(type), r1, r2); return (r1); } // Bitwise XOR two temporaries int cgxor(int r1, int r2, int type) { fprintf(Outfile, " %%.t%d =%c xor %%.t%d, %%.t%d\n", r1, cgqbetype(type), r1, r2); return (r1); } // Shift left r1 by r2 bits int cgshl(int r1, int r2, int type) { fprintf(Outfile, " %%.t%d =%c shl %%.t%d, %%.t%d\n", r1, cgqbetype(type), r1, r2); return (r1); } // Shift right r1 by r2 bits int cgshr(int r1, int r2, int type) { fprintf(Outfile, " %%.t%d =%c shr %%.t%d, %%.t%d\n", r1, cgqbetype(type), r1, r2); return (r1); } // Negate a temporary's value int cgnegate(int r, int type) { fprintf(Outfile, " %%.t%d =%c sub 0, %%.t%d\n", r, cgqbetype(type), r); return (r); } // Invert a temporary's value int cginvert(int r, int type) { fprintf(Outfile, " %%.t%d =%c xor %%.t%d, -1\n", r, cgqbetype(type), r); return (r); } // Logically negate a temporary's value int cglognot(int r, int type) { char q = cgqbetype(type); fprintf(Outfile, " %%.t%d =%c ceq%c %%.t%d, 0\n", r, q, q, r); return (r); } // Load a boolean value (only 0 or 1) // into the given temporary void cgloadboolean(int r, int val, int type) { fprintf(Outfile, " %%.t%d =%c copy %d\n", r, cgqbetype(type), val); } // Convert an integer value to a boolean value. Jump if // it's an IF, WHILE, LOGAND or LOGOR operation int cgboolean(int r, int op, int label, int type) { // Get a label for the next instruction int label2 = genlabel(); // Get a new temporary for the comparison int r2 = cgalloctemp(); // Convert temporary to boolean value fprintf(Outfile, " %%.t%d =l cne%c %%.t%d, 0\n", r2, cgqbetype(type), r); switch (op) { case A_IF: case A_WHILE: case A_LOGAND: fprintf(Outfile, " jnz %%.t%d, @L%d, @L%d\n", r2, label2, label); break; case A_LOGOR: fprintf(Outfile, " jnz %%.t%d, @L%d, @L%d\n", r2, label, label2); break; } // Output the label for the next instruction cglabel(label2); return (r2); } // Call a function with the given symbol id. // Return the temprary with the result int cgcall(struct symtable *sym, int numargs, int *arglist, int *typelist) { int outr; int i; // Get a new temporary for the return result outr = cgalloctemp(); // Call the function if (sym->type == P_VOID) fprintf(Outfile, " call $%s(", sym->name); else fprintf(Outfile, " %%.t%d =%c call $%s(", outr, cgqbetype(sym->type), sym->name); // Output the list of arguments for (i = numargs - 1; i >= 0; i--) { fprintf(Outfile, "%c %%.t%d, ", cgqbetype(typelist[i]), arglist[i]); } fprintf(Outfile, ")\n"); return (outr); } // Shift a temporary left by a constant. As we only // use this for address calculations, extend the // type to be a QBE 'l' if required int cgshlconst(int r, int val, int type) { int r2 = cgalloctemp(); int r3 = cgalloctemp(); if (cgprimsize(type) < 8) { fprintf(Outfile, " %%.t%d =l extsw %%.t%d\n", r2, r); fprintf(Outfile, " %%.t%d =l shl %%.t%d, %d\n", r3, r2, val); } else fprintf(Outfile, " %%.t%d =l shl %%.t%d, %d\n", r3, r, val); return (r3); } // Store a temporary's value into a global variable int cgstorglob(int r, struct symtable *sym) { // We can store to bytes in memory char q = cgqbetype(sym->type); if (sym->type == P_CHAR) q = 'b'; fprintf(Outfile, " store%c %%.t%d, $%s\n", q, r, sym->name); return (r); } // Store a temporary's value into a local variable int cgstorlocal(int r, struct symtable *sym) { // If the variable is on the stack, use store instructions if (sym->st_hasaddr) { fprintf(Outfile, " store%c %%.t%d, %%%s\n", cgqbetype(sym->type), r, sym->name); } else { fprintf(Outfile, " %%%s =%c copy %%.t%d\n", sym->name, cgqbetype(sym->type), r); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size, type; int initvalue; int i; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the variable (or its elements if an array) // and the type of the variable if (node->stype == S_ARRAY) { size = typesize(value_at(node->type), node->ctype); type = value_at(node->type); } else { size = node->size; type = node->type; } // Generate the global identity and the label cgdataseg(); if (node->class == C_GLOBAL) fprintf(Outfile, "export "); if ((node->type == P_STRUCT) || (node->type == P_UNION)) fprintf(Outfile, "data $%s = align 8 { ", node->name); else fprintf(Outfile, "data $%s = align %d { ", node->name, cgprimsize(type)); // Output space for one or more elements for (i = 0; i < node->nelems; i++) { // Get any initial value initvalue = 0; if (node->initlist != NULL) initvalue = node->initlist[i]; // Generate the space for this type switch (size) { case 1: fprintf(Outfile, "b %d, ", initvalue); break; case 4: fprintf(Outfile, "w %d, ", initvalue); break; case 8: // Generate the pointer to a string literal. Treat a zero value // as actually zero, not the label L0 if (node->initlist != NULL && type == pointer_to(P_CHAR) && initvalue != 0) fprintf(Outfile, "l $L%d, ", initvalue); else fprintf(Outfile, "l %d, ", initvalue); break; default: fprintf(Outfile, "z %d, ", size); } } fprintf(Outfile, "}\n"); } // Generate a global string and its label. // Don't output the label if append is true. void cgglobstr(int l, char *strvalue, int append) { char *cptr; if (!append) fprintf(Outfile, "data $L%d = { ", l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "b %d, ", *cptr); } } // NUL terminate a global string void cgglobstrend(void) { fprintf(Outfile, " b 0 }\n"); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "ceq", "cne", "cslt", "csgt", "csle", "csge" }; // Compare two temporaries and set if true. int cgcompare_and_set(int ASTop, int r1, int r2, int type) { int r3; char q = cgqbetype(type); // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); // Get a new temporary for the comparison r3 = cgalloctemp(); fprintf(Outfile, " %%.t%d =%c %s%c %%.t%d, %%.t%d\n", r3, q, cmplist[ASTop - A_EQ], q, r1, r2); return (r3); } // Generate a label void cglabel(int l) { fprintf(Outfile, "@L%d\n", l); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, " jmp @L%d\n", l); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "cne", "ceq", "csge", "csle", "csgt", "cslt" }; // Compare two temporaries and jump if false. int cgcompare_and_jump(int ASTop, int r1, int r2, int label, int type) { int label2; int r3; char q = cgqbetype(type); // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); // Get a label for the next instruction label2 = genlabel(); // Get a new temporary for the comparison r3 = cgalloctemp(); fprintf(Outfile, " %%.t%d =%c %s%c %%.t%d, %%.t%d\n", r3, q, invcmplist[ASTop - A_EQ], q, r1, r2); fprintf(Outfile, " jnz %%.t%d, @L%d, @L%d\n", r3, label, label2); cglabel(label2); return (NOREG); } // Widen the value in the temporary from the old // to the new type, and return a temporary with // this new value int cgwiden(int r, int oldtype, int newtype) { char oldq = cgqbetype(oldtype); char newq = cgqbetype(newtype); // Get a new temporary int t = cgalloctemp(); switch (oldtype) { case P_CHAR: fprintf(Outfile, " %%.t%d =%c extub %%.t%d\n", t, newq, r); break; default: fprintf(Outfile, " %%.t%d =%c exts%c %%.t%d\n", t, newq, oldq, r); } return (t); } // Generate code to return a value from a function void cgreturn(int reg, struct symtable *sym) { // Only return a value if we have a value to return if (reg != NOREG) fprintf(Outfile, " %%.ret =%c copy %%.t%d\n", cgqbetype(sym->type), reg); cgjump(sym->st_endlabel); } // Generate code to load the address of an // identifier. Return a new temporary int cgaddress(struct symtable *sym) { int r = cgalloctemp(); char qbeprefix = ((sym->class == C_GLOBAL) || (sym->class == C_STATIC) || (sym->class == C_EXTERN)) ? '$' : '%'; fprintf(Outfile, " %%.t%d =l copy %c%s\n", r, qbeprefix, sym->name); return (r); } // Dereference a pointer to get the value // it points at into a new temporary int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); // Get temporary for the return result int ret = cgalloctemp(); switch (size) { case 1: fprintf(Outfile, " %%.t%d =w loadub %%.t%d\n", ret, r); break; case 4: fprintf(Outfile, " %%.t%d =w loadsw %%.t%d\n", ret, r); break; case 8: fprintf(Outfile, " %%.t%d =l loadl %%.t%d\n", ret, r); break; default: fatald("Can't cgderef on type:", type); } return (ret); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { // Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, " storeb %%.t%d, %%.t%d\n", r1, r2); break; case 4: fprintf(Outfile, " storew %%.t%d, %%.t%d\n", r1, r2); break; case 8: fprintf(Outfile, " storel %%.t%d, %%.t%d\n", r1, r2); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } // Move value between temporaries void cgmove(int r1, int r2, int type) { fprintf(Outfile, " %%.t%d =%c copy %%.t%d\n", r2, cgqbetype(type), r1); } // Output a gdb directive to say on which // source code line number the following // assembly code came from void cglinenum(int line) { // fprintf(Outfile, "\t.loc 1 %d 0\n", line); } // Change a temporary value from its old // type to a new type. int cgcast(int t, int oldtype, int newtype) { // Get temporary for the return result int ret = cgalloctemp(); int oldsize, newsize; char qnew; // If the new type is a pointer if (ptrtype(newtype)) { // Nothing to do if the old type is also a pointer if (ptrtype(oldtype)) return (t); // Otherwise, widen from a primitive type to a pointer return (cgwiden(t, oldtype, newtype)); } // New type is not a pointer // Get the new QBE type // and the type sizes in bytes qnew = cgqbetype(newtype); oldsize = cgprimsize(oldtype); newsize = cgprimsize(newtype); // Nothing to do if the two are the same size if (newsize == oldsize) return (t); // If the new size is smaller, we can copy and QBE will truncate it, // otherwise use the QBE cast operation if (newsize < oldsize) fprintf(Outfile, " %%.t%d =%c copy %%.t%d\n", ret, qnew, t); else fprintf(Outfile, " %%.t%d =%c cast %%.t%d\n", ret, qnew, t); return (ret); } ================================================ FILE: 63_QBE/data.h ================================================ #ifndef extern_ #define extern_ extern #endif // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 extern_ int Line; // Current line number extern_ int Linestart; // True if at start of a line extern_ int Putback; // Character put back by scanner extern_ struct symtable *Functionid; // Symbol ptr of the current function extern_ FILE *Infile; // Input and output files extern_ FILE *Outfile; extern_ char *Infilename; // Name of file we are parsing extern_ char *Outfilename; // Name of file we opened as Outfile extern_ struct token Token; // Last token scanned extern_ struct token Peektoken; // A look-ahead token extern_ char Text[TEXTLEN + 1]; // Last identifier scanned extern_ int Looplevel; // Depth of nested loops extern_ int Switchlevel; // Depth of nested switches extern char *Tstring[]; // List of token strings // Symbol table lists extern_ struct symtable *Globhead, *Globtail; // Global variables and functions extern_ struct symtable *Loclhead, *Locltail; // Local variables extern_ struct symtable *Parmhead, *Parmtail; // Local parameters extern_ struct symtable *Membhead, *Membtail; // Temp list of struct/union members extern_ struct symtable *Structhead, *Structtail; // List of struct types extern_ struct symtable *Unionhead, *Uniontail; // List of union types extern_ struct symtable *Enumhead, *Enumtail; // List of enum types and values extern_ struct symtable *Typehead, *Typetail; // List of typedefs // Command-line flags extern_ int O_dumpAST; // If true, dump the AST trees extern_ int O_dumpsym; // If true, dump the symbol table extern_ int O_keepasm; // If true, keep any assembly files extern_ int O_assemble; // If true, assemble the assembly files extern_ int O_dolink; // If true, link the object files extern_ int O_verbose; // If true, print info on compilation stages ================================================ FILE: 63_QBE/decl.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of declarations // Copyright (c) 2019 Warren Toomey, GPL3 static struct symtable *composite_declaration(int type); static int typedef_declaration(struct symtable **ctype); static int type_of_typedef(char *name, struct symtable **ctype); static void enum_declaration(void); // Parse the current token and return a primitive type enum value, // a pointer to any composite type and possibly modify // the class of the type. int parse_type(struct symtable **ctype, int *class) { int type = 0, exstatic = 1; // See if the class has been changed to extern or static while (exstatic) { switch (Token.token) { case T_EXTERN: if (*class == C_STATIC) fatal("Illegal to have extern and static at the same time"); *class = C_EXTERN; scan(&Token); break; case T_STATIC: if (*class == C_LOCAL) fatal("Compiler doesn't support static local declarations"); if (*class == C_EXTERN) fatal("Illegal to have extern and static at the same time"); *class = C_STATIC; scan(&Token); break; default: exstatic = 0; } } // Now work on the actual type keyword switch (Token.token) { case T_VOID: type = P_VOID; scan(&Token); break; case T_CHAR: type = P_CHAR; scan(&Token); break; case T_INT: type = P_INT; scan(&Token); break; case T_LONG: type = P_LONG; scan(&Token); break; // For the following, if we have a ';' after the // parsing then there is no type, so return -1. // Example: struct x {int y; int z}; case T_STRUCT: type = P_STRUCT; *ctype = composite_declaration(P_STRUCT); if (Token.token == T_SEMI) type = -1; break; case T_UNION: type = P_UNION; *ctype = composite_declaration(P_UNION); if (Token.token == T_SEMI) type = -1; break; case T_ENUM: type = P_INT; // Enums are really ints enum_declaration(); if (Token.token == T_SEMI) type = -1; break; case T_TYPEDEF: type = typedef_declaration(ctype); if (Token.token == T_SEMI) type = -1; break; case T_IDENT: type = type_of_typedef(Text, ctype); break; default: fatals("Illegal type, token", Token.tokstr); } return (type); } // Given a type parsed by parse_type(), scan in any following // '*' tokens and return the new type int parse_stars(int type) { while (1) { if (Token.token != T_STAR) break; type = pointer_to(type); scan(&Token); } return (type); } // Parse a type which appears inside a cast int parse_cast(struct symtable **ctype) { int type = 0, class = 0; // Get the type inside the parentheses type = parse_stars(parse_type(ctype, &class)); // Do some error checking. I'm sure more can be done if (type == P_STRUCT || type == P_UNION || type == P_VOID) fatal("Cannot cast to a struct, union or void type"); return (type); } // Given a type, parse an expression of literals and ensure // that the type of this expression matches the given type. // Parse any type cast that precedes the expression. // If an integer literal, return this value. // If a string literal, return the label number of the string. int parse_literal(int type) { struct ASTnode *tree; // Parse the expression and optimise the resulting AST tree tree = optimise(binexpr(0)); // If there's a cast, get the child and // mark it as having the type from the cast if (tree->op == A_CAST) { tree->left->type = tree->type; tree = tree->left; } // The tree must now have an integer or string literal if (tree->op != A_INTLIT && tree->op != A_STRLIT) fatal("Cannot initialise globals with a general expression"); // If the type is char * and if (type == pointer_to(P_CHAR)) { // We have a string literal, return the label number if (tree->op == A_STRLIT) return (tree->a_intvalue); // We have a zero int literal, so that's a NULL if (tree->op == A_INTLIT && tree->a_intvalue == 0) return (0); } // We only get here with an integer literal. The input type // is an integer type and is wide enough to hold the literal value if (inttype(type) && typesize(type, NULL) >= typesize(tree->type, NULL)) return (tree->a_intvalue); fatal("Type mismatch: literal vs. variable"); return (0); // Keep -Wall happy } // Given a pointer to a symbol that may already exist // return true if this symbol doesn't exist. We use // this function to convert externs into globals static int is_new_symbol(struct symtable *sym, int class, int type, struct symtable *ctype) { // There is no existing symbol, thus is new if (sym == NULL) return (1); // global versus extern: if they match that it's not new // and we can convert the class to global if ((sym->class == C_GLOBAL && class == C_EXTERN) || (sym->class == C_EXTERN && class == C_GLOBAL)) { // If the types don't match, there's a problem if (type != sym->type) fatals("Type mismatch between global/extern", sym->name); // Struct/unions, also compare the ctype if (type >= P_STRUCT && ctype != sym->ctype) fatals("Type mismatch between global/extern", sym->name); // If we get to here, the types match, so mark the symbol // as global sym->class = C_GLOBAL; // Return that symbol is not new return (0); } // It must be a duplicate symbol if we get here fatals("Duplicate global variable declaration", sym->name); return (-1); // Keep -Wall happy } // Given the type, name and class of a scalar variable, // parse any initialisation value and allocate storage for it. // Return the variable's symbol table entry. static struct symtable *scalar_declaration(char *varname, int type, struct symtable *ctype, int class, struct ASTnode **tree) { struct symtable *sym = NULL; struct ASTnode *varnode, *exprnode; *tree = NULL; // Add this as a known scalar switch (class) { case C_STATIC: case C_EXTERN: case C_GLOBAL: // See if this variable is new or already exists sym = findglob(varname); if (is_new_symbol(sym, class, type, ctype)) sym = addglob(varname, type, ctype, S_VARIABLE, class, 1, 0); break; case C_LOCAL: sym = addlocl(varname, type, ctype, S_VARIABLE, 1); break; case C_PARAM: sym = addparm(varname, type, ctype, S_VARIABLE); break; case C_MEMBER: sym = addmemb(varname, type, ctype, S_VARIABLE, 1); break; } // The variable is being initialised if (Token.token == T_ASSIGN) { // Only possible for a global or local if (class != C_GLOBAL && class != C_LOCAL && class != C_STATIC) fatals("Variable can not be initialised", varname); scan(&Token); // Globals must be assigned a literal value if (class == C_GLOBAL || class == C_STATIC) { // Create one initial value for the variable and // parse this value sym->initlist = (int *) malloc(sizeof(int)); sym->initlist[0] = parse_literal(type); } if (class == C_LOCAL) { // Make an A_IDENT AST node with the variable varnode = mkastleaf(A_IDENT, sym->type, sym->ctype, sym, 0); // Get the expression for the assignment, make into a rvalue exprnode = binexpr(0); exprnode->rvalue = 1; // Ensure the expression's type matches the variable exprnode = modify_type(exprnode, varnode->type, varnode->ctype, 0); if (exprnode == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree *tree = mkastnode(A_ASSIGN, exprnode->type, exprnode->ctype, exprnode, NULL, varnode, NULL, 0); } } // Generate any global space if (class == C_GLOBAL || class == C_STATIC) genglobsym(sym); return (sym); } // Given the type, name and class of an array variable, parse // the size of the array, if any. Then parse any initialisation // value and allocate storage for it. // Return the variable's symbol table entry. static struct symtable *array_declaration(char *varname, int type, struct symtable *ctype, int class) { struct symtable *sym = NULL; // New symbol table entry int nelems = -1; // Assume the number of elements won't be given int maxelems; // The maximum number of elements in the init list int *initlist; // The list of initial elements int i = 0, j; // Skip past the '[' scan(&Token); // See if we have an array size if (Token.token != T_RBRACKET) { nelems = parse_literal(P_INT); if (nelems <= 0) fatald("Array size is illegal", nelems); } // Ensure we have a following ']' match(T_RBRACKET, "]"); // Add this as a known array. We treat the // array as a pointer to its elements' type switch (class) { case C_STATIC: case C_EXTERN: case C_GLOBAL: // See if this variable is new or already exists sym = findglob(varname); if (is_new_symbol(sym, class, pointer_to(type), ctype)) sym = addglob(varname, pointer_to(type), ctype, S_ARRAY, class, 0, 0); break; case C_LOCAL: // Add the array to the local symbol table. Mark it as having an address sym = addlocl(varname, pointer_to(type), ctype, S_ARRAY, 0); sym->st_hasaddr = 1; break; default: fatal("Declaration of array parameters is not implemented"); } // Array initialisation if (Token.token == T_ASSIGN) { if (class != C_GLOBAL && class != C_STATIC) fatals("Variable can not be initialised", varname); scan(&Token); // Get the following left curly bracket match(T_LBRACE, "{"); #define TABLE_INCREMENT 10 // If the array already has nelems, allocate that many elements // in the list. Otherwise, start with TABLE_INCREMENT. if (nelems != -1) maxelems = nelems; else maxelems = TABLE_INCREMENT; initlist = (int *) malloc(maxelems * sizeof(int)); // Loop getting a new literal value from the list while (1) { // Check we can add the next value, then parse and add it if (nelems != -1 && i == maxelems) fatal("Too many values in initialisation list"); initlist[i++] = parse_literal(type); // Increase the list size if the original size was // not set and we have hit the end of the current list if (nelems == -1 && i == maxelems) { maxelems += TABLE_INCREMENT; initlist = (int *) realloc(initlist, maxelems * sizeof(int)); } // Leave when we hit the right curly bracket if (Token.token == T_RBRACE) { scan(&Token); break; } // Next token must be a comma, then comma(); } // Zero any unused elements in the initlist. // Attach the list to the symbol table entry for (j = i; j < sym->nelems; j++) initlist[j] = 0; if (i > nelems) nelems = i; sym->initlist = initlist; } // Set the size of the array and the number of elements // Only externs can have no elements. if (class != C_EXTERN && nelems <= 0) fatals("Array must have non-zero elements", sym->name); sym->nelems = nelems; sym->size = sym->nelems * typesize(type, ctype); // Generate any global space if (class == C_GLOBAL || class == C_STATIC) genglobsym(sym); return (sym); } // Given a pointer to the new function being declared and // a possibly NULL pointer to the function's previous declaration, // parse a list of parameters and cross-check them against the // previous declaration. Return the count of parameters static int param_declaration_list(struct symtable *oldfuncsym, struct symtable *newfuncsym) { int type, paramcnt = 0; struct symtable *ctype; struct symtable *protoptr = NULL; struct ASTnode *unused; // Get the pointer to the first prototype parameter if (oldfuncsym != NULL) protoptr = oldfuncsym->member; // Loop getting any parameters while (Token.token != T_RPAREN) { // If the first token is 'void' if (Token.token == T_VOID) { // Peek at the next token. If a ')', the function // has no parameters, so leave the loop. scan(&Peektoken); if (Peektoken.token == T_RPAREN) { // Move the Peektoken into the Token paramcnt = 0; scan(&Token); break; } } // Get the type of the next parameter type = declaration_list(&ctype, C_PARAM, T_COMMA, T_RPAREN, &unused); if (type == -1) fatal("Bad type in parameter list"); // Ensure the type of this parameter matches the prototype if (protoptr != NULL) { if (type != protoptr->type) fatald("Type doesn't match prototype for parameter", paramcnt + 1); protoptr = protoptr->next; } paramcnt++; // Stop when we hit the right parenthesis if (Token.token == T_RPAREN) break; // We need a comma as separator comma(); } if (oldfuncsym != NULL && paramcnt != oldfuncsym->nelems) fatals("Parameter count mismatch for function", oldfuncsym->name); // Return the count of parameters return (paramcnt); } // // function_declaration: type identifier '(' parameter_list ')' ; // | type identifier '(' parameter_list ')' compound_statement ; // // Parse the declaration of function. static struct symtable *function_declaration(char *funcname, int type, struct symtable *ctype, int class) { struct ASTnode *tree, *finalstmt; struct symtable *oldfuncsym, *newfuncsym = NULL; int endlabel = 0, paramcnt; int linenum = Line; // Text has the identifier's name. If this exists and is a // function, get the id. Otherwise, set oldfuncsym to NULL. if ((oldfuncsym = findsymbol(funcname)) != NULL) if (oldfuncsym->stype != S_FUNCTION) oldfuncsym = NULL; // If this is a new function declaration, get a // label-id for the end label, and add the function // to the symbol table, if (oldfuncsym == NULL) { endlabel = genlabel(); // Assumption: functions only return scalar types, so NULL below newfuncsym = addglob(funcname, type, NULL, S_FUNCTION, class, 0, endlabel); } // Scan in the '(', any parameters and the ')'. // Pass in any existing function prototype pointer lparen(); paramcnt = param_declaration_list(oldfuncsym, newfuncsym); rparen(); // If this is a new function declaration, update the // function symbol entry with the number of parameters. // Also copy the parameter list into the function's node. if (newfuncsym) { newfuncsym->nelems = paramcnt; newfuncsym->member = Parmhead; oldfuncsym = newfuncsym; } // Clear out the parameter list Parmhead = Parmtail = NULL; // If the declaration ends in a semicolon, only a prototype. if (Token.token == T_SEMI) return (oldfuncsym); // This is not just a prototype. // Set the Functionid global to the function's symbol pointer Functionid = oldfuncsym; // Get the AST tree for the compound statement and mark // that we have parsed no loops or switches yet Looplevel = 0; Switchlevel = 0; lbrace(); tree = compound_statement(0); rbrace(); // If the function type isn't P_VOID ... if (type != P_VOID) { // Error if no statements in the function if (tree == NULL) fatal("No statements in function with non-void type"); // Check that the last AST operation in the // compound statement was a return statement finalstmt = (tree->op == A_GLUE) ? tree->right : tree; if (finalstmt == NULL || finalstmt->op != A_RETURN) fatal("No return for function with non-void type"); } // Build the A_FUNCTION node which has the function's symbol pointer // and the compound statement sub-tree tree = mkastunary(A_FUNCTION, type, ctype, tree, oldfuncsym, endlabel); tree->linenum = linenum; // Do optimisations on the AST tree tree = optimise(tree); // Dump the AST tree if requested if (O_dumpAST) { dumpAST(tree, NOLABEL, 0); fprintf(stdout, "\n\n"); } // Generate the assembly code for it genAST(tree, NOLABEL, NOLABEL, NOLABEL, 0); // Now free the symbols associated with this function freeloclsyms(); return (oldfuncsym); } // Parse composite type declarations: structs or unions. // Either find an existing struct/union declaration, or build // a struct/union symbol table entry and return its pointer. static struct symtable *composite_declaration(int type) { struct symtable *ctype = NULL; struct symtable *m; struct ASTnode *unused; int offset; int t; // Skip the struct/union keyword scan(&Token); // See if there is a following struct/union name if (Token.token == T_IDENT) { // Find any matching composite type if (type == P_STRUCT) ctype = findstruct(Text); else ctype = findunion(Text); scan(&Token); } // If the next token isn't an LBRACE , this is // the usage of an existing struct/union type. // Return the pointer to the type. if (Token.token != T_LBRACE) { if (ctype == NULL) fatals("unknown struct/union type", Text); return (ctype); } // Ensure this struct/union type hasn't been // previously defined if (ctype) fatals("previously defined struct/union", Text); // Build the composite type and skip the left brace if (type == P_STRUCT) ctype = addstruct(Text); else ctype = addunion(Text); scan(&Token); // Scan in the list of members while (1) { // Get the next member. m is used as a dummy t = declaration_list(&m, C_MEMBER, T_SEMI, T_RBRACE, &unused); if (t == -1) fatal("Bad type in member list"); if (Token.token == T_SEMI) scan(&Token); if (Token.token == T_RBRACE) break; } // Attach to the struct type's node rbrace(); if (Membhead == NULL) fatals("No members in struct", ctype->name); ctype->member = Membhead; Membhead = Membtail = NULL; // Set the offset of the initial member // and find the first free byte after it m = ctype->member; m->st_posn = 0; offset = typesize(m->type, m->ctype); // Set the position of each successive member in the composite type // Unions are easy. For structs, align the member and find the next free byte for (m = m->next; m != NULL; m = m->next) { // Set the offset for this member if (type == P_STRUCT) m->st_posn = genalign(m->type, offset, 1); else m->st_posn = 0; // Get the offset of the next free byte after this member offset += typesize(m->type, m->ctype); } // Set the overall size of the composite type ctype->size = offset; return (ctype); } // Parse an enum declaration static void enum_declaration(void) { struct symtable *etype = NULL; char *name = NULL; int intval = 0; // Skip the enum keyword. scan(&Token); // If there's a following enum type name, get a // pointer to any existing enum type node. if (Token.token == T_IDENT) { etype = findenumtype(Text); name = strdup(Text); // As it gets tromped soon scan(&Token); } // If the next token isn't a LBRACE, check // that we have an enum type name, then return if (Token.token != T_LBRACE) { if (etype == NULL) fatals("undeclared enum type:", name); return; } // We do have an LBRACE. Skip it scan(&Token); // If we have an enum type name, ensure that it // hasn't been declared before. if (etype != NULL) fatals("enum type redeclared:", etype->name); else // Build an enum type node for this identifier etype = addenum(name, C_ENUMTYPE, 0); // Loop to get all the enum values while (1) { // Ensure we have an identifier // Copy it in case there's an int literal coming up ident(); name = strdup(Text); // Ensure this enum value hasn't been declared before etype = findenumval(name); if (etype != NULL) fatals("enum value redeclared:", Text); // If the next token is an '=', skip it and // get the following int literal if (Token.token == T_ASSIGN) { scan(&Token); if (Token.token != T_INTLIT) fatal("Expected int literal after '='"); intval = Token.intvalue; scan(&Token); } // Build an enum value node for this identifier. // Increment the value for the next enum identifier. etype = addenum(name, C_ENUMVAL, intval++); // Bail out on a right curly bracket, else get a comma if (Token.token == T_RBRACE) break; comma(); } scan(&Token); // Skip over the right curly bracket } // Parse a typedef declaration and return the type // and ctype that it represents static int typedef_declaration(struct symtable **ctype) { int type, class = 0; // Skip the typedef keyword. scan(&Token); // Get the actual type following the keyword type = parse_type(ctype, &class); if (class != 0) fatal("Can't have static/extern in a typedef declaration"); // See if the typedef identifier already exists if (findtypedef(Text) != NULL) fatals("redefinition of typedef", Text); // Get any following '*' tokens type = parse_stars(type); // It doesn't exist so add it to the typedef list addtypedef(Text, type, *ctype); scan(&Token); return (type); } // Given a typedef name, return the type it represents static int type_of_typedef(char *name, struct symtable **ctype) { struct symtable *t; // Look up the typedef in the list t = findtypedef(name); if (t == NULL) fatals("unknown type", name); scan(&Token); *ctype = t->ctype; return (t->type); } // Parse the declaration of a variable or function. // The type and any following '*'s have been scanned, and we // have the identifier in the Token variable. // The class argument is the symbol's class. // Return a pointer to the symbol's entry in the symbol table static struct symtable *symbol_declaration(int type, struct symtable *ctype, int class, struct ASTnode **tree) { struct symtable *sym = NULL; char *varname = strdup(Text); // Ensure that we have an identifier. // We copied it above so we can scan more tokens in, e.g. // an assignment expression for a local variable. ident(); // Deal with function declarations if (Token.token == T_LPAREN) { return (function_declaration(varname, type, ctype, class)); } // See if this array or scalar variable has already been declared switch (class) { case C_EXTERN: case C_STATIC: case C_GLOBAL: case C_LOCAL: case C_PARAM: if (findlocl(varname) != NULL) fatals("Duplicate local variable declaration", varname); case C_MEMBER: if (findmember(varname) != NULL) fatals("Duplicate struct/union member declaration", varname); } // Add the array or scalar variable to the symbol table if (Token.token == T_LBRACKET) { sym = array_declaration(varname, type, ctype, class); *tree = NULL; // Local arrays are not initialised } else sym = scalar_declaration(varname, type, ctype, class, tree); return (sym); } // Parse a list of symbols where there is an initial type. // Return the type of the symbols. et1 and et2 are end tokens. int declaration_list(struct symtable **ctype, int class, int et1, int et2, struct ASTnode **gluetree) { int inittype, type; struct symtable *sym; struct ASTnode *tree = NULL; *gluetree = NULL; // Get the initial type. If -1, it was // a composite type definition, return this if ((inittype = parse_type(ctype, &class)) == -1) return (inittype); // Now parse the list of symbols while (1) { // See if this symbol is a pointer type = parse_stars(inittype); // Parse this symbol sym = symbol_declaration(type, *ctype, class, &tree); // We parsed a function, there is no list so leave if (sym->stype == S_FUNCTION) { if (class != C_GLOBAL && class != C_STATIC) fatal("Function definition not at global level"); return (type); } // Glue any AST tree from a local declaration // to build a sequence of assignments to perform if (*gluetree == NULL) *gluetree = tree; else *gluetree = mkastnode(A_GLUE, P_NONE, NULL, *gluetree, NULL, tree, NULL, 0); // We are at the end of the list, leave if (Token.token == et1 || Token.token == et2) return (type); // Otherwise, we need a comma as separator comma(); } return (0); // Keep -Wall happy } // Parse one or more global declarations, // either variables, functions or structs void global_declarations(void) { struct symtable *ctype = NULL; struct ASTnode *unused; // Loop parsing one declaration list until the end of file while (Token.token != T_EOF) { declaration_list(&ctype, C_GLOBAL, T_SEMI, T_EOF, &unused); // Skip any separating semicolons if (Token.token == T_SEMI) scan(&Token); } } ================================================ FILE: 63_QBE/decl.h ================================================ // Function prototypes for all compiler files // Copyright (c) 2019 Warren Toomey, GPL3 // scan.c void reject_token(struct token *t); int scan(struct token *t); // tree.c struct ASTnode *mkastnode(int op, int type, struct symtable *ctype, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue); struct ASTnode *mkastleaf(int op, int type, struct symtable *ctype, struct symtable *sym, int intvalue); struct ASTnode *mkastunary(int op, int type, struct symtable *ctype, struct ASTnode *left, struct symtable *sym, int intvalue); void dumpAST(struct ASTnode *n, int label, int level); // gen.c int genlabel(void); int genAST(struct ASTnode *n, int iflabel, int looptoplabel, int loopendlabel, int parentASTop); void genpreamble(char *filename); void genpostamble(); void genfreeregs(int keepreg); void genglobsym(struct symtable *node); int genglobstr(char *strvalue, int append); void genglobstrend(void); int genprimsize(int type); int genalign(int type, int offset, int direction); void genreturn(int reg, int id); // cg.c int cgprimsize(int type); int cgalign(int type, int offset, int direction); void cgtextseg(); void cgdataseg(); int cgalloctemp(void); void cgfreeallregs(int keepreg); void cgfreereg(int reg); void cgspillregs(void); void cgpreamble(char *filename); void cgpostamble(); void cgfuncpreamble(struct symtable *sym); void cgfuncpostamble(struct symtable *sym); int cgloadint(int value, int type); int cgloadvar(struct symtable *sym, int op); int cgloadglobstr(int label); int cgadd(int r1, int r2, int type); int cgsub(int r1, int r2, int type); int cgmul(int r1, int r2, int type); int cgdivmod(int r1, int r2, int op, int type); int cgshlconst(int r, int val, int type); int cgcall(struct symtable *sym, int numargs, int *arglist, int *typelist); void cgcopyarg(int r, int argposn); int cgstorglob(int r, struct symtable *sym); int cgstorlocal(int r, struct symtable *sym); void cgglobsym(struct symtable *node); void cgglobstr(int l, char *strvalue, int append); void cgglobstrend(void); int cgcompare_and_set(int ASTop, int r1, int r2, int type); int cgcompare_and_jump(int ASTop, int r1, int r2, int label, int type); void cglabel(int l); void cgjump(int l); int cgwiden(int r, int oldtype, int newtype); void cgreturn(int reg, struct symtable *sym); int cgaddress(struct symtable *sym); int cgderef(int r, int type); int cgstorderef(int r1, int r2, int type); int cgnegate(int r, int type); int cginvert(int r, int type); int cglognot(int r, int type); void cgloadboolean(int r, int val, int type); int cgboolean(int r, int op, int label, int type); int cgand(int r1, int r2, int type); int cgor(int r1, int r2, int type); int cgxor(int r1, int r2, int type); int cgshl(int r1, int r2, int type); int cgshr(int r1, int r2, int type); void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel); void cgmove(int r1, int r2, int type); void cglinenum(int line); int cgcast(int t, int oldtype, int newtype); // expr.c struct ASTnode *expression_list(int endtoken); struct ASTnode *binexpr(int ptp); // stmt.c struct ASTnode *compound_statement(int inswitch); // misc.c void match(int t, char *what); void semi(void); void lbrace(void); void rbrace(void); void lparen(void); void rparen(void); void ident(void); void comma(void); void fatal(char *s); void fatals(char *s1, char *s2); void fatald(char *s, int d); void fatalc(char *s, int c); // sym.c void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node); struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn); struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn); struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int nelems); struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype); struct symtable *addstruct(char *name); struct symtable *addunion(char *name); struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int nelems); struct symtable *addenum(char *name, int class, int value); struct symtable *addtypedef(char *name, int type, struct symtable *ctype); struct symtable *findglob(char *s); struct symtable *findlocl(char *s); struct symtable *findsymbol(char *s); struct symtable *findmember(char *s); struct symtable *findstruct(char *s); struct symtable *findunion(char *s); struct symtable *findenumtype(char *s); struct symtable *findenumval(char *s); struct symtable *findtypedef(char *s); void clear_symtable(void); void freeloclsyms(void); void freestaticsyms(void); void dumptable(struct symtable *head, char *name, int indent); void dumpsymtables(void); // decl.c int parse_type(struct symtable **ctype, int *class); int parse_stars(int type); int parse_cast(struct symtable **ctype); int declaration_list(struct symtable **ctype, int class, int et1, int et2, struct ASTnode **gluetree); void global_declarations(void); // types.c int inttype(int type); int ptrtype(int type); int pointer_to(int type); int value_at(int type); int typesize(int type, struct symtable *ctype); struct ASTnode *modify_type(struct ASTnode *tree, int rtype, struct symtable *rctype, int op); // opt.c struct ASTnode *optimise(struct ASTnode *n); ================================================ FILE: 63_QBE/defs.h ================================================ #include #include #include #include #include "incdir.h" // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 enum { TEXTLEN = 512 // Length of identifiers in input }; // Commands and default filenames #define AOUT "a.out" #define ASCMD "as -g -o " #define QBECMD "qbe -o " #define LDCMD "cc -g -no-pie -o " #define CPPCMD "cpp -nostdinc -isystem " // Token types enum { T_EOF, // Binary operators T_ASSIGN, T_ASPLUS, T_ASMINUS, // 1 T_ASSTAR, T_ASSLASH, T_ASMOD, // 4 T_QUESTION, T_LOGOR, T_LOGAND, // 7 T_OR, T_XOR, T_AMPER, // 10 T_EQ, T_NE, // 13 T_LT, T_GT, T_LE, T_GE, // 15 T_LSHIFT, T_RSHIFT, // 19 T_PLUS, T_MINUS, T_STAR, T_SLASH, T_MOD, // 21 // Other operators T_INC, T_DEC, T_INVERT, T_LOGNOT, // 26 // Type keywords T_VOID, T_CHAR, T_INT, T_LONG, // 30 // Other keywords T_IF, T_ELSE, T_WHILE, T_FOR, T_RETURN, // 34 T_STRUCT, T_UNION, T_ENUM, T_TYPEDEF, // 39 T_EXTERN, T_BREAK, T_CONTINUE, T_SWITCH, // 43 T_CASE, T_DEFAULT, T_SIZEOF, T_STATIC, // 47 // Structural tokens T_INTLIT, T_STRLIT, T_SEMI, T_IDENT, // 51 T_LBRACE, T_RBRACE, T_LPAREN, T_RPAREN, // 55 T_LBRACKET, T_RBRACKET, T_COMMA, T_DOT, // 59 T_ARROW, T_COLON // 63 }; // Token structure struct token { int token; // Token type, from the enum list above char *tokstr; // String version of the token int intvalue; // For T_INTLIT, the integer value }; // AST node types. The first few line up // with the related tokens enum { A_ASSIGN = 1, A_ASPLUS, A_ASMINUS, A_ASSTAR, // 1 A_ASSLASH, A_ASMOD, A_TERNARY, A_LOGOR, // 5 A_LOGAND, A_OR, A_XOR, A_AND, A_EQ, A_NE, A_LT, // 9 A_GT, A_LE, A_GE, A_LSHIFT, A_RSHIFT, // 16 A_ADD, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, A_MOD, // 21 A_INTLIT, A_STRLIT, A_IDENT, A_GLUE, // 26 A_IF, A_WHILE, A_FUNCTION, A_WIDEN, A_RETURN, // 30 A_FUNCCALL, A_DEREF, A_ADDR, A_SCALE, // 35 A_PREINC, A_PREDEC, A_POSTINC, A_POSTDEC, // 39 A_NEGATE, A_INVERT, A_LOGNOT, A_TOBOOL, A_BREAK, // 43 A_CONTINUE, A_SWITCH, A_CASE, A_DEFAULT, A_CAST // 48 }; // Primitive types. The bottom 4 bits is an integer // value that represents the level of indirection, // e.g. 0= no pointer, 1= pointer, 2= pointer pointer etc. enum { P_NONE, P_VOID = 16, P_CHAR = 32, P_INT = 48, P_LONG = 64, P_STRUCT=80, P_UNION=96 }; // Structural types enum { S_VARIABLE, S_FUNCTION, S_ARRAY }; // Storage classes enum { C_GLOBAL = 1, // Globally visible symbol C_LOCAL, // Locally visible symbol C_PARAM, // Locally visible function parameter C_EXTERN, // External globally visible symbol C_STATIC, // Static symbol, visible in one file C_STRUCT, // A struct C_UNION, // A union C_MEMBER, // Member of a struct or union C_ENUMTYPE, // A named enumeration type C_ENUMVAL, // A named enumeration value C_TYPEDEF // A named typedef }; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol struct symtable *ctype; // If struct/union, ptr to that type int stype; // Structural type for the symbol int class; // Storage class for the symbol int size; // Total size in bytes of this symbol int nelems; // Functions: # params. Arrays: # elements #define st_endlabel st_posn // For functions, the end label #define st_hasaddr st_posn // For locals, 1 if any A_ADDR operation int st_posn; // For struct members, the offset of // the member from the base of the struct int *initlist; // List of initial values struct symtable *next; // Next symbol in one list struct symtable *member; // First member of a function, struct, }; // union or enum // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates struct symtable *ctype; // If struct/union, ptr to that type int rvalue; // True if the node is an rvalue struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; struct symtable *sym; // For many AST nodes, the pointer to // the symbol in the symbol table #define a_intvalue a_size // For A_INTLIT, the integer value int a_size; // For A_SCALE, the size to scale by int linenum; // Line number from where this node comes }; enum { NOREG = -1, // Use NOREG when the AST generation // functions have no temporary to return NOLABEL = 0 // Use NOLABEL when we have no label to // pass to genAST() }; ================================================ FILE: 63_QBE/expr.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of expressions // Copyright (c) 2019 Warren Toomey, GPL3 // expression_list: // | expression // | expression ',' expression_list // ; // Parse a list of zero or more comma-separated expressions and // return an AST composed of A_GLUE nodes with the left-hand child // being the sub-tree of previous expressions (or NULL) and the right-hand // child being the next expression. Each A_GLUE node will have size field // set to the number of expressions in the tree at this point. If no // expressions are parsed, NULL is returned struct ASTnode *expression_list(int endtoken) { struct ASTnode *tree = NULL; struct ASTnode *child = NULL; int exprcount = 0; // Loop until the end token while (Token.token != endtoken) { // Parse the next expression and increment the expression count child = binexpr(0); exprcount++; // Build an A_GLUE AST node with the previous tree as the left child // and the new expression as the right child. Store the expression count. tree = mkastnode(A_GLUE, P_NONE, NULL, tree, NULL, child, NULL, exprcount); // Stop when we reach the end token if (Token.token == endtoken) break; // Must have a ',' at this point match(T_COMMA, ","); } // Return the tree of expressions return (tree); } // Parse a function call and return its AST static struct ASTnode *funccall(void) { struct ASTnode *tree; struct symtable *funcptr; // Check that the identifier has been defined as a function, // then make a leaf node for it. if ((funcptr = findsymbol(Text)) == NULL || funcptr->stype != S_FUNCTION) { fatals("Undeclared function", Text); } // Get the '(' lparen(); // Parse the argument expression list tree = expression_list(T_RPAREN); // XXX Check type of each argument against the function's prototype // Build the function call AST node. Store the // function's return type as this node's type. // Also record the function's symbol-id tree = mkastunary(A_FUNCCALL, funcptr->type, funcptr->ctype, tree, funcptr, 0); // Get the ')' rparen(); return (tree); } // Parse the index into an array and return an AST tree for it static struct ASTnode *array_access(struct ASTnode *left) { struct ASTnode *right; // Check that the sub-tree is a pointer if (!ptrtype(left->type)) fatal("Not an array or pointer"); // Get the '[' scan(&Token); // Parse the following expression right = binexpr(0); // Get the ']' match(T_RBRACKET, "]"); // Ensure that this is of int type if (!inttype(right->type)) fatal("Array index is not of integer type"); // Make the left tree an rvalue left->rvalue = 1; // Scale the index by the size of the element's type right = modify_type(right, left->type, left->ctype, A_ADD); // Return an AST tree where the array's base has the offset added to it, // and dereference the element. Still an lvalue at this point. left = mkastnode(A_ADD, left->type, left->ctype, left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, value_at(left->type), left->ctype, left, NULL, 0); return (left); } // Parse the member reference of a struct or union // and return an AST tree for it. If withpointer is true, // the access is through a pointer to the member. static struct ASTnode *member_access(struct ASTnode *left, int withpointer) { struct ASTnode *right; struct symtable *typeptr; struct symtable *m; // Check that the left AST tree is a pointer to struct or union if (withpointer && left->type != pointer_to(P_STRUCT) && left->type != pointer_to(P_UNION)) fatal("Expression is not a pointer to a struct/union"); // Or, check that the left AST tree is a struct or union. // If so, change it from an A_IDENT to an A_ADDR so that // we get the base address, not the value at this address. if (!withpointer) { if (left->type == P_STRUCT || left->type == P_UNION) left->op = A_ADDR; else fatal("Expression is not a struct/union"); } // Get the details of the composite type typeptr = left->ctype; // Skip the '.' or '->' token and get the member's name scan(&Token); ident(); // Find the matching member's name in the type // Die if we can't find it for (m = typeptr->member; m != NULL; m = m->next) if (!strcmp(m->name, Text)) break; if (m == NULL) fatals("No member found in struct/union: ", Text); // Make the left tree an rvalue left->rvalue = 1; // Build an A_INTLIT node with the offset right = mkastleaf(A_INTLIT, P_LONG, NULL, NULL, m->st_posn); // Add the member's offset to the base of the struct/union // and dereference it. Still an lvalue at this point left = mkastnode(A_ADD, pointer_to(m->type), m->ctype, left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, m->type, m->ctype, left, NULL, 0); return (left); } // Parse a parenthesised expression and // return an AST node representing it. static struct ASTnode *paren_expression(int ptp) { struct ASTnode *n; int type = 0; struct symtable *ctype = NULL; // Beginning of a parenthesised expression, skip the '('. scan(&Token); // If the token after is a type identifier, this is a cast expression switch (Token.token) { case T_IDENT: // We have to see if the identifier matches a typedef. // If not, treat it as an expression. if (findtypedef(Text) == NULL) { n = binexpr(0); // ptp is zero as expression inside ( ) break; } case T_VOID: case T_CHAR: case T_INT: case T_LONG: case T_STRUCT: case T_UNION: case T_ENUM: // Get the type inside the parentheses type = parse_cast(&ctype); // Skip the closing ')' and then parse the following expression rparen(); default: n = binexpr(ptp); // Scan in the expression. We pass in ptp // as the cast doesn't change the // expression's precedence } // We now have at least an expression in n, and possibly a non-zero type // in type if there was a cast. Skip the closing ')' if there was no cast. if (type == 0) rparen(); else // Otherwise, make a unary AST node for the cast n = mkastunary(A_CAST, type, ctype, n, NULL, 0); return (n); } // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(int ptp) { struct ASTnode *n; struct symtable *enumptr; struct symtable *varptr; int id; int type = 0; int size, class; struct symtable *ctype; switch (Token.token) { case T_STATIC: case T_EXTERN: fatal("Compiler doesn't support static or extern local declarations"); case T_SIZEOF: // Skip the T_SIZEOF and ensure we have a left parenthesis scan(&Token); if (Token.token != T_LPAREN) fatal("Left parenthesis expected after sizeof"); scan(&Token); // Get the type inside the parentheses type = parse_stars(parse_type(&ctype, &class)); // Get the type's size size = typesize(type, ctype); rparen(); // Make a leaf node int literal with the size return (mkastleaf(A_INTLIT, P_INT, NULL, NULL, size)); case T_INTLIT: // For an INTLIT token, make a leaf AST node for it. // Make it a P_CHAR if it's within the P_CHAR range if (Token.intvalue >= 0 && Token.intvalue < 256) n = mkastleaf(A_INTLIT, P_CHAR, NULL, NULL, Token.intvalue); else n = mkastleaf(A_INTLIT, P_INT, NULL, NULL, Token.intvalue); break; case T_STRLIT: // For a STRLIT token, generate the assembly for it. id = genglobstr(Text, 0); // For successive STRLIT tokens, append their contents // to this one while (1) { scan(&Peektoken); if (Peektoken.token != T_STRLIT) break; genglobstr(Text, 1); scan(&Token); // To skip it properly } // Now make a leaf AST node for it. id is the string's label. genglobstrend(); n = mkastleaf(A_STRLIT, pointer_to(P_CHAR), NULL, NULL, id); break; case T_IDENT: // If the identifier matches an enum value, // return an A_INTLIT node if ((enumptr = findenumval(Text)) != NULL) { n = mkastleaf(A_INTLIT, P_INT, NULL, NULL, enumptr->st_posn); break; } // See if this identifier exists as a symbol. For arrays, set rvalue to 1. if ((varptr = findsymbol(Text)) == NULL) fatals("Unknown variable or function", Text); switch (varptr->stype) { case S_VARIABLE: n = mkastleaf(A_IDENT, varptr->type, varptr->ctype, varptr, 0); break; case S_ARRAY: n = mkastleaf(A_ADDR, varptr->type, varptr->ctype, varptr, 0); n->rvalue = 1; break; case S_FUNCTION: // Function call, see if the next token is a left parenthesis scan(&Token); if (Token.token != T_LPAREN) fatals("Function name used without parentheses", Text); return (funccall()); default: fatals("Identifier not a scalar or array variable", Text); } break; case T_LPAREN: return (paren_expression(ptp)); default: fatals("Expecting a primary expression, got token", Token.tokstr); } // Scan in the next token and return the leaf node scan(&Token); return (n); } // Parse a postfix expression and return // an AST node representing it. The // identifier is already in Text. static struct ASTnode *postfix(int ptp) { struct ASTnode *n; // Get the primary expression n = primary(ptp); // Loop until there are no more postfix operators while (1) { switch (Token.token) { case T_LBRACKET: // An array reference n = array_access(n); break; case T_DOT: // Access into a struct or union n = member_access(n, 0); break; case T_ARROW: // Pointer access into a struct or union n = member_access(n, 1); break; case T_INC: // Post-increment: skip over the token if (n->rvalue == 1) fatal("Cannot ++ on rvalue"); scan(&Token); // Can't do it twice if (n->op == A_POSTINC || n->op == A_POSTDEC) fatal("Cannot ++ and/or -- more than once"); // and change the AST operation n->op = A_POSTINC; break; case T_DEC: // Post-decrement: skip over the token if (n->rvalue == 1) fatal("Cannot -- on rvalue"); scan(&Token); // Can't do it twice if (n->op == A_POSTINC || n->op == A_POSTDEC) fatal("Cannot ++ and/or -- more than once"); // and change the AST operation n->op = A_POSTDEC; break; default: return (n); } } return (NULL); // Keep -Wall happy } // Convert a binary operator token into a binary AST operation. // We rely on a 1:1 mapping from token to AST operation static int binastop(int tokentype) { if (tokentype > T_EOF && tokentype <= T_MOD) return (tokentype); fatals("Syntax error, token", Tstring[tokentype]); return (0); // Keep -Wall happy } // Return true if a token is right-associative, // false otherwise. static int rightassoc(int tokentype) { if (tokentype >= T_ASSIGN && tokentype <= T_ASSLASH) return (1); return (0); } // Operator precedence for each token. Must // match up with the order of tokens in defs.h static int OpPrec[] = { 0, 10, 10, // T_EOF, T_ASSIGN, T_ASPLUS, 10, 10, // T_ASMINUS, T_ASSTAR, 10, 10, // T_ASSLASH, T_ASMOD, 15, // T_QUESTION, 20, 30, // T_LOGOR, T_LOGAND 40, 50, 60, // T_OR, T_XOR, T_AMPER 70, 70, // T_EQ, T_NE 80, 80, 80, 80, // T_LT, T_GT, T_LE, T_GE 90, 90, // T_LSHIFT, T_RSHIFT 100, 100, // T_PLUS, T_MINUS 110, 110, 110 // T_STAR, T_SLASH, T_MOD }; // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec; if (tokentype > T_MOD) fatals("Token with no precedence in op_precedence:", Tstring[tokentype]); prec = OpPrec[tokentype]; if (prec == 0) fatals("Syntax error, token", Tstring[tokentype]); return (prec); } // prefix_expression: postfix_expression // | '*' prefix_expression // | '&' prefix_expression // | '-' prefix_expression // | '++' prefix_expression // | '--' prefix_expression // ; // Parse a prefix expression and return // a sub-tree representing it. static struct ASTnode *prefix(int ptp) { struct ASTnode *tree = NULL; switch (Token.token) { case T_AMPER: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(ptp); // Ensure that it's an identifier if (tree->op != A_IDENT) fatal("& operator must be followed by an identifier"); // Prevent '&' being performed on an array if (tree->sym->stype == S_ARRAY) fatal("& operator cannot be performed on an array"); // Now change the operator to A_ADDR and the type to // a pointer to the original type. Mark the identifier // as needing a real memory address tree->op = A_ADDR; tree->type = pointer_to(tree->type); tree->sym->st_hasaddr = 1; break; case T_STAR: // Get the next token and parse it // recursively as a prefix expression. // Make it an rvalue scan(&Token); tree = prefix(ptp); tree->rvalue = 1; // Ensure the tree's type is a pointer if (!ptrtype(tree->type)) fatal("* operator must be followed by an expression of pointer type"); // Prepend an A_DEREF operation to the tree tree = mkastunary(A_DEREF, value_at(tree->type), tree->ctype, tree, NULL, 0); break; case T_MINUS: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(ptp); // Prepend a A_NEGATE operation to the tree and // make the child an rvalue. Because chars are unsigned, // also widen this if needed to int so that it's signed tree->rvalue = 1; if (tree->type == P_CHAR) tree->type = P_INT; tree = mkastunary(A_NEGATE, tree->type, tree->ctype, tree, NULL, 0); break; case T_INVERT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(ptp); // Prepend a A_INVERT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_INVERT, tree->type, tree->ctype, tree, NULL, 0); break; case T_LOGNOT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(ptp); // Prepend a A_LOGNOT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_LOGNOT, tree->type, tree->ctype, tree, NULL, 0); break; case T_INC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(ptp); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("++ operator must be followed by an identifier"); // Prepend an A_PREINC operation to the tree tree = mkastunary(A_PREINC, tree->type, tree->ctype, tree, NULL, 0); break; case T_DEC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(ptp); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("-- operator must be followed by an identifier"); // Prepend an A_PREDEC operation to the tree tree = mkastunary(A_PREDEC, tree->type, tree->ctype, tree, NULL, 0); break; default: tree = postfix(ptp); } return (tree); } // Return an AST tree whose root is a binary operator. // Parameter ptp is the previous token's precedence. struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; struct ASTnode *ltemp, *rtemp; int ASTop; int tokentype; // Get the tree on the left. // Fetch the next token at the same time. left = prefix(ptp); // If we hit one of several terminating tokens, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA || tokentype == T_COLON || tokentype == T_RBRACE) { left->rvalue = 1; return (left); } // While the precedence of this token is more than that of the // previous token precedence, or it's right associative and // equal to the previous token's precedence while ((op_precedence(tokentype) > ptp) || (rightassoc(tokentype) && op_precedence(tokentype) == ptp)) { // Fetch in the next integer literal scan(&Token); // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Determine the operation to be performed on the sub-trees ASTop = binastop(tokentype); switch (ASTop) { case A_TERNARY: // Ensure we have a ':' token, scan in the expression after it match(T_COLON, ":"); ltemp = binexpr(0); // Build and return the AST for this statement. Use the middle // expression's type as the return type. XXX We should also // consider the third expression's type. return (mkastnode (A_TERNARY, right->type, right->ctype, left, right, ltemp, NULL, 0)); case A_ASSIGN: // Assignment // Make the right tree into an rvalue right->rvalue = 1; // Ensure the right's type matches the left right = modify_type(right, left->type, left->ctype, 0); if (right == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree. However, switch // left and right around, so that the right expression's // code will be generated before the left expression ltemp = left; left = right; right = ltemp; break; default: // We are not doing a ternary or assignment, so both trees should // be rvalues. Convert both trees into rvalue if they are lvalue trees left->rvalue = 1; right->rvalue = 1; // Ensure the two types are compatible by trying // to modify each tree to match the other's type. ltemp = modify_type(left, right->type, right->ctype, ASTop); rtemp = modify_type(right, left->type, left->ctype, ASTop); if (ltemp == NULL && rtemp == NULL) fatal("Incompatible types in binary expression"); if (ltemp != NULL) left = ltemp; if (rtemp != NULL) right = rtemp; } // Join that sub-tree with ours. Convert the token // into an AST operation at the same time. left = mkastnode(binastop(tokentype), left->type, left->ctype, left, NULL, right, NULL, 0); // Some operators produce an int result regardless of their operands switch (binastop(tokentype)) { case A_LOGOR: case A_LOGAND: case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: left->type = P_INT; } // Update the details of the current token. // If we hit a terminating token, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA || tokentype == T_COLON || tokentype == T_RBRACE) { left->rvalue = 1; return (left); } } // Return the tree we have when the precedence // is the same or lower left->rvalue = 1; return (left); } ================================================ FILE: 63_QBE/gen.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Generic code generator // Copyright (c) 2019 Warren Toomey, GPL3 // Generate and return a new label number static int labelid = 1; int genlabel(void) { return (labelid++); } static void update_line(struct ASTnode *n) { // Output the line into the assembly if we've // changed the line number in the AST node if (n->linenum != 0 && Line != n->linenum) { Line = n->linenum; cglinenum(Line); } } // Generate the code for an IF statement // and an optional ELSE clause. static int genIF(struct ASTnode *n, int looptoplabel, int loopendlabel) { int Lfalse, Lend = 0; int r, r2; // Generate two labels: one for the // false compound statement, and one // for the end of the overall IF statement. // When there is no ELSE clause, Lfalse _is_ // the ending label! Lfalse = genlabel(); if (n->right) Lend = genlabel(); // Generate the condition code r = genAST(n->left, Lfalse, NOLABEL, NOLABEL, n->op); // Test to see if the condition is true. If not, jump to the false label r2 = cgloadint(1, P_INT); cgcompare_and_jump(A_EQ, r, r2, Lfalse, P_INT); // Generate the true compound statement genAST(n->mid, NOLABEL, looptoplabel, loopendlabel, n->op); // If there is an optional ELSE clause, // generate the jump to skip to the end if (n->right) { // QBE doesn't like two jump instructions in a row, and // a break at the end of a true IF section causes this. The // solution is to insert a label before the IF jump. cglabel(genlabel()); cgjump(Lend); } // Now the false label cglabel(Lfalse); // Optional ELSE clause: generate the // false compound statement and the // end label if (n->right) { genAST(n->right, NOLABEL, NOLABEL, loopendlabel, n->op); cglabel(Lend); } return (NOREG); } // Generate the code for a WHILE statement static int genWHILE(struct ASTnode *n) { int Lstart, Lend; int r, r2; // Generate the start and end labels // and output the start label Lstart = genlabel(); Lend = genlabel(); cglabel(Lstart); // Generate the condition code r = genAST(n->left, Lend, Lstart, Lend, n->op); // Test to see if the condition is true. If not, jump to the end label r2 = cgloadint(1, P_INT); cgcompare_and_jump(A_EQ, r, r2, Lend, P_INT); // Generate the compound statement for the body genAST(n->right, NOLABEL, Lstart, Lend, n->op); // Finally output the jump back to the condition, // and the end label cgjump(Lstart); cglabel(Lend); return (NOREG); } // Generate the code for a SWITCH statement static int genSWITCH(struct ASTnode *n) { int *caselabel; int Lend; int Lcode = 0; int i, reg, r2, type; struct ASTnode *c; // Create an array for the case labels caselabel = (int *) malloc((n->a_intvalue + 1) * sizeof(int)); if (caselabel == NULL) fatal("malloc failed in genSWITCH"); // Because QBE doesn't yet support jump tables, // we simply evaluate the switch condition and // then do successive comparisons and jumps, // just like we were doing successive if/elses // Generate a label for the end of the switch statement. Lend = genlabel(); // Generate labels for each case. Put the end label in // as the entry after all the cases for (i = 0, c = n->right; c != NULL; i++, c = c->right) caselabel[i] = genlabel(); caselabel[i] = Lend; // Output the code to calculate the switch condition reg = genAST(n->left, NOLABEL, NOLABEL, NOLABEL, 0); type = n->left->type; // Walk the right-child linked list to // generate the code for each case for (i = 0, c = n->right; c != NULL; i++, c = c->right) { // Generate a label for the actual code that the cases will fall down to if (Lcode == 0) Lcode = genlabel(); // Output the label for this case's test cglabel(caselabel[i]); // Do the comparison and jump, but not if it's the default case if (c->op != A_DEFAULT) { // Jump to the next case if the value doesn't match the case value r2 = cgloadint(c->a_intvalue, type); cgcompare_and_jump(A_EQ, reg, r2, caselabel[i + 1], type); // Otherwise, jump to the code to handle this case cgjump(Lcode); } // Generate the case code. Pass in the end label for the breaks. // If case has no body, we will fall into the following body. // Reset Lcode so we will create a new code label on the next loop. if (c->left) { cglabel(Lcode); genAST(c->left, NOLABEL, NOLABEL, Lend, 0); Lcode = 0; } } // Now output the end label. cglabel(Lend); return (NOREG); } // Generate the code for an // A_LOGAND or A_LOGOR operation static int gen_logandor(struct ASTnode *n) { // Generate two labels int Lfalse = genlabel(); int Lend = genlabel(); int reg; int type; // Generate the code for the left expression // followed by the jump to the false label reg = genAST(n->left, NOLABEL, NOLABEL, NOLABEL, 0); type = n->left->type; cgboolean(reg, n->op, Lfalse, type); // Generate the code for the right expression // followed by the jump to the false label reg = genAST(n->right, NOLABEL, NOLABEL, NOLABEL, 0); type = n->right->type; cgboolean(reg, n->op, Lfalse, type); // We didn't jump so set the right boolean value if (n->op == A_LOGAND) { cgloadboolean(reg, 1, type); cgjump(Lend); cglabel(Lfalse); cgloadboolean(reg, 0, type); } else { cgloadboolean(reg, 0, type); cgjump(Lend); cglabel(Lfalse); cgloadboolean(reg, 1, type); } cglabel(Lend); return (reg); } // Generate the code to calculate the arguments of a // function call, then call the function with these // arguments. Return the temoprary that holds // the function's return value. static int gen_funccall(struct ASTnode *n) { struct ASTnode *gluetree; int i = 0, numargs = 0; int *arglist = NULL; int *typelist = NULL; // Determine the actual number of arguments for (gluetree = n->left; gluetree != NULL; gluetree = gluetree->left) { numargs++; } // Allocate memory to hold the list of argument temporaries. // We need to walk the list of arguments to determine the size for (i = 0, gluetree = n->left; gluetree != NULL; gluetree = gluetree->left) i++; if (i != 0) { arglist = (int *) malloc(i * sizeof(int)); if (arglist == NULL) fatal("malloc failed in gen_funccall"); typelist = (int *) malloc(i * sizeof(int)); if (typelist == NULL) fatal("malloc failed in gen_funccall"); } // If there is a list of arguments, walk this list // from the last argument (right-hand child) to the first. // Also cache the type of each expression for (i = 0, gluetree = n->left; gluetree != NULL; gluetree = gluetree->left) { // Calculate the expression's value arglist[i] = genAST(gluetree->right, NOLABEL, NOLABEL, NOLABEL, gluetree->op); typelist[i++] = gluetree->right->type; } // Call the function and return its result return (cgcall(n->sym, numargs, arglist, typelist)); } // Generate code for a ternary expression static int gen_ternary(struct ASTnode *n) { int Lfalse, Lend; int reg, expreg; int r, r2; // Generate two labels: one for the // false expression, and one for the // end of the overall expression Lfalse = genlabel(); Lend = genlabel(); // Generate the condition code r = genAST(n->left, Lfalse, NOLABEL, NOLABEL, n->op); // Test to see if the condition is true. If not, jump to the false label r2 = cgloadint(1, P_INT); cgcompare_and_jump(A_EQ, r, r2, Lfalse, P_INT); // Get a temporary to hold the result of the two expressions reg = cgalloctemp(); // Generate the true expression and the false label. // Move the expression result into the known temporary. expreg = genAST(n->mid, NOLABEL, NOLABEL, NOLABEL, n->op); cgmove(expreg, reg, n->mid->type); cgjump(Lend); cglabel(Lfalse); // Generate the false expression and the end label. // Move the expression result into the known temporary. expreg = genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); cgmove(expreg, reg, n->right->type); cglabel(Lend); return (reg); } // Given an AST, an optional label, and the AST op // of the parent, generate assembly code recursively. // Return the temporary id with the tree's final value. int genAST(struct ASTnode *n, int iflabel, int looptoplabel, int loopendlabel, int parentASTop) { int leftreg = NOREG, rightreg = NOREG; int lefttype = P_VOID, type = P_VOID; struct symtable *leftsym = NULL; // Empty tree, do nothing if (n == NULL) return (NOREG); // Update the line number in the output update_line(n); // We have some specific AST node handling at the top // so that we don't evaluate the child sub-trees immediately switch (n->op) { case A_IF: return (genIF(n, looptoplabel, loopendlabel)); case A_WHILE: return (genWHILE(n)); case A_SWITCH: return (genSWITCH(n)); case A_FUNCCALL: return (gen_funccall(n)); case A_TERNARY: return (gen_ternary(n)); case A_LOGOR: return (gen_logandor(n)); case A_LOGAND: return (gen_logandor(n)); case A_GLUE: // Do each child statement, and free the // temporaries after each child if (n->left != NULL) genAST(n->left, iflabel, looptoplabel, loopendlabel, n->op); if (n->right != NULL) genAST(n->right, iflabel, looptoplabel, loopendlabel, n->op); return (NOREG); case A_FUNCTION: // Generate the function's preamble before the code // in the child sub-tree cgfuncpreamble(n->sym); genAST(n->left, NOLABEL, NOLABEL, NOLABEL, n->op); cgfuncpostamble(n->sym); return (NOREG); } // General AST node handling below // Get the left and right sub-tree values. Also get the type if (n->left) { lefttype = type = n->left->type; leftsym = n->left->sym; leftreg = genAST(n->left, NOLABEL, NOLABEL, NOLABEL, n->op); } if (n->right) { type = n->right->type; rightreg = genAST(n->right, NOLABEL, NOLABEL, NOLABEL, n->op); } switch (n->op) { case A_ADD: return (cgadd(leftreg, rightreg, type)); case A_SUBTRACT: return (cgsub(leftreg, rightreg, type)); case A_MULTIPLY: return (cgmul(leftreg, rightreg, type)); case A_DIVIDE: return (cgdivmod(leftreg, rightreg, A_DIVIDE, type)); case A_MOD: return (cgdivmod(leftreg, rightreg, A_MOD, type)); case A_AND: return (cgand(leftreg, rightreg, type)); case A_OR: return (cgor(leftreg, rightreg, type)); case A_XOR: return (cgxor(leftreg, rightreg, type)); case A_LSHIFT: return (cgshl(leftreg, rightreg, type)); case A_RSHIFT: return (cgshr(leftreg, rightreg, type)); case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: return (cgcompare_and_set(n->op, leftreg, rightreg, lefttype)); case A_INTLIT: return (cgloadint(n->a_intvalue, n->type)); case A_STRLIT: return (cgloadglobstr(n->a_intvalue)); case A_IDENT: // Load our value if we are an rvalue // or we are being dereferenced if (n->rvalue || parentASTop == A_DEREF) { return (cgloadvar(n->sym, n->op)); } else return (NOREG); case A_ASPLUS: case A_ASMINUS: case A_ASSTAR: case A_ASSLASH: case A_ASMOD: case A_ASSIGN: // For the '+=' and friends operators, generate suitable code // and get the temporary with the result. Then take the left child, // make it the right child so that we can fall into the assignment code. switch (n->op) { case A_ASPLUS: leftreg = cgadd(leftreg, rightreg, type); n->right = n->left; break; case A_ASMINUS: leftreg = cgsub(leftreg, rightreg, type); n->right = n->left; break; case A_ASSTAR: leftreg = cgmul(leftreg, rightreg, type); n->right = n->left; break; case A_ASSLASH: leftreg = cgdivmod(leftreg, rightreg, A_DIVIDE, type); n->right = n->left; break; case A_ASMOD: leftreg = cgdivmod(leftreg, rightreg, A_MOD, type); n->right = n->left; break; } // Now into the assignment code // Are we assigning to an identifier or through a pointer? switch (n->right->op) { case A_IDENT: if (n->right->sym->class == C_GLOBAL || n->right->sym->class == C_EXTERN || n->right->sym->class == C_STATIC) return (cgstorglob(leftreg, n->right->sym)); else return (cgstorlocal(leftreg, n->right->sym)); case A_DEREF: return (cgstorderef(leftreg, rightreg, n->right->type)); default: fatald("Can't A_ASSIGN in genAST(), op", n->op); } case A_WIDEN: // Widen the child's type to the parent's type return (cgwiden(leftreg, lefttype, n->type)); case A_RETURN: cgreturn(leftreg, Functionid); return (NOREG); case A_ADDR: // If we have a symbol, get its address. Otherwise, // the left temporary already has the address because // it's a member access if (n->sym != NULL) return (cgaddress(n->sym)); else return (leftreg); case A_DEREF: // If we are an rvalue, dereference to get the value we point at, // otherwise leave it for A_ASSIGN to store through the pointer if (n->rvalue) return (cgderef(leftreg, lefttype)); else return (leftreg); case A_SCALE: // Small optimisation: use shift if the // scale value is a known power of two switch (n->a_size) { case 2: return (cgshlconst(leftreg, 1, type)); case 4: return (cgshlconst(leftreg, 2, type)); case 8: return (cgshlconst(leftreg, 3, type)); default: // Load a temporary with the size and // multiply the leftreg by this size rightreg = cgloadint(n->a_size, P_INT); return (cgmul(leftreg, rightreg, type)); } case A_POSTINC: case A_POSTDEC: // Load and decrement the variable's value into a temporary // and post increment/decrement it return (cgloadvar(n->sym, n->op)); case A_PREINC: case A_PREDEC: // Load and decrement the variable's value into a temporary // and pre increment/decrement it return (cgloadvar(leftsym, n->op)); case A_NEGATE: return (cgnegate(leftreg, type)); case A_INVERT: return (cginvert(leftreg, type)); case A_LOGNOT: return (cglognot(leftreg, type)); case A_TOBOOL: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, set the temporary // to 0 or 1 based on it's zeroeness or non-zeroeness return (cgboolean(leftreg, parentASTop, iflabel, type)); case A_BREAK: cgjump(loopendlabel); return (NOREG); case A_CONTINUE: cgjump(looptoplabel); return (NOREG); case A_CAST: return (cgcast(leftreg, lefttype, n->type)); default: fatald("Unknown AST operator", n->op); } return (NOREG); // Keep -Wall happy } void genpreamble(char *filename) { cgpreamble(filename); } void genpostamble() { cgpostamble(); } void genglobsym(struct symtable *node) { cgglobsym(node); } // Generate a global string. // If append is true, append to // previous genglobstr() call. int genglobstr(char *strvalue, int append) { int l = genlabel(); cgglobstr(l, strvalue, append); return (l); } void genglobstrend(void) { cgglobstrend(); } int genprimsize(int type) { return (cgprimsize(type)); } int genalign(int type, int offset, int direction) { return (cgalign(type, offset, direction)); } ================================================ FILE: 63_QBE/include/ctype.h ================================================ #ifndef _CTYPE_H_ # define _CTYPE_H_ int isalnum(int c); int isalpha(int c); int iscntrl(int c); int isdigit(int c); int isgraph(int c); int islower(int c); int isprint(int c); int ispunct(int c); int isspace(int c); int isupper(int c); int isxdigit(int c); int isascii(int c); int isblank(int c); int toupper(int c); int tolower(int c); #endif // _CTYPE_H_ ================================================ FILE: 63_QBE/include/errno.h ================================================ #ifndef _ERRNO_H_ # define _ERRNO_H_ int * __errno_location(void); #define errno (* __errno_location()) #endif // _ERRNO_H_ ================================================ FILE: 63_QBE/include/fcntl.h ================================================ #ifndef _FCNTL_H_ # define _FCNTL_H_ #define O_RDONLY 00 #define O_WRONLY 01 #define O_RDWR 02 int open(char *pathname, int flags); #endif // _FCNTL_H_ ================================================ FILE: 63_QBE/include/stddef.h ================================================ #ifndef _STDDEF_H_ # define _STDDEF_H_ #ifndef NULL # define NULL (void *)0 #endif typedef long size_t; #endif //_STDDEF_H_ ================================================ FILE: 63_QBE/include/stdio.h ================================================ #ifndef _STDIO_H_ # define _STDIO_H_ #include #ifndef NULL # define NULL (void *)0 #endif #ifndef EOF # define EOF (-1) #endif // This FILE definition will do for now typedef char * FILE; FILE *fopen(char *pathname, char *mode); size_t fread(void *ptr, size_t size, size_t nmemb, FILE *stream); size_t fwrite(void *ptr, size_t size, size_t nmemb, FILE *stream); int fclose(FILE *stream); int printf(char *format); int fprintf(FILE *stream, char *format); int sprintf(char *str, char *format); int snprintf(char *str, size_t size, char *format); int fgetc(FILE *stream); int fputc(int c, FILE *stream); int fputs(char *s, FILE *stream); int putc(int c, FILE *stream); int putchar(int c); int puts(char *s); FILE *popen(char *command, char *type); int pclose(FILE *stream); int fflush(FILE *stream); extern FILE *stdin; extern FILE *stdout; extern FILE *stderr; #endif // _STDIO_H_ ================================================ FILE: 63_QBE/include/stdlib.h ================================================ #ifndef _STDLIB_H_ # define _STDLIB_H_ void exit(int status); void _Exit(int status); void *malloc(int size); void free(void *ptr); void *calloc(int nmemb, int size); void *realloc(void *ptr, int size); int system(char *command); #endif // _STDLIB_H_ ================================================ FILE: 63_QBE/include/string.h ================================================ #ifndef _STRING_H_ # define _STRING_H_ char *strdup(char *s); char *strchr(char *s, int c); char *strrchr(char *s, int c); int strcmp(char *s1, char *s2); int strncmp(char *s1, char *s2, size_t n); char *strerror(int errnum); #endif // _STRING_H_ ================================================ FILE: 63_QBE/include/unistd.h ================================================ #ifndef _UNISTD_H_ # define _UNISTD_H_ void _exit(int status); int unlink(char *pathname); #endif // _UNISTD_H_ ================================================ FILE: 63_QBE/main.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "decl.h" #include #include // Compiler setup and top-level execution // Copyright (c) 2019 Warren Toomey, GPL3 // Given a string with a '.' and at least a 1-character suffix // after the '.', change the suffix to be the given character. // Return the new string or NULL if the original string could // not be modified char *alter_suffix(char *str, char suffix) { char *posn; char *newstr; // Clone the string if ((newstr = strdup(str)) == NULL) return (NULL); // Find the '.' if ((posn = strrchr(newstr, '.')) == NULL) return (NULL); // Ensure there is a suffix posn++; if (*posn == '\0') return (NULL); // Change the suffix and NUL-terminate the string *posn = suffix; posn++; *posn = '\0'; return (newstr); } // Given an input filename, compile that file // down to assembly code. Return the new file's name static char *do_compile(char *filename) { char cmd[TEXTLEN]; // Change the input file's suffix to .q Outfilename = alter_suffix(filename, 'q'); if (Outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .c on the end\n", filename); exit(1); } // Generate the pre-processor command snprintf(cmd, TEXTLEN, "%s %s %s", CPPCMD, INCDIR, filename); // Open up the pre-processor pipe if ((Infile = popen(cmd, "r")) == NULL) { fprintf(stderr, "Unable to open %s: %s\n", filename, strerror(errno)); exit(1); } Infilename = filename; // Create the output file if ((Outfile = fopen(Outfilename, "w")) == NULL) { fprintf(stderr, "Unable to create %s: %s\n", Outfilename, strerror(errno)); exit(1); } Line = 1; // Reset the scanner Linestart = 1; Putback = '\n'; clear_symtable(); // Clear the symbol table if (O_verbose) printf("compiling %s\n", filename); scan(&Token); // Get the first token from the input Peektoken.token = 0; // and set there is no lookahead token genpreamble(filename); // Output the preamble global_declarations(); // Parse the global declarations genpostamble(); // Output the postamble fclose(Outfile); // Close the output file // Dump the symbol table if requested if (O_dumpsym) { printf("Symbols for %s\n", filename); dumpsymtables(); fprintf(stdout, "\n\n"); } freestaticsyms(); // Free any static symbols in the file return (Outfilename); } // Given an input filename, run QBE on the file and // produce an assembly file. Return the object filename char *do_qbe(char *filename) { char cmd[TEXTLEN]; int err; char *outfilename = alter_suffix(filename, 's'); if (outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .qbe on the end\n", filename); exit(1); } // Build the QBE command and run it snprintf(cmd, TEXTLEN, "%s %s %s", QBECMD, outfilename, filename); if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "QBE translation of %s failed\n", filename); exit(1); } return (outfilename); } // Given an input filename, assemble that file // down to object code. Return the object filename char *do_assemble(char *filename) { char cmd[TEXTLEN]; int err; char *outfilename = alter_suffix(filename, 'o'); if (outfilename == NULL) { fprintf(stderr, "Error: %s has no suffix, try .s on the end\n", filename); exit(1); } // Build the assembly command and run it snprintf(cmd, TEXTLEN, "%s %s %s", ASCMD, outfilename, filename); if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Assembly of %s failed\n", filename); exit(1); } return (outfilename); } // Given a list of object files and an output filename, // link all of the object filenames together. void do_link(char *outfilename, char **objlist) { int cnt, size = TEXTLEN; char cmd[TEXTLEN], *cptr; int err; // Start with the linker command and the output file cptr = cmd; cnt = snprintf(cptr, size, "%s %s ", LDCMD, outfilename); cptr += cnt; size -= cnt; // Now append each object file while (*objlist != NULL) { cnt = snprintf(cptr, size, "%s ", *objlist); cptr += cnt; size -= cnt; objlist++; } if (O_verbose) printf("%s\n", cmd); err = system(cmd); if (err != 0) { fprintf(stderr, "Linking failed\n"); exit(1); } } // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "Usage: %s [-vcSTM] [-o outfile] file [file ...]\n", prog); fprintf(stderr, " -v give verbose output of the compilation stages\n"); fprintf(stderr, " -c generate object files but don't link them\n"); fprintf(stderr, " -S generate assembly files but don't link them\n"); fprintf(stderr, " -T dump the AST trees for each input file\n"); fprintf(stderr, " -M dump the symbol table for each input file\n"); fprintf(stderr, " -o outfile, produce the outfile executable file\n"); exit(1); } // Main program: check arguments and print a usage // if we don't have an argument. Open up the input // file and call scanfile() to scan the tokens in it. enum { MAXOBJ = 100 }; int main(int argc, char **argv) { char *outfilename = AOUT; char *qbefile, *asmfile, *objfile; char *objlist[MAXOBJ]; int i, j, objcnt = 0; // Initialise our variables O_dumpAST = 0; O_dumpsym = 0; O_keepasm = 0; O_assemble = 0; O_verbose = 0; O_dolink = 1; // Scan for command-line options for (i = 1; i < argc; i++) { // No leading '-', stop scanning for options if (*argv[i] != '-') break; // For each option in this argument for (j = 1; (*argv[i] == '-') && argv[i][j]; j++) { switch (argv[i][j]) { case 'o': outfilename = argv[++i]; // Save & skip to next argument break; case 'T': O_dumpAST = 1; break; case 'M': O_dumpsym = 1; break; case 'c': O_assemble = 1; O_keepasm = 0; O_dolink = 0; break; case 'S': O_keepasm = 1; O_assemble = 0; O_dolink = 0; break; case 'v': O_verbose = 1; break; default: usage(argv[0]); } } } // Ensure we have at lease one input file argument if (i >= argc) usage(argv[0]); // Work on each input file in turn while (i < argc) { qbefile = do_compile(argv[i]); // Compile the source file asmfile = do_qbe(qbefile); if (O_dolink || O_assemble) { objfile = do_assemble(asmfile); // Assemble it to object forma if (objcnt == (MAXOBJ - 2)) { fprintf(stderr, "Too many object files for the compiler to handle\n"); exit(1); } objlist[objcnt++] = objfile; // Add the object file's name objlist[objcnt] = NULL; // to the list of object files } if (!O_keepasm) { // Remove the QBE and assembly files unlink(qbefile); // if we don't need to keep them unlink(asmfile); } i++; } // Now link all the object files together if (O_dolink) { do_link(outfilename, objlist); // If we don't need to keep the object // files, then remove them if (!O_assemble) { for (i = 0; objlist[i] != NULL; i++) unlink(objlist[i]); } } return (0); } ================================================ FILE: 63_QBE/misc.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" #include #include // Miscellaneous functions // Copyright (c) 2019 Warren Toomey, GPL3 // Ensure that the current token is t, // and fetch the next token. Otherwise // throw an error void match(int t, char *what) { if (Token.token == t) { scan(&Token); } else { fatals("Expected", what); } } // Match a semicolon and fetch the next token void semi(void) { match(T_SEMI, ";"); } // Match a left brace and fetch the next token void lbrace(void) { match(T_LBRACE, "{"); } // Match a right brace and fetch the next token void rbrace(void) { match(T_RBRACE, "}"); } // Match a left parenthesis and fetch the next token void lparen(void) { match(T_LPAREN, "("); } // Match a right parenthesis and fetch the next token void rparen(void) { match(T_RPAREN, ")"); } // Match an identifer and fetch the next token void ident(void) { match(T_IDENT, "identifier"); } // Match a comma and fetch the next token void comma(void) { match(T_COMMA, "comma"); } // Print out fatal messages void fatal(char *s) { fprintf(stderr, "%s on line %d of %s\n", s, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatals(char *s1, char *s2) { fprintf(stderr, "%s:%s on line %d of %s\n", s1, s2, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatald(char *s, int d) { fprintf(stderr, "%s:%d on line %d of %s\n", s, d, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } void fatalc(char *s, int c) { fprintf(stderr, "%s:%c on line %d of %s\n", s, c, Line, Infilename); fclose(Outfile); unlink(Outfilename); exit(1); } ================================================ FILE: 63_QBE/opt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST Tree Optimisation Code // Copyright (c) 2019 Warren Toomey, GPL3 // Fold an AST tree with a binary operator // and two A_INTLIT children. Return either // the original tree or a new leaf node. static struct ASTnode *fold2(struct ASTnode *n) { int val, leftval, rightval; // Get the values from each child leftval = n->left->a_intvalue; rightval = n->right->a_intvalue; // Perform some of the binary operations. // For any AST op we can't do, return // the original tree. switch (n->op) { case A_ADD: val = leftval + rightval; break; case A_SUBTRACT: val = leftval - rightval; break; case A_MULTIPLY: val = leftval * rightval; break; case A_DIVIDE: // Don't try to divide by zero. if (rightval == 0) return (n); val = leftval / rightval; break; default: return (n); } // Return a leaf node with the new value return (mkastleaf(A_INTLIT, n->type, NULL, NULL, val)); } // Fold an AST tree with a unary operator // and one INTLIT children. Return either // the original tree or a new leaf node. static struct ASTnode *fold1(struct ASTnode *n) { int val; // Get the child value. Do the // operation if recognised. // Return the new leaf node. val = n->left->a_intvalue; switch (n->op) { case A_WIDEN: break; case A_INVERT: val = ~val; break; case A_LOGNOT: val = !val; break; default: return (n); } // Return a leaf node with the new value return (mkastleaf(A_INTLIT, n->type, NULL, NULL, val)); } // Attempt to do constant folding on // the AST tree with the root node n static struct ASTnode *fold(struct ASTnode *n) { if (n == NULL) return (NULL); // Fold on the left child, then // do the same on the right child n->left = fold(n->left); n->right = fold(n->right); // If both children are A_INTLITs, do a fold2() if (n->left && n->left->op == A_INTLIT) { if (n->right && n->right->op == A_INTLIT) n = fold2(n); else // If only the left is A_INTLIT, do a fold1() n = fold1(n); } // Return the possibly modified tree return (n); } // Optimise an AST tree by // constant folding in all sub-trees struct ASTnode *optimise(struct ASTnode *n) { n = fold(n); return (n); } ================================================ FILE: 63_QBE/scan.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { int i; for (i = 0; s[i] != '\0'; i++) if (s[i] == (char) c) return (i); return (-1); } // Get the next character from the input file. static int next(void) { int c, l; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return (c); } c = fgetc(Infile); // Read from input file while (Linestart && c == '#') { // We've hit a pre-processor statement Linestart = 0; // No longer at the start of the line scan(&Token); // Get the line number into l if (Token.token != T_INTLIT) fatals("Expecting pre-processor line number, got:", Text); l = Token.intvalue; scan(&Token); // Get the filename in Text if (Token.token != T_STRLIT) fatals("Expecting pre-processor file name, got:", Text); if (Text[0] != '<') { // If this is a real filename if (strcmp(Text, Infilename)) // and not the one we have now Infilename = strdup(Text); // save it. Then update the line num Line = l; } while ((c = fgetc(Infile)) != '\n'); // Skip to the end of the line c = fgetc(Infile); // and get the next character Linestart = 1; // Now back at the start of the line } Linestart = 0; // No longer at the start of the line if ('\n' == c) { Line++; // Increment line count Linestart = 1; // Now back at the start of the line } return (c); } // Put back an unwanted character static void putback(int c) { Putback = c; } // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } // Read in a hexadecimal constant from the input static int hexchar(void) { int c, h, n = 0, f = 0; // Loop getting characters while (isxdigit(c = next())) { // Convert from char to int value h = chrpos("0123456789abcdef", tolower(c)); // Add to running hex value n = n * 16 + h; f = 1; } // We hit a non-hex character, put it back putback(c); // Flag tells us we never saw any hex characters if (!f) fatal("missing digits after '\\x'"); if (n > 255) fatal("value out of range after '\\x'"); return (n); } // Return the next character from a character // or string literal static int scanch(void) { int i, c, c2; // Get the next input character and interpret // metacharacters that start with a backslash c = next(); if (c == '\\') { switch (c = next()) { case 'a': return ('\a'); case 'b': return ('\b'); case 'f': return ('\f'); case 'n': return ('\n'); case 'r': return ('\r'); case 't': return ('\t'); case 'v': return ('\v'); case '\\': return ('\\'); case '"': return ('"'); case '\'': return ('\''); // Deal with octal constants by reading in // characters until we hit a non-octal digit. // Build up the octal value in c2 and count // # digits in i. Permit only 3 octal digits. case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': for (i = c2 = 0; isdigit(c) && c < '8'; c = next()) { if (++i > 3) break; c2 = c2 * 8 + (c - '0'); } putback(c); // Put back the first non-octal char return (c2); case 'x': return (hexchar()); default: fatalc("unknown escape sequence", c); } } return (c); // Just an ordinary old character! } // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0, radix = 10; // Assume the radix is 10, but if it starts with 0 if (c == '0') { // and the next character is 'x', it's radix 16 if ((c = next()) == 'x') { radix = 16; c = next(); } else // Otherwise, it's radix 8 radix = 8; } // Convert each character into an int value while ((k = chrpos("0123456789abcdef", tolower(c))) >= 0) { if (k >= radix) fatalc("invalid digit in integer literal", c); val = val * radix + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return (val); } // Scan in a string literal from the input file, // and store it in buf[]. Return the length of // the string. static int scanstr(char *buf) { int i, c; // Loop while we have enough buffer space for (i = 0; i < TEXTLEN - 1; i++) { // Get the next char and append to buf // Return when we hit the ending double quote if ((c = scanch()) == '"') { buf[i] = 0; return (i); } buf[i] = (char) c; } // Ran out of buf[] space fatal("String literal too long"); return (0); } // Scan an identifier from the input file and // store it in buf[]. Return the identifier's length static int scanident(int c, char *buf, int lim) { int i = 0; // Allow digits, alpha and underscores while (isalpha(c) || isdigit(c) || '_' == c) { // Error if we hit the identifier length limit, // else append to buf[] and get next character if (lim - 1 == i) { fatal("Identifier too long"); } else if (i < lim - 1) { buf[i++] = (char) c; } c = next(); } // We hit a non-valid character, put it back. // NUL-terminate the buf[] and return the length putback(c); buf[i] = '\0'; return (i); } // Given a word from the input, return the matching // keyword token number or 0 if it's not a keyword. // Switch on the first letter so that we don't have // to waste time strcmp()ing against all the keywords. static int keyword(char *s) { switch (*s) { case 'b': if (!strcmp(s, "break")) return (T_BREAK); break; case 'c': if (!strcmp(s, "case")) return (T_CASE); if (!strcmp(s, "char")) return (T_CHAR); if (!strcmp(s, "continue")) return (T_CONTINUE); break; case 'd': if (!strcmp(s, "default")) return (T_DEFAULT); break; case 'e': if (!strcmp(s, "else")) return (T_ELSE); if (!strcmp(s, "enum")) return (T_ENUM); if (!strcmp(s, "extern")) return (T_EXTERN); break; case 'f': if (!strcmp(s, "for")) return (T_FOR); break; case 'i': if (!strcmp(s, "if")) return (T_IF); if (!strcmp(s, "int")) return (T_INT); break; case 'l': if (!strcmp(s, "long")) return (T_LONG); break; case 'r': if (!strcmp(s, "return")) return (T_RETURN); break; case 's': if (!strcmp(s, "sizeof")) return (T_SIZEOF); if (!strcmp(s, "static")) return (T_STATIC); if (!strcmp(s, "struct")) return (T_STRUCT); if (!strcmp(s, "switch")) return (T_SWITCH); break; case 't': if (!strcmp(s, "typedef")) return (T_TYPEDEF); break; case 'u': if (!strcmp(s, "union")) return (T_UNION); break; case 'v': if (!strcmp(s, "void")) return (T_VOID); break; case 'w': if (!strcmp(s, "while")) return (T_WHILE); break; } return (0); } // List of token strings, for debugging purposes char *Tstring[] = { "EOF", "=", "+=", "-=", "*=", "/=", "%=", "?", "||", "&&", "|", "^", "&", "==", "!=", "<", ">", "<=", ">=", "<<", ">>", "+", "-", "*", "/", "%", "++", "--", "~", "!", "void", "char", "int", "long", "if", "else", "while", "for", "return", "struct", "union", "enum", "typedef", "extern", "break", "continue", "switch", "case", "default", "sizeof", "static", "intlit", "strlit", ";", "identifier", "{", "}", "(", ")", "[", "]", ",", ".", "->", ":" }; // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int c, tokentype; // If we have a lookahead token, return this token if (Peektoken.token != 0) { t->token = Peektoken.token; t->tokstr = Peektoken.tokstr; t->intvalue = Peektoken.intvalue; Peektoken.token = 0; return (1); } // Skip whitespace c = skip(); // Determine the token based on // the input character switch (c) { case EOF: t->token = T_EOF; return (0); case '+': if ((c = next()) == '+') { t->token = T_INC; } else if (c == '=') { t->token = T_ASPLUS; } else { putback(c); t->token = T_PLUS; } break; case '-': if ((c = next()) == '-') { t->token = T_DEC; } else if (c == '>') { t->token = T_ARROW; } else if (c == '=') { t->token = T_ASMINUS; } else if (isdigit(c)) { // Negative int literal t->intvalue = -scanint(c); t->token = T_INTLIT; } else { putback(c); t->token = T_MINUS; } break; case '*': if ((c = next()) == '=') { t->token = T_ASSTAR; } else { putback(c); t->token = T_STAR; } break; case '/': if ((c = next()) == '=') { t->token = T_ASSLASH; } else { putback(c); t->token = T_SLASH; } break; case '%': if ((c = next()) == '=') { t->token = T_ASMOD; } else { putback(c); t->token = T_MOD; } break; case ';': t->token = T_SEMI; break; case '{': t->token = T_LBRACE; break; case '}': t->token = T_RBRACE; break; case '(': t->token = T_LPAREN; break; case ')': t->token = T_RPAREN; break; case '[': t->token = T_LBRACKET; break; case ']': t->token = T_RBRACKET; break; case '~': t->token = T_INVERT; break; case '^': t->token = T_XOR; break; case ',': t->token = T_COMMA; break; case '.': t->token = T_DOT; break; case ':': t->token = T_COLON; break; case '?': t->token = T_QUESTION; break; case '=': if ((c = next()) == '=') { t->token = T_EQ; } else { putback(c); t->token = T_ASSIGN; } break; case '!': if ((c = next()) == '=') { t->token = T_NE; } else { putback(c); t->token = T_LOGNOT; } break; case '<': if ((c = next()) == '=') { t->token = T_LE; } else if (c == '<') { t->token = T_LSHIFT; } else { putback(c); t->token = T_LT; } break; case '>': if ((c = next()) == '=') { t->token = T_GE; } else if (c == '>') { t->token = T_RSHIFT; } else { putback(c); t->token = T_GT; } break; case '&': if ((c = next()) == '&') { t->token = T_LOGAND; } else { putback(c); t->token = T_AMPER; } break; case '|': if ((c = next()) == '|') { t->token = T_LOGOR; } else { putback(c); t->token = T_OR; } break; case '\'': // If it's a quote, scan in the // literal character value and // the trailing quote t->intvalue = scanch(); t->token = T_INTLIT; if (next() != '\'') fatal("Expected '\\'' at end of char literal"); break; case '"': // Scan in a literal string scanstr(Text); t->token = T_STRLIT; break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } else if (isalpha(c) || '_' == c) { // Read in a keyword or identifier scanident(c, Text, TEXTLEN); // If it's a recognised keyword, return that token if ((tokentype = keyword(Text)) != 0) { t->token = tokentype; break; } // Not a recognised keyword, so it must be an identifier t->token = T_IDENT; break; } // The character isn't part of any recognised token, error fatalc("Unrecognised character", c); } // We found a token t->tokstr = Tstring[t->token]; return (1); } ================================================ FILE: 63_QBE/stmt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Parsing of statements // Copyright (c) 2019 Warren Toomey, GPL3 // Prototypes static struct ASTnode *single_statement(void); // compound_statement: // empty, i.e. no statement // | statement // | statement statements // ; // // statement: declaration // | expression_statement // | function_call // | if_statement // | while_statement // | for_statement // | return_statement // ; // if_statement: if_head // | if_head 'else' statement // ; // // if_head: 'if' '(' true_false_expression ')' statement ; // // Parse an IF statement including any // optional ELSE clause and return its AST static struct ASTnode *if_statement(void) { struct ASTnode *condAST, *trueAST, *falseAST = NULL; // Ensure we have 'if' '(' match(T_IF, "if"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST->ctype, condAST, NULL, 0); rparen(); // Get the AST for the statement trueAST = single_statement(); // If we have an 'else', skip it // and get the AST for the statement if (Token.token == T_ELSE) { scan(&Token); falseAST = single_statement(); } // Build and return the AST for this statement return (mkastnode(A_IF, P_NONE, NULL, condAST, trueAST, falseAST, NULL, 0)); } // while_statement: 'while' '(' true_false_expression ')' statement ; // // Parse a WHILE statement and return its AST static struct ASTnode *while_statement(void) { struct ASTnode *condAST, *bodyAST; // Ensure we have 'while' '(' match(T_WHILE, "while"); lparen(); // Parse the following expression // and the ')' following. Force a // non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST->ctype, condAST, NULL, 0); rparen(); // Get the AST for the statement. // Update the loop depth in the process Looplevel++; bodyAST = single_statement(); Looplevel--; // Build and return the AST for this statement return (mkastnode(A_WHILE, P_NONE, NULL, condAST, NULL, bodyAST, NULL, 0)); } // for_statement: 'for' '(' expression_list ';' // true_false_expression ';' // expression_list ')' statement ; // // Parse a FOR statement and return its AST static struct ASTnode *for_statement(void) { struct ASTnode *condAST, *bodyAST; struct ASTnode *preopAST, *postopAST; struct ASTnode *tree; // Ensure we have 'for' '(' match(T_FOR, "for"); lparen(); // Get the pre_op expression and the ';' preopAST = expression_list(T_SEMI); semi(); // Get the condition and the ';'. // Force a non-comparison to be boolean // the tree's operation is a comparison. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST->ctype, condAST, NULL, 0); semi(); // Get the post_op expression and the ')' postopAST = expression_list(T_RPAREN); rparen(); // Get the statement which is the body // Update the loop depth in the process Looplevel++; bodyAST = single_statement(); Looplevel--; // Glue the statement and the postop tree tree = mkastnode(A_GLUE, P_NONE, NULL, bodyAST, NULL, postopAST, NULL, 0); // Make a WHILE loop with the condition and this new body tree = mkastnode(A_WHILE, P_NONE, NULL, condAST, NULL, tree, NULL, 0); // And glue the preop tree to the A_WHILE tree return (mkastnode(A_GLUE, P_NONE, NULL, preopAST, NULL, tree, NULL, 0)); } // return_statement: 'return' '(' expression ')' ; // // Parse a return statement and return its AST static struct ASTnode *return_statement(void) { struct ASTnode *tree = NULL; // Ensure we have 'return' match(T_RETURN, "return"); // See if we have a return value if (Token.token == T_LPAREN) { // Can't return a value if function returns P_VOID if (Functionid->type == P_VOID) fatal("Can't return from a void function"); // Skip the left parenthesis lparen(); // Parse the following expression tree = binexpr(0); // Ensure this is compatible with the function's type tree = modify_type(tree, Functionid->type, Functionid->ctype, 0); if (tree == NULL) fatal("Incompatible type to return"); // Get the ')' rparen(); } // Add on the A_RETURN node tree = mkastunary(A_RETURN, P_NONE, NULL, tree, NULL, 0); // Get the ';' semi(); return (tree); } // break_statement: 'break' ; // // Parse a break statement and return its AST static struct ASTnode *break_statement(void) { if (Looplevel == 0 && Switchlevel == 0) fatal("no loop or switch to break out from"); scan(&Token); semi(); return (mkastleaf(A_BREAK, P_NONE, NULL, NULL, 0)); } // continue_statement: 'continue' ; // // Parse a continue statement and return its AST static struct ASTnode *continue_statement(void) { if (Looplevel == 0) fatal("no loop to continue to"); scan(&Token); semi(); return (mkastleaf(A_CONTINUE, P_NONE, NULL, NULL, 0)); } // Parse a switch statement and return its AST static struct ASTnode *switch_statement(void) { struct ASTnode *left, *body, *n, *c; struct ASTnode *casetree = NULL, *casetail = NULL; int inloop = 1, casecount = 0; int seendefault = 0; int ASTop, casevalue = 0; // Skip the 'switch' and '(' scan(&Token); lparen(); // Get the switch expression, the ')' and the '{' left = binexpr(0); rparen(); lbrace(); // Ensure that this is of int type if (!inttype(left->type)) fatal("Switch expression is not of integer type"); // Build an A_SWITCH subtree with the expression as // the child n = mkastunary(A_SWITCH, P_NONE, NULL, left, NULL, 0); // Now parse the cases Switchlevel++; while (inloop) { switch (Token.token) { // Leave the loop when we hit a '}' case T_RBRACE: if (casecount == 0) fatal("No cases in switch"); inloop = 0; break; case T_CASE: case T_DEFAULT: // Ensure this isn't after a previous 'default' if (seendefault) fatal("case or default after existing default"); // Set the AST operation. Scan the case value if required if (Token.token == T_DEFAULT) { ASTop = A_DEFAULT; seendefault = 1; scan(&Token); } else { ASTop = A_CASE; scan(&Token); left = binexpr(0); // Ensure the case value is an integer literal if (left->op != A_INTLIT) fatal("Expecting integer literal for case value"); casevalue = left->a_intvalue; // Walk the list of existing case values to ensure // that there isn't a duplicate case value for (c = casetree; c != NULL; c = c->right) if (casevalue == c->a_intvalue) fatal("Duplicate case value"); } // Scan the ':' and increment the casecount match(T_COLON, ":"); casecount++; // If the next token is a T_CASE, the existing case will fall // into the next case. Otherwise, parse the case body. if (Token.token == T_CASE) body = NULL; else body = compound_statement(1); // Build a sub-tree with any compound statement as the left child // and link it in to the growing A_CASE tree if (casetree == NULL) { casetree = casetail = mkastunary(ASTop, P_NONE, NULL, body, NULL, casevalue); } else { casetail->right = mkastunary(ASTop, P_NONE, NULL, body, NULL, casevalue); casetail = casetail->right; } break; default: fatals("Unexpected token in switch", Token.tokstr); } } Switchlevel--; // We have a sub-tree with the cases and any default. Put the // case count into the A_SWITCH node and attach the case tree. n->a_intvalue = casecount; n->right = casetree; rbrace(); return (n); } // Parse a single statement and return its AST. static struct ASTnode *single_statement(void) { struct ASTnode *stmt; struct symtable *ctype; int linenum = Line; switch (Token.token) { case T_SEMI: // An empty statement semi(); break; case T_LBRACE: // We have a '{', so this is a compound statement lbrace(); stmt = compound_statement(0); stmt->linenum = linenum; rbrace(); return (stmt); case T_IDENT: // We have to see if the identifier matches a typedef. // If not, treat it as an expression. // Otherwise, fall down to the parse_type() call. if (findtypedef(Text) == NULL) { stmt = binexpr(0); stmt->linenum = linenum; semi(); return (stmt); } case T_CHAR: case T_INT: case T_LONG: case T_STRUCT: case T_UNION: case T_ENUM: case T_TYPEDEF: // The beginning of a variable declaration list. declaration_list(&ctype, C_LOCAL, T_SEMI, T_EOF, &stmt); semi(); return (stmt); // Any assignments from the declarations case T_IF: stmt = if_statement(); stmt->linenum = linenum; return (stmt); case T_WHILE: stmt = while_statement(); stmt->linenum = linenum; return (stmt); case T_FOR: stmt = for_statement(); stmt->linenum = linenum; return (stmt); case T_RETURN: stmt = return_statement(); stmt->linenum = linenum; return (stmt); case T_BREAK: stmt = break_statement(); stmt->linenum = linenum; return (stmt); case T_CONTINUE: stmt = continue_statement(); stmt->linenum = linenum; return (stmt); case T_SWITCH: stmt = switch_statement(); stmt->linenum = linenum; return (stmt); default: // For now, see if this is an expression. // This catches assignment statements. stmt = binexpr(0); stmt->linenum = linenum; semi(); return (stmt); } return (NULL); // Keep -Wall happy } // Parse a compound statement // and return its AST. If inswitch is true, // we look for a '}', 'case' or 'default' token // to end the parsing. Otherwise, look for // just a '}' to end the parsing. struct ASTnode *compound_statement(int inswitch) { struct ASTnode *left = NULL; struct ASTnode *tree; while (1) { // Leave if we've hit the end token. We do this first to allow // an empty compound statement if (Token.token == T_RBRACE) return (left); if (inswitch && (Token.token == T_CASE || Token.token == T_DEFAULT)) return (left); // Parse a single statement tree = single_statement(); // For each new tree, either save it in left // if left is empty, or glue the left and the // new tree together if (tree != NULL) { if (left == NULL) left = tree; else left = mkastnode(A_GLUE, P_NONE, NULL, left, NULL, tree, NULL, 0); } } return (NULL); // Keep -Wall happy } ================================================ FILE: 63_QBE/sym.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Symbol table functions // Copyright (c) 2019 Warren Toomey, GPL3 // Append a node to the singly-linked list pointed to by head or tail void appendsym(struct symtable **head, struct symtable **tail, struct symtable *node) { // Check for valid pointers if (head == NULL || tail == NULL || node == NULL) fatal("Either head, tail or node is NULL in appendsym"); // Append to the list if (*tail) { (*tail)->next = node; *tail = node; } else *head = *tail = node; node->next = NULL; } // Create a symbol node to be added to a symbol table list. // Set up the node's: // + type: char, int etc. // + ctype: composite type pointer for struct/union // + structural type: var, function, array etc. // + size: number of elements, or endlabel: end label for a function // + posn: Position information for local symbols // Return a pointer to the new node struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn) { // Get a new node struct symtable *node = (struct symtable *) malloc(sizeof(struct symtable)); if (node == NULL) fatal("Unable to malloc a symbol table node in newsym"); // Fill in the values if (name == NULL) node->name = NULL; else node->name = strdup(name); node->type = type; node->ctype = ctype; node->stype = stype; node->class = class; node->nelems = nelems; // For pointers and integer types, set the size // of the symbol. structs and union declarations // manually set this up themselves. if (ptrtype(type) || inttype(type)) node->size = nelems * typesize(type, ctype); node->st_posn = posn; node->next = NULL; node->member = NULL; node->initlist = NULL; return (node); } // Add a symbol to the global symbol list struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn) { struct symtable *sym = newsym(name, type, ctype, stype, class, nelems, posn); // For structs and unions, copy the size from the type node if (type == P_STRUCT || type == P_UNION) sym->size = ctype->size; appendsym(&Globhead, &Globtail, sym); return (sym); } // Add a symbol to the local symbol list struct symtable *addlocl(char *name, int type, struct symtable *ctype, int stype, int nelems) { struct symtable *sym = newsym(name, type, ctype, stype, C_LOCAL, nelems, 0); // For structs and unions, copy the size from the type node if (type == P_STRUCT || type == P_UNION) sym->size = ctype->size; appendsym(&Loclhead, &Locltail, sym); return (sym); } // Add a symbol to the parameter list struct symtable *addparm(char *name, int type, struct symtable *ctype, int stype) { struct symtable *sym = newsym(name, type, ctype, stype, C_PARAM, 1, 0); appendsym(&Parmhead, &Parmtail, sym); return (sym); } // Add a symbol to the temporary member list struct symtable *addmemb(char *name, int type, struct symtable *ctype, int stype, int nelems) { struct symtable *sym = newsym(name, type, ctype, stype, C_MEMBER, nelems, 0); // For structs and unions, copy the size from the type node if (type == P_STRUCT || type == P_UNION) sym->size = ctype->size; appendsym(&Membhead, &Membtail, sym); return (sym); } // Add a struct to the struct list struct symtable *addstruct(char *name) { struct symtable *sym = newsym(name, P_STRUCT, NULL, 0, C_STRUCT, 0, 0); appendsym(&Structhead, &Structtail, sym); return (sym); } // Add a struct to the union list struct symtable *addunion(char *name) { struct symtable *sym = newsym(name, P_UNION, NULL, 0, C_UNION, 0, 0); appendsym(&Unionhead, &Uniontail, sym); return (sym); } // Add an enum type or value to the enum list. // Class is C_ENUMTYPE or C_ENUMVAL. // Use posn to store the int value. struct symtable *addenum(char *name, int class, int value) { struct symtable *sym = newsym(name, P_INT, NULL, 0, class, 0, value); appendsym(&Enumhead, &Enumtail, sym); return (sym); } // Add a typedef to the typedef list struct symtable *addtypedef(char *name, int type, struct symtable *ctype) { struct symtable *sym = newsym(name, type, ctype, 0, C_TYPEDEF, 0, 0); appendsym(&Typehead, &Typetail, sym); return (sym); } // Search for a symbol in a specific list. // Return a pointer to the found node or NULL if not found. // If class is not zero, also match on the given class static struct symtable *findsyminlist(char *s, struct symtable *list, int class) { for (; list != NULL; list = list->next) if ((list->name != NULL) && !strcmp(s, list->name)) if (class == 0 || class == list->class) return (list); return (NULL); } // Determine if the symbol s is in the global symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findglob(char *s) { return (findsyminlist(s, Globhead, 0)); } // Determine if the symbol s is in the local symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findlocl(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member, 0); if (node) return (node); } return (findsyminlist(s, Loclhead, 0)); } // Determine if the symbol s is in the symbol table. // Return a pointer to the found node or NULL if not found. struct symtable *findsymbol(char *s) { struct symtable *node; // Look for a parameter if we are in a function's body if (Functionid) { node = findsyminlist(s, Functionid->member, 0); if (node) return (node); } // Otherwise, try the local and global symbol lists node = findsyminlist(s, Loclhead, 0); if (node) return (node); return (findsyminlist(s, Globhead, 0)); } // Find a member in the member list // Return a pointer to the found node or NULL if not found. struct symtable *findmember(char *s) { return (findsyminlist(s, Membhead, 0)); } // Find a struct in the struct list // Return a pointer to the found node or NULL if not found. struct symtable *findstruct(char *s) { return (findsyminlist(s, Structhead, 0)); } // Find a struct in the union list // Return a pointer to the found node or NULL if not found. struct symtable *findunion(char *s) { return (findsyminlist(s, Unionhead, 0)); } // Find an enum type in the enum list // Return a pointer to the found node or NULL if not found. struct symtable *findenumtype(char *s) { return (findsyminlist(s, Enumhead, C_ENUMTYPE)); } // Find an enum value in the enum list // Return a pointer to the found node or NULL if not found. struct symtable *findenumval(char *s) { return (findsyminlist(s, Enumhead, C_ENUMVAL)); } // Find a type in the tyedef list // Return a pointer to the found node or NULL if not found. struct symtable *findtypedef(char *s) { return (findsyminlist(s, Typehead, 0)); } // Reset the contents of the symbol table void clear_symtable(void) { Globhead = Globtail = NULL; Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Membhead = Membtail = NULL; Structhead = Structtail = NULL; Unionhead = Uniontail = NULL; Enumhead = Enumtail = NULL; Typehead = Typetail = NULL; } // Clear all the entries in the local symbol table void freeloclsyms(void) { Loclhead = Locltail = NULL; Parmhead = Parmtail = NULL; Functionid = NULL; } // Remove all static symbols from the global symbol table void freestaticsyms(void) { // g points at current node, prev at the previous one struct symtable *g, *prev = NULL; // Walk the global table looking for static entries for (g = Globhead; g != NULL; g = g->next) { if (g->class == C_STATIC) { // If there's a previous node, rearrange the prev pointer // to skip over the current node. If not, g is the head, // so do the same to Globhead if (prev != NULL) prev->next = g->next; else Globhead->next = g->next; // If g is the tail, point Globtail at the previous node // (if there is one), or Globhead if (g == Globtail) { if (prev != NULL) Globtail = prev; else Globtail = Globhead; } } } // Point prev at g before we move up to the next node prev = g; } // Dump a single symbol static void dumpsym(struct symtable *sym, int indent) { int i; for (i = 0; i < indent; i++) printf(" "); switch (sym->type & (~0xf)) { case P_VOID: printf("void "); break; case P_CHAR: printf("char "); break; case P_INT: printf("int "); break; case P_LONG: printf("long "); break; case P_STRUCT: if (sym->ctype != NULL) printf("struct %s ", sym->ctype->name); else printf("struct %s ", sym->name); break; case P_UNION: if (sym->ctype != NULL) printf("union %s ", sym->ctype->name); else printf("union %s ", sym->name); break; default: printf("unknown type "); } for (i = 0; i < (sym->type & 0xf); i++) printf("*"); printf("%s", sym->name); switch (sym->stype) { case S_VARIABLE: break; case S_FUNCTION: printf("()"); break; case S_ARRAY: printf("[]"); break; default: printf(" unknown stype"); } switch (sym->class) { case C_GLOBAL: printf(": global"); break; case C_LOCAL: printf(": local"); break; case C_PARAM: printf(": param"); break; case C_EXTERN: printf(": extern"); break; case C_STATIC: printf(": static"); break; case C_STRUCT: printf(": struct"); break; case C_UNION: printf(": union"); break; case C_MEMBER: printf(": member"); break; case C_ENUMTYPE: printf(": enumtype"); break; case C_ENUMVAL: printf(": enumval"); break; case C_TYPEDEF: printf(": typedef"); break; default: printf(": unknown class"); } switch (sym->stype) { case S_VARIABLE: if (sym->class == C_ENUMVAL) printf(", value %d\n", sym->st_posn); else printf(", size %d\n", sym->size); break; case S_FUNCTION: printf(", %d params\n", sym->nelems); break; case S_ARRAY: printf(", %d elems, size %d\n", sym->nelems, sym->size); break; } switch (sym->type & (~0xf)) { case P_STRUCT: case P_UNION: dumptable(sym->member, NULL, 4); } switch (sym->stype) { case S_FUNCTION: dumptable(sym->member, NULL, 4); } } // Dump one symbol table void dumptable(struct symtable *head, char *name, int indent) { struct symtable *sym; if (head != NULL && name != NULL) printf("%s\n--------\n", name); for (sym = head; sym != NULL; sym = sym->next) dumpsym(sym, indent); } void dumpsymtables(void) { dumptable(Globhead, "Global", 0); printf("\n"); dumptable(Enumhead, "Enums", 0); printf("\n"); dumptable(Typehead, "Typedefs", 0); } ================================================ FILE: 63_QBE/tests/err.input031.c ================================================ Expecting a primary expression, got token:+ on line 5 of input031.c ================================================ FILE: 63_QBE/tests/err.input032.c ================================================ Unknown variable or function:pizza on line 4 of input032.c ================================================ FILE: 63_QBE/tests/err.input033.c ================================================ Incompatible type to return on line 4 of input033.c ================================================ FILE: 63_QBE/tests/err.input034.c ================================================ For now, declaration of non-global arrays is not implemented on line 4 of input034.c ================================================ FILE: 63_QBE/tests/err.input035.c ================================================ Duplicate local variable declaration:a on line 4 of input035.c ================================================ FILE: 63_QBE/tests/err.input036.c ================================================ Type doesn't match prototype for parameter:2 on line 4 of input036.c ================================================ FILE: 63_QBE/tests/err.input037.c ================================================ Expected:comma on line 3 of input037.c ================================================ FILE: 63_QBE/tests/err.input038.c ================================================ Type doesn't match prototype for parameter:2 on line 4 of input038.c ================================================ FILE: 63_QBE/tests/err.input039.c ================================================ No statements in function with non-void type on line 4 of input039.c ================================================ FILE: 63_QBE/tests/err.input040.c ================================================ No return for function with non-void type on line 4 of input040.c ================================================ FILE: 63_QBE/tests/err.input041.c ================================================ Can't return from a void function on line 3 of input041.c ================================================ FILE: 63_QBE/tests/err.input042.c ================================================ Unknown variable or function:fred on line 3 of input042.c ================================================ FILE: 63_QBE/tests/err.input043.c ================================================ Unknown variable or function:b on line 3 of input043.c ================================================ FILE: 63_QBE/tests/err.input044.c ================================================ Unknown variable or function:z on line 3 of input044.c ================================================ FILE: 63_QBE/tests/err.input045.c ================================================ & operator must be followed by an identifier on line 3 of input045.c ================================================ FILE: 63_QBE/tests/err.input046.c ================================================ * operator must be followed by an expression of pointer type on line 3 of input046.c ================================================ FILE: 63_QBE/tests/err.input047.c ================================================ ++ operator must be followed by an identifier on line 3 of input047.c ================================================ FILE: 63_QBE/tests/err.input048.c ================================================ -- operator must be followed by an identifier on line 3 of input048.c ================================================ FILE: 63_QBE/tests/err.input049.c ================================================ Incompatible expression in assignment on line 6 of input049.c ================================================ FILE: 63_QBE/tests/err.input050.c ================================================ Incompatible types in binary expression on line 6 of input050.c ================================================ FILE: 63_QBE/tests/err.input051.c ================================================ Expected '\'' at end of char literal on line 4 of input051.c ================================================ FILE: 63_QBE/tests/err.input052.c ================================================ Unrecognised character:$ on line 5 of input052.c ================================================ FILE: 63_QBE/tests/err.input056.c ================================================ unknown struct/union type:var1 on line 2 of input056.c ================================================ FILE: 63_QBE/tests/err.input057.c ================================================ previously defined struct/union:fred on line 2 of input057.c ================================================ FILE: 63_QBE/tests/err.input059.c ================================================ Unknown variable or function:y on line 3 of input059.c ================================================ FILE: 63_QBE/tests/err.input060.c ================================================ Expression is not a struct/union on line 3 of input060.c ================================================ FILE: 63_QBE/tests/err.input061.c ================================================ Expression is not a pointer to a struct/union on line 3 of input061.c ================================================ FILE: 63_QBE/tests/err.input064.c ================================================ undeclared enum type::fred on line 1 of input064.c ================================================ FILE: 63_QBE/tests/err.input065.c ================================================ enum type redeclared::fred on line 2 of input065.c ================================================ FILE: 63_QBE/tests/err.input066.c ================================================ enum value redeclared::z on line 2 of input066.c ================================================ FILE: 63_QBE/tests/err.input068.c ================================================ redefinition of typedef:FOO on line 2 of input068.c ================================================ FILE: 63_QBE/tests/err.input069.c ================================================ unknown type:FLOO on line 2 of input069.c ================================================ FILE: 63_QBE/tests/err.input072.c ================================================ no loop or switch to break out from on line 1 of input072.c ================================================ FILE: 63_QBE/tests/err.input073.c ================================================ no loop to continue to on line 1 of input073.c ================================================ FILE: 63_QBE/tests/err.input075.c ================================================ Unexpected token in switch:if on line 4 of input075.c ================================================ FILE: 63_QBE/tests/err.input076.c ================================================ No cases in switch on line 3 of input076.c ================================================ FILE: 63_QBE/tests/err.input077.c ================================================ case or default after existing default on line 6 of input077.c ================================================ FILE: 63_QBE/tests/err.input078.c ================================================ case or default after existing default on line 6 of input078.c ================================================ FILE: 63_QBE/tests/err.input079.c ================================================ Duplicate case value on line 6 of input079.c ================================================ FILE: 63_QBE/tests/err.input085.c ================================================ Bad type in parameter list on line 1 of input085.c ================================================ FILE: 63_QBE/tests/err.input086.c ================================================ Function definition not at global level on line 2 of input086.c ================================================ FILE: 63_QBE/tests/err.input087.c ================================================ Bad type in member list on line 4 of input087.c ================================================ FILE: 63_QBE/tests/err.input092.c ================================================ Type mismatch: literal vs. variable on line 1 of input092.c ================================================ FILE: 63_QBE/tests/err.input093.c ================================================ Unknown variable or function:fred on line 1 of input093.c ================================================ FILE: 63_QBE/tests/err.input094.c ================================================ Type mismatch: literal vs. variable on line 1 of input094.c ================================================ FILE: 63_QBE/tests/err.input095.c ================================================ Variable can not be initialised:x on line 1 of input095.c ================================================ FILE: 63_QBE/tests/err.input096.c ================================================ Array size is illegal:0 on line 1 of input096.c ================================================ FILE: 63_QBE/tests/err.input097.c ================================================ For now, declaration of non-global arrays is not implemented on line 2 of input097.c ================================================ FILE: 63_QBE/tests/err.input098.c ================================================ Too many values in initialisation list on line 1 of input098.c ================================================ FILE: 63_QBE/tests/err.input102.c ================================================ Cannot cast to a struct, union or void type on line 3 of input102.c ================================================ FILE: 63_QBE/tests/err.input103.c ================================================ Cannot cast to a struct, union or void type on line 3 of input103.c ================================================ FILE: 63_QBE/tests/err.input104.c ================================================ Cannot cast to a struct, union or void type on line 2 of input104.c ================================================ FILE: 63_QBE/tests/err.input105.c ================================================ Incompatible expression in assignment on line 4 of input105.c ================================================ FILE: 63_QBE/tests/err.input118.c ================================================ Compiler doesn't support static or extern local declarations on line 2 of input118.c ================================================ FILE: 63_QBE/tests/err.input124.c ================================================ Cannot ++ on rvalue on line 6 of input124.c ================================================ FILE: 63_QBE/tests/err.input126.c ================================================ Unknown variable or function:ptr on line 7 of input126.c ================================================ FILE: 63_QBE/tests/err.input129.c ================================================ Cannot ++ and/or -- more than once on line 6 of input129.c ================================================ FILE: 63_QBE/tests/err.input141.c ================================================ Declaration of array parameters is not implemented on line 4 of input141.c ================================================ FILE: 63_QBE/tests/err.input142.c ================================================ Array must have non-zero elements:fred on line 1 of input142.c ================================================ FILE: 63_QBE/tests/input001.c ================================================ int printf(char *fmt); void main() { printf("%d\n", 12 * 3); printf("%d\n", 18 - 2 * 4); printf("%d\n", 1 + 2 + 9 - 5/2 + 3*5); } ================================================ FILE: 63_QBE/tests/input002.c ================================================ int printf(char *fmt); void main() { int fred; int jim; fred= 5; jim= 12; printf("%d\n", fred + jim); } ================================================ FILE: 63_QBE/tests/input003.c ================================================ int printf(char *fmt); void main() { int x; x= 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); } ================================================ FILE: 63_QBE/tests/input004.c ================================================ int printf(char *fmt); void main() { int x; x= 7 < 9; printf("%d\n", x); x= 7 <= 9; printf("%d\n", x); x= 7 != 9; printf("%d\n", x); x= 7 == 7; printf("%d\n", x); x= 7 >= 7; printf("%d\n", x); x= 7 <= 7; printf("%d\n", x); x= 9 > 7; printf("%d\n", x); x= 9 >= 7; printf("%d\n", x); x= 9 != 7; printf("%d\n", x); } ================================================ FILE: 63_QBE/tests/input005.c ================================================ int printf(char *fmt); void main() { int i; int j; i=6; j=12; if (i < j) { printf("%d\n", i); } else { printf("%d\n", j); } } ================================================ FILE: 63_QBE/tests/input006.c ================================================ int printf(char *fmt); void main() { int i; i=1; while (i <= 10) { printf("%d\n", i); i= i + 1; } } ================================================ FILE: 63_QBE/tests/input007.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 63_QBE/tests/input008.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 63_QBE/tests/input009.c ================================================ int printf(char *fmt); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } void fred() { int a; int b; a= 12; b= 3 * a; if (a >= b) { printf("%d\n", 2 * b - a); } } ================================================ FILE: 63_QBE/tests/input010.c ================================================ int printf(char *fmt); void main() { int i; char j; j= 20; printf("%d\n", j); i= 10; printf("%d\n", i); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 2; j= j + 1) { printf("%d\n", j); } } ================================================ FILE: 63_QBE/tests/input011.c ================================================ int printf(char *fmt); int main() { int i; char j; long k; i= 10; printf("%d\n", i); j= 20; printf("%d\n", j); k= 30; printf("%d\n", k); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 4; j= j + 1) { printf("%d\n", j); } for (k= 1; k <= 5; k= k + 1) { printf("%d\n", k); } return(i); } ================================================ FILE: 63_QBE/tests/input012.c ================================================ int printf(char *fmt); int fred() { return(5); } void main() { int x; x= fred(2); printf("%d\n", x); } ================================================ FILE: 63_QBE/tests/input013.c ================================================ int printf(char *fmt); int fred() { return(56); } void main() { int dummy; int result; dummy= printf("%d\n", 23); result= fred(10); dummy= printf("%d\n", result); } ================================================ FILE: 63_QBE/tests/input014.c ================================================ int printf(char *fmt); int fred() { return(20); } int main() { int result; printf("%d\n", 10); result= fred(15); printf("%d\n", result); printf("%d\n", fred(15)+10); return(0); } ================================================ FILE: 63_QBE/tests/input015.c ================================================ int printf(char *fmt); int main() { char a; char *b; char c; int d; int *e; int f; a= 18; printf("%d\n", a); b= &a; c= *b; printf("%d\n", c); d= 12; printf("%d\n", d); e= &d; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 63_QBE/tests/input016.c ================================================ int printf(char *fmt); int c; int d; int *e; int f; int main() { c= 12; d=18; printf("%d\n", c); e= &c + 1; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 63_QBE/tests/input017.c ================================================ int printf(char *fmt); int main() { char a; char *b; int d; int *e; b= &a; *b= 19; printf("%d\n", a); e= &d; *e= 12; printf("%d\n", d); return(0); } ================================================ FILE: 63_QBE/tests/input018.c ================================================ int printf(char *fmt); int main() { int a; int b; a= b= 34; printf("%d\n", a); printf("%d\n", b); return(0); } ================================================ FILE: 63_QBE/tests/input018a.c ================================================ int printf(char *fmt); int a; int *b; char c; char *d; int main() { b= &a; *b= 15; printf("%d\n", a); d= &c; *d= 16; printf("%d\n", c); return(0); } ================================================ FILE: 63_QBE/tests/input019.c ================================================ int printf(char *fmt); int a; int b; int c; int d; int e; int main() { a= 2; b= 4; c= 3; d= 2; e= (a+b) * (c+d); printf("%d\n", e); return(0); } ================================================ FILE: 63_QBE/tests/input020.c ================================================ int printf(char *fmt); int a; int b[25]; int main() { b[3]= 12; a= b[3]; printf("%d\n", a); return(0); } ================================================ FILE: 63_QBE/tests/input021.c ================================================ int printf(char *fmt); char c; char *str; int main() { c= '\n'; printf("%d\n", c); for (str= "Hello world\n"; *str != 0; str= str + 1) { printf("%c", *str); } return(0); } ================================================ FILE: 63_QBE/tests/input022.c ================================================ int printf(char *fmt); char a; char b; char c; int d; int e; int f; long g; long h; long i; int main() { b= 5; c= 7; a= b + c++; printf("%d\n", a); e= 5; f= 7; d= e + f++; printf("%d\n", d); h= 5; i= 7; g= h + i++; printf("%d\n", g); a= b-- + c; printf("%d\n", a); d= e-- + f; printf("%d\n", d); g= h-- + i; printf("%d\n", g); a= ++b + c; printf("%d\n", a); d= ++e + f; printf("%d\n", d); g= ++h + i; printf("%d\n", g); a= b * --c; printf("%d\n", a); d= e * --f; printf("%d\n", d); g= h * --i; printf("%d\n", g); return(0); } ================================================ FILE: 63_QBE/tests/input023.c ================================================ int printf(char *fmt); char *str; int x; int main() { x= -23; printf("%d\n", x); printf("%d\n", -10 * -10); x= 1; x= ~x; printf("%d\n", x); x= 2 > 5; printf("%d\n", x); x= !x; printf("%d\n", x); x= !x; printf("%d\n", x); x= 13; if (x) { printf("%d\n", 13); } x= 0; if (!x) { printf("%d\n", 14); } for (str= "Hello world\n"; *str; str++) { printf("%c", *str); } return(0); } ================================================ FILE: 63_QBE/tests/input024.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { a= 42; b= 19; printf("%d\n", a & b); printf("%d\n", a | b); printf("%d\n", a ^ b); printf("%d\n", 1 << 3); printf("%d\n", 63 >> 3); return(0); } ================================================ FILE: 63_QBE/tests/input025.c ================================================ int printf(char *fmt); int a; int b; int c; int main() { char z; int y; int x; x= 10; y= 20; z= 30; printf("%d\n", x); printf("%d\n", y); printf("%d\n", z); a= 5; b= 15; c= 25; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); return(0); } ================================================ FILE: 63_QBE/tests/input026.c ================================================ int printf(char *fmt); int main(int a, char b, long c, int d, int e, int f, int g, int h) { int i; int j; int k; a= 13; printf("%d\n", a); b= 23; printf("%d\n", b); c= 34; printf("%d\n", c); d= 44; printf("%d\n", d); e= 54; printf("%d\n", e); f= 64; printf("%d\n", f); g= 74; printf("%d\n", g); h= 84; printf("%d\n", h); i= 94; printf("%d\n", i); j= 95; printf("%d\n", j); k= 96; printf("%d\n", k); return(0); } ================================================ FILE: 63_QBE/tests/input027.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int param5(int a, int b, int c, int d, int e) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param2(int a, int b) { int c; int d; int e; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param0() { int a; int b; int c; int d; int e; a= 1; b= 2; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int main() { param8(1,2,3,4,5,6,7,8); param5(1,2,3,4,5); param2(1,2); param0(); return(0); } ================================================ FILE: 63_QBE/tests/input028.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 63_QBE/tests/input029.c ================================================ int printf(char *fmt); int param8(int a, int b, int c, int d, int e, int f, int g, int h); int fred(int a, int b, int c); int main(); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 63_QBE/tests/input030.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input030.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 63_QBE/tests/input031.c ================================================ int printf(char *fmt); int main() { int x; x= 2 + + 3 - * / ; } ================================================ FILE: 63_QBE/tests/input032.c ================================================ int printf(char *fmt); int main() { pizza cow llama sausage; } ================================================ FILE: 63_QBE/tests/input033.c ================================================ int printf(char *fmt); int main() { char *z; return(z); } ================================================ FILE: 63_QBE/tests/input035.c ================================================ int printf(char *fmt); int fred(int a, int b) { int a; return(a); } ================================================ FILE: 63_QBE/tests/input036.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c); ================================================ FILE: 63_QBE/tests/input037.c ================================================ int printf(char *fmt); int fred(int a, char b +, int z); ================================================ FILE: 63_QBE/tests/input038.c ================================================ int printf(char *fmt); int fred(int a, char b, int c); int fred(int a, int b, char c, int g); ================================================ FILE: 63_QBE/tests/input039.c ================================================ int printf(char *fmt); int main() { int a; } ================================================ FILE: 63_QBE/tests/input040.c ================================================ int printf(char *fmt); int main() { int a; a= 5; } ================================================ FILE: 63_QBE/tests/input041.c ================================================ int printf(char *fmt); void fred() { return(5); } ================================================ FILE: 63_QBE/tests/input042.c ================================================ int printf(char *fmt); int main() { fred(5); } ================================================ FILE: 63_QBE/tests/input043.c ================================================ int printf(char *fmt); int main() { int a; a= b[4]; } ================================================ FILE: 63_QBE/tests/input044.c ================================================ int printf(char *fmt); int main() { int a; a= z; } ================================================ FILE: 63_QBE/tests/input045.c ================================================ int printf(char *fmt); int main() { int a; a= &5; } ================================================ FILE: 63_QBE/tests/input046.c ================================================ int printf(char *fmt); int main() { int a; a= *5; } ================================================ FILE: 63_QBE/tests/input047.c ================================================ int printf(char *fmt); int main() { int a; a= ++5; } ================================================ FILE: 63_QBE/tests/input048.c ================================================ int printf(char *fmt); int main() { int a; a= --5; } ================================================ FILE: 63_QBE/tests/input049.c ================================================ int printf(char *fmt); int main() { int x; char y; y= x; } ================================================ FILE: 63_QBE/tests/input050.c ================================================ int printf(char *fmt); int main() { char *a; char *b; a= a + b; } ================================================ FILE: 63_QBE/tests/input051.c ================================================ int printf(char *fmt); int main() { char a; a= 'fred'; } ================================================ FILE: 63_QBE/tests/input052.c ================================================ int printf(char *fmt); int main() { int a; a= $5.00; } ================================================ FILE: 63_QBE/tests/input053.c ================================================ int printf(char *fmt); int main() { printf("Hello world, %d\n", 23); return(0); } ================================================ FILE: 63_QBE/tests/input054.c ================================================ int printf(char *fmt); int main() { int i; for (i=0; i < 20; i++) { printf("Hello world, %d\n", i); } return(0); } ================================================ FILE: 63_QBE/tests/input055.c ================================================ int printf(char *fmt); int main(int argc, char **argv) { int i; char *argument; printf("Hello world\n"); for (i=0; i < argc; i++) { argument= *argv; argv= argv + 1; printf("Argument %d is %s\n", i, argument); } return(0); } ================================================ FILE: 63_QBE/tests/input056.c ================================================ struct foo { int x; }; struct mary var1; ================================================ FILE: 63_QBE/tests/input057.c ================================================ struct fred { int x; } ; struct fred { char y; } ; ================================================ FILE: 63_QBE/tests/input058.c ================================================ int printf(char *fmt); struct fred { int x; char y; long z; }; struct fred var2; struct fred *varptr; int main() { long result; var2.x= 12; printf("%d\n", var2.x); var2.y= 'c'; printf("%d\n", var2.y); var2.z= 4005; printf("%d\n", var2.z); result= var2.x + var2.y + var2.z; printf("%d\n", result); varptr= &var2; result= varptr->x + varptr->y + varptr->z; printf("%d\n", result); return(0); } ================================================ FILE: 63_QBE/tests/input059.c ================================================ int main() { int x; x= y.foo; } ================================================ FILE: 63_QBE/tests/input060.c ================================================ int main() { int x; x= x.foo; } ================================================ FILE: 63_QBE/tests/input061.c ================================================ int main() { int x; x= x->foo; } ================================================ FILE: 63_QBE/tests/input062.c ================================================ int printf(char *fmt); union fred { char w; int x; int y; long z; }; union fred var1; union fred *varptr; int main() { var1.x= 65; printf("%d\n", var1.x); var1.x= 66; printf("%d\n", var1.x); printf("%d\n", var1.y); printf("The next two depend on the endian of the platform\n"); printf("%d\n", var1.w); printf("%d\n", var1.z); varptr= &var1; varptr->x= 67; printf("%d\n", varptr->x); printf("%d\n", varptr->y); return(0); } ================================================ FILE: 63_QBE/tests/input063.c ================================================ int printf(char *fmt); enum fred { apple=1, banana, carrot, pear=10, peach, mango, papaya }; enum jane { aple=1, bnana, crrot, par=10, pech, mago, paaya }; enum fred var1; enum jane var2; enum fred var3; int main() { var1= carrot + pear + mango; printf("%d\n", var1); return(0); } ================================================ FILE: 63_QBE/tests/input064.c ================================================ enum fred var3; ================================================ FILE: 63_QBE/tests/input065.c ================================================ enum fred { x, y, z }; enum fred { a, b }; ================================================ FILE: 63_QBE/tests/input066.c ================================================ enum fred { x, y, z }; enum mary { a, b, z }; ================================================ FILE: 63_QBE/tests/input067.c ================================================ int printf(char *fmt); typedef int FOO; FOO var1; struct bar { int x; int y} ; typedef struct bar BAR; BAR var2; int main() { var1= 5; printf("%d\n", var1); var2.x= 7; var2.y= 10; printf("%d\n", var2.x + var2.y); return(0); } ================================================ FILE: 63_QBE/tests/input068.c ================================================ typedef int FOO; typedef char FOO; ================================================ FILE: 63_QBE/tests/input069.c ================================================ typedef int FOO; FLOO y; ================================================ FILE: 63_QBE/tests/input070.c ================================================ #include typedef int FOO; int main() { FOO x; x= 56; printf("%d\n", x); return(0); } ================================================ FILE: 63_QBE/tests/input071.c ================================================ #include int main() { int x; x = 0; while (x < 100) { if (x == 5) { x = x + 2; continue; } printf("%d\n", x); if (x == 14) { break; } x = x + 1; } printf("Done\n"); return (0); } ================================================ FILE: 63_QBE/tests/input072.c ================================================ int main() { break; } ================================================ FILE: 63_QBE/tests/input073.c ================================================ int main() { continue; } ================================================ FILE: 63_QBE/tests/input074.c ================================================ #include int main() { int x; int y; y= 0; for (x=0; x < 5; x++) { switch(x) { case 1: { y= 5; break; } case 2: { y= 7; break; } case 3: { y= 9; } default: { y= 100; } } printf("%d\n", y); } return(0); } ================================================ FILE: 63_QBE/tests/input075.c ================================================ int main() { int x; switch(x) { if (x<5); } } ================================================ FILE: 63_QBE/tests/input076.c ================================================ int main() { int x; switch(x) { } } ================================================ FILE: 63_QBE/tests/input077.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } default: { x= 3; } case 4: { x= 2; } } } ================================================ FILE: 63_QBE/tests/input078.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } default: { x= 3; } default: { x= 2; } } } ================================================ FILE: 63_QBE/tests/input079.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } case 2: { x= 2; } case 1: { x= 2; } default: { x= 2; } } } ================================================ FILE: 63_QBE/tests/input080.c ================================================ #include int x; int y; int main() { for (x=0, y=1; x < 6; x++, y=y+2) { printf("%d %d\n", x, y); } return(0); } ================================================ FILE: 63_QBE/tests/input081.c ================================================ #include int x; int y; int main() { x= 0; y=1; for (;x<5;) { printf("%d %d\n", x, y); x=x+1; y=y+2; } return(0); } ================================================ FILE: 63_QBE/tests/input082.c ================================================ #include int main() { int x; x= 10; // Dangling else test if (x > 5) if (x > 15) printf("x > 15\n"); else printf("15 >= x > 5\n"); else printf("5 >= x\n"); return(0); } ================================================ FILE: 63_QBE/tests/input083.c ================================================ #include int main() { int x; // Dangling else test. // We should not print anything for x<= 5 for (x=0; x < 12; x++) if (x > 5) if (x > 10) printf("10 < %2d\n", x); else printf(" 5 < %2d <= 10\n", x); return(0); } ================================================ FILE: 63_QBE/tests/input084.c ================================================ #include int main() { int x, y; x=2; y=3; printf("%d %d\n", x, y); char a, *b; a= 'f'; b= &a; printf("%c %c\n", a, *b); return(0); } ================================================ FILE: 63_QBE/tests/input085.c ================================================ int main(struct foo { int x; }; , int a) { return(0); } ================================================ FILE: 63_QBE/tests/input086.c ================================================ int main() { int fred() { return(5); } int x; x=2; return(x); } ================================================ FILE: 63_QBE/tests/input087.c ================================================ struct foo { int x; int y; struct blah { int g; }; }; ================================================ FILE: 63_QBE/tests/input088.c ================================================ #include struct foo { int x; int y; } fred, mary; struct foo james; int main() { fred.x= 5; mary.y= 6; printf("%d %d\n", fred.x, mary.y); return(0); } ================================================ FILE: 63_QBE/tests/input089.c ================================================ #include int x= 23; char y= 'H'; char *z= "Hello world"; int main() { printf("%d %c %s\n", x, y, z); return(0); } ================================================ FILE: 63_QBE/tests/input090.c ================================================ #include int a = 23, b = 100; char y = 'H', *z = "Hello world"; int main() { printf("%d %d %c %s\n", a, b, y, z); return (0); } ================================================ FILE: 63_QBE/tests/input091.c ================================================ #include int fred[] = { 1, 2, 3, 4, 5 }; int jim[10] = { 1, 2, 3, 4, 5 }; int main() { int i; for (i=0; i < 5; i++) printf("%d\n", fred[i]); for (i=0; i < 10; i++) printf("%d\n", jim[i]); return(0); } ================================================ FILE: 63_QBE/tests/input092.c ================================================ char x= 3000; ================================================ FILE: 63_QBE/tests/input093.c ================================================ char x= fred; ================================================ FILE: 63_QBE/tests/input094.c ================================================ char *s= 54; ================================================ FILE: 63_QBE/tests/input095.c ================================================ int fred(int x=2) { return(x); } ================================================ FILE: 63_QBE/tests/input096.c ================================================ int fred[0]; ================================================ FILE: 63_QBE/tests/input098.c ================================================ int fred[3]= { 1, 2, 3, 4, 5 }; ================================================ FILE: 63_QBE/tests/input099.c ================================================ #include // List of token strings, for debugging purposes. // As yet, we can't store a NULL into the list char *Tstring[] = { "EOF", "=", "||", "&&", "|", "^", "&", "==", "!=", ",", ">", "<=", ">=", "<<", ">>", "+", "-", "*", "/", "++", "--", "~", "!", "void", "char", "int", "long", "if", "else", "while", "for", "return", "struct", "union", "enum", "typedef", "extern", "break", "continue", "switch", "case", "default", "intlit", "strlit", ";", "identifier", "{", "}", "(", ")", "[", "]", ",", ".", "->", ":", "" }; int main() { int i; char *str; i=0; while (1) { str= Tstring[i]; if (*str == 0) break; printf("%s\n", str); i++; } return(0); } ================================================ FILE: 63_QBE/tests/input100.c ================================================ #include int main() { int x= 3, y=14; int z= 2 * x + y; char *str= "Hello world"; printf("%s %d %d\n", str, x+y, z); return(0); } ================================================ FILE: 63_QBE/tests/input101.c ================================================ #include int main() { int x= 65535; char y; char *str; y= (char )x; printf("0x%x\n", y); str= (char *)0; printf("0x%lx\n", (long)str); return(0); } ================================================ FILE: 63_QBE/tests/input102.c ================================================ int main() { struct foo { int p; }; int y= (struct foo) x; } ================================================ FILE: 63_QBE/tests/input103.c ================================================ int main() { union foo { int p; }; int y= (union foo) x; } ================================================ FILE: 63_QBE/tests/input104.c ================================================ int main() { int y= (void) x; } ================================================ FILE: 63_QBE/tests/input105.c ================================================ int main() { int x; char *y; y= (char) x; } ================================================ FILE: 63_QBE/tests/input106.c ================================================ #include char *y= (char *)0; int main() { printf("0x%lx\n", (long)y); return(0); } ================================================ FILE: 63_QBE/tests/input107.c ================================================ #include char *y[] = { "fish", "cow", NULL }; char *z= NULL; int main() { int i; char *ptr; for (i=0; i < 3; i++) { ptr= y[i]; if (ptr != (char *)0) printf("%s\n", y[i]); else printf("NULL\n"); } return(0); } ================================================ FILE: 63_QBE/tests/input108.c ================================================ int main() { char *str= (void *)0; return(0); } ================================================ FILE: 63_QBE/tests/input109.c ================================================ #include int main() { int x= 17 - 1; printf("%d\n", x); return(0); } ================================================ FILE: 63_QBE/tests/input110.c ================================================ #include int x; int y; int main() { x= 3; y= 15; y += x; printf("%d\n", y); x= 3; y= 15; y -= x; printf("%d\n", y); x= 3; y= 15; y *= x; printf("%d\n", y); x= 3; y= 15; y /= x; printf("%d\n", y); return(0); } ================================================ FILE: 63_QBE/tests/input111.c ================================================ #include int main() { int x= 2000 + 3 + 4 * 5 + 6; printf("%d\n", x); return(0); } ================================================ FILE: 63_QBE/tests/input112.c ================================================ #include char* y = NULL; int x= 10 + 6; int fred [ 2 + 3 ]; int main() { fred[2]= x; printf("%d\n", fred[2]); return(0); } ================================================ FILE: 63_QBE/tests/input113.c ================================================ #include void fred(void); void fred(void) { printf("fred says hello\n"); } int main(void) { fred(); return(0); } ================================================ FILE: 63_QBE/tests/input114.c ================================================ #include int main() { int x= 0x4a; printf("%c\n", x); return(0); } ================================================ FILE: 63_QBE/tests/input115.c ================================================ #include struct foo { int x; char y; long z; }; typedef struct foo blah; // Symbol table structure struct symtable { char *name; // Name of a symbol int type; // Primitive type for the symbol struct symtable *ctype; // If struct/union, ptr to that type int stype; // Structural type for the symbol int class; // Storage class for the symbol int size; // Total size in bytes of this symbol int nelems; // Functions: # params. Arrays: # elements #define st_endlabel st_posn // For functions, the end label int st_posn; // For locals, the negative offset // from the stack base pointer int *initlist; // List of initial values struct symtable *next; // Next symbol in one list struct symtable *member; // First member of a function, struct, }; // union or enum // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates int rvalue; // True if the node is an rvalue struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; struct symtable *sym; // For many AST nodes, the pointer to // the symbol in the symbol table #define a_intvalue a_size // For A_INTLIT, the integer value int a_size; // For A_SCALE, the size to scale by }; int main() { printf("%ld\n", sizeof(char)); printf("%ld\n", sizeof(int)); printf("%ld\n", sizeof(long)); printf("%ld\n", sizeof(char *)); printf("%ld\n", sizeof(blah)); printf("%ld\n", sizeof(struct symtable)); printf("%ld\n", sizeof(struct ASTnode)); return(0); } ================================================ FILE: 63_QBE/tests/input116.c ================================================ #include static int counter=0; static int fred(void) { return(counter++); } int main(void) { int i; for (i=0; i < 5; i++) printf("%d\n", fred()); return(0); } ================================================ FILE: 63_QBE/tests/input117.c ================================================ #include static char *fred(void) { return("Hello"); } int main(void) { printf("%s\n", fred()); return(0); } ================================================ FILE: 63_QBE/tests/input118.c ================================================ int main(void) { static int x; } ================================================ FILE: 63_QBE/tests/input119.c ================================================ #include int x; int y= 3; int main() { x= y != 3 ? 6 : 8; printf("%d\n", x); x= (y == 3) ? 6 : 8; printf("%d\n", x); return(0); } ================================================ FILE: 63_QBE/tests/input120.c ================================================ #include int x; int y= 3; int main() { for (y= 0; y < 10; y++) { x= y > 4 ? y + 2 : y + 9; printf("%d\n", x); } return(0); } ================================================ FILE: 63_QBE/tests/input121.c ================================================ #include int x; int y= 3; int main() { for (y= 0; y < 10; y++) { x= (y < 4) ? y + 2 : (y > 7) ? 1000 : y + 9; printf("%d\n", x); } return(0); } ================================================ FILE: 63_QBE/tests/input122.c ================================================ #include int x, y, z1, z2; int main() { for (x= 0; x <= 1; x++) { for (y= 0; y <= 1; y++) { z1= x || y; z2= x && y; printf("x %d, y %d, x || y %d, x && y %d\n", x, y, z1, z2); } } //z= x || y; return(0); } ================================================ FILE: 63_QBE/tests/input123.c ================================================ #include int main() { int x; for (x=0; x < 20; x++) switch(x) { case 2: case 3: case 5: case 7: case 11: printf("%2d infant prime\n", x); break; case 13: case 17: case 19: printf("%2d teen prime\n", x); break; case 0: case 1: case 4: case 6: case 8: case 9: case 10: case 12: printf("%2d infant composite\n", x); break; default: printf("%2d teen composite\n", x); break; } return(0); } ================================================ FILE: 63_QBE/tests/input124.c ================================================ #include int ary[5]; int main() { ary++; return(0); } ================================================ FILE: 63_QBE/tests/input125.c ================================================ #include int ary[5]; int *ptr; int x; int main() { ary[3]= 2008; ptr= ary; // Load ary's address into ptr x= ary[3]; printf("%d\n", x); x= ptr[3]; printf("%d\n", x); // Treat ptr as an array return(0); } ================================================ FILE: 63_QBE/tests/input126.c ================================================ #include int ary[5]; int main() { ary[3]= 2008; ptr= &ary; return(0); } ================================================ FILE: 63_QBE/tests/input127.c ================================================ #include int ary[5]; void fred(int *ptr) { // Receive a pointer printf("%d\n", ptr[3]); } int main() { ary[3]= 2008; printf("%d\n", ary[3]); fred(ary); // Pass ary as a pointer return(0); } ================================================ FILE: 63_QBE/tests/input128.c ================================================ #include struct foo { int val; struct foo *next; }; struct foo head, mid, tail; int main() { struct foo *ptr; tail.val= 20; tail.next= NULL; mid.val= 15; mid.next= &tail; head.val= 10; head.next= ∣ ptr= &head; printf("%d %d\n", head.val, ptr->val); printf("%d %d\n", mid.val, ptr->next->val); printf("%d %d\n", tail.val, ptr->next->next->val); return(0); } ================================================ FILE: 63_QBE/tests/input129.c ================================================ #include int x= 6; int main() { printf("%d\n", x++ ++); return(0); } ================================================ FILE: 63_QBE/tests/input130.c ================================================ #include char *x= "foo"; int main() { printf("Hello " "world" "\n"); return(0); } ================================================ FILE: 63_QBE/tests/input131.c ================================================ #include void donothing() { } int main() { int x=0; printf("Doing nothing... "); donothing(); printf("nothing done\n"); while (++x < 100) ; printf("x is now %d\n", x); return(0); } ================================================ FILE: 63_QBE/tests/input132.c ================================================ extern int fred; int fred; int mary; extern int mary; int main() { return(0); } ================================================ FILE: 63_QBE/tests/input133.c ================================================ #include extern int fred[]; int fred[23]; char mary[100]; extern char mary[]; void main() { printf("OK\n"); } ================================================ FILE: 63_QBE/tests/input134.c ================================================ #include char y = 'a'; char *x; int main() { x= &y; if (x && y == 'a') printf("1st match\n"); x= NULL; if (x && y == 'a') printf("2nd match\n"); x= &y; y='b'; if (x && y == 'a') printf("3rd match\n"); return(0); } ================================================ FILE: 63_QBE/tests/input135.c ================================================ #include void fred() { int x= 5; printf("testing x\n"); if (x > 4) return; printf("x below 5\n"); } int main() { fred(); return(0); } ================================================ FILE: 63_QBE/tests/input136.c ================================================ #include int add(int x, int y) { return(x+y); } int main() { int result; result= 3 * add(2,3) - 5 * add(4,6); printf("%d\n", result); return(0); } ================================================ FILE: 63_QBE/tests/input137.c ================================================ #include int a=1, b=2, c=3, d=4, e=5, f=6, g=7, h=8; int main() { int x; x= ((((((a + b) + c) + d) + e) + f) + g) + h; x= a + (b + (c + (d + (e + (f + (g + h)))))); printf("x is %d\n", x); return(0); } ================================================ FILE: 63_QBE/tests/input138.c ================================================ #include int x, y, z; int a=1; int *aptr; int main() { // See if generic AND works for (x=0; x <= 1; x++) for (y=0; y <= 1; y++) { z= x && y; printf("%d %d | %d\n", x, y, z); } // See if generic AND works for (x=0; x <= 1; x++) for (y=0; y <= 1; y++) { z= x || y; printf("%d %d | %d\n", x, y, z); } // Now some lazy evaluation aptr= NULL; if (aptr && *aptr == 1) printf("aptr points at 1\n"); else printf("aptr is NULL or doesn't point at 1\n"); aptr= &a; if (aptr && *aptr == 1) printf("aptr points at 1\n"); else printf("aptr is NULL or doesn't point at 1\n"); return(0); } ================================================ FILE: 63_QBE/tests/input139.c ================================================ #include int same(int x) { return(x); } int main() { int a= 3; if (same(a) && same(a) >= same(a)) printf("same apparently\n"); return(0); } ================================================ FILE: 63_QBE/tests/input140.c ================================================ #include int main() { int i; int ary[5]; char z; // Write below the array z= 'H'; // Fill the array for (i=0; i < 5; i++) ary[i]= i * i; // Write above the array i=14; // Print out the array for (i=0; i < 5; i++) printf("%d\n", ary[i]); // See if either side is OK printf("%d %c\n", i, z); return(0); } ================================================ FILE: 63_QBE/tests/input141.c ================================================ static int fred[5]; int jim; int foo(int mary[6]) { return(5); } ================================================ FILE: 63_QBE/tests/input142.c ================================================ static int fred[]; int jim; ================================================ FILE: 63_QBE/tests/input143.c ================================================ #include char foo; char *a, *b, *c; int main() { a= b= c= NULL; if (a==NULL || b==NULL || c==NULL) printf("One of the three is NULL\n"); a= &foo; if (a==NULL || b==NULL || c==NULL) printf("One of the three is NULL\n"); b= &foo; if (a==NULL || b==NULL || c==NULL) printf("One of the three is NULL\n"); c= &foo; if (a==NULL || b==NULL || c==NULL) printf("One of the three is NULL\n"); else printf("All three are non-NULL\n"); return(0); } ================================================ FILE: 63_QBE/tests/input144.c ================================================ #include #include #include char *filename= "fred"; int main() { fprintf(stdout, "Unable to open %s: %s\n", filename, strerror(errno)); return(0); } ================================================ FILE: 63_QBE/tests/input145.c ================================================ #include char *str= "qwertyuiop"; int list[]= {3, 5, 7, 9, 11, 13, 15}; int *lptr; int main() { printf("%c\n", *str); str= str + 1; printf("%c\n", *str); str += 1; printf("%c\n", *str); str += 1; printf("%c\n", *str); str -= 1; printf("%c\n", *str); lptr= list; printf("%d\n", *lptr); lptr= lptr + 1; printf("%d\n", *lptr); lptr += 1; printf("%d\n", *lptr); lptr += 1; printf("%d\n", *lptr); lptr -= 1; printf("%d\n", *lptr); return(0); } ================================================ FILE: 63_QBE/tests/input146.c ================================================ #include char *str= "qwertyuiop"; int list[]= {3, 5, 7, 9, 11, 13, 15}; int *lptr; int main() { printf("%c\n", *str); str= str + 1; printf("%c\n", *str); str += 1; printf("%c\n", *str); str += 1; printf("%c\n", *str); str -= 1; printf("%c\n", *str); str++; printf("%c\n", *str); str--; printf("%c\n", *str); ++str; printf("%c\n", *str); --str; printf("%c\n\n", *str); lptr= list; printf("%d\n", *lptr); lptr= lptr + 1; printf("%d\n", *lptr); lptr += 1; printf("%d\n", *lptr); lptr += 1; printf("%d\n", *lptr); lptr -= 1; printf("%d\n", *lptr); lptr++ ; printf("%d\n", *lptr); lptr-- ; printf("%d\n", *lptr); ++lptr ; printf("%d\n", *lptr); --lptr ; printf("%d\n", *lptr); return(0); } ================================================ FILE: 63_QBE/tests/input147.c ================================================ #include int a; int main() { printf("%d\n", 24 % 9); printf("%d\n", 31 % 11); a= 24; a %= 9; printf("%d\n",a); a= 31; a %= 11; printf("%d\n",a); return(0); } ================================================ FILE: 63_QBE/tests/input148.c ================================================ #include char *argv[]= { "unused", "-fish", "-cat", "owl" }; int argc= 4; int main() { int i; for (i = 1; i < argc; i++) { printf("i is %d\n", i); if (*argv[i] != '-') break; } while (i < argc) { printf("leftover %s\n", argv[i]); i++; } return (0); } ================================================ FILE: 63_QBE/tests/input149.c ================================================ #include static int localOffset=0; static int newlocaloffset(int size) { localOffset += (size > 4) ? size : 4; return (-localOffset); } int main() { int i, r; for (i=1; i <= 12; i++) { r= newlocaloffset(i); printf("%d %d\n", i, r); } return(0); } ================================================ FILE: 63_QBE/tests/input150.c ================================================ #include #include struct Svalue { char *thing; int vreg; int intval; }; struct IR { int label; int op; struct Svalue dst; struct Svalue src1; struct Svalue src2; int jmplabel; }; struct foo { int a; int b; struct Svalue *c; int d; }; struct IR *fred; struct foo jane; int main() { fred= (struct IR *)malloc(sizeof(struct IR)); fred->label= 1; fred->op= 2; fred->dst.thing= NULL; fred->dst.vreg=3; fred->dst.intval=4; fred->src1.thing= NULL; fred->src1.vreg=5; fred->src1.intval=6; fred->src2.thing= NULL; fred->src2.vreg=7; fred->src2.intval=8; fred->jmplabel= 9; printf("%d %d %d\n", fred->label, fred->op, fred->dst.vreg); printf("%d %d %d\n", fred->dst.intval, fred->src1.vreg, fred->src1.intval); printf("%d %d %d\n\n", fred->src2.vreg, fred->src2.intval, fred->jmplabel); jane.c= (struct Svalue *)malloc(sizeof(struct Svalue)); jane.a= 1; jane.b= 2; jane.d= 4; jane.c->thing= "fish"; jane.c->vreg= 3; jane.c->intval= 5; printf("%d %d %d\n", jane.a, jane.b, jane.c->vreg); printf("%d %d %s\n", jane.d, jane.c->intval, jane.c->thing); return(0); } ================================================ FILE: 63_QBE/tests/input151.c ================================================ #include int main() { int x; int y; y= 0; for (x=0; x < 5; x++) { switch(x) { case 1: { y= 5; break; } case 2: { y= 7; break; } case 3: { y= 9; } } printf("%d\n", y); } return(0); } ================================================ FILE: 63_QBE/tests/input152.c ================================================ #include void fred(int x) { int a = 2; int *b = &x; printf("%d %d %d\n", x, a, *b); } int main() { fred(4); return(0); } ================================================ FILE: 63_QBE/tests/input153.c ================================================ #include enum { C_GLOBAL = 1, C_LOCAL, C_PARAM, C_EXTERN, C_STATIC, C_STRUCT, C_UNION, C_MEMBER, C_ENUMTYPE, C_ENUMVAL, C_TYPEDEF }; int main() { int class; int b; char q; for (class = C_GLOBAL; class <= C_TYPEDEF; class ++) { q = ((class == C_GLOBAL) || (class == C_STATIC) || (class == C_EXTERN)) ? '$' : '%'; printf("class %d prefix %c\n", class, q); } return(0); } ================================================ FILE: 63_QBE/tests/input154.c ================================================ #include int main() { int x= 5; long y= x; int z= (int)y; printf("%d %ld %d\n", x, y, z); return(0); } ================================================ FILE: 63_QBE/tests/mktests ================================================ #!/bin/sh # Make the output files for each test # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make install) fi for i in input*c do if [ ! -f "out.$i" -a ! -f "err.$i" ] then ../cwj -o out $i 2> "err.$i" # If the err file is empty if [ ! -s "err.$i" ] then rm -f "err.$i" cc -o out $i ./out > "out.$i" fi fi rm -f out out.s done ================================================ FILE: 63_QBE/tests/out.input001.c ================================================ 36 10 25 ================================================ FILE: 63_QBE/tests/out.input002.c ================================================ 17 ================================================ FILE: 63_QBE/tests/out.input003.c ================================================ 1 2 3 4 5 ================================================ FILE: 63_QBE/tests/out.input004.c ================================================ 1 1 1 1 1 1 1 1 1 ================================================ FILE: 63_QBE/tests/out.input005.c ================================================ 6 ================================================ FILE: 63_QBE/tests/out.input006.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 63_QBE/tests/out.input007.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 63_QBE/tests/out.input008.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 63_QBE/tests/out.input009.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 63_QBE/tests/out.input010.c ================================================ 20 10 1 2 3 4 5 253 254 255 0 1 ================================================ FILE: 63_QBE/tests/out.input011.c ================================================ 10 20 30 1 2 3 4 5 253 254 255 0 1 2 3 1 2 3 4 5 ================================================ FILE: 63_QBE/tests/out.input012.c ================================================ 5 ================================================ FILE: 63_QBE/tests/out.input013.c ================================================ 23 56 ================================================ FILE: 63_QBE/tests/out.input014.c ================================================ 10 20 30 ================================================ FILE: 63_QBE/tests/out.input015.c ================================================ 18 18 12 12 ================================================ FILE: 63_QBE/tests/out.input016.c ================================================ 12 18 ================================================ FILE: 63_QBE/tests/out.input017.c ================================================ 19 12 ================================================ FILE: 63_QBE/tests/out.input018.c ================================================ 34 34 ================================================ FILE: 63_QBE/tests/out.input018a.c ================================================ 15 16 ================================================ FILE: 63_QBE/tests/out.input019.c ================================================ 30 ================================================ FILE: 63_QBE/tests/out.input020.c ================================================ 12 ================================================ FILE: 63_QBE/tests/out.input021.c ================================================ 10 Hello world ================================================ FILE: 63_QBE/tests/out.input022.c ================================================ 12 12 12 13 13 13 13 13 13 35 35 35 ================================================ FILE: 63_QBE/tests/out.input023.c ================================================ -23 100 -2 0 1 0 13 14 Hello world ================================================ FILE: 63_QBE/tests/out.input024.c ================================================ 2 59 57 8 7 ================================================ FILE: 63_QBE/tests/out.input025.c ================================================ 10 20 30 5 15 25 ================================================ FILE: 63_QBE/tests/out.input026.c ================================================ 13 23 34 44 54 64 74 84 94 95 96 ================================================ FILE: 63_QBE/tests/out.input027.c ================================================ 1 2 3 4 5 6 7 8 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 ================================================ FILE: 63_QBE/tests/out.input028.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 63_QBE/tests/out.input029.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 63_QBE/tests/out.input030.c ================================================ int printf(char *fmt); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input030.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 63_QBE/tests/out.input053.c ================================================ Hello world, 23 ================================================ FILE: 63_QBE/tests/out.input054.c ================================================ Hello world, 0 Hello world, 1 Hello world, 2 Hello world, 3 Hello world, 4 Hello world, 5 Hello world, 6 Hello world, 7 Hello world, 8 Hello world, 9 Hello world, 10 Hello world, 11 Hello world, 12 Hello world, 13 Hello world, 14 Hello world, 15 Hello world, 16 Hello world, 17 Hello world, 18 Hello world, 19 ================================================ FILE: 63_QBE/tests/out.input055.c ================================================ Hello world Argument 0 is ./out ================================================ FILE: 63_QBE/tests/out.input058.c ================================================ 12 99 4005 4116 4116 ================================================ FILE: 63_QBE/tests/out.input062.c ================================================ 65 66 66 The next two depend on the endian of the platform 66 66 67 67 ================================================ FILE: 63_QBE/tests/out.input063.c ================================================ 25 ================================================ FILE: 63_QBE/tests/out.input067.c ================================================ 5 17 ================================================ FILE: 63_QBE/tests/out.input070.c ================================================ 56 ================================================ FILE: 63_QBE/tests/out.input071.c ================================================ 0 1 2 3 4 7 8 9 10 11 12 13 14 Done ================================================ FILE: 63_QBE/tests/out.input074.c ================================================ 100 5 7 100 100 ================================================ FILE: 63_QBE/tests/out.input080.c ================================================ 0 1 1 3 2 5 3 7 4 9 5 11 ================================================ FILE: 63_QBE/tests/out.input081.c ================================================ 0 1 1 3 2 5 3 7 4 9 ================================================ FILE: 63_QBE/tests/out.input082.c ================================================ 15 >= x > 5 ================================================ FILE: 63_QBE/tests/out.input083.c ================================================ 5 < 6 <= 10 5 < 7 <= 10 5 < 8 <= 10 5 < 9 <= 10 5 < 10 <= 10 10 < 11 ================================================ FILE: 63_QBE/tests/out.input084.c ================================================ 2 3 f f ================================================ FILE: 63_QBE/tests/out.input088.c ================================================ 5 6 ================================================ FILE: 63_QBE/tests/out.input089.c ================================================ 23 H Hello world ================================================ FILE: 63_QBE/tests/out.input090.c ================================================ 23 100 H Hello world ================================================ FILE: 63_QBE/tests/out.input091.c ================================================ 1 2 3 4 5 1 2 3 4 5 0 0 0 0 0 ================================================ FILE: 63_QBE/tests/out.input099.c ================================================ EOF = || && | ^ & == != , > <= >= << >> + - * / ++ -- ~ ! void char int long if else while for return struct union enum typedef extern break continue switch case default intlit strlit ; identifier { } ( ) [ ] , . -> : ================================================ FILE: 63_QBE/tests/out.input100.c ================================================ Hello world 17 20 ================================================ FILE: 63_QBE/tests/out.input101.c ================================================ 0xff 0x0 ================================================ FILE: 63_QBE/tests/out.input106.c ================================================ 0x0 ================================================ FILE: 63_QBE/tests/out.input107.c ================================================ fish cow NULL ================================================ FILE: 63_QBE/tests/out.input108.c ================================================ ================================================ FILE: 63_QBE/tests/out.input109.c ================================================ 16 ================================================ FILE: 63_QBE/tests/out.input110.c ================================================ 18 12 45 5 ================================================ FILE: 63_QBE/tests/out.input111.c ================================================ 2029 ================================================ FILE: 63_QBE/tests/out.input112.c ================================================ 16 ================================================ FILE: 63_QBE/tests/out.input113.c ================================================ fred says hello ================================================ FILE: 63_QBE/tests/out.input114.c ================================================ J ================================================ FILE: 63_QBE/tests/out.input115.c ================================================ 1 4 8 8 13 64 48 ================================================ FILE: 63_QBE/tests/out.input116.c ================================================ 0 1 2 3 4 ================================================ FILE: 63_QBE/tests/out.input117.c ================================================ Hello ================================================ FILE: 63_QBE/tests/out.input119.c ================================================ 8 6 ================================================ FILE: 63_QBE/tests/out.input120.c ================================================ 9 10 11 12 13 7 8 9 10 11 ================================================ FILE: 63_QBE/tests/out.input121.c ================================================ 2 3 4 5 13 14 15 16 1000 1000 ================================================ FILE: 63_QBE/tests/out.input122.c ================================================ x 0, y 0, x || y 0, x && y 0 x 0, y 1, x || y 1, x && y 0 x 1, y 0, x || y 1, x && y 0 x 1, y 1, x || y 1, x && y 1 ================================================ FILE: 63_QBE/tests/out.input123.c ================================================ 0 infant composite 1 infant composite 2 infant prime 3 infant prime 4 infant composite 5 infant prime 6 infant composite 7 infant prime 8 infant composite 9 infant composite 10 infant composite 11 infant prime 12 infant composite 13 teen prime 14 teen composite 15 teen composite 16 teen composite 17 teen prime 18 teen composite 19 teen prime ================================================ FILE: 63_QBE/tests/out.input125.c ================================================ 2008 2008 ================================================ FILE: 63_QBE/tests/out.input127.c ================================================ 2008 2008 ================================================ FILE: 63_QBE/tests/out.input128.c ================================================ 10 10 15 15 20 20 ================================================ FILE: 63_QBE/tests/out.input130.c ================================================ Hello world ================================================ FILE: 63_QBE/tests/out.input131.c ================================================ Doing nothing... nothing done x is now 100 ================================================ FILE: 63_QBE/tests/out.input132.c ================================================ ================================================ FILE: 63_QBE/tests/out.input133.c ================================================ OK ================================================ FILE: 63_QBE/tests/out.input134.c ================================================ 1st match ================================================ FILE: 63_QBE/tests/out.input135.c ================================================ testing x ================================================ FILE: 63_QBE/tests/out.input136.c ================================================ -35 ================================================ FILE: 63_QBE/tests/out.input137.c ================================================ x is 36 ================================================ FILE: 63_QBE/tests/out.input138.c ================================================ 0 0 | 0 0 1 | 0 1 0 | 0 1 1 | 1 0 0 | 0 0 1 | 1 1 0 | 1 1 1 | 1 aptr is NULL or doesn't point at 1 aptr points at 1 ================================================ FILE: 63_QBE/tests/out.input139.c ================================================ same apparently ================================================ FILE: 63_QBE/tests/out.input140.c ================================================ 0 1 4 9 16 5 H ================================================ FILE: 63_QBE/tests/out.input143.c ================================================ One of the three is NULL One of the three is NULL One of the three is NULL All three are non-NULL ================================================ FILE: 63_QBE/tests/out.input144.c ================================================ Unable to open fred: Success ================================================ FILE: 63_QBE/tests/out.input145.c ================================================ q w e r e 3 5 7 9 7 ================================================ FILE: 63_QBE/tests/out.input146.c ================================================ q w e r e r e r e 3 5 7 9 7 9 7 9 7 ================================================ FILE: 63_QBE/tests/out.input147.c ================================================ 6 9 6 9 ================================================ FILE: 63_QBE/tests/out.input148.c ================================================ i is 1 i is 2 i is 3 leftover owl ================================================ FILE: 63_QBE/tests/out.input149.c ================================================ 1 -4 2 -8 3 -12 4 -16 5 -21 6 -27 7 -34 8 -42 9 -51 10 -61 11 -72 12 -84 ================================================ FILE: 63_QBE/tests/out.input150.c ================================================ 1 2 3 4 5 6 7 8 9 1 2 3 4 5 fish ================================================ FILE: 63_QBE/tests/out.input151.c ================================================ 0 5 7 9 9 ================================================ FILE: 63_QBE/tests/out.input152.c ================================================ 4 2 4 ================================================ FILE: 63_QBE/tests/out.input153.c ================================================ class 1 prefix $ class 2 prefix % class 3 prefix % class 4 prefix $ class 5 prefix $ class 6 prefix % class 7 prefix % class 8 prefix % class 9 prefix % class 10 prefix % class 11 prefix % ================================================ FILE: 63_QBE/tests/out.input154.c ================================================ 5 5 5 ================================================ FILE: 63_QBE/tests/runtests ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make install) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!"; exit 1 # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../cwj -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" # Stop if our 1st argument is "stop" if [ $1 = "stop" ] then exit 1 fi echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../cwj $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.s "trial.$i" done ================================================ FILE: 63_QBE/tests/runtests2 ================================================ #!/bin/sh # Run each test and compare # against known good output # Build our compiler if needed if [ ! -f ../cwj2 ] then (cd ..; make install; make cwj2) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../cwj2 -o out $i ./out > trial.$i # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../cwj2 $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.s "trial.$i" done ================================================ FILE: 63_QBE/tree.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // AST tree functions // Copyright (c) 2019 Warren Toomey, GPL3 // Build and return a generic AST node struct ASTnode *mkastnode(int op, int type, struct symtable *ctype, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue) { struct ASTnode *n; // Malloc a new ASTnode n = (struct ASTnode *) malloc(sizeof(struct ASTnode)); if (n == NULL) fatal("Unable to malloc in mkastnode()"); // Copy in the field values and return it n->op = op; n->type = type; n->ctype = ctype; n->left = left; n->mid = mid; n->right = right; n->sym = sym; n->a_intvalue = intvalue; n->linenum = 0; return (n); } // Make an AST leaf node struct ASTnode *mkastleaf(int op, int type, struct symtable *ctype, struct symtable *sym, int intvalue) { return (mkastnode(op, type, ctype, NULL, NULL, NULL, sym, intvalue)); } // Make a unary AST node: only one child struct ASTnode *mkastunary(int op, int type, struct symtable *ctype, struct ASTnode *left, struct symtable *sym, int intvalue) { return (mkastnode(op, type, ctype, left, NULL, NULL, sym, intvalue)); } // Generate and return a new label number // just for AST dumping purposes static int dumpid = 1; static int gendumplabel(void) { return (dumpid++); } // List of AST node names static char *astname[] = { NULL, "ASSIGN", "ASPLUS", "ASMINUS", "ASSTAR", "ASSLASH", "ASMOD", "TERNARY", "LOGOR", "LOGAND", "OR", "XOR", "AND", "EQ", "NE", "LT", "GT", "LE", "GE", "LSHIFT", "RSHIFT", "ADD", "SUBTRACT", "MULTIPLY", "DIVIDE", "MOD", "INTLIT", "STRLIT", "IDENT", "GLUE", "IF", "WHILE", "FUNCTION", "WIDEN", "RETURN", "FUNCCALL", "DEREF", "ADDR", "SCALE", "PREINC", "PREDEC", "POSTINC", "POSTDEC", "NEGATE", "INVERT", "LOGNOT", "TOBOOL", "BREAK", "CONTINUE", "SWITCH", "CASE", "DEFAULT", "CAST" }; // Given an AST tree, print it out and follow the // traversal of the tree that genAST() follows void dumpAST(struct ASTnode *n, int label, int level) { int Lfalse, Lstart, Lend; int i; if (n == NULL) fatal("NULL AST node"); if (n->op > A_CAST) fatald("Unknown dumpAST operator", n->op); // Deal with IF and WHILE statements specifically switch (n->op) { case A_IF: Lfalse = gendumplabel(); for (i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "IF"); if (n->right) { Lend = gendumplabel(); fprintf(stdout, ", end L%d", Lend); } fprintf(stdout, "\n"); dumpAST(n->left, Lfalse, level + 2); dumpAST(n->mid, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); return; case A_WHILE: Lstart = gendumplabel(); for (i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "WHILE, start L%d\n", Lstart); Lend = gendumplabel(); dumpAST(n->left, Lend, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); return; } // Reset level to -2 for A_GLUE nodes if (n->op == A_GLUE) { level -= 2; } else { // General AST node handling for (i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "%s", astname[n->op]); switch (n->op) { case A_FUNCTION: case A_FUNCCALL: case A_ADDR: case A_PREINC: case A_PREDEC: if (n->sym != NULL) fprintf(stdout, " %s", n->sym->name); break; case A_INTLIT: fprintf(stdout, " %d", n->a_intvalue); break; case A_STRLIT: fprintf(stdout, " rval label L%d", n->a_intvalue); break; case A_IDENT: if (n->rvalue) fprintf(stdout, " rval %s", n->sym->name); else fprintf(stdout, " %s", n->sym->name); break; case A_DEREF: if (n->rvalue) fprintf(stdout, " rval"); break; case A_SCALE: fprintf(stdout, " %d", n->a_size); break; case A_CASE: fprintf(stdout, " %d", n->a_intvalue); break; case A_CAST: fprintf(stdout, " %d", n->type); break; } fprintf(stdout, "\n"); } // General AST node handling if (n->left) dumpAST(n->left, NOLABEL, level + 2); if (n->mid) dumpAST(n->mid, NOLABEL, level + 2); if (n->right) dumpAST(n->right, NOLABEL, level + 2); } ================================================ FILE: 63_QBE/types.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" // Types and type handling // Copyright (c) 2019 Warren Toomey, GPL3 // Return true if a type is an int type // of any size, false otherwise int inttype(int type) { return (((type & 0xf) == 0) && (type >= P_CHAR && type <= P_LONG)); } // Return true if a type is of pointer type int ptrtype(int type) { return ((type & 0xf) != 0); } // Given a primitive type, return // the type which is a pointer to it int pointer_to(int type) { if ((type & 0xf) == 0xf) fatald("Unrecognised in pointer_to: type", type); return (type + 1); } // Given a primitive pointer type, return // the type which it points to int value_at(int type) { if ((type & 0xf) == 0x0) fatald("Unrecognised in value_at: type", type); return (type - 1); } // Given a type and a composite type pointer, return // the size of this type in bytes int typesize(int type, struct symtable *ctype) { if (type == P_STRUCT || type == P_UNION) return (ctype->size); return (genprimsize(type)); } // Given an AST tree and a type which we want it to become, // possibly modify the tree by widening or scaling so that // it is compatible with this type. Return the original tree // if no changes occurred, a modified tree, or NULL if the // tree is not compatible with the given type. // If this will be part of a binary operation, the AST op is not zero. struct ASTnode *modify_type(struct ASTnode *tree, int rtype, struct symtable *rctype, int op) { int ltype; int lsize, rsize; ltype = tree->type; // For A_LOGOR and A_LOGAND, both types have to be int or pointer types if (op == A_LOGOR || op == A_LOGAND) { if (!inttype(ltype) && !ptrtype(ltype)) return (NULL); if (!inttype(ltype) && !ptrtype(rtype)) return (NULL); return (tree); } // XXX No idea on these yet if (ltype == P_STRUCT || ltype == P_UNION) fatal("Don't know how to do this yet"); if (rtype == P_STRUCT || rtype == P_UNION) fatal("Don't know how to do this yet"); // Compare scalar int types if (inttype(ltype) && inttype(rtype)) { // Both types same, nothing to do if (ltype == rtype) return (tree); // Get the sizes for each type lsize = typesize(ltype, NULL); rsize = typesize(rtype, NULL); // The tree's type size is too big and we can't narrow if (lsize > rsize) return (NULL); // Widen to the right if (rsize > lsize) return (mkastunary(A_WIDEN, rtype, NULL, tree, NULL, 0)); } // For pointers if (ptrtype(ltype) && ptrtype(rtype)) { // We can compare them if (op >= A_EQ && op <= A_GE) return (tree); // A comparison of the same type for a non-binary operation is OK, // or when the left tree is of `void *` type. if (op == 0 && (ltype == rtype || ltype == pointer_to(P_VOID))) return (tree); } // We can scale only on add and subtract operations if (op == A_ADD || op == A_SUBTRACT || op == A_ASPLUS || op == A_ASMINUS) { // Left is int type, right is pointer type and the size // of the original type is >1: scale the left if (inttype(ltype) && ptrtype(rtype)) { rsize = genprimsize(value_at(rtype)); if (rsize > 1) return (mkastunary(A_SCALE, rtype, rctype, tree, NULL, rsize)); else // No need to scale, but we need to widen to pointer size return (mkastunary(A_WIDEN, rtype, NULL, tree, NULL, 0)); } } // If we get here, the types are not compatible return (NULL); } ================================================ FILE: 64_6809_Target/6809triple_test ================================================ #!/bin/sh # # Build the 6809 compiler binaries. Also create front-end shell # scripts so that we can run them as if they were native programs. # # Do this twice to run the triple test on the 6809 compiler binaries. # make clean l1dirs.h mkdir L1 wcc -o L1/wcc wcc.c cc -o L1/cpeep cpeep.c wcc -m6809 -o L1/_cscan scan.c misc.c wcc -m6809 -o L1/_detok detok.c tstring.c wcc -m6809 -o L1/_detree -DDETREE detree.c misc.c tree.c wcc -m6809 -o L1/_desym desym.c wcc -m6809 -o L1/_cparse6809 -DWRITESYMS decl.c expr.c misc.c opt.c \ parse.c stmt.c sym.c tree.c targ6809.c tstring.c types.c wcc -m6809 -o L1/_cgen6809 -DSPLITSWITCH cg6809.c cgen.c gen.c misc.c sym.c \ targ6809.c tree.c types.c rm -f l1dirs.h dirs.h # Make the front-end shell scripts dir=`pwd`/L1 for i in cscan detok detree desym cparse6809 cgen6809 do cat << EOF > L1/$i #!/bin/sh exec emu6809 $dir/_$i \$* EOF chmod +x L1/$i done # exit 0 # Now we do it all again for L2 make l2dirs.h mkdir L2 wcc -o L2/wcc wcc.c cc -o L2/cpeep cpeep.c L1/wcc -m6809 -v -o L2/_cscan scan.c misc.c L1/wcc -m6809 -v -o L2/_detok detok.c tstring.c L1/wcc -m6809 -v -o L2/_detree -DDETREE detree.c misc.c tree.c L1/wcc -m6809 -v -o L2/_desym desym.c L1/wcc -m6809 -v -o L2/_cparse6809 -DWRITESYMS decl.c expr.c misc.c opt.c \ parse.c stmt.c sym.c tree.c targ6809.c tstring.c types.c L1/wcc -m6809 -v -o L2/_cgen6809 -DSPLITSWITCH cg6809.c cgen.c gen.c misc.c sym.c \ targ6809.c tree.c types.c rm -f l2dirs.h dirs.h # Make the front-end shell scripts dir=`pwd`/L2 for i in cscan detok detree desym cparse6809 cgen6809 do cat << EOF > L2/$i #!/bin/sh exec emu6809 $dir/_$i \$* EOF chmod +x L2/$i done exit 0 ================================================ FILE: 64_6809_Target/Makefile ================================================ # Define the location of the include directory # and the location to install the compiler binary. # You will need to make TOPDIR and make it writable by you. # TOPDIR=/opt/wcc BINDIR=$(TOPDIR)/bin INCQBEDIR=$(TOPDIR)/include/qbe INC6809DIR=$(TOPDIR)/include/6809 LIB6809DIR=$(TOPDIR)/lib/6809 CFLAGS=-g -Wall # CFLAGS+= --coverage # then gcov *gcno # Header files and C files for the QBE and 6809 parser phase # PARSEH= cg.h data.h decl.h defs.h expr.h gen.h misc.h opt.h \ parse.h stmt.h sym.h target.h tree.h types.h PARSEC6809= decl.c expr.c misc.c opt.c parse.c stmt.c sym.c tree.c \ targ6809.c tstring.c types.c PARSECQBE= decl.c expr.c misc.c opt.c parse.c stmt.c sym.c tree.c \ targqbe.c tstring.c types.c # Header files and C files for the QBE and 6809 code generator phase # GENH= cg.h data.h defs.h gen.h misc.h sym.h target.h tree.h types.h GENC6809= cg6809.c cgen.c gen.c misc.c sym.c targ6809.c tree.c types.c GENCQBE= cgqbe.c cgen.c gen.c misc.c sym.c targqbe.c tree.c types.c # These executables are compiled by the existing C compiler on your system. # all: wcc cscan detok detree desym cpeep \ cparse6809 cgen6809 cparseqbe cgenqbe wcc: wcc.c wcc.h l0dirs.h cc -o wcc $(CFLAGS) wcc.c cscan: scan.c defs.h misc.h misc.c cc -o cscan $(CFLAGS) scan.c misc.c cpeep: cpeep.c cc -o cpeep $(CFLAGS) cpeep.c cparse6809: $(PARSEC6809) $(PARSEH) cc -o cparse6809 $(CFLAGS) -DWRITESYMS $(PARSEC6809) cgen6809: $(GENC6809) $(GENH) cc -o cgen6809 $(CFLAGS) $(GENC6809) cparseqbe: $(PARSECQBE) $(PARSEH) cc -o cparseqbe $(CFLAGS) -DWRITESYMS $(PARSECQBE) cgenqbe: $(GENCQBE) $(GENH) cc -o cgenqbe $(CFLAGS) $(GENCQBE) desym: desym.c defs.h types.h cc -o desym $(CFLAGS) desym.c detok: detok.c tstring.c defs.h cc -o detok $(CFLAGS) detok.c tstring.c detree: detree.c misc.c tree.c misc.h defs.h tree.h cc -o detree $(CFLAGS) -DDETREE detree.c misc.c tree.c l0dirs.h: echo "#define TOPDIR \"$(TOPDIR)\"" > l0dirs.h echo "#define INCQBEDIR \"$(INCQBEDIR)\"" >> l0dirs.h echo "#define INC6809DIR \"$(INC6809DIR)\"" >> l0dirs.h echo "#define BINDIR \"$(BINDIR)\"" >> l0dirs.h echo "#define LIB6809DIR \"$(LIB6809DIR)\"" >> l0dirs.h cp l0dirs.h dirs.h # Install the compiler built by the external compiler # install: all @if [ ! -d $(TOPDIR) ]; then echo "$(TOPDIR) doesn't exit, create it writeable by you"; exit 1; fi mkdir -p $(INC6809DIR) mkdir -p $(INCQBEDIR) mkdir -p $(BINDIR) mkdir -p $(LIB6809DIR) rsync -a --exclude RCS include/qbe/. $(INCQBEDIR) rsync -a --exclude RCS include/6809/. $(INC6809DIR) rsync -a --exclude Makefile --exclude RCS --exclude crt0.s \ lib/6809/. $(LIB6809DIR) cp wcc cscan detok detree desym cpeep \ cparse6809 cgen6809 \ cparseqbe cgenqbe $(BINDIR) # These rules are for the compiler to build itself. # Use the L1 directory to hold the binaries. # l1dirs.h: TOPDIR= `pwd` l1dirs.h: BINDIR= `pwd`/L1 l1dirs.h: echo "#define TOPDIR \"$(TOPDIR)\"" > l1dirs.h echo "#define INCQBEDIR \"$(INCQBEDIR)\"" >> l1dirs.h echo "#define INC6809DIR \"$(INC6809DIR)\"" >> l1dirs.h echo "#define BINDIR \"$(BINDIR)\"" >> l1dirs.h echo "#define LIB6809DIR \"$(LIB6809DIR)\"" >> l1dirs.h cp l1dirs.h dirs.h # These executables are compiled by our own compiler # l1bins: install L1/wcc L1/cscan L1/cparseqbe L1/cgenqbe \ L1/desym L1/detok L1/detree L1/wcc: wcc.c wcc.h l1dirs.h mkdir -p L1 wcc -o L1/wcc wcc.c L1/cscan: scan.c defs.h misc.h misc.c wcc -o L1/cscan scan.c misc.c L1/cparseqbe: $(PARSECQBE) $(PARSEH) wcc -o L1/cparseqbe -DWRITESYMS $(PARSECQBE) L1/cgenqbe: $(GENCQBE) $(GENH) wcc -o L1/cgenqbe $(GENCQBE) L1/desym: desym.c defs.h types.h wcc -o L1/desym desym.c L1/detok: detok.c tstring.c defs.h wcc -o L1/detok detok.c tstring.c L1/detree: detree.c misc.c tree.c misc.h defs.h tree.h wcc -o L1/detree -DDETREE detree.c misc.c tree.c # These rules are for the compiler to build itself a second time. # If the binaries match those built the first time, we know that # the compiler can successfully compile itself. # Use the L2 directory to hold the binaries. # l2dirs.h: TOPDIR= `pwd` l2dirs.h: BINDIR= `pwd`/L2 l2dirs.h: echo "#define TOPDIR \"$(TOPDIR)\"" > l2dirs.h echo "#define INCQBEDIR \"$(INCQBEDIR)\"" >> l2dirs.h echo "#define INC6809DIR \"$(INC6809DIR)\"" >> l2dirs.h echo "#define BINDIR \"$(BINDIR)\"" >> l2dirs.h echo "#define LIB6809DIR \"$(LIB6809DIR)\"" >> l2dirs.h cp l2dirs.h dirs.h # These executables are compiled by our own compiler # l2bins: l1bins L2/wcc L2/cscan L2/cparseqbe L2/cgenqbe \ L2/desym L2/detok L2/detree L2/wcc: wcc.c wcc.h l2dirs.h mkdir -p L2 L1/wcc -o L2/wcc wcc.c L2/cscan: scan.c defs.h misc.h misc.c L1/wcc -o L2/cscan scan.c misc.c L2/cparseqbe: $(PARSECQBE) $(PARSEH) L1/wcc -o L2/cparseqbe -DWRITESYMS $(PARSECQBE) L2/cgenqbe: $(GENCQBE) $(GENH) L1/wcc -o L2/cgenqbe $(GENCQBE) L2/desym: desym.c defs.h types.h L1/wcc -o L2/desym desym.c L2/detok: detok.c tstring.c defs.h L1/wcc -o L2/detok detok.c tstring.c L2/detree: detree.c misc.c tree.c misc.h defs.h tree.h L1/wcc -o L2/detree -DDETREE detree.c misc.c tree.c # Do the triple test: build the compiler with the external compiler, # build the compiler with itself and then use this compiler to # build the compiler again. The binaries should be identical. Note # that the `wcc` binaries are different because the TOPDIR is different. # triple: l2bins md5sum L1/* L2/* | sort # Clean up all versions of the compiler clean: rm -f wcc cscan detok detree desym cpeep \ cparse6809 cgen6809 \ cparseqbe cgenqbe rm -f *.o *.s out a.out dirs.h l?dirs.h *.gc?? rm -rf L1 L2 # Run the tests with the compiler built with the external compiler # test: install tests/runtests (cd tests; chmod +x runtests; ./runtests qbe) tests: test # Run the 6809 tests 6test: install tests/runtests (cd tests; chmod +x runtests; ./runtests 6809) ================================================ FILE: 64_6809_Target/Readme.md ================================================ # Part 64: Self-compilation on an 8-bit CPU I'm back with another chapter of this compiler writing journey. This time, the goal is get the compiler to compile itself on a 8-bit CPU from the 1980s. It's been an interesting, sometimes fun, sometimes painful task. Here's a summary of all the work that I've had to do. For the CPU, I chose the [Motorola 6809](https://en.wikipedia.org/wiki/Motorola_6809). This is probably one of the most sophisticated 8-bit CPUs from the 1980s, with a bunch of useful addressing modes and, importantly, a useful stack pointer. What makes it difficult to write a compiler for the 6809 is the address space limitation. Like many of the 8-bit CPUs, there only 64K of memory (yes, 65,536 _bytes_!) and, on most vintage 6809 systems, a significant portion of this is taken up by ROM. I went in this direction as, in 2023, I decided to try and build a single board computer (SBC) using the 6809 as the CPU. In particular, I wanted to have a machine with at least half a megabyte of memory, a disk-like storage device, and a Unix-like operating system. The result is the [MMU09 SBC](https://github.com/DoctorWkt/MMU09). The project is semi-incomplete; it does have a Unix-like system, it does do multitasking, but there is no pre-emptive multitasking. Each process gets 63.5K of usable address space (i.e. RAM). While I was working on MMU09, I needed to find a suitable C compiler to compile the code for the operating system, the libraries and the applications. I started with [CMOC](http://perso.b2b2c.ca/~sarrazip/dev/cmoc.html) but eventually switched over to [vbcc](http://www.compilers.de/vbcc.html). Along the way I found Alan Cox's [Fuzix Compiler Kit](https://github.com/EtchedPixels/Fuzix-Compiler-Kit) which is a work-in-progress C compiler for many 8-bit and 16-bit CPUs. All of this got me to thinking: is it possible to have the C compiler run _on_ the 6809 and not just cross compile from a more powerful system? I thought the Fuzix Compiler Kit might be a contender but, no, it's just too big to fit on the 6809 itself. So here we are with the question/goal: can the "acwj" compiler be modified to fit and run on a 6809 platform? ## The 6809 CPU Let's start with a look at the 6809 CPU from a compiler writer's perspective. I've already mentioned the 64K address space limitation: that's going to require the "acwj" compiler to be completely restructured to fit. Now let's look at the 6809's architecture. ![](docs/6809_Internal_Registers.png) Creative Commons CC0 license, [Wikipedia](https://commons.wikimedia.org/wiki/File:6809_Internal_Registers.svg) For an 8-bit CPU, the 6809 has quite a few registers. Well, it's not like the x64 or a RISC CPU with a bunch of general-purpose registers. There is a single 16-bit `D` register on which we can do logical and arithmetic operations. It can also be accessed as two 8-bit registers `A` and `B`, of which `B` is the least-significant byte in the `D` register. When doing logical and arithmetic operations, the second operand is either a memory location accessed via some addressing mode, or a literal value. The result of the operation is put back in the `D` register: hence, it _accumulates_ the operation's result. To access memory, there are a bunch of addressing mode to do so. In fact, there are many more available than a compiler needs! We have the index registers `X` and `Y` to, for example, access an element in an array when we know the base address and `X` holds the element's index. We can also access memory by using a signed constant and the stack pointer `S` as the index; this allows us to treat `S` as a [frame pointer](https://en.wikipedia.org/wiki/Call_stack#FRAME-POINTER). We can find the local variables of a function at addresses below the frame pointer and function arguments at addresses above the frame pointer. Let's have a look at some examples to make the above a bit clearer: ``` ldd #2 # Load D with the constant 2 ldd 2 # Load D from addresses 2 and 3 (16 bits) ldd _x # Load D from the location known as _x ldd 2,s # Load D from an argument on the stack std -20,s # Store D to a local variable on the stack leax 8,s # Get the (effective) address which is S+8 # and store it in the X register ldd 4,x # Now use that as a pointer to an int array # and load the value at index 2 - remember # that D is 16-bits (2 bytes), so 4 bytes # are two 16-bit "words" addd -6,s # Add the int we just fetched to a local # variable and save it in the D register ``` For more details, I'd recommend that you browse through the [6809 datasheet](docs/6809Data.pdf). Pages 5-6 cover the registers, pages 16-18 cover the addressing modes, and pages 25-27 list the available instructions. Back to targetting the "acwj" compiler to the 6809. Well, having a lot of addressing modes is great. We can deal with 8-bit values and 16-bit values, but there are no 32-bit registers. OK, we can sort that out somehow. But the biggest problem, apart from the 64K address space, is that the "acwj" compiler was written for an architecture that has two- or three-operand instructions, and with a lot of available registers, e.g. ``` load R1, _x # Bring _x and _y into registers load R2, _y add R3, R1, R2 # R3= R1 + R2 save R3, _z # Store the result into _z ``` The 6809 usually has the `D` register as one instruction operand, and memory or a literal value as the other operand; the result always ends up in the `D` register. ## Keeping the QBE Backend I also wanted to keep the existing QBE backend in the compiler. I knew that this would be invaluable as I made changes to the compiler - I could run the tests with both the QBE and 6809 backends and compare results. And I could always stress-test the compiler by trying to perform the triple test using the QBE backend. So now the full goal is: can I take the abstract syntax tree (AST) generated by the compiler's parser and use it to generate assembly code for two completely different architectures: QBE (RISC-like, three-operand instructions) and the 6809 (only one register, two-operand instructions with implicit source and destination)? And can I get the compiler to self-compile on both architectures? This is going to be an interesting journey! ## The Code Generator Contract Now that we are going to have two different backends, we need a "contract" or API between the architecture-independent part of the code generator ([gen.c](gen.c)) and each architecture-dependent part. This is now the list of functions defined in [gen.h](gen.h). The basic API is the same as before. We pass in one or more "register numbers" and get back a register number that holds the result. One difference this time is that many of the functions receive the architecture-independent `type` of the operands; this is defined in [defs.h](defs.h): ``` // Primitive types. The bottom 4 bits is an integer // value that represents the level of indirection, // e.g. 0= no pointer, 1= pointer, 2= pointer pointer etc. enum { P_NONE, P_VOID = 16, P_CHAR = 32, P_INT = 48, P_LONG = 64, P_STRUCT=80, P_UNION=96 }; ``` If you look at the QBE code generator in [cgqbe.c](cgqbe.c), it is pretty much the same as in the last chapter in this "acwj" journey. One thing to note is that I've abstracted a few of the functions into a separate file, [targqbe.c](targqbe.c), as the parser and code generator now live in different programs. Now let's look at the 6809 code generator. ## 6809-Specific Types and the D Register The big problem is how to have the idea of multiple registers on the 6809. I'll cover that in the next section, but I need to take a short detour first. Each architecture-dependent code generator gets given the type of operands: P_CHAR, P_INT etc. For the 6809 code generator, we convert these into 6809-specific types, as defined in [cg6809.c](cg6809.c): ``` #define PR_CHAR 1 // size 1 byte #define PR_INT 2 // size 2 bytes #define PR_POINTER 3 // size 2 bytes #define PR_LONG 4 // size 4 bytes ``` In this file, you will see a lot of this sort of code: ``` int primtype= cgprimtype(type); switch(primtype) { case PR_CHAR: // Code to generate char operations case PR_INT: case PR_POINTER: // Code to generate int operations case PR_LONG: // Code to generate long operations } ``` Even though `PR_INT` and `PR_POINTER` are the same size and generate the same code, I've kept the ideas separate. That's because pointers are really unsigned whereas `int`s are signed. Later on, if I get to adding signed and unsigned types to the compiler, I already have a head start here in the 6809 backend. ## How Registers When No Registers? Now, back to the main problem: if the code generator API uses register numbers, how do we write a 6809 backend when this CPU only has a single accumulator, `D`? When I began writing the 6809 backend, I started with a set of 4-byte memory locations called `R0, R1, R2` etc. You can still see them in [lib/6809/crt0.s](lib/6809/crt0.s): ``` R0: .word 0 .word 0 R1: .word 0 .word 0 ... ``` This helped me get the 6809 backend up and running, but the code generated was awful. For example, this C code: ``` int x, y, z; ... z= x + y; ``` gets translated to: ``` ldd _x std R0 ldd _y std R1 ldd R0 addd R1 std R2 ldd R2 std _z ``` Then I realised: the 6809 is very "address"-oriented: there are a bunch of addressing modes, and most instructions have an address (or a literal) as an operand. So, let's keep a list of "_locations_". A location is one of the following, defined in [cg6809.c](cg6809.c): ``` enum { L_FREE, // This location is not used L_SYMBOL, // A global symbol with an optional offset L_LOCAL, // A local variable or parameter L_CONST, // An integer literal value L_LABEL, // A label L_SYMADDR, // The address of a symbol, local or parameter L_TEMP, // A temporarily-stored value: R0, R1, R2 ... L_DREG // The D location, i.e. B, D or Y/D }; ``` and we keep a list of free or in-use locations which have this structure: ``` struct Location { int type; // One of the L_ values char *name; // A symbol's name long intval; // Offset, const value, label-id etc. int primtype; // 6809 primitive type }; ``` Examples: - a global `int x` would be an L_SYMBOL with `name` set to "x" and `primtype` set to PR_INT. - a local `char *ptr` would be an L_LOCAL with no name, but the `intval` would be set to its offset in the stack frame, e.g. -8. `primtype` would be PR_POINTER. If it were a function parameter, the offset would be positive. - if the operand was something like `&x` (the address of `x`), then the location would be an L_SYMADDR with `name` set to "x". - a literal value like 456 would be an L_CONST with the `intval` set to 456 and `primtype` set to PR_INT. - finally, if the operand is already in the `D` register, we would have an L_DREG location with a certain PR_ type. So, locations stand in for registers. We have an array of 16 locations: ``` #define NUMFREELOCNS 16 static struct Location Locn[NUMFREELOCNS]; ``` Let's take a look at the code to generate addition on the 6809. ``` // Add two locations together and return // the number of the location with the result int cgadd(int l1, int l2, int type) { int primtype= cgprimtype(type); load_d(l1); switch(primtype) { case PR_CHAR: fprintf(Outfile, "\taddb "); printlocation(l2, 0, 'b'); break; case PR_INT: case PR_POINTER: fprintf(Outfile, "\taddd "); printlocation(l2, 0, 'd'); break; break; case PR_LONG: fprintf(Outfile, "\taddd "); printlocation(l2, 2, 'd'); fprintf(Outfile, "\texg y,d\n"); fprintf(Outfile, "\tadcb "); printlocation(l2, 1, 'f'); fprintf(Outfile, "\tadca "); printlocation(l2, 0, 'e'); fprintf(Outfile, "\texg y,d\n"); } cgfreelocn(l2); Locn[l1].type= L_DREG; d_holds= l1; return(l1); } ``` We first determine the 6809 type from the generic operand type. Then we load the value from the first location `l1` into the `D` register. Then, based on the 6809 type, we output the right set of instructions and print the second location `l2` after each instruction. Once the addition is done, we free the second location and mark that the first location `l1` is now the `D` register. We also note that `D` is now in-use before returning. Using the idea of locations, the C code `z= x + y` now gets translated to: ``` ldd _x ; i.e. load_x(l1); addd _y ; i.e. fprintf(Outfile, "\taddd "); printlocation(l2, 2, 'd'); std _z ; performed in another function, cgstorglob() ``` ## Dealing with Longs The 6809 has 8-bit and 16-bit operations, but the compiler needs to synthesize operations on 32-bit longs. Also, there is no 32-bit register. > Aside: the 6809 is big-endian. If the `long` value of 0x12345678 > was stored in a `long` variable named `foo`, then 0x12 would be > at `foo` offset 0, 0x34 at `foo` offset 1, 0x56 at `foo` offset > 2 and 0x78 at `foo` offset 3. I've borrowed the idea for longs that Alan Cox uses in the [Fuzix Compiler Kit](https://github.com/EtchedPixels/Fuzix-Compiler-Kit). We use the `Y` register to hold the top-half of a 32-bit long with the `D` register holding the lower half: ![](docs/long_regs.png) The 6809 already calls the lower half of the `D` register the `B` register, used for 8-bit operations. And there is the `A` register which is the top half of the `D` register. Looking at the above `cgadd()` code, you can see that, if `x`, `y` and `z` were `long`s not `int`s, we would generate: ``` ldd _x+2 ; Get lower half of _x into D ldy _x+0 ; Get upper half of _x into Y addd _y+2 ; Add lower half of _y to D exg y,d ; Swap Y and D registers adcb _y+1 ; Add _y offset 1 to the B register with carry adca _y+0 ; Add _y offset 0 to the A register with carry exg y,d ; Swap Y and D registers back again std _z+2 ; Finally store D (the lower half) in _z offset 2 sty _z ; and Y (the upper half) in _z offset 0 ``` It's a bit of a pain: there is a 16-bit `addd` operation with no carry but there is no 16-bit addition operation with carry. Instead, we have to perform two 8-bit additions with carry to get the same result. This inconsistency with the available 6809 operations makes the 6809 code generator code annoyingly ugly in places. # printlocation() A lot of the work in handling locations is performed by the `printlocation()` function. Let's break it down into a few stages. ``` // Print a location out. For memory locations // use the offset. For constants, use the // register letter to determine which part to use. static void printlocation(int l, int offset, char rletter) { int intval; if (Locn[l].type == L_FREE) fatald("Error trying to print location", l); switch(Locn[l].type) { case L_SYMBOL: fprintf(Outfile, "_%s+%d\n", Locn[l].name, offset); break; case L_LOCAL: fprintf(Outfile, "%ld,s\n", Locn[l].intval + offset + sp_adjust); break; case L_LABEL: fprintf(Outfile, "#L%ld\n", Locn[l].intval); break; case L_SYMADDR: fprintf(Outfile, "#_%s\n", Locn[l].name); break; case L_TEMP: fprintf(Outfile, "R%ld+%d\n", Locn[l].intval, offset); break; ... ``` If the location is L_FREE, then there is no point in trying to print it! For symbols, we print out the symbol's name followed by the offset. That way, for `int`s and `long`s, we can get access to all 2 or 4 bytes that make up the symbol: `_x+0`, `_x+1`, `_x+2`, `_x+3`. For locals and function parameters, we print out the position in the stack frame (i.e. `intval` with the offset added on). So if a local `long` variable `fred` is on the stack at position -12, we can get access to all four bytes with `-12,s`, `-11,s`, `-10,s`, `-9,s`. Yes, there is something called `sp_adjust` here. I'll talk about that soon! Now, L_TEMP locations. As with all previous versions of the compiler, sometimes we have to store intermediate results somewhere, e.g. ``` int z= (a + b) * (c - d) / (e + f) * (g + h - i) * (q - 3); ``` We have five intermediate results in parentheses which we need before we can do the multiplies and divides. Well, those original pretend registers R0, R1, R2 ... become useful now! When I need temporary storage for intermediate results, I just allocate these locations and store the intermediate results here. There are functions `cgalloctemp()` and `cgfreealltemps()` in [cg6809.c](cg6809.c) to do this. # printlocation() and Literal Values For most locations, we can simply print out the location's name or position on the stack, plus the offset we need. The code generator has already printed out the instruction to run, so: ``` ldb _x+0 ; Will load one byte from _x into B ldd _x+0 ; Will load two bytes from _x into D ``` But for literal values, e.g. 0x12345678, do we need to print out the 0x78 on the end, or perhaps the 0x5678? Or do we need (in the addition code), access to the 0x34 and also the 0x12? That is why there is the `rletter` parameter to `printlocation()`: ``` static void printlocation(int l, int offset, char rletter); ``` When we are printing out literals, we use this to choose which part and how much of the literal value. I've chosen values that reflect the 6809's register names, but I also made a few up. For literal 0x12345678: - 'b' prints out the 0x78 part - 'a' prints out the 0x56 part - 'd' prints out the 0x5678 part - 'y' prints out the 0x1234 part - 'f' prints out the 0x34 part - 'e' prints out the 0x12 part ## Helper Functions There are several operations which the compiler needs to perform but which the 6809 has no instruction: multiplication, division, shifts by multiple bits etc. To solve this problem, I've borrowed several of the helper functions from the [Fuzix Compiler Kit](https://github.com/EtchedPixels/Fuzix-Compiler-Kit). They are in the archive file `lib/6809/lib6809.a`. The function `cgbinhelper()` in [cg6809.c](cg6809.c): ``` // Run a helper subroutine on two locations // and return the number of the location with the result static int cgbinhelper(int l1, int l2, int type, char *cop, char *iop, char *lop); ``` gets the value from the two locations `l1` and `l2`, pushes them on the stack and then calls one of the three char/int/long helper functions with names in `cop`, `iop` and `lop`. Thus, the function in the code generator to do multiplication is simply: ``` // Multiply two locations together and return // the number of the location with the result int cgmul(int r1, int r2, int type) { return(cgbinhelper(r1, r2, type, "__mul", "__mul", "__mull")); } ``` # Tracking Positions of Locals and Parameters A function's local variables or parameters are kept on the stack, and we access them by using their offset relative to the stack pointer, e.g. ``` ldd -12,s ; Load the local integer variable which is 12 bytes ; below the stack pointer ``` But there's a problem. What if the stack pointer moves? Consider the code: ``` int main() { int x; x= 2; printf("%d %d %d\n", x, x, x); return(0); } ``` `x` might be at offset 0 relative to the stack pointer. But when we call `printf()`, we push a copy of `x` on the stack. Now the real `x` is at position 2 etc. So we actually have to generate the code: ``` ldd 0,s ; Get x's value pshs d ; Push it on the stack ldd 2,s ; Get x's value, note new offset pshs d ; Push it on the stack ldd 4,s ; Get x's value, note new offset pshs d ; Push it on the stack ldd #L2 ; Get the address of the string "%d %d %d\n" pshs d ; Push it on the stack lbsr _printf ; Call printf() leas 8,s ; Pull the 8 bytes of arguments off the stack ``` How do we track what the current offset of locals and parameters are? The answer is the `sp_adjust` variable in [cg6809.c](cg6809.c). Each time we push something on the stack, we add the number of bytes pushed to `sp_adjust`. Similarly, when we pull from the stack or move the stack pointer up, we subtract that amount from `sp_adjust`. Example: ``` // Push a location on the stack static void pushlocn(int l) { load_d(l); switch(Locn[l].primtype) { ... case PR_INT: fprintf(Outfile, "\tpshs d\n"); sp_adjust += 2; break; ... } ... } ``` And in `printlocation()` when we print out locals and parameters: ``` case L_LOCAL: fprintf(Outfile, "%ld,s\n", Locn[l].intval + offset + sp_adjust); ``` There is also a bit of error checking when we get to the end of generating a function's assembly code: ``` // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { ... if (sp_adjust !=0 ) { fprintf(Outfile, "; DANGER sp_adjust is %d not 0\n", sp_adjust); fatald("sp_adjust is not zero", sp_adjust); } } ``` That's about all I want to cover in terms of 6809 assembly code generation. Yes, the code in [cg6809.c](cg6809.c) has to deal with the vagaries of the 6809 instruction set, which is why [cg6809.c](cg6809.c) is so much bigger than [cgqbe.c](cgqbe.c). But I (hope I) have put enough comments in [cg6809.c](cg6809.c) so that you can follow along and understand what it is doing. There are a few tricky things like tracking when the `D` register is in-use or free, and I'm sure I still haven't quite got the synthesis of all the `long` operations right. Now we need to cover a much bigger topic, that of the 6809's 64K address limitation. ## Fitting a Compiler into 65,536 Bytes The original "acwj" compiler was a single executable. It read from the C pre-processor's output, did the scanning, parsing and code generation, outputting assembly code. It kept the symbol table and the AST tree for each function in memory, and never bothered to free data structures once they were used. None of this is going to help fit the compiler into 64K of memory! So, my approach for 6809 self-compilation was to: 1. Break the compiler up into a number of phases. Each phase does one part of the overall compilation task, and the phases use intermediate files to communicate. 2. Keep as little of the symbol table and AST trees in memory as we can get away with. Instead, these are kept in files and we have functions to read/write them as required. 3. Try to garbage collect unused data structures with `free()` wherever we can. Let's look at all three in turn. ## The Seven Compiler Phases The compiler is now arranged to have seven phases, each one with its own executable: 1. An external C pre-processor interprets #include, #ifdef and the pre-processor macros. 2. The lexer reads the pre-processor output and produces a token stream. 3. The parser reads the token stream and creates a symbol table plus a set of AST trees. 4. The code generator uses the AST trees and the symbol table and generates assembly code. 5. An external peephole optimiser improves the assembly code. 6. An external assembler produces object files. 7. An external linker takes `crt0.o`, the object files and several libraries and produces a final executable. We now have a frontend program [wcc.c](wcc.c) that co-ordinates all the phases. The lexer is the program called `cscan`. The parser is `cparse6809` or `cparseqbe`. The code generator is `cgen6809` or `cgenqbe`, and the peephole optimiser is `cpeep`. All of these (via `make install`) get installed in `/opt/wcc/bin`. It's understandable that there are two code generators, but why are there two parsers? The answer is that `sizeof(int)`, `sizeof(long)` etc. are different on each architecture, so the parser needs to have this information as well as the code generator. Hence the files [targ6809.c](targ6809.c) and [targqbe.c](targqbe.c) which get compiled into the parsers and the code generators. > Aside: the 6809 has a peephole optimiser. The QBE backend uses > the `qbe` program to convert QBE code to x64 code. I guess that's > also a form of optimisation :-) ## Intermediate Files Between all of these seven phases, we need intermediate files to hold the phases' outputs. Normally they get deleted at the end of compilation, but you can keep them if you use the `-X` command-line flag with `wcc`. The C pre-processor's output is stored in a temporary file ending with `_cpp`, e.g. `foo.c_cpp` if we are compiling `fred.c`. The tokeniser's output is stored in a temporary file ending with `_tok`. We have a program called [detok.c](detok.c) which you can use to dump a token file into readable format. The parser produces a symbol table file ending with `_sym` and a set of AST trees that get stored in a file ending with `_ast`. We have programs [desym.c](desym.c) and [detree.c](detree.c) to dump the symbol table and AST tree files. Regardless of the CPU, the code generator always outputs unoptimised assembly code in a file ending with `_qbe`. This gets read by either `qbe` or `cpeep` to produce the optimised assembly code in a temporary file that ends in `_s`. The assembler then assembles this file to produce object files ending in `.o`, which are then linked by the linker to produce the final executable file. Like other compilers, `wcc` has the `-S` flag to output assembly to a file ending with `.s` (and then stop), and the `-c` flag to output object files and then stop. ## Format of the Symbol Table and AST files I took a simple approach for these files which I'm sure could be improved. I simply write each `struct symtable` and `struct ASTnode` nodes (see [defs.h](defs.h)) directly to the files using `fwrite()`. Many of these have an associated string: symbol names, for example, and AST nodes that hold string literals. For these I just `fwrite()` out the string including the NUL byte at the end. Reading the nodes back in is simple: I just `fread()` the size of each struct. But then I have to read back in the NUL-terminated string if there is one. There isn't a good C library function to do this, so in [misc.c](misc.c) there is a function called `fgetstr()` to do this. One problem with dumping in-memory structures out to disk is that the pointers in the structures lose their meaning: when the structure is reloaded, it's going to end up in another part of memory. Any pointer value becomes invalid. To solve this, both the symbol table structure and the ASTnode structure now have numeric ids, both for the node itself and the nodes it points to. ``` // Symbol table structure struct symtable { char *name; // Name of a symbol int id; // Numeric id of the symbol ... struct symtable *ctype; // If struct/union, ptr to that type int ctypeid; // Numeric id of that type }; // Abstract Syntax Tree structure struct ASTnode { ... struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; int nodeid; // Node id when tree is serialised int leftid; // Numeric ids when serialised int midid; int rightid; ... }; ``` The reading-in code for both is tricky as we have to find and reattach nodes. The bigger question is: how much of each file do we bring in and keep in memory? ## Structures In-Memory vs. On-Disk The tension here is that, if we keep too many symbol table and AST nodes in memory, we will run out of memory. But if we put them out into files then we might have to do a lot of file read/write operations when we need access to the nodes. As with most problems of this type, we just choose one heuristic that does a good enough job. One extra constraint here is that we might choose a heuristic which does a great job, but it requires a lot of code which itself puts pressure on available memory. So, here is what I've chosen. It can be replaced, but it's what I've got for now. ## Writing Symbol Table Nodes The parse phase finds symbols and determines their type etc. So it is responsible for writing the symbols to a file. One big change in the compiler is that there is now only a single symbol table, not a set of tables. Each symbol in the unified table now has a structural type and a visibility (in [defs.h](defs.h)): ``` // A symbol in the symbol table is // one of these structural types. enum { S_VARIABLE, S_FUNCTION, S_ARRAY, S_ENUMVAL, S_STRLIT, S_STRUCT, S_UNION, S_ENUMTYPE, S_TYPEDEF, S_NOTATYPE }; // Visibilty class for symbols enum { V_GLOBAL, // Globally visible symbol V_EXTERN, // External globally visible symbol V_STATIC, // Static symbol, visible in one file V_LOCAL, // Locally visible symbol V_PARAM, // Locally visible function parameter V_MEMBER // Member of a struct or union }; ``` OK, so I lied a little bit :-) There are actually three symbol tables: one for generic symbols, one for types (structs, unions, enums, typedefs) and a temporary one which is used to build the member list for structs, unions and functions. In [sym.c](sym.c), the `serialiseSym()` function writes a symbol table node and any associated string out to the file. One optimisation is that, as nodes are given montonically increasing ids, we can record the highest symbol id we have already written out, and not (re)write symbols at or below this id. The function `flushSymtable()` in the same file walks the type list and the generic symbol list and calls `serialiseSym()` to write each node out. In the same file, `freeSym()` frees the memory that a symbol entry occupies. This is the node itself, any associated name and also any initialisation list (i.e. for global symbols, e.g. `int x= 27;`). Symbols like structs, unions and functions also have a list of member symbols - the fields in structs and unions, and the locals and parameters of a function. These also get freed. The function `freeSymtable()` in [sym.c](sym.c) walks these lists and calls `freeSym()` to free each node. Now, the question is: when is it safe to flush and free the symbol table in the parser? The answer is: we can flush the symbol table out after each function. But we can't free the symbol table, as the parser needs to look up pre-defined types and pre-defined symbols, e.g. ``` z= x + y; ``` What types do these have, and are they compatible? Are they locals, arguments or globals? Have they even been declared? We need the full symbol table for this. So in [decl.c](decl.c) at the end of `function_declaration()`: ``` ... flushSymtable(); Functionid= NULL; return (oldfuncsym); } ``` ## Reading Symbol Table Nodes The 6809 code generator, code-wise, is pretty big. It takes about 30K of RAM, so we have to work hard to not waste the remaining RAM. In the code generator, we only load symbols if we need them. And, each symbol might require knowledge of one or more symbols, e.g. a variable might be of type `struct foo`, so now we need to load the `struct foo` symbol and all of the symbols which are the fields of that structure. An issue is that the symbols are written out in order of when they are parsed, but we need to find symbols by their name or by their id. Example: ``` struct foo x; ``` We have to search for the `x` symbol by name. That node has the `ctypeid` for the `foo` symbol, so we need to search for that symbol by id. The majority of the work here is done by `loadSym()` in [sym.c](sym.c): ``` // Given a pointer to a symtable node, read in the next entry // in the on-disk symbol table. Do this always if loadit is true. // Only read one node if recurse is zero. // If loadit is false, load the data and return true if the symbol // a) matches the given name and stype or b) matches the id. // Return -1 when there is nothing left to read. static int loadSym(struct symtable *sym, char *name, int stype, int id, int loadit, int recurse) { ... } ``` I won't go through the code, but there a few things to note. We can search by `stype` and `name`, e.g. an S_FUNCTION called `printf()`. We can search by numeric id. Sometimes we want to recursively fetch nodes: this happens because a symbol with members (e.g. a struct) gets written out immediately followed by the members. Finally, we can just read in the next symbol always if `loadit` is set, e.g. when reading in members. The `findSyminfile()` function simply goes back to the start of the symbol file each time, and loops calling `loadSym()` until either the symbol required is found or we reach the end of the file. Not very efficient, is it? The old compiler code had functions ``` struct symtable *findlocl(char *name, int id); struct symtable *findSymbol(char *name, int stype, int id); struct symtable *findmember(char *s); struct symtable *findstruct(char *s); struct symtable *findunion(char *s); struct symtable *findenumtype(char *s); struct symtable *findenumval(char *s); struct symtable *findtypedef(char *s); ``` They are still here, but different. We first search in memory for the required symbol, then call `findSyminfile()` if the symbol isn't in memory. When a symbol is loaded from the file, it gets linked into the in-memory symbol table. Thus, we build up a cache of symbols as the code generator needs them. To ease memory, we should flush and free the symbol table periodically in the code generator. In [cgen.c](cgen.c) which has the main loop for the code generator: ``` while (1) { // Read the next function's top node in from file node= loadASTnode(0, 1); if (node==NULL) break; // Generate the assembly code for the tree genAST(node, NOLABEL, NOLABEL, NOLABEL, 0); // Free the symbols in the in-memory symbol tables. freeSymtable(); } ``` One minor issue that bit me when rewriting the compiler was that there are global symbols which are initialised and need to have assembly instructions generated for them. So, just above the above loop there is a call to a function called `allocateGlobals()`. This, in turn, calls a function in [sym.c](sym.c) called `loadGlobals()` which reads in any global symbols. Now we can call the appropriate code generator function as we walk the list of global symbols. At the end of `allocateGlobals()` we can `freeSymtable()`. And I've got one last comment. All of this works because there are not that many symbols in any C program, also taking into account the header files that get included. But if this were a real, production, compiler on a real Unix-like system, argh!! A typical program will pull in a dozen or so header files, each with dozens of typedefs, structs, enum values etc. We would run out of memory in no time. So this all works but it's not scalable. ## Writing AST Nodes Now on to the AST nodes. The first point I need to make is that there simply isn't enough memory to build the AST tree for a function, then write it out (or read it in). The bigger functions that we need to deal with have 3,000 or more AST nodes. They simply won't fit into 64K of RAM by themselves. We can only keep a limited number of AST nodes in memory, but how? After all it's a tree. For any node, when do we need the sub-trees below it and when can we prune the tree? In the top-level parser file [parse.c](parse.c) there is function called `serialiseAST()` which writes the given node and its children out to disk. This function gets called in a few places. In `compound_statement()` in [stmt.c](stmt.c): ``` while (1) { ... // Parse a single statement tree = single_statement(); ... left = mkastnode(A_GLUE, P_NONE, NULL, left, NULL, tree, NULL, 0); // To conserve memory, we try to optimise the single statement tree. // Then we serialise the tree and free it. We set the right pointer // in left NULL; this will stop the serialiser from descending into // the tree that we already serialised. tree = optimise(tree); serialiseAST(tree); freetree(tree, 0); ... } ``` So, each time there is a single statement, we parse this statement, build up the AST tree for it and then dump it to disk. And at the end of `function_declaration()` in [decl.c](decl.c): ``` // Serialise the tree serialiseAST(tree); freetree(tree, 0); // Flush out the in-memory symbol table. // We are no longer in a function. flushSymtable(); Functionid= NULL; return (oldfuncsym); ``` This writes out the S_FUNCTION node which identifies the top AST node of the function. The code snippets above reference `freetree()`. Here it is in [tree.c](tree.c): ``` // Free the contents of a tree. Possibly // because of tree optimisation, sometimes // left and right are the same sub-nodes. // Free the names if asked to do so. void freetree(struct ASTnode *tree, int freenames) { if (tree==NULL) return; if (tree->left!=NULL) freetree(tree->left, freenames); if (tree->mid!=NULL) freetree(tree->mid, freenames); if (tree->right!=NULL && tree->right!=tree->left) freetree(tree->right, freenames); if (freenames && tree->name != NULL) free(tree->name); free(tree); } ``` ## Reading AST Nodes I fought for quite a while to find a good approach for reading AST nodes back in to the code generator. We have to do two things: 1. Find each function's top node and read it in. 2. Once we have an AST node, read in its children using their ids. My first approach was, like the symbol table, rewind to the start of the file each time I did a search. OK, so that made the compilation of an 1,000 line file take about 45 minutes. No, that's not good. I did think of trying to cache the numeric ids, type (S_FUNCTION or not) and file offset in memory. That's not going to work either. For each AST node that would be: - 2 bytes for the id - 1 byte for the S_FUNCTION boolean - 4 bytes for the file offset An AST file with, say, 3,000 nodes now needs a 21,000 byte cache in memory. Ridiculous! Instead, I build a list of node file offsets in a separate temporary file. This is done by the `mkASTidxfile()` function in [tree.c](tree.c). The file is simply a sequence of offset values, each 4 bytes long. Position 0 holds the offset for id 0, position 4 the offset for id 1 etc. As we will need to find each function's top node in turn, and there are usually not many functions in a file, I chose to record all the S_FUNCTION nodes' offsets in an in-memory list: In [tree.c](tree.c), we have: ``` // We keep an array of AST node offsets that // represent the functions in the AST file long *Funcoffset; ``` This gets `malloc()`d and `realloc()`d and grows to contain all the function offsets. The last value is 0 because the id value 0 never gets allocated in the parser. Now, how do we use all of this information? In the same file there is a function called `loadASTnode()`: ``` // Given an AST node id, load that AST node from the AST file. // If nextfunc is set, find the next AST node which is a function. // Allocate and return the node or NULL if it can't be found. struct ASTnode *loadASTnode(int id, int nextfunc) { ... } ``` We can load a node given its id, or we can just load the next S_FUNCTION node. We use the temporary file with the offsets to quickly find where the node we want is positioned in the main AST file. Nice and simple! ## Using loadASTnode() and Freeing AST nodes Unfortunately, there is not a single place where we can call `loadASTnode()`. Anywhere in the architecture-independent generation code in [gen.c](gen.c), where we previously used the pointers `n->left`, `n->mid` or `n->right`, we now have to call `loadASTnode()`, e.g. ``` // Given an AST, an optional label, and the AST op // of the parent, generate assembly code recursively. // Return the register id with the tree's final value. int genAST(struct ASTnode *n, int iflabel, int looptoplabel, int loopendlabel, int parentASTop) { struct ASTnode *nleft, *nmid, *nright; // Load in the sub-nodes nleft=loadASTnode(n->leftid,0); nmid=loadASTnode(n->midid,0); nright=loadASTnode(n->rightid,0); ... } ``` You will find about fifteen calls to `loadASTnode()` in [gen.c](gen.c). Back in the parser, we could parse a single statement and then call `freetree()` once we have written it out to disk. Here in the code generator, I decided to be more specific. Once I'd definitely finished using an AST node, I call the function `freeASTnode()` defined in [tree.c](tree.c) to free its memory. You will find about twelve calls to this function in the code generator. That's about it for the changes to the symbol table and AST node handling. ## General Memory Freeing Back up when I started talking about trying to fit the compiler into 64K, my third point was: try to garbage collect unused data structures with `free()` wherever we can. Well, C is probably the worst language to try and do garbage collection! For a while I tried to sprinkle `free()`s where I thought they would work but then the compiler would either segfault or, worse, use a node that had been overwritten and go into crazy behaviour mode. Fortunately, I've been able to get down to four main functions that garbage collect: `freeSym()`, `freeSymtable()`, `freeASTnode()` and `freetree()`. That hasn't solved all the garbage collection issues. I've recently resorted to using [Valgrind](https://valgrind.org/) to show me where I have memory leaks. I try to find the worst cases and then work out where I can insert a `free()` that helps. This has got the compiler to the point where it _can_ self-compile on the 6809, but there is definitely room for improvement! ## The Peephole Optimiser The peephole optimiser, [cpeep.c](cpeep.c) was originally written by Christian W. Fraser in 1984. Looking at the [documentation](docs/copt.1), it has been worked on by several people since then. I imported it from the [Fuzix Compiler Kit](https://github.com/EtchedPixels/Fuzix-Compiler-Kit) and changed its name. I also changed the rule termination to be `====` instead of a blank line; I find it easier to see where rules end. The 6809 backend can spit out some bad code. The optimiser helps to get rid of some of it. Have a look at the [rules.6809](lib/6809/rules.6809) file to see what the rules are; I think I've documented them well enough. There is a [test file](tests/input.rules.6809) which I use to check that the rules work OK. ## Building and Running the Compiler - QBE To build the compiler on a Linux box so that it outputs x68 code, you first need to download [QBE 1.2](https://c9x.me/compile/releases.html), build it and install the `qbe` binary somewhere on your `$PATH`. Next, you need to make the `/opt/wcc` directory and make it writable by yourself. Now you can `make; make install`, which will build the compiler and put the executables into `/opt/wcc/bin`, the header files into ``/opt/wcc/include` and the 6809 libraries into ``/opt/wcc/lib/6809`. Now make sure that `/opt/wcc/bin/wcc` (the compiler front-end) is on your `$PATH`. I usually put a symlink to it into my private `bin` folder. From here, you can `make test` which goes into the `tests/` directory and runs all the tests that are in there. ## Building and Running the Compiler - 6809 This is a bit complicated. Firstly, you need to download the [Fuzix Bintools](https://github.com/EtchedPixels/Fuzix-Bintools), and build at least the assembler `as6809` and the linker `ld6809`. Now install these somewhere on your `$PATH`. Next, download my [Fuzemsys](https://github.com/DoctorWkt/Fuzemsys) project. This has a 6809 emulator which we need to run the 6809 binaries. Go into the `emulators/` directory and `make emu6809`. Once this is built, install the emulator somewhere on your `$PATH`. If you haven't already, make the `/opt/wcc` directory as before, come back to this project and `make; make install` to install it. Make sure that `/opt/wcc/bin/wcc` (the compiler front-end) is on your `$PATH`. From here, you can `make 6test` which goes into the `tests/` directory and runs all the tests that are in there. This time, we build 6809 binaries and use the 6809 emulator to run them. ## Doing The QBE Triple Test With `qbe` installed and you have done a `make install; make test` to check that the compiler works, you can now do a `make triple`. This: - builds the compiler with your native compiler, - builds the compiler with itself into the `L1` directory, - builds the compiler with itself again into the `L2` directory, and - checksums the `L1` and `L2` executables to ensure they are identical: ``` 0f14b990d9a48352c4d883cd550720b3 L1/detok 0f14b990d9a48352c4d883cd550720b3 L2/detok 3cc59102c6a5dcc1661b3ab3dcce5191 L1/cgenqbe 3cc59102c6a5dcc1661b3ab3dcce5191 L2/cgenqbe 3e036c748bdb5e3ffc0e03506ed00243 L2/wcc <-- different 6fa26e506a597c9d9cfde7d168ae4640 L1/detree 6fa26e506a597c9d9cfde7d168ae4640 L2/detree 7f8e55a544400ab799f2357ee9cc4b44 L1/cscan 7f8e55a544400ab799f2357ee9cc4b44 L2/cscan 912ebc765c27a064226e9743eea3dd30 L1/wcc <-- different 9c6a66e8b8bbc2d436266c5a3ca622c7 L1/cparseqbe 9c6a66e8b8bbc2d436266c5a3ca622c7 L2/cparseqbe cb493abe1feed812fb4bb5c958a8cf83 L1/desym cb493abe1feed812fb4bb5c958a8cf83 L2/desym ``` The `wcc` binaries are different as one has `L1` in the path to find the executables for the phases, and the other has `L2` instead. ## Doing The 6809 Triple Test Instead of using the `Makefile` to do this, I have a separate Bash shell script called `6809triple_test`. Run this to: - build the compiler with your native compiler, - build the 6809 compiler with itself into the `L1` directory, and - build the 6809 compiler with itself again into the `L2` directory. This is slow! On my decent laptop it takes about 45 minutes. Eventually you can do your own checksums to verify that the executables are identical: ``` $ md5sum L1/_* L2/_* | sort 01c5120e56cb299bf0063a07e38ec2b9 L1/_cgen6809 01c5120e56cb299bf0063a07e38ec2b9 L2/_cgen6809 0caee9118cb7745eaf40970677897dbf L1/_detree 0caee9118cb7745eaf40970677897dbf L2/_detree 2d333482ad8b4a886b5b78a4a49f3bb5 L1/_detok 2d333482ad8b4a886b5b78a4a49f3bb5 L2/_detok d507bd89c0fc1439efe2dffc5d8edfe3 L1/_desym d507bd89c0fc1439efe2dffc5d8edfe3 L2/_desym e78da1f3003d87ca852f682adc4214e8 L1/_cscan e78da1f3003d87ca852f682adc4214e8 L2/_cscan e9c8b2c12ea5bd4f62091fafaae45971 L1/_cparse6809 e9c8b2c12ea5bd4f62091fafaae45971 L2/_cparse6809 ``` At the moment I'm having problems with running `wcc` as a 6809 executable, so I use the x64 `wcc` binary instead. ## Example Command-line Actions Here is a capture of the commands I used to do all the above: ``` # Download the acwj repository cd /usr/local/src git clone https://github.com/DoctorWkt/acwj # Make the destination directory sudo mkdir /opt/wcc sudo chown wkt:wkt /opt/wcc # Install QBE cd /usr/local/src wget https://c9x.me/compile/release/qbe-1.2.tar.xz xz -d qbe-1.2.tar.xz tar vxf qbe-1.2.tar cd qbe-1.2/ make sudo make install # Install the wcc compiler cd /usr/local/src/acwj/64_6809_Target make install # Put wcc on my $PATH cd ~/.bin ln -s /opt/wcc/bin/wcc . # Do the triple test on x64 using QBE cd /usr/local/src/acwj/64_6809_Target make triple # Get the Fuzix-Bintools and build # the 6809 assembler and linker cd /usr/local/src git clone https://github.com/EtchedPixels/Fuzix-Bintools cd Fuzix-Bintools/ make as6809 ld6809 cp as6809 ld6809 ~/.bin # Get Fuzemsys and build the 6809 emulator. # I needed to install the readline library. sudo apt-get install libreadline-dev cd /usr/local/src git clone https://github.com/DoctorWkt/Fuzemsys cd Fuzemsys/emulators/ make emu6809 cp emu6809 ~/.bin # Go back to the compiler and do the # triple test using the 6809 emulator cd /usr/local/src/acwj/64_6809_Target ./6809triple_test ``` ## Is This Self-Compiling? We can pass the triple test with the 6809 CPU. But, is this really self-compiling? Well, it is, but it is definitely _not_ self-hosting. The things that this C compiler _doesn't_ build include: - a C pre-processor - the peephole optimiser - the 6809 assembler - the 6809 linker - an `ar` archiver for the 6809 - the compiler helper functions, and the C library. At the moment, I'm using the Fuzix Compiler Kit to build these functions. The Fuzix Compiler speaks "real" C; this compiler only speaks a subset of the C language, so it can't build these functions. So, if I wanted to move all of this over to my [MMU09 SBC](https://github.com/DoctorWkt/MMU09), then I would need to use the Fuzix Compiler to build the assembler, linker, helper functions and the C library. Thus, the "acwj" compiler can definitely take pre-processed C source code and, using a scanner, a parser and a code generator, output 6809 assembly code. And the "acwj" compiler can do the above on its own code. That makes our compiler a self-compiling compiler, but not a self-hosting compiler! ## Future Work Right now, this isn't a production compiler. It's not even a proper C compiler - it only knows a subset of the C language. Some things to do would be: - make it more robust - get on top of the garbage collection - add unsigned types - add floats and doubles - add more of the real C language to become self-hosting - improve the quality of the 6809 code generator - improve the speed of the 6809 compiler - perhaps, take a big step back, use the lessons learned through this whole journey and rewrite a new compiler from scratch! ## Conclusion I'm pretty burned out after this part - it's taken a few months of work as evidenced by my [notes](docs/NOTES.md). And we are now up to part 64 of the "acwj" journey; that's a good power of two :-) So I won't say definitely not, but I think this is where I'll end the "acwj" journey. If you have followed along through some/most/all of the parts, then thank you for spending the time reading my notes. I hope it's been useful. And, now, if you need a sort-of C compiler for an 8-bit or 16-bit CPU with a limited set of registers, this might be a starting point for you! Cheers, Warren ================================================ FILE: 64_6809_Target/cg.h ================================================ /* cg.c */ void cgtextseg(); void cgdataseg(); void cglitseg(); void cgfreeallregs(int keepreg); int cgallocreg(int type); void cgfreereg(int reg); void cgspillregs(); void cgpreamble(); void cgpostamble(); void cgfuncpreamble(struct symtable *sym); void cgfuncpostamble(struct symtable *sym); int cgloadint(int value, int type); int cgloadvar(struct symtable *sym, int op); int cgloadglobstr(int label); int cgadd(int r1, int r2, int type); int cgsub(int r1, int r2, int type); int cgmul(int r1, int r2, int type); int cgdiv(int r1, int r2, int type); int cgmod(int r1, int r2, int type); int cgand(int r1, int r2, int type); int cgor(int r1, int r2, int type); int cgxor(int r1, int r2, int type); int cginvert(int r, int type); int cgshl(int r1, int r2, int type); int cgshr(int r1, int r2, int type); int cgnegate(int r, int type); int cglognot(int r, int type); int cgloadboolean(int r, int val, int type); int cgboolean(int r, int op, int label, int type); int cgcall(struct symtable *sym, int numargs, int *arglist, int *typelist); int cgshlconst(int r, int val, int type); int cgstorglob(int r, struct symtable *sym); int cgstorlocal(int r, struct symtable *sym); void cgglobsym(struct symtable *node); void cgglobstr(int l, char *strvalue); int cgcompare_and_set(int ASTop, int r1, int r2, int type); void cglabel(int l); void cgjump(int l); int cgcompare_and_jump(int ASTop, int parentASTop, int r1, int r2, int label, int type); int cgcast(int t, int oldtype, int newtype); int cgwiden(int r, int oldtype, int newtype); void cgreturn(int r, struct symtable *sym); int cgaddress(struct symtable *sym); int cgderef(int r, int type); int cgstorderef(int r1, int r2, int type); void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel); void cgmove(int r1, int r2, int type); void cglinenum(int line); ================================================ FILE: 64_6809_Target/cg6809.c ================================================ #include "defs.h" #include "data.h" #include "gen.h" #include "misc.h" #include "types.h" #include "target.h" // Code generator for 6809 // Copyright (c) 2024 Warren Toomey, GPL3 // Instead of registers, we keep a list of locations. // They can be one of the following: enum { L_FREE, // This location is not used L_SYMBOL, // A global symbol with an optional offset L_LOCAL, // A local variable or parameter L_CONST, // An integer literal value L_LABEL, // A label L_SYMADDR, // The address of a symbol, local or parameter L_TEMP, // A temporarily-stored value L_DREG // The D location, i.e. B, D or Y/D }; struct Location { int type; // One of the L_ values char *name; // A symbol's name long intval; // Offset, const value, label-id etc. int primtype; // 6809 primiive type, see PR_POINTER below }; // We also track if D holds a copy of a location. // It could be NOREG if it is available. static int d_holds; #define NUMFREELOCNS 16 static struct Location Locn[NUMFREELOCNS]; // We also need a set of temporary locations in memory. // They are defined in crt0.s as R0, R1 etc. // They can be allocated incrementally. static int next_free_temp; // Allocate a currently free temporary location static int cgalloctemp() { return(next_free_temp++); } // Free all temporary locations static void cgfreealltemps() { next_free_temp=0; } // Parameters and local variables live // on the stack. We need to adjust their offset // each time we push/pop off the stack. sp_adjust // holds the number of extra bytes on the stack. static int sp_adjust; // We convert C types to types on the 6809: // PR_CHAR, PR_INT, PR_LONG, PR_POINTER. #define PR_CHAR 1 #define PR_INT 2 #define PR_POINTER 3 #define PR_LONG 4 // Given a C type, return the matching 6809 type static int cgprimtype(int type) { if (ptrtype(type)) return(PR_POINTER); if (type == P_CHAR) return(PR_CHAR); if (type == P_INT) return(PR_INT); if (type == P_LONG) return(PR_LONG); fatald("Bad type in cgprimtype:", type); return(0); // Keep -Wall happy } // Print a location out. For memory locations // use the offset. For constants, use the // register letter to determine which part to use. static void printlocation(int l, int offset, char rletter) { int intval; if (Locn[l].type == L_FREE) fatald("Error trying to print location", l); switch(Locn[l].type) { case L_SYMBOL: fprintf(Outfile, "_%s+%d\n", Locn[l].name, offset); break; case L_LOCAL: fprintf(Outfile, "%ld,s\n", Locn[l].intval + offset + sp_adjust); break; case L_LABEL: fprintf(Outfile, "#L%ld\n", Locn[l].intval); break; case L_SYMADDR: fprintf(Outfile, "#_%s\n", Locn[l].name); break; case L_TEMP: fprintf(Outfile, "R%ld+%d\n", Locn[l].intval, offset); break; case L_CONST: // We convert Locn[l].intval (a long) to intval (an int). If // we did, for example, Locn[l].intval & 0xffff, on the 6809 // the 0xffff gets widened to 32 bits. But this is a negative // value, so it gets widened to 0xffffffff not 0x0000ffff. switch(rletter) { case 'b': fprintf(Outfile, "#%ld\n", Locn[l].intval & 0xff); break; case 'a': fprintf(Outfile, "#%ld\n", (Locn[l].intval >> 8) & 0xff); break; case 'd': intval= (int)Locn[l].intval; fprintf(Outfile, "#%d\n", intval & 0xffff); break; case 'y': intval= (int)(Locn[l].intval >> 16); fprintf(Outfile, "#%d\n", intval & 0xffff); break; // These are the top two bytes of a 32-bit value case 'f': fprintf(Outfile, "#%ld\n", (Locn[l].intval >> 16) & 0xff); break; case 'e': fprintf(Outfile, "#%ld\n", (Locn[l].intval >> 24) & 0xff); break; } break; default: fatald("Unknown type for location", l); } } // Save D (B, D, Y/D) to a location. static void save_d(int l) { // If we are saving to ourself then // there is nothing to do :-) if (Locn[l].type == L_DREG) return; switch (Locn[l].primtype) { case PR_CHAR: fprintf(Outfile, "\tstb "); printlocation(l, 0, 'b'); break; case PR_INT: case PR_POINTER: fprintf(Outfile, "\tstd "); printlocation(l, 0, 'd'); break; case PR_LONG: fprintf(Outfile, "\tstd "); printlocation(l, 2, 'd'); fprintf(Outfile, "\tsty "); printlocation(l, 0, 'y'); } d_holds= l; } // Stash D in a temporary if required static void stash_d() { // If D holds a value, we will need to store it // in a temporary location if (d_holds != NOREG && Locn[d_holds].type == L_DREG) { Locn[d_holds].type= L_TEMP; Locn[d_holds].intval= cgalloctemp(); save_d(d_holds); } } // Load D (B, D, Y/D) with a location. static void load_d(int l) { // If l is already L_DREG, do nothing. if (Locn[l].type== L_DREG) return; // If D holds a value, we will need to store it // in a temporary location stash_d(); // Load the existing location into D and mark it as L_DREG. switch(Locn[l].primtype) { case PR_CHAR: fprintf(Outfile, "\tldb "); printlocation(l, 0, 'b'); break; case PR_INT: case PR_POINTER: fprintf(Outfile, "\tldd "); printlocation(l, 0, 'd'); break; case PR_LONG: fprintf(Outfile, "\tldd "); printlocation(l, 2, 'd'); fprintf(Outfile, "\tldy "); printlocation(l, 0, 'y'); } Locn[l].type= L_DREG; d_holds= l; } // Set all locations as available. // If keepl is positive, don't free that one. static void cgfreeall_locns(int keepl) { int l; for (l = 0; l < NUMFREELOCNS; l++) if (l != keepl) { Locn[l].type= L_FREE; } if (keepl == NOREG) cgfreealltemps(); fprintf(Outfile, ";\n"); d_holds= NOREG; } // Allocate a free location. Return the number of // the location. Die if no available locations. static int cgalloclocn(int type, int primtype, char *name, long intval) { int l; for (l = 0; l < NUMFREELOCNS; l++) { if (Locn[l].type== L_FREE) { // If we're asked for a temporary, get one if (type==L_TEMP) intval= cgalloctemp(); if (type==L_DREG) d_holds= l; Locn[l].type= type; Locn[l].primtype= primtype; Locn[l].name= name; Locn[l].intval= intval; return(l); } } fatal("Out of locations in cgalloclocn"); return(0); // Keep -Wall happy } // Free a location. Check to see if it's not already there. static void cgfreelocn(int l) { if (Locn[l].type== L_FREE) fatald("Error trying to free location", l); Locn[l].type= L_FREE; if (d_holds ==l) d_holds= NOREG; } // gen.c calls us as if we have registers void cgfreeallregs(int keepl) { cgfreeall_locns(keepl); } int cgallocreg(int type) { return(cgalloclocn(L_TEMP, cgprimtype(type), NULL, 0)); } void cgfreereg(int reg) { cgfreelocn(reg); } // Push a location on the stack static void pushlocn(int l) { load_d(l); switch(Locn[l].primtype) { case PR_CHAR: fprintf(Outfile, "\tpshs b\n"); sp_adjust += 1; break; case PR_INT: case PR_POINTER: fprintf(Outfile, "\tpshs d\n"); sp_adjust += 2; break; case PR_LONG: fprintf(Outfile, "\tpshs d\n"); fprintf(Outfile, "\tpshs y\n"); sp_adjust += 4; } cgfreelocn(l); d_holds= NOREG; } // Flag to say which section were are outputting in to enum { no_seg, text_seg, data_seg, lit_seg } currSeg = no_seg; // Switch to the text segment void cgtextseg() { if (currSeg != text_seg) { fputs("\t.code\n", Outfile); currSeg = text_seg; } } // Switch to the data segment void cgdataseg() { if (currSeg != data_seg) { fputs("\t.data\n", Outfile); currSeg = data_seg; } } // Switch to the literal segment void cglitseg() { if (currSeg != lit_seg) { fputs("\t.literal\n", Outfile); currSeg = lit_seg; } } // Position of next local variable relative to stack base pointer. // We store the offset as positive to make aligning the stack pointer easier static int localOffset; // Create the position of a new local variable. static int newlocaloffset(int size) { int o; // Return the current localOffset and // then increment the localOffset o= localOffset; localOffset += size; return (o); } // Print out the assembly preamble // for one output file void cgpreamble() { cgfreeall_locns(NOREG); cgfreealltemps(); cgtextseg(); } // Nothing to do for the end of a file void cgpostamble() { } // Generate a label void cglabel(int l) { fprintf(Outfile, "L%d:\n", l); } // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int paramOffset = 2; // Any pushed params start at this frame offset // Output in the text segment, reset local offset // and the amount of args on the stack cgtextseg(); localOffset = 0; next_free_temp = 0; sp_adjust = 0; // Output the function start if (sym->class == V_GLOBAL) { fprintf(Outfile, "\t.export _%s\n", name); } fprintf(Outfile, "_%s:\n", name); // Make frame positions for the locals. // Skip over the parameters in the member list first for (locvar = sym->member; locvar != NULL; locvar = locvar->next) if (locvar->class==V_LOCAL) break; for (; locvar != NULL; locvar = locvar->next) { locvar->st_posn = newlocaloffset(locvar->size); // fprintf(Oufile, "; placed local %s size %d at offset %d\n", // locvar->name, locvar->size, locvar->st_posn); } // Work out the frame offset for the parameters. // Do this once we know the total size of the locals. // Stop once we hit the locals for (parm = sym->member; parm != NULL; parm = parm->next) { if (parm->class==V_LOCAL) break; parm->st_posn = paramOffset + localOffset; paramOffset += parm->size; // fprintf(Outfile, "; placed param %s size %d at offset %d\n", // parm->name, parm->size, parm->st_posn); } // Bring the stack down to below the locals if (localOffset!=0) fprintf(Outfile, "\tleas -%d,s\n", localOffset); } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->st_endlabel); if (localOffset!=0) fprintf(Outfile, "\tleas %d,s\n", localOffset); fputs("\trts\n", Outfile); cgfreeall_locns(NOREG); cgfreealltemps(); if (sp_adjust !=0 ) { fprintf(Outfile, "; DANGER sp_adjust is %d not 0\n", sp_adjust); fatald("sp_adjust is not zero", sp_adjust); } } // Load an integer literal value into a location. // Return the number of the location. int cgloadint(int value, int type) { int primtype= cgprimtype(type); return(cgalloclocn(L_CONST, primtype, NULL, value)); } // Increment the value at a symbol by offset // which could be positive or negative static void incdecsym(struct symtable *sym, int offset) { // Load the symbol's address if (sym->class == V_LOCAL || sym->class == V_PARAM) fprintf(Outfile, "\tleax %d,s\n", sym->st_posn + sp_adjust); else fprintf(Outfile, "\tldx #_%s\n", sym->name); // Now change the value at that address switch (sym->size) { case 1: fprintf(Outfile, "\tldb #%d\n", offset & 0xff); fprintf(Outfile, "\taddb 0,x\n"); fprintf(Outfile, "\tstb 0,x\n"); break; case 2: fprintf(Outfile, "\tldd #%d\n", offset & 0xffff); fprintf(Outfile, "\taddd 0,x\n"); fprintf(Outfile, "\tstd 0,x\n"); break; case 4: fprintf(Outfile, "\tldd #%d\n", offset); fprintf(Outfile, "\taddd 2,x\n"); fprintf(Outfile, "\tstd 2,x\n"); fprintf(Outfile, "\tldd 0,x\n"); fprintf(Outfile, "\tadcb #0\n"); fprintf(Outfile, "\tadca #0\n"); fprintf(Outfile, "\tstb 0,x\n"); } } // Load a value from a variable into a location. // Return the number of the location. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadvar(struct symtable *sym, int op) { int l, offset = 1; int primtype= cgprimtype(sym->type); // If the symbol is a pointer, use the size // of the type that it points to as any // increment or decrement. If not, it's one. if (ptrtype(sym->type)) offset = typesize(value_at(sym->type), sym->ctype); // Negate the offset for decrements if (op == A_PREDEC || op == A_POSTDEC) offset = -offset; // If we have a pre-operation, do it if (op == A_PREINC || op == A_PREDEC) incdecsym(sym, offset); // Get a new location and set it up if (sym->class == V_LOCAL || sym->class == V_PARAM) l= cgalloclocn(L_LOCAL, primtype, NULL, sym->st_posn + sp_adjust); else l= cgalloclocn(L_SYMBOL, primtype, sym->name, 0); // If we have a post-operation, do it // but get the current value into a temporary. if (op == A_POSTINC || op == A_POSTDEC) { load_d(l); stash_d(); incdecsym(sym, offset); load_d(l); } // Return the location with the value return (l); } // Given the label number of a global string, // load its address into a new location int cgloadglobstr(int label) { // Get a new location int l = cgalloclocn(L_LABEL, PR_INT, NULL, label); return (l); } // Add two locations together and return // the number of the location with the result int cgadd(int l1, int l2, int type) { int primtype= cgprimtype(type); load_d(l1); switch(primtype) { case PR_CHAR: fprintf(Outfile, "\taddb "); printlocation(l2, 0, 'b'); break; case PR_INT: case PR_POINTER: fprintf(Outfile, "\taddd "); printlocation(l2, 0, 'd'); break; break; case PR_LONG: fprintf(Outfile, "\taddd "); printlocation(l2, 2, 'd'); fprintf(Outfile, "\texg y,d\n"); fprintf(Outfile, "\tadcb "); printlocation(l2, 1, 'f'); fprintf(Outfile, "\tadca "); printlocation(l2, 0, 'e'); fprintf(Outfile, "\texg y,d\n"); } cgfreelocn(l2); Locn[l1].type= L_DREG; d_holds= l1; return(l1); } // Subtract the second location from the first and // return the number of the location with the result int cgsub(int l1, int l2, int type) { int primtype= cgprimtype(type); load_d(l1); switch(primtype) { case PR_CHAR: fprintf(Outfile, "\tsubb "); printlocation(l2, 0, 'b'); break; break; case PR_INT: case PR_POINTER: fprintf(Outfile, "\tsubd "); printlocation(l2, 0, 'd'); break; break; case PR_LONG: fprintf(Outfile, "\tsubd "); printlocation(l2, 2, 'd'); fprintf(Outfile, "\texg y,d\n"); fprintf(Outfile, "\tsbcb "); printlocation(l2, 1, 'f'); fprintf(Outfile, "\tsbca "); printlocation(l2, 0, 'e'); fprintf(Outfile, "\texg y,d\n"); } cgfreelocn(l2); Locn[l1].type= L_DREG; d_holds= l1; return (l1); } // Run a helper subroutine on two locations // and return the number of the location with the result static int cgbinhelper(int l1, int l2, int type, char *cop, char *iop, char *lop) { int primtype= cgprimtype(type); load_d(l1); switch(primtype) { case PR_CHAR: fprintf(Outfile, "\tclra\n"); fprintf(Outfile, "\tpshs d\n"); sp_adjust += 2; fprintf(Outfile, "\tldb "); printlocation(l2, 0, 'b'); fprintf(Outfile, "\tlbsr %s\n", cop); sp_adjust -= 2; break; case PR_INT: case PR_POINTER: fprintf(Outfile, "\tpshs d\n"); sp_adjust += 2; fprintf(Outfile, "\tldd "); printlocation(l2, 0, 'd'); fprintf(Outfile, "\tlbsr %s\n", iop); sp_adjust -= 2; break; case PR_LONG: fprintf(Outfile, "\tpshs d\n"); fprintf(Outfile, "\tpshs y\n"); sp_adjust += 4; fprintf(Outfile, "\tldy "); printlocation(l2, 0, 'd'); fprintf(Outfile, "\tldd "); printlocation(l2, 2, 'y'); fprintf(Outfile, "\tlbsr %s\n", lop); sp_adjust -= 4; } cgfreelocn(l2); Locn[l1].type= L_DREG; d_holds= l1; return (l1); } // Multiply two locations together and return // the number of the location with the result int cgmul(int r1, int r2, int type) { return(cgbinhelper(r1, r2, type, "__mul", "__mul", "__mull")); } // Divide the first location by the second and // return the number of the location with the result int cgdiv(int r1, int r2, int type) { return(cgbinhelper(r1, r2, type, "__div", "__div", "__divl")); } // Divide the first location by the second to get // the remainder. Return the number of the location with the result int cgmod(int r1, int r2, int type) { return(cgbinhelper(r1, r2, type, "__rem", "__rem", "__reml")); } // Generic binary operation on two locations static int cgbinop(int l1, int l2, int type, char *op) { int primtype= cgprimtype(type); load_d(l1); switch(primtype) { case PR_CHAR: fprintf(Outfile, "\t%sb ", op); printlocation(l2, 0, 'b'); break; case PR_INT: case PR_POINTER: fprintf(Outfile, "\t%sa ", op); printlocation(l2, 0, 'a'); fprintf(Outfile, "\t%sb ", op); printlocation(l2, 1, 'b'); break; case PR_LONG: fprintf(Outfile, "\t%sa ", op); printlocation(l2, 2, 'a'); fprintf(Outfile, "\t%sb ", op); printlocation(l2, 3, 'b'); fprintf(Outfile, "\texg y,d\n"); fprintf(Outfile, "\t%sa ", op); printlocation(l2, 0, 'e'); fprintf(Outfile, "\t%sb ", op); printlocation(l2, 1, 'f'); fprintf(Outfile, "\texg y,d\n"); break; } cgfreelocn(l2); Locn[l1].type= L_DREG; d_holds= l1; return (l1); } // Bitwise AND two locations int cgand(int r1, int r2, int type) { return(cgbinop(r1, r2, type, "and")); } // Bitwise OR two locations int cgor(int r1, int r2, int type) { return(cgbinop(r1, r2, type, "or")); } // Bitwise XOR two locations int cgxor(int r1, int r2, int type) { return(cgbinop(r1, r2, type, "eor")); } // Invert a location's value int cginvert(int l, int type) { int primtype= cgprimtype(type); load_d(l); switch(primtype) { case PR_CHAR: fprintf(Outfile, "\tcomb\n"); case PR_INT: case PR_POINTER: fprintf(Outfile, "\tcoma\n"); fprintf(Outfile, "\tcomb\n"); break; case PR_LONG: fprintf(Outfile, "\tcoma\n"); fprintf(Outfile, "\tcomb\n"); fprintf(Outfile, "\texg y,d\n"); fprintf(Outfile, "\tcoma\n"); fprintf(Outfile, "\tcomb\n"); fprintf(Outfile, "\texg y,d\n"); } Locn[l].type= L_DREG; d_holds= l; return(l); } // Shift left r1 by r2 bits int cgshl(int r1, int r2, int type) { return(cgbinhelper(r1, r2, type, "__shl", "__shl", "__shll")); } // Shift r1 right by 8, 16 or 24 bits int cgshrconst(int r1, int amount, int type) { int primtype= cgprimtype(type); int temp; load_d(r1); switch(primtype) { // Any shift on B clears it case PR_CHAR: fprintf(Outfile, "\tclrb\n"); return(r1); case PR_INT: case PR_POINTER: switch(amount) { case 8: fprintf(Outfile, "\ttfr a,b\n"); fprintf(Outfile, "\tclra\n"); return(r1); case 16: case 24: fprintf(Outfile, "\tclra\n"); fprintf(Outfile, "\tclrb\n"); return(r1); } case PR_LONG: switch(amount) { case 8: temp= cgalloctemp(); fprintf(Outfile, "\tclr R%d ; long >> 8\n", temp); fprintf(Outfile, "\tsty R%d+1\n", temp); fprintf(Outfile, "\tsta R%d+3\n", temp); fprintf(Outfile, "\tldy R%d\n", temp); fprintf(Outfile, "\tldd R%d+2\n", temp); return(r1); case 16: fprintf(Outfile, "\ttfr y,d ; long >> 16\n"); fprintf(Outfile, "\tldy #0\n"); return(r1); case 24: fprintf(Outfile, "\ttfr y,d ; long >> 24\n"); fprintf(Outfile, "\ttfr a,b\n"); fprintf(Outfile, "\tclra\n"); fprintf(Outfile, "\tldy #0\n"); return(r1); } } return(0); // Keep -Wall happy } // Shift right r1 by r2 bits int cgshr(int r1, int r2, int type) { int val; // If r2 is the constant 8, 16 or 24 // we can do it with just a few instructions if (Locn[r2].type== L_CONST) { val= (int)Locn[r2].intval; if (val==8 || val==16 || val==24) return(cgshrconst(r1, val, type)); } return(cgbinhelper(r1, r2, type, "__shr", "__shr", "__shrl")); } // Negate a location's value int cgnegate(int l, int type) { int primtype= cgprimtype(type); load_d(l); switch(primtype) { case PR_CHAR: fprintf(Outfile, "\tnegb\n"); break; case PR_INT: case PR_POINTER: fprintf(Outfile, "\tcoma\n"); fprintf(Outfile, "\tcomb\n"); fprintf(Outfile, "\taddd #1\n"); break; case PR_LONG: fprintf(Outfile, "\tlbsr __negatel\n"); } Locn[l].type= L_DREG; d_holds= l; return (l); } // Logically negate a location's value int cglognot(int l, int type) { // Get two labels int label1 = genlabel(); int label2 = genlabel(); int primtype= cgprimtype(type); load_d(l); switch(primtype) { case PR_CHAR: fprintf(Outfile, "\tcmpb #0\n"); fprintf(Outfile, "\tbne L%d\n", label1); fprintf(Outfile, "\tldd #1\n"); break; case PR_INT: case PR_POINTER: fprintf(Outfile, "\tcmpd #0\n"); fprintf(Outfile, "\tbne L%d\n", label1); fprintf(Outfile, "\tldd #1\n"); break; case PR_LONG: fprintf(Outfile, "\tcmpd #0\n"); fprintf(Outfile, "\tbne L%d\n", label1); fprintf(Outfile, "\tcmpy #0\n"); fprintf(Outfile, "\tbne L%d\n", label1); fprintf(Outfile, "\tldd #1\n"); } fprintf(Outfile, "\tbra L%d\n", label2); cglabel(label1); fprintf(Outfile, "\tldd #0\n"); cglabel(label2); Locn[l].type= L_DREG; d_holds= l; return (l); } // Load a boolean value (only 0 or 1) // into the given location. Allocate // a location if l is NOREG. int cgloadboolean(int l, int val, int type) { int primtype= cgprimtype(type); int templ; // Put the value into a literal location. // Load it into D. templ= cgalloclocn(L_CONST, primtype, NULL, val); load_d(templ); // Return the literal location or // save the value and return that location if (l==NOREG) { return(templ); } else { save_d(l); return(l); } return(NOREG); // Keep -Wall happy } // Set the Z flag if D is already loaded. Otherwise, // load the D register which will set the Z flag static void load_d_z(int l, int type) { int primtype= cgprimtype(type); int label = genlabel(); if (Locn[l].type== L_DREG) { switch(primtype) { case PR_CHAR: fprintf(Outfile, "\tcmpb #0\n"); break; case PR_INT: case PR_POINTER: fprintf(Outfile, "\tcmpd #0\n"); break; case PR_LONG: fprintf(Outfile, "\tcmpd #0\n"); fprintf(Outfile, "\tbne L%d\n", label); fprintf(Outfile, "\tcmpy #0\n"); cglabel(label); } } else load_d(l); } // Convert an integer value to a boolean value for // a TOBOOL operation. Jump if true if it's an IF, // WHILE operation. Jump if false if it's // a LOGOR operation. int cgboolean(int l, int op, int label, int type) { int primtype= cgprimtype(type); char *jmpop= "beq"; if (op== A_LOGOR) jmpop= "bne"; load_d_z(l, type); switch(primtype) { case PR_CHAR: fprintf(Outfile, "\t%s L%d\n", jmpop, label); break; case PR_INT: case PR_POINTER: fprintf(Outfile, "\t%s L%d\n", jmpop, label); break; case PR_LONG: fprintf(Outfile, "\tpshs y\n"); fprintf(Outfile, "\tora 0,s\n"); fprintf(Outfile, "\torb 1,s\n"); fprintf(Outfile, "\tleas 2,s\n"); fprintf(Outfile, "\t%s L%d\n", jmpop, label); } // If the op is A_TOBOOL, set the location to 1 if (op == A_TOBOOL) { cgloadboolean(l, 1, type); return(l); } return(NOREG); } // Call a function with the given symbol id. // Beforehand, push the arguments on the stack. // Afterwards, pop off any arguments pushed on the stack. // Return the location with the result. int cgcall(struct symtable *sym, int numargs, int *arglist, int *typelist) { int i, l, argamount; int gentype= sym->type; int primtype= 0; // If it's not a void function, get its primtype. // Also stash any D value in a temporary. if (gentype!=P_VOID) { stash_d(); primtype= cgprimtype(sym->type); } // Push the function arguments on the stack argamount=0; for (i= 0; i< numargs; i++) { pushlocn(arglist[i]); argamount += cgprimsize(typelist[i]); } // Call the function, adjust the stack fprintf(Outfile, "\tlbsr _%s\n", sym->name); fprintf(Outfile, "\tleas %d,s\n", argamount); sp_adjust -= argamount; // If it's not a void function, mark the result in D if (gentype!=P_VOID) { // Get a location and say that D is in use l = cgalloclocn(L_DREG, primtype, NULL, 0); return (l); } return(NOREG); } // Shift a location left by a constant, only 1 or 2 int cgshlconst(int l, int val, int type) { int primtype= cgprimtype(type); load_d(l); switch(primtype) { case PR_CHAR: if (val==2) { fprintf(Outfile, "\taslb\n"); } fprintf(Outfile, "\taslb\n"); break; case PR_INT: case PR_POINTER: if (val==2) { fprintf(Outfile, "\taslb\n"); fprintf(Outfile, "\trola\n"); } fprintf(Outfile, "\taslb\n"); fprintf(Outfile, "\trola\n"); break; case PR_LONG: if (val==2) { fprintf(Outfile, "\taslb\n"); fprintf(Outfile, "\trola\n"); } fprintf(Outfile, "\texg y,d\n"); fprintf(Outfile, "\trolb\n"); fprintf(Outfile, "\trola\n"); fprintf(Outfile, "\texg y,d\n"); } Locn[l].type= L_DREG; d_holds= l; return (l); } // Store a location's value into a variable int cgstorglob(int l, struct symtable *sym) { int size= cgprimsize(sym->type); load_d(l); switch (size) { case 1: fprintf(Outfile, "\tstb _%s\n", sym->name); break; case 2: fprintf(Outfile, "\tstd _%s\n", sym->name); break; case 4: fprintf(Outfile, "\tstd _%s+2\n", sym->name); fprintf(Outfile, "\tsty _%s\n", sym->name); } return (l); } // Store a location's value into a local variable int cgstorlocal(int l, struct symtable *sym) { int primtype= cgprimtype(sym->type); load_d(l); switch (primtype) { case PR_CHAR: fprintf(Outfile, "\tstb %d,s\n", sym->st_posn + sp_adjust); break; case PR_INT: case PR_POINTER: fprintf(Outfile, "\tstd %d,s\n", sym->st_posn + sp_adjust); break; case PR_LONG: fprintf(Outfile, "\tsty %d,s\n", sym->st_posn + sp_adjust); fprintf(Outfile, "\tstd %d,s\n", 2+sym->st_posn + sp_adjust); } return (l); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size, type; int initvalue; int i,j; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the variable (or its elements if an array) // and the type of the variable if (node->stype == S_ARRAY) { size = typesize(value_at(node->type), node->ctype); type = value_at(node->type); } else { size = node->size; type = node->type; } // Generate the global identity and the label cgdataseg(); if (node->class == V_GLOBAL) fprintf(Outfile, "\t.export _%s\n", node->name); fprintf(Outfile, "_%s:\n", node->name); // Output space for one or more elements for (i = 0; i < node->nelems; i++) { // Get any initial value initvalue = 0; if (node->initlist != NULL) initvalue = node->initlist[i]; // Generate the space for this type switch (size) { case 1: fprintf(Outfile, "\t.byte\t%d\n", initvalue & 0xff); break; case 2: // Generate the pointer to a string literal. Treat a zero value // as actually zero, not the label L0 if (node->initlist != NULL && type == pointer_to(P_CHAR) && initvalue != 0) fprintf(Outfile, "\t.word\tL%d\n", initvalue); else fprintf(Outfile, "\t.word\t%d\n", initvalue & 0xffff); break; case 4: fprintf(Outfile, "\t.word\t%d\n", (initvalue >> 16) & 0xffff); fprintf(Outfile, "\t.word\t%d\n", initvalue & 0xffff); break; default: for (j = 0; j < size; j++) fprintf(Outfile, "\t.byte\t0\n"); } } } // Generate a global string and its start label. void cgglobstr(int l, char *strvalue) { char *cptr; cglabel(l); for (cptr = strvalue; *cptr; cptr++) { fprintf(Outfile, "\t.byte\t%d\n", *cptr); } fprintf(Outfile, "\t.byte\t0\n"); } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "beq", "bne", "blt", "bgt", "ble", "bge" }; // Compare two locations and set if true. int cgcompare_and_set(int ASTop, int l1, int l2, int type) { int label1, label2; int primtype = cgprimtype(type); // Get two labels label1= genlabel(); label2= genlabel(); // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); load_d(l1); switch (primtype) { case PR_CHAR: fprintf(Outfile, "\tcmpb "); printlocation(l2, 0, 'b'); break; case PR_INT: case PR_POINTER: case PR_LONG: fprintf(Outfile, "\tcmpd "); printlocation(l2, 0, 'd'); break; } fprintf(Outfile, "\t%s L%d\n", cmplist[ASTop - A_EQ], label1); // XXX This isn't right and I need to fix it if (primtype==PR_LONG) { fprintf(Outfile, "\tbne L%d\n", label1); fprintf(Outfile, "\tcmpd "); printlocation(l2, 2, 'd'); } fprintf(Outfile, "\tldd #0\n"); fprintf(Outfile, "\tbra L%d\n", label2); cglabel(label1); fprintf(Outfile, "\tldd #1\n"); cglabel(label2); cgfreelocn(l2); // Mark the location as the D register Locn[l1].type= L_DREG; d_holds= l1; return (l1); } // Generate a jump to a label void cgjump(int l) { fprintf(Outfile, "\tbra L%d\n", l); d_holds= NOREG; } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "bne", "beq", "bge", "ble", "bgt", "blt" }; // Comparisons used by the long function static char *lcmplist1[] = { "bne", "beq", "bge", "ble", "bgt", "blt" }; static char *lcmplist2[] = { "bne", "beq", "bhs", "bls", "bhi", "blo" }; // Compare two long locations and jump if false static void longcmp_and_jump(int ASTop, int parentASTop, int l1, int l2, int label) { int truelabel; // Generate a new label truelabel=genlabel(); fprintf(Outfile, "\t%s L%d\n", lcmplist1[ASTop - A_EQ], label); switch(ASTop) { case A_EQ: fprintf(Outfile, "\tbne L%d\n", label); break; case A_NE: fprintf(Outfile, "\tbne L%d\n", truelabel); break; case A_LT: fprintf(Outfile, "\tblt L%d\n", truelabel); fprintf(Outfile, "\tbne L%d\n", label); break; case A_GT: fprintf(Outfile, "\tbgt L%d\n", truelabel); fprintf(Outfile, "\tbne L%d\n", label); break; case A_LE: fprintf(Outfile, "\tbgt L%d\n", label); fprintf(Outfile, "\tbne L%d\n", truelabel); break; case A_GE: fprintf(Outfile, "\tblt L%d\n", label); fprintf(Outfile, "\tbne L%d\n", truelabel); } fprintf(Outfile, "\tcmpd "); printlocation(l2, 2, 'd'); fprintf(Outfile, "\t%s L%d\n", lcmplist2[ASTop - A_EQ], label); cglabel(truelabel); } // Compare two locations and jump if false. // Jump if true if the parent op is A_LOGOR int cgcompare_and_jump(int ASTop, int parentASTop, int l1, int l2, int label, int type) { int primtype = cgprimtype(type); char *jmpop; // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); load_d_z(l1, type); jmpop= invcmplist[ASTop - A_EQ]; if (parentASTop==A_LOGOR) jmpop= cmplist[ASTop - A_EQ]; switch (primtype) { case PR_CHAR: fprintf(Outfile, "\tcmpb "); printlocation(l2, 0, 'b'); break; case PR_INT: case PR_POINTER: fprintf(Outfile, "\tcmpd "); printlocation(l2, 0, 'd'); break; case PR_LONG: fprintf(Outfile, "\tcmpy "); printlocation(l2, 0, 'y'); break; } if (primtype==PR_LONG) longcmp_and_jump(ASTop, parentASTop, l1, l2, label); fprintf(Outfile, "\t%s L%d\n", jmpop, label); cgfreelocn(l1); cgfreelocn(l2); return (NOREG); } // Widen the value in the location from the old // to the new type, and return a location // with this new value int cgwiden(int l, int oldtype, int newtype) { int how= cgprimsize(newtype) - cgprimsize(oldtype); int label1, label2; int l2; // If the sizes are the same do nothing if (how==0) return(l); load_d(l); // Get a location which is a L_DREG l2= cgalloclocn(L_DREG, cgprimtype(newtype), NULL, 0); // Three possibilities: size 1 to 2, size 2 to 4 and size 1 to 4. // Note that chars are unsigned which makes things easier switch(how) { // 1 to 2 case 1: fprintf(Outfile, "\tclra\n"); break; // 2 to 4 case 2: // Get two labels label1 = genlabel(); label2 = genlabel(); fprintf(Outfile, "\tbge L%d\n", label1); fprintf(Outfile, "\tldy #65535\n"); fprintf(Outfile, "\tbra L%d\n", label2); cglabel(label1); fprintf(Outfile, "\tldy #0\n"); cglabel(label2); break; // 1 to 4 case 3: fprintf(Outfile, "\tclra\n"); fprintf(Outfile, "\tldy #0\n"); } return (l2); } // Change a location from its old type to a new type. int cgcast(int l, int oldtype, int newtype) { return(cgwiden(l,oldtype,newtype)); } // Generate code to return a value from a function void cgreturn(int l, struct symtable *sym) { // Load D is there is a return value if (l != NOREG) load_d(l); cgjump(sym->st_endlabel); } // Generate code to load the address of an identifier. // Return a new location int cgaddress(struct symtable *sym) { int l; // For things not on the stack it's easy if (sym->class == V_GLOBAL || sym->class == V_EXTERN || sym->class == V_STATIC) { l= cgalloclocn(L_SYMADDR, PR_POINTER, sym->name, 0); return(l); } // For things on the stack we need to get the address in // the X register and then move it into D. Stash D in // a temporary if it's already in use. stash_d(); fprintf(Outfile, "\tleax %d,s\n", sym->st_posn + sp_adjust); fprintf(Outfile, "\ttfr x,d\n"); l = cgalloclocn(L_DREG, PR_POINTER, NULL, 0); return(l); } // Dereference a pointer to get the value // it points at into the same location int cgderef(int l, int type) { // Get the type that we are pointing to int newtype = value_at(type); int primtype= cgprimtype(newtype); if (Locn[l].type==L_DREG) fprintf(Outfile, "\ttfr d,x\n"); else { // Stash D in a temporary if it's already in use. stash_d(); fprintf(Outfile, "\tldx "); printlocation(l, 0, 'd'); } switch (primtype) { case PR_CHAR: fprintf(Outfile, "\tldb 0,x\n"); break; case PR_INT: case PR_POINTER: fprintf(Outfile, "\tldd 0,x\n"); break; case PR_LONG: fprintf(Outfile, "\tldd 2,x\n"); fprintf(Outfile, "\tldy 0,x\n"); } cgfreelocn(l); l= cgalloclocn(L_DREG, primtype, NULL, 0); return (l); } // Dereference and store through l2, a pointer int cgstorderef(int l1, int l2, int type) { int primtype = cgprimtype(type); // If l2 is in the D register, do a transfer if (d_holds== l2) { fprintf(Outfile, "\ttfr d,x\n"); } else { fprintf(Outfile, "\tldx "); printlocation(l2, 0, 'd'); } d_holds= NOREG; load_d(l1); switch (primtype) { case PR_CHAR: fprintf(Outfile, "\tstb 0,x\n"); break; case PR_INT: case PR_POINTER: fprintf(Outfile, "\tstd 0,x\n"); break; case PR_LONG: fprintf(Outfile, "\tsty 0,x\n"); fprintf(Outfile, "\tstd 2,x\n"); break; } d_holds= l1; return (11); } // Generate a switch jump table and the code to // load the locations and call the switch() code void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel) { int i, label; // Get a label for the switch jump table label= genlabel(); // Generate the switch jump table. cglitseg(); cglabel(label); fprintf(Outfile, "\t.word %d\n", casecount); for (i = 0; i < casecount; i++) fprintf(Outfile, "\t.word %d\n\t.word L%d\n", caseval[i], caselabel[i]); fprintf(Outfile, "\t.word L%d\n", defaultlabel); // Output the label where we restart actual code cgtextseg(); cglabel(toplabel); // Call the helper routine with the jump table location // after loading D with the value in reg (a location) load_d(reg); fprintf(Outfile, "\tldx #L%d\n", label); fprintf(Outfile, "\tbra __switch\n"); } // Move value between locations void cgmove(int l1, int l2, int type) { load_d(l1); save_d(l2); } // Output a gdb directive to say on which // source code line number the following // assembly code came from void cglinenum(int line) { fprintf(Outfile, ";\t\t\t\t\tline %d\n", line); } ================================================ FILE: 64_6809_Target/cgen.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "gen.h" #include "misc.h" #include "sym.h" #include "tree.h" #include "types.h" // Assembly code generator. // Copyright (c) 2023,2024 Warren Toomey, GPL3 // Allocate space for the variables // then free the symbol table. void allocateGlobals(void) { struct symtable *sym, *litsym; int i; // Load all the types and all the globals loadGlobals(); // We now have all the types and all the globals in memory // Generate the string literals first for (sym=Symhead; sym!=NULL; sym=sym->next) { if (sym->stype== S_STRLIT) sym->st_label= genglobstr(sym->name); } // Now do the non string literals // XXX To fix: sym=sym->next for (sym=Symhead; sym!=NULL; ) { if (sym->stype== S_STRLIT) { sym=sym->next; continue; } // If this is a char pointer or an array of char pointers, // replace any values in the initlist (which are symbol ids) // with the associated string literal labels. // Yes, P_CHAR+2 means array of char pointers. if (sym->initlist!=NULL && (sym->type== pointer_to(P_CHAR) || sym->type == P_CHAR+2)) { for (i=0; inelems; i++) if (sym->initlist[i]!=0) { litsym= findSymbol(NULL, 0, sym->initlist[i]); sym->initlist[i]= litsym->st_label; } } genglobsym(sym); sym=sym->next; } freeSymtable(); // Clear the symbol table } // Open the symbol table file and AST file // Loop: // Read in the next AST tree // Generate the assembly code // Free the in-memory symbol tables int main(int argc, char **argv) { struct ASTnode *node; if (argc !=4) { fprintf(stderr, "Usage: %s symfile astfile idxfile\n", argv[0]); exit(1); } // Open the symbol table file Symfile= fopen(argv[1], "r"); if (Symfile == NULL) { fprintf(stderr, "Can't open %s\n", argv[1]); exit(1); } // Open the AST file Infile= fopen(argv[2], "r"); if (Infile == NULL) { fprintf(stderr, "Can't open %s\n", argv[2]); exit(1); } // Open the AST index offset file for read/writing Idxfile= fopen(argv[3], "w+"); if (Idxfile == NULL) { fprintf(stderr, "Can't open %s\n", argv[3]); exit(1); } // We write assembly to stdout Outfile=stdout; mkASTidxfile(); // Build the AST index offset file freeSymtable(); // Clear the symbol table genpreamble(); // Output the preamble allocateGlobals(); // Allocate global variables while (1) { // Read the next function's top node in from file node= loadASTnode(0, 1); if (node==NULL) break; // Generate the assembly code for the tree genAST(node, NOLABEL, NOLABEL, NOLABEL, 0); // Free the symbols in the in-memory symbol tables. // Also free the AST node we loaded in freeSymtable(); freeASTnode(node); } genpostamble(); // Output the postamble freeSymtable(); fclose(Infile); fclose(Symfile); exit(0); return(0); } ================================================ FILE: 64_6809_Target/cgqbe.c ================================================ #include "defs.h" #include "data.h" #include "gen.h" #include "misc.h" #include "types.h" #include "target.h" #include "cg.h" // Code generator for x86-64 using the QBE intermediate language. // Copyright (c) 2019 Warren Toomey, GPL3 // We have to keep a list of literal strings as we can't generate // them in the middle of code struct litlist { char *val; int label; struct litlist *next; }; struct litlist *Strlithead; struct litlist *Strlittail; // Switch to the text segment void cgtextseg() { } // Switch to the data segment void cgdataseg() { } // Switch to the literal segment void cglitseg() { } // Free registers/temporaries void cgfreeallregs(int keepreg) { } void cgfreereg(int reg) { } // Given a scalar type value, return the // character that matches the QBE type. // Because chars are stored on the stack, // we can return 'w' for P_CHAR. static int cgprimtype(int type) { if (ptrtype(type)) return ('l'); switch (type) { case P_VOID: return (' '); case P_CHAR: return ('w'); case P_INT: return ('w'); case P_LONG: return ('l'); default: fatald("Bad type in cgprimtype:", type); } return (0); // Keep -Wall happy } // Allocate a QBE temporary static int nexttemp = 0; int cgalloctemp(void) { return (++nexttemp); } int cgallocreg(int type) { return(cgalloctemp()); } // Print out the assembly preamble // for one output file void cgpreamble() { Strlithead= NULL; Strlittail= NULL; } // Print out any global string literals static void cgmakeglobstrs(); void cgpostamble() { cgmakeglobstrs(); } // Boolean flag: has there been a switch statement // in this function yet? static int used_switch; // Print out a function preamble void cgfuncpreamble(struct symtable *sym) { char *name = sym->name; struct symtable *parm, *locvar; int size, bigsize; int label; // Output the function's name and return type if (sym->class == V_GLOBAL) fprintf(Outfile, "export "); fprintf(Outfile, "function %c $%s(", cgprimtype(sym->type), name); // Output the parameter names and types. For any parameters which // need addresses, change their name as we copy their value below for (parm = sym->member; parm != NULL; parm = parm->next) { if (parm->class==V_LOCAL) break; // Ugly. Make all params have a address parm->st_hasaddr = 1; if (parm->st_hasaddr == 1) fprintf(Outfile, "%c %%.p%s, ", cgprimtype(parm->type), parm->name); else fprintf(Outfile, "%c %%%s, ", cgprimtype(parm->type), parm->name); } fprintf(Outfile, ") {\n"); // Get a label for the function start label = genlabel(); cglabel(label); // For any parameters which need addresses, allocate memory // on the stack for them. QBE won't let us do alloc1, so // we allocate 4 bytes for chars. Copy the value from the // parameter to the new memory location. // of the parameter for (parm = sym->member; parm != NULL; parm = parm->next) { if (parm->class==V_LOCAL) break; // Ugly. Make all params have a address parm->st_hasaddr = 1; if (parm->st_hasaddr == 1) { size = cgprimsize(parm->type); bigsize = (size == 1) ? 4 : size; fprintf(Outfile, " %%%s =l alloc%d 1\n", parm->name, bigsize); // Copy to the allocated memory switch (size) { case 1: fprintf(Outfile, " storeb %%.p%s, %%%s\n", parm->name, parm->name); break; case 4: fprintf(Outfile, " storew %%.p%s, %%%s\n", parm->name, parm->name); break; case 8: fprintf(Outfile, " storel %%.p%s, %%%s\n", parm->name, parm->name); } } } // Allocate memory for any local variables that need to be on the // stack. There are two reasons for this. The first is for locals // where their address is used. The second is for char variables // We need to do this as QBE can only truncate down to 8 bits // for locations in memory. // Note: locals come after parameters in the member list. for (locvar = parm; locvar != NULL; locvar = locvar->next) { if (locvar->st_hasaddr == 1) { // Get the total size for all elements (if an array). // Round up to the nearest multiple of 8, to ensure that // pointers are aligned on 8-byte boundaries size = locvar->size * locvar->nelems; size = (size + 7) >> 3; fprintf(Outfile, " %%%s =l alloc8 %d\n", locvar->name, size); } else if (locvar->type == P_CHAR) { locvar->st_hasaddr = 1; fprintf(Outfile, " %%%s =l alloc4 1\n", locvar->name); } } used_switch = 0; // We haven't output the switch handling code yet } // Print out a function postamble void cgfuncpostamble(struct symtable *sym) { cglabel(sym->st_endlabel); // Return a value if the function's type isn't void if (sym->type != P_VOID) fprintf(Outfile, " ret %%.ret\n}\n"); else fprintf(Outfile, " ret\n}\n"); } // Load an integer literal value into a temporary. // Return the number of the temporary. int cgloadint(int value, int type) { // Get a new temporary int t = cgalloctemp(); fprintf(Outfile, " %%.t%d =%c copy %d\n", t, cgprimtype(type), value); return (t); } // Load a value from a variable into a temporary. // Return the number of the temporary. If the // operation is pre- or post-increment/decrement, // also perform this action. int cgloadvar(struct symtable *sym, int op) { int r, posttemp, offset = 1; char qbeprefix; // Get a new temporary r = cgalloctemp(); // If the symbol is a pointer, use the size // of the type that it points to as any // increment or decrement. If not, it's one. if (ptrtype(sym->type)) offset = typesize(value_at(sym->type), sym->ctype); // Negate the offset for decrements if (op == A_PREDEC || op == A_POSTDEC) offset = -offset; // Get the relevant QBE prefix for the symbol qbeprefix = ((sym->class == V_GLOBAL) || (sym->class == V_STATIC) || (sym->class == V_EXTERN)) ? (char)'$' : (char)'%'; // If we have a pre-operation if (op == A_PREINC || op == A_PREDEC) { if (sym->st_hasaddr || qbeprefix == '$') { // Get a new temporary posttemp = cgalloctemp(); switch (sym->size) { case 1: fprintf(Outfile, " %%.t%d =w loadub %c%s\n", posttemp, qbeprefix, sym->name); fprintf(Outfile, " %%.t%d =w add %%.t%d, %d\n", posttemp, posttemp, offset); fprintf(Outfile, " storeb %%.t%d, %c%s\n", posttemp, qbeprefix, sym->name); break; case 4: fprintf(Outfile, " %%.t%d =w loadsw %c%s\n", posttemp, qbeprefix, sym->name); fprintf(Outfile, " %%.t%d =w add %%.t%d, %d\n", posttemp, posttemp, offset); fprintf(Outfile, " storew %%.t%d, %c%s\n", posttemp, qbeprefix, sym->name); break; case 8: fprintf(Outfile, " %%.t%d =l loadl %c%s\n", posttemp, qbeprefix, sym->name); fprintf(Outfile, " %%.t%d =l add %%.t%d, %d\n", posttemp, posttemp, offset); fprintf(Outfile, " storel %%.t%d, %c%s\n", posttemp, qbeprefix, sym->name); } } else fprintf(Outfile, " %c%s =%c add %c%s, %d\n", qbeprefix, sym->name, cgprimtype(sym->type), qbeprefix, sym->name, offset); } // Now load the output temporary with the value if (sym->st_hasaddr || qbeprefix == '$') { switch (sym->size) { case 1: fprintf(Outfile, " %%.t%d =w loadub %c%s\n", r, qbeprefix, sym->name); break; case 4: fprintf(Outfile, " %%.t%d =w loadsw %c%s\n", r, qbeprefix, sym->name); break; case 8: fprintf(Outfile, " %%.t%d =l loadl %c%s\n", r, qbeprefix, sym->name); } } else fprintf(Outfile, " %%.t%d =%c copy %c%s\n", r, cgprimtype(sym->type), qbeprefix, sym->name); // If we have a post-operation if (op == A_POSTINC || op == A_POSTDEC) { if (sym->st_hasaddr || qbeprefix == '$') { // Get a new temporary posttemp = cgalloctemp(); switch (sym->size) { case 1: fprintf(Outfile, " %%.t%d =w loadub %c%s\n", posttemp, qbeprefix, sym->name); fprintf(Outfile, " %%.t%d =w add %%.t%d, %d\n", posttemp, posttemp, offset); fprintf(Outfile, " storeb %%.t%d, %c%s\n", posttemp, qbeprefix, sym->name); break; case 4: fprintf(Outfile, " %%.t%d =w loadsw %c%s\n", posttemp, qbeprefix, sym->name); fprintf(Outfile, " %%.t%d =w add %%.t%d, %d\n", posttemp, posttemp, offset); fprintf(Outfile, " storew %%.t%d, %c%s\n", posttemp, qbeprefix, sym->name); break; case 8: fprintf(Outfile, " %%.t%d =l loadl %c%s\n", posttemp, qbeprefix, sym->name); fprintf(Outfile, " %%.t%d =l add %%.t%d, %d\n", posttemp, posttemp, offset); fprintf(Outfile, " storel %%.t%d, %c%s\n", posttemp, qbeprefix, sym->name); } } else fprintf(Outfile, " %c%s =%c add %c%s, %d\n", qbeprefix, sym->name, cgprimtype(sym->type), qbeprefix, sym->name, offset); } // Return the temporary with the value return (r); } // Given the label number of a global string, // load its address into a new temporary int cgloadglobstr(int label) { // Get a new temporary int r = cgalloctemp(); fprintf(Outfile, " %%.t%d =l copy $L%d\n", r, label); return (r); } // Add two temporaries together and return // the number of the temporary with the result int cgadd(int r1, int r2, int type) { fprintf(Outfile, " %%.t%d =%c add %%.t%d, %%.t%d\n", r1, cgprimtype(type), r1, r2); return (r1); } // Subtract the second temporary from the first and // return the number of the temporary with the result int cgsub(int r1, int r2, int type) { fprintf(Outfile, " %%.t%d =%c sub %%.t%d, %%.t%d\n", r1, cgprimtype(type), r1, r2); return (r1); } // Multiply two temporaries together and return // the number of the temporary with the result int cgmul(int r1, int r2, int type) { fprintf(Outfile, " %%.t%d =%c mul %%.t%d, %%.t%d\n", r1, cgprimtype(type), r1, r2); return (r1); } // Divide the first temporary by the second and // return the number of the temporary with the result int cgdiv(int r1, int r2, int type) { fprintf(Outfile, " %%.t%d =%c div %%.t%d, %%.t%d\n", r1, cgprimtype(type), r1, r2); return (r1); } // Modulo the first temporary by the second and // return the number of the temporary with the result int cgmod(int r1, int r2, int type) { fprintf(Outfile, " %%.t%d =%c rem %%.t%d, %%.t%d\n", r1, cgprimtype(type), r1, r2); return (r1); } // Bitwise AND two temporaries int cgand(int r1, int r2, int type) { fprintf(Outfile, " %%.t%d =%c and %%.t%d, %%.t%d\n", r1, cgprimtype(type), r1, r2); return (r1); } // Bitwise OR two temporaries int cgor(int r1, int r2, int type) { fprintf(Outfile, " %%.t%d =%c or %%.t%d, %%.t%d\n", r1, cgprimtype(type), r1, r2); return (r1); } // Bitwise XOR two temporaries int cgxor(int r1, int r2, int type) { fprintf(Outfile, " %%.t%d =%c xor %%.t%d, %%.t%d\n", r1, cgprimtype(type), r1, r2); return (r1); } // Shift left r1 by r2 bits int cgshl(int r1, int r2, int type) { fprintf(Outfile, " %%.t%d =%c shl %%.t%d, %%.t%d\n", r1, cgprimtype(type), r1, r2); return (r1); } // Shift right r1 by r2 bits int cgshr(int r1, int r2, int type) { fprintf(Outfile, " %%.t%d =%c shr %%.t%d, %%.t%d\n", r1, cgprimtype(type), r1, r2); return (r1); } // Negate a temporary's value int cgnegate(int r, int type) { fprintf(Outfile, " %%.t%d =%c sub 0, %%.t%d\n", r, cgprimtype(type), r); return (r); } // Invert a temporary's value int cginvert(int r, int type) { fprintf(Outfile, " %%.t%d =%c xor %%.t%d, -1\n", r, cgprimtype(type), r); return (r); } // Logically negate a temporary's value int cglognot(int r, int type) { int q = cgprimtype(type); fprintf(Outfile, " %%.t%d =%c ceq%c %%.t%d, 0\n", r, q, q, r); return (r); } // Load a boolean value (only 0 or 1) // into the given temporary. Allocate a // temporary if r is NOREG int cgloadboolean(int r, int val, int type) { if (r==NOREG) r= cgalloctemp(); fprintf(Outfile, " %%.t%d =%c copy %d\n", r, cgprimtype(type), val); return(r); } // Convert an integer value to a boolean value for // a TOBOOL operation. Jump if true if it's an IF, // WHILE operation. Jump if false if it's // a LOGOR operation. int cgboolean(int r, int op, int label, int type) { // Get a label for the next instruction int label2 = genlabel(); // Get a new temporary for the comparison int r2 = cgalloctemp(); // Convert temporary to boolean value fprintf(Outfile, " %%.t%d =l cne%c %%.t%d, 0\n", r2, cgprimtype(type), r); switch (op) { case A_IF: case A_WHILE: case A_TERNARY: case A_LOGAND: fprintf(Outfile, " jnz %%.t%d, @L%d, @L%d\n", r2, label2, label); break; case A_LOGOR: fprintf(Outfile, " jnz %%.t%d, @L%d, @L%d\n", r2, label, label2); break; } // Output the label for the next instruction cglabel(label2); return (r2); } // Call a function with the given symbol id. // Return the temprary with the result int cgcall(struct symtable *sym, int numargs, int *arglist, int *typelist) { int outr; int i; // Get a new temporary for the return result outr = cgalloctemp(); // Call the function if (sym->type == P_VOID) fprintf(Outfile, " call $%s(", sym->name); else fprintf(Outfile, " %%.t%d =%c call $%s(", outr, cgprimtype(sym->type), sym->name); // Output the list of arguments for (i = numargs - 1; i >= 0; i--) { fprintf(Outfile, "%c %%.t%d, ", cgprimtype(typelist[i]), arglist[i]); } fprintf(Outfile, ")\n"); return (outr); } // Shift a temporary left by a constant. As we only // use this for address calculations, extend the // type to be a QBE 'l' if required int cgshlconst(int r, int val, int type) { int r2 = cgalloctemp(); int r3 = cgalloctemp(); if (cgprimsize(type) < 8) { fprintf(Outfile, " %%.t%d =l extsw %%.t%d\n", r2, r); fprintf(Outfile, " %%.t%d =l shl %%.t%d, %d\n", r3, r2, val); } else fprintf(Outfile, " %%.t%d =l shl %%.t%d, %d\n", r3, r, val); return (r3); } // Store a temporary's value into a global variable int cgstorglob(int r, struct symtable *sym) { // We can store to bytes in memory int q = cgprimtype(sym->type); if (sym->type == P_CHAR) q = 'b'; fprintf(Outfile, " store%c %%.t%d, $%s\n", q, r, sym->name); return (r); } // Store a temporary's value into a local variable int cgstorlocal(int r, struct symtable *sym) { // If the variable is on the stack, use store instructions if (sym->st_hasaddr) { fprintf(Outfile, " store%c %%.t%d, %%%s\n", cgprimtype(sym->type), r, sym->name); } else { fprintf(Outfile, " %%%s =%c copy %%.t%d\n", sym->name, cgprimtype(sym->type), r); } return (r); } // Generate a global symbol but not functions void cgglobsym(struct symtable *node) { int size, type; int initvalue; int i; if (node == NULL) return; if (node->stype == S_FUNCTION) return; // Get the size of the variable (or its elements if an array) // and the type of the variable if (node->stype == S_ARRAY) { size = typesize(value_at(node->type), node->ctype); type = value_at(node->type); } else { size = node->size; type = node->type; } // Generate the global identity and the label cgdataseg(); if (node->class == V_GLOBAL) fprintf(Outfile, "export "); if (node->ctype==NULL) fprintf(Outfile, "data $%s = align %d { ", node->name, cgprimsize(type)); else fprintf(Outfile, "data $%s = align 8 { ", node->name); // Output space for one or more elements for (i = 0; i < node->nelems; i++) { // Get any initial value initvalue = 0; if (node->initlist != NULL) initvalue = node->initlist[i]; // Generate the space for this type switch (size) { case 1: fprintf(Outfile, "b %d, ", initvalue); break; case 4: fprintf(Outfile, "w %d, ", initvalue); break; case 8: // Generate the pointer to a string literal. Treat a zero value // as actually zero, not the label L0 if (node->initlist != NULL && type == pointer_to(P_CHAR) && initvalue != 0) fprintf(Outfile, "l $L%d, ", initvalue); else fprintf(Outfile, "l %d, ", initvalue); break; default: fprintf(Outfile, "z %d, ", size); } } fprintf(Outfile, "}\n"); } // Stash a global string for later output void cgglobstr(int l, char *strvalue) { struct litlist *this; this= (struct litlist *)malloc(sizeof(struct litlist)); this->val= strdup(strvalue); this->label= l; this->next= NULL; if (Strlithead==NULL) { Strlithead= Strlittail= this; } else { Strlittail->next= this; Strlittail= this; } } // Generate all the global strings and their labels static void cgmakeglobstrs() { struct litlist *this; char *cptr; for (this= Strlithead; this!=NULL; this=this->next) { fprintf(Outfile, "data $L%d = { ", this->label); for (cptr = this->val; *cptr; cptr++) { fprintf(Outfile, "b %d, ", *cptr); } fprintf(Outfile, " b 0 }\n"); } } // NUL terminate a global string void cgglobstrend(void) { } // List of comparison instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *cmplist[] = { "ceq", "cne", "cslt", "csgt", "csle", "csge" }; // Compare two temporaries and set if true. int cgcompare_and_set(int ASTop, int r1, int r2, int type) { int r3; int q = cgprimtype(type); // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); // Get a new temporary for the comparison r3 = cgalloctemp(); fprintf(Outfile, " %%.t%d =%c %s%c %%.t%d, %%.t%d\n", r3, q, cmplist[ASTop - A_EQ], q, r1, r2); return (r3); } // Generate a label void cglabel(int l) { fprintf(Outfile, "@L%d\n", l); } // Generate a jump to a label void cgjump(int l) { int label; fprintf(Outfile, " jmp @L%d\n", l); // Print out a bogus label. This prevents the output // having two adjacent jumps which QBE doesn't like. label = genlabel(); cglabel(label); } // List of inverted jump instructions, // in AST order: A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE static char *invcmplist[] = { "cne", "ceq", "csge", "csle", "csgt", "cslt" }; // Compare two temporaries and jump if false. // Jump if true if the parent op is A_LOGOR. int cgcompare_and_jump(int ASTop, int parentASTop, int r1, int r2, int label, int type) { int label2; int r3; int q = cgprimtype(type); char *cmpop; // Check the range of the AST operation if (ASTop < A_EQ || ASTop > A_GE) fatal("Bad ASTop in cgcompare_and_set()"); cmpop= invcmplist[ASTop - A_EQ]; if (parentASTop == A_LOGOR) cmpop= cmplist[ASTop - A_EQ]; // Get a label for the next instruction label2 = genlabel(); // Get a new temporary for the comparison r3 = cgalloctemp(); fprintf(Outfile, " %%.t%d =%c %s%c %%.t%d, %%.t%d\n", r3, q, cmpop, q, r1, r2); fprintf(Outfile, " jnz %%.t%d, @L%d, @L%d\n", r3, label, label2); cglabel(label2); return (NOREG); } // Widen the value in the temporary from the old // to the new type, and return a temporary with // this new value int cgwiden(int r, int oldtype, int newtype) { int oldq = cgprimtype(oldtype); int newq = cgprimtype(newtype); // Get a new temporary int t = cgalloctemp(); switch (oldtype) { case P_CHAR: fprintf(Outfile, " %%.t%d =%c extub %%.t%d\n", t, newq, r); break; default: fprintf(Outfile, " %%.t%d =%c exts%c %%.t%d\n", t, newq, oldq, r); } return (t); } // Generate code to return a value from a function void cgreturn(int r, struct symtable *sym) { // Only return a value if we have a value to return if (r != NOREG) fprintf(Outfile, " %%.ret =%c copy %%.t%d\n", cgprimtype(sym->type), r); cgjump(sym->st_endlabel); } // Generate code to load the address of an // identifier. Return a new temporary int cgaddress(struct symtable *sym) { int r = cgalloctemp(); char qbeprefix = ((sym->class == V_GLOBAL) || (sym->class == V_STATIC) || (sym->class == V_EXTERN)) ? (char)'$' : (char)'%'; fprintf(Outfile, " %%.t%d =l copy %c%s\n", r, qbeprefix, sym->name); return (r); } // Dereference a pointer to get the value // it points at into a new temporary int cgderef(int r, int type) { // Get the type that we are pointing to int newtype = value_at(type); // Now get the size of this type int size = cgprimsize(newtype); // Get temporary for the return result int ret = cgalloctemp(); switch (size) { case 1: fprintf(Outfile, " %%.t%d =w loadub %%.t%d\n", ret, r); break; case 4: fprintf(Outfile, " %%.t%d =w loadsw %%.t%d\n", ret, r); break; case 8: fprintf(Outfile, " %%.t%d =l loadl %%.t%d\n", ret, r); break; default: fatald("Can't cgderef on type:", type); } return (ret); } // Store through a dereferenced pointer int cgstorderef(int r1, int r2, int type) { // Get the size of the type int size = cgprimsize(type); switch (size) { case 1: fprintf(Outfile, " storeb %%.t%d, %%.t%d\n", r1, r2); break; case 4: fprintf(Outfile, " storew %%.t%d, %%.t%d\n", r1, r2); break; case 8: fprintf(Outfile, " storel %%.t%d, %%.t%d\n", r1, r2); break; default: fatald("Can't cgstoderef on type:", type); } return (r1); } // Generate code to compare each switch value // and jump to the appropriate case label. void cgswitch(int reg, int casecount, int toplabel, int *caselabel, int *caseval, int defaultlabel) { int i, label; int rval, rcmp; // Get two temporaries for the case value and the comparison rval= cgalloctemp(); rcmp= cgalloctemp(); // Output the label at the top of the code cglabel(toplabel); for (i = 0; i < casecount; i++) { // Get a label for the code when we skip this case label= genlabel(); // Load the case value fprintf(Outfile, " %%.t%d =w copy %d\n", rval, caseval[i]); // Compare the temporary against the case value fprintf(Outfile, " %%.t%d =w ceqw %%.t%d, %%.t%d\n", rcmp, reg, rval); // Jump either to the next comparison or the case code fprintf(Outfile, " jnz %%.t%d, @L%d, @L%d\n", rcmp, caselabel[i], label); cglabel(label); } // No case matched, jump to the default label cgjump(defaultlabel); } // Move value between temporaries void cgmove(int r1, int r2, int type) { fprintf(Outfile, " %%.t%d =%c copy %%.t%d\n", r2, cgprimtype(type), r1); } // Output a gdb directive to say on which // source code line number the following // assembly code came from void cglinenum(int line) { // fprintf(Outfile, "\t.loc 1 %d 0\n", line); } // Change a temporary value from its old // type to a new type. int cgcast(int t, int oldtype, int newtype) { // Get temporary for the return result int ret = cgalloctemp(); int oldsize, newsize; int qnew; // If the new type is a pointer if (ptrtype(newtype)) { // Nothing to do if the old type is also a pointer if (ptrtype(oldtype)) return (t); // Otherwise, widen from a primitive type to a pointer return (cgwiden(t, oldtype, newtype)); } // New type is not a pointer // Get the new QBE type // and the type sizes in bytes qnew = cgprimtype(newtype); oldsize = cgprimsize(oldtype); newsize = cgprimsize(newtype); // Nothing to do if the two are the same size if (newsize == oldsize) return (t); // If the new size is smaller, we can copy and QBE will truncate it, // otherwise use the QBE cast operation if (newsize < oldsize) fprintf(Outfile, " %%.t%d =%c copy %%.t%d\n", ret, qnew, t); else fprintf(Outfile, " %%.t%d =%c cast %%.t%d\n", ret, qnew, t); return (ret); } ================================================ FILE: 64_6809_Target/cpeep.c ================================================ /* copt version 1.00 (C) Copyright Christopher W. Fraser 1984 */ /* Added out of memory checking and ANSI prototyping. DG 1999 */ /* Added %L - %N variables, %activate, regexp, %check. Zrin Z. 2002 */ #include #include #include #include #include int rpn_eval(char *expr, char **vars); #define HSIZE 107 #define MAXLINE 128 #define MAXFIRECOUNT 65535L #define MAX_PASS 16 int debug = 0; int global_again = 0; /* signalize rule set has changed */ #define FIRSTLAB 'L' #define LASTLAB 'N' int nextlab = 1; /* unique label counter */ int labnum[LASTLAB - FIRSTLAB + 1]; /* unique label numbers */ struct lnode { char *l_text; struct lnode *l_prev, *l_next; }; struct onode { struct lnode *o_old, *o_new; struct onode *o_next; long firecount; } *opts, *activerule; void printlines(struct lnode *beg, struct lnode *end, FILE * out) { struct lnode *p; for (p = beg; p != end; p = p->l_next) fputs(p->l_text, out); } void printrule(struct onode *o, FILE * out) { struct lnode *p = o->o_old; while (p->l_prev) p = p->l_prev; printlines(p, NULL, out); fputs("=\n", out); printlines(o->o_new, NULL, out); } /* error - report error and quit */ void error(char *s) { fputs(s, stderr); if (activerule) { fputs("active rule:\n", stderr); printrule(activerule, stderr); } exit(1); } /* connect - connect p1 to p2 */ void connect(struct lnode *p1, struct lnode *p2) { if (p1 == NULL || p2 == NULL) error("connect: can't happen\n"); p1->l_next = p2; p2->l_prev = p1; } static struct hnode { char *h_str; struct hnode *h_ptr; } *htab[HSIZE]; /* install - install str in string table */ char *install(char *str) { char *p1, *p2, *s; int i; struct hnode *p; s = str; for (i = 0; *s; i += *s++); i = abs(i) % HSIZE; for (p = htab[i]; p; p = p->h_ptr) for (p1 = str, p2 = p->h_str; *p1++ == *p2++;) if (p1[-1] == '\0') return (p->h_str); p = (struct hnode *) malloc(sizeof(struct hnode)); if (p == NULL) error("install 1: out of memory\n"); p->h_str = (char *) malloc((s - str) + 1); if (p->h_str == NULL) error("install 2: out of memory\n"); strcpy(p->h_str, str); p->h_ptr = htab[i]; htab[i] = p; return (p->h_str); } /* insert - insert a new node with text s before node p */ void insert(char *s, struct lnode *p) { struct lnode *n; n = (struct lnode *) malloc(sizeof(struct lnode)); if (n == NULL) error("insert: out of memory\n"); n->l_text = s; connect(p->l_prev, n); connect(n, p); } /* getlst - link lines from fp in between p1 and p2 */ void getlst(FILE * fp, char *quit, struct lnode *p1, struct lnode *p2) { char lin[MAXLINE]; connect(p1, p2); while (fgets(lin, MAXLINE, fp) != NULL && strcmp(lin, quit)) { insert(install(lin), p2); } } /* getlst_1 - link lines from fp in between p1 and p2 */ /* skip blank lines and comments at the start */ void getlst_1(FILE * fp, char *quit, struct lnode *p1, struct lnode *p2) { char lin[MAXLINE]; int firstline = 1; connect(p1, p2); while (fgets(lin, MAXLINE, fp) != NULL && strcmp(lin, quit)) { if (firstline) { char *p = lin; if (lin[0] == '#') continue; while (isspace(*p)) ++p; if (!*p) continue; firstline = 0; } insert(install(lin), p2); } } /* init - read patterns file */ void init(FILE * fp) { struct lnode head, tail; struct onode *p, **next; next = &opts; while (*next) next = &((*next)->o_next); while (!feof(fp)) { p = (struct onode *) malloc((unsigned) sizeof(struct onode)); if (p == NULL) error("init: out of memory\n"); p->firecount = MAXFIRECOUNT; getlst_1(fp, "=\n", &head, &tail); head.l_next->l_prev = NULL; if (tail.l_prev) tail.l_prev->l_next = NULL; p->o_old = tail.l_prev; if (p->o_old == NULL) { /* do not create empty rules */ free(p); continue; } getlst(fp, "====\n", &head, &tail); tail.l_prev->l_next = NULL; if (head.l_next) head.l_next->l_prev = NULL; p->o_new = head.l_next; *next = p; next = &p->o_next; } *next = NULL; } /* match - check conditions in rules */ /* format: %check min <= %n <= max */ int check(char *pat, char **vars) { int low, high, x; char v; x = sscanf(pat, "%d <= %%%c <= %d", &low, &v, &high); if (x != 3 || !('0' <= v && v <= '9')) { fprintf(stderr, "warning: invalid use of '%%check' in \"%s\"\n", pat); fprintf(stderr, "format is '%%check min <= %%n <= max'\n"); return 0; } if (vars[v - '0'] == 0) { fprintf(stderr, "error in pattern \"%s\"\n", pat); error("variable is not set\n"); } if (sscanf(vars[v - '0'], "%d", &x) != 1) return 0; return low <= x && x <= high; } int check_eval(char *pat, char **vars) { char expr[1024]; int expected, x; x = sscanf(pat, "%d = %[^\n]s", &expected, expr); if (x != 2) { fprintf(stderr, "warning: invalid use of '%%check_eval' in \"%s\"\n", pat); fprintf(stderr, "format is '%%check_eval result = expr"); return 0; } return expected == rpn_eval(expr, vars); } /* match - match ins against pat and set vars */ int match(char *ins, char *pat, char **vars) { char *p, lin[MAXLINE], *start = pat; while (*ins && *pat) if (pat[0] == '%') { switch (pat[1]) { case '%': if (*pat != *ins++) return 0; pat += 2; break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': if (pat[2] == '%' && pat[3] != '%') { fprintf(stderr, "error in \"%s\": ", start); error("input pattern %n% is not allowed\n"); } for (p = lin; *ins && *ins != pat[2];) *p++ = *ins++; *p = 0; p = install(lin); if (vars[pat[1] - '0'] == 0) vars[pat[1] - '0'] = p; else if (vars[pat[1] - '0'] != p) return 0; pat += 2; continue; default: break; } if (*pat++ != *ins++) return 0; } else if (*pat++ != *ins++) return 0; return *pat == *ins; /* compare end of string */ } /* subst_imp - return result of substituting vars into pat */ char *subst_imp(char *pat, char **vars) { static char errormsg[80]; static char lin[MAXLINE]; char num[30]; char *s, *start = pat; int i; i = 0; for (;;) { if (pat[0] == '%' && pat[1] == '%') { if (i < MAXLINE) { lin[i] = '%'; ++i; } pat += 2; } else if (pat[0] == '%' && pat[1] >= FIRSTLAB && pat[1] <= LASTLAB) { int il = pat[1] - FIRSTLAB; if (!labnum[il]) labnum[il] = nextlab++; sprintf(num, "%d", labnum[il]); for (s = num; i < MAXLINE && (lin[i] = *s++) != 0; ++i); pat += 2; } else if (pat[0] == '%' && strncmp(pat, "%eval(", 6) == 0) { char expr[1024]; int x = 0, r; pat += 6; while (*pat != ')') { expr[x++] = *pat++; } expr[x] = 0; pat++; r = rpn_eval(expr, vars); sprintf(expr, "%d", r); for (s = expr; i < MAXLINE && *s; i++) lin[i] = *s++; } else if (pat[0] == '%' && isdigit(pat[1])) { if (vars[pat[1] - '0'] == 0) { sprintf(errormsg, "error: variable %c is not set in \"%s\"", pat[1], start); error(errormsg); } for (s = vars[pat[1] - '0']; i < MAXLINE && (lin[i] = *s++) != 0; i++); pat += 2; } else if (i >= MAXLINE) error("line too long\n"); else if (!(lin[i++] = *pat++)) return &lin[0]; } } /* subst - return install(result of substituting vars into pat) */ char *subst(char *pat, char **vars) { return install(subst_imp(pat, vars)); } /* rep - substitute vars into new and replace lines between p1 and p2 */ struct lnode *rep(struct lnode *p1, struct lnode *p2, struct lnode *new, char **vars) { int i; struct lnode *p, *psav; for (i = 0; i < LASTLAB - FIRSTLAB + 1; ++i) labnum[i] = 0; for (p = p1->l_next; p != p2; p = psav) { psav = p->l_next; if (debug) fputs(p->l_text, stderr); free(p); } connect(p1, p2); if (debug) fputs("=\n", stderr); for (; new; new = new->l_next) { insert(subst(new->l_text, vars), p2); if (debug) fputs(p2->l_prev->l_text, stderr); } if (debug) putc('\n', stderr); return p1->l_next; } /* copylist - copy activated rule; substitute variables */ struct lnode *copylist(struct lnode *source, struct lnode **pat, struct lnode **sub, char **vars) { struct lnode head, tail, *more = NULL; int pattern = 1; /* allow nested rules */ int i; connect(&head, &tail); head.l_prev = tail.l_next = NULL; for (i = 0; i < LASTLAB - FIRSTLAB + 1; ++i) labnum[i] = 0; for (; source; source = source->l_next) { if (pattern && strcmp(source->l_text, "=\n") == 0) { pattern = 0; if (head.l_next == &tail) error("error: empty pattern\n"); *pat = tail.l_prev; head.l_next->l_prev = NULL; tail.l_prev->l_next = NULL; connect(&head, &tail); continue; } if (strcmp(source->l_text, "%activate\n") == 0) { if (pattern) error("error: %activate in pattern (before '=')\n"); more = source->l_next; break; } insert(subst(source->l_text, vars), &tail); } if (head.l_next == &tail) *sub = NULL; else { head.l_next->l_prev = NULL; tail.l_prev->l_next = NULL; *sub = head.l_next; } return more; } /* opt - replace instructions ending at r if possible */ struct lnode *opt(struct lnode *r) { char *vars[10]; int i, lines; struct lnode *c, *p; struct onode *o; static char *activated = "%activated "; for (o = opts; o; o = o->o_next) { activerule = o; if (o->firecount < 1) continue; c = r; p = o->o_old; if (debug) { fprintf(stderr, "Trying rule: "); printrule(o, stderr); } if (p == NULL) continue; /* skip empty rules */ for (i = 0; i < 10; i++) vars[i] = 0; lines = 0; while (p && c) { if (strncmp(p->l_text, "%check", 6) == 0) { if (!check(p->l_text + 6, vars)) break; } else if (strncmp(p->l_text, "%eval", 5) == 0) { if (!check_eval(p->l_text + 5, vars)) break; } else { // fprintf(stderr, "Matching '%s', '%s'.\n", // c->l_text, p->l_text); if (!match(c->l_text, p->l_text, vars)) break; c = c->l_prev; ++lines; } p = p->l_prev; } if (p != NULL) continue; /* decrease firecount */ --o->firecount; /* check for %once */ if (o->o_new && strcmp(o->o_new->l_text, "%once\n") == 0) { struct lnode *tmp = o->o_new; /* delete the %once line */ o->o_new = o->o_new->l_next; o->o_new->l_prev = NULL; free(tmp); o->firecount = 0; /* never again */ } /* check for activation rules */ if (o->o_new && strcmp(o->o_new->l_text, "%activate\n") == 0) { /* we have to prevent repeated activation of rules */ char signature[300]; struct lnode *lnp; struct onode *nn, *last; int skip = 0; /* since we 'install()' strings, we can compare pointers */ sprintf(signature, "%s%p%p%p%p%p%p%p%p%p%p\n", activated, vars[0], vars[1], vars[2], vars[3], vars[4], vars[5], vars[6], vars[7], vars[8], vars[9]); lnp = o->o_new->l_next; while (lnp && strncmp(lnp->l_text, activated, strlen(activated)) == 0) { if (strcmp(lnp->l_text, signature) == 0) { skip = 1; break; } lnp = lnp->l_next; } if (!lnp || skip) continue; insert(install(signature), lnp); if (debug) { fputs("matched pattern:\n", stderr); for (p = o->o_old; p->l_prev; p = p->l_prev); printlines(p, NULL, stderr); fputs("with:\n", stderr); printlines(c->l_next, r->l_next, stderr); } /* allow creation of several rules */ last = o; while (lnp) { nn = (struct onode *) malloc((unsigned) sizeof(struct onode)); if (nn == NULL) error("activate: out of memory\n"); nn->o_old = 0, nn->o_new = 0; nn->firecount = MAXFIRECOUNT; lnp = copylist(lnp, &nn->o_old, &nn->o_new, vars); nn->o_next = last->o_next; last->o_next = nn; last = nn; if (debug) { fputs("activated rule:\n", stderr); printrule(nn, stderr); } } if (debug) fputs("\n", stderr); /* step back to allow (shorter) activated rules to match in the order they appear */ while (--lines && r->l_prev) r = r->l_prev; global_again = 1; /* signalize changes */ continue; } /* fire the rule */ r = rep(c, r->l_next, o->o_new, vars); activerule = 0; return r; } activerule = 0; return r->l_next; } /* #define _TESTING */ void usage(char *name) { fprintf(stderr, "Usage: %s [-D] [-o output] input rulesfile\n", name); exit(1); } /* main - peephole optimizer */ int main(int argc, char **argv) { FILE *fp, *infile, *outfile = stdout; int pass, option; struct lnode head, *p, tail; opts = NULL; activerule = NULL; htab[0]= NULL; if (argc < 3) usage(argv[0]); while ((option = getopt(argc, argv, "Do:")) != -1) { switch (option) { case 'D': debug = 1; break; case 'o': outfile = fopen(optarg, "w"); if (outfile == NULL) { fprintf(stderr, "Unable to write to %s\n", optarg); exit(1); } break; default: usage(argv[0]); } } // Open the input file if ((infile = fopen(argv[optind], "r")) == NULL) { fprintf(stderr, "Can't open input file %s\n", argv[optind]); exit(1); } // Get the patterns file if ((fp = fopen(argv[optind + 1], "r")) == NULL) { fprintf(stderr, "Can't open patterns file %s\n", argv[optind + 1]); exit(1); } init(fp); getlst(infile, "", &head, &tail); head.l_text = tail.l_text = ""; pass = 0; do { ++pass; if (debug) fprintf(stderr, "\n--- pass %d ---\n", pass); global_again = 0; for (p = head.l_next; p != &tail; p = opt(p)); } while (global_again && pass < MAX_PASS); if (global_again) { fprintf(stderr, "error: maximum of %d passes exceeded\n", MAX_PASS); error(" check for recursive substitutions"); } printlines(head.l_next, &tail, outfile); exit(0); return 1; /* make compiler happy */ } #define STACKSIZE 20 int sp; int stack[STACKSIZE]; void push(int l) { if (sp < STACKSIZE) stack[sp++] = l; ; } int pop(void) { if (sp > 0) return stack[--sp]; return 0; } int top(void) { if (sp > 0) return stack[sp - 1]; return 0; } int rpn_eval(char *expr, char **vars) { char *ptr = expr; char *endptr; int op2; int n; sp = 0; while (*ptr) { switch (*ptr++) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': n = strtol(ptr - 1, &endptr, 0); if (endptr == ptr - 1) { fprintf(stderr, "Optimiser error, cannot parse number: %s\n", ptr - 1); exit(1); } ptr = endptr; push(n); break; case '+': { int a = pop(); int b = pop(); int c = a + b; push(c); } break; case '*': push(pop() * pop()); break; case '-': op2 = pop(); push(pop() - op2); break; case '|': op2 = pop(); push(pop() | op2); break; case '&': op2 = pop(); push(pop() & op2); break; case '>': op2 = pop(); push(pop() >> op2); break; case '<': op2 = pop(); push(pop() << op2); break; case '/': op2 = pop(); if (op2 != 0) push(pop() / op2); else return 0; // Divide by zero break; case '%': if (isdigit(*ptr)) { // It's a variable char v = *ptr++; char *endpt2; char *val = vars[v - '0']; n = strtol(val, &endpt2, 0); if (endpt2 == val) { fprintf(stderr, "Optimiser error, cannot parse variable: %s\n", val); exit(1); } push(n); } else if (*ptr++ == '%') { op2 = pop(); if (op2 != 0) { push(pop() % op2); } else { return 0; // Divide by zero } } break; } } if (sp != 1) { int i; fprintf(stderr, "Exiting with a stack level of %d\n", sp); for (i = 0; i < sp; i++) { fprintf(stderr, "Stack level %d -> %d\n", i, stack[i]); } } return top(); } ================================================ FILE: 64_6809_Target/data.h ================================================ #ifndef extern_ #define extern_ extern #endif // Global variables // Copyright (c) 2019 Warren Toomey, GPL3 extern_ int Line; // Current line number extern_ int Linestart; // True if at start of a line extern_ int Putback; // Character put back by scanner extern_ struct symtable *Functionid; // Symbol ptr of the current function extern_ FILE *Infile; // Input and output files extern_ FILE *Outfile; extern_ FILE *Symfile; // Symbol table file extern_ FILE *Idxfile; // AST offset index file extern_ char *Infilename; // Name of file we are parsing extern_ struct token Token; // Last token scanned extern_ struct token Peektoken; // A look-ahead token extern_ char Text[TEXTLEN + 1]; // Last identifier scanned extern_ int Looplevel; // Depth of nested loops extern_ int Switchlevel; // Depth of nested switches extern char *Tstring[]; // List of token strings ================================================ FILE: 64_6809_Target/decl.c ================================================ #include "defs.h" #include "data.h" #include "expr.h" #include "gen.h" #include "misc.h" #include "opt.h" #include "parse.h" #include "stmt.h" #include "sym.h" #include "target.h" #include "tree.h" #include "types.h" // Parsing of declarations // Copyright (c) 2019 Warren Toomey, GPL3 static struct symtable *composite_declaration(int type); static int typedef_declaration(struct symtable **ctype); static int type_of_typedef(char *name, struct symtable **ctype); static void enum_declaration(void); int declaration_list(struct symtable **ctype, int class, int et1, int et2, struct ASTnode **gluetree); // Parse the current token and return a primitive type enum value, // a pointer to any composite type and possibly modify // the class of the type. int parse_type(struct symtable **ctype, int *class) { int type = 0, exstatic = 1; *ctype=NULL; // See if the visibility class has been changed to extern or static while (exstatic) { switch (Token.token) { case T_EXTERN: if (*class == V_STATIC) fatal("Illegal to have extern and static at the same time"); *class = V_EXTERN; scan(&Token); break; case T_STATIC: if (*class == V_LOCAL) fatal("Compiler doesn't support static local declarations"); if (*class == V_EXTERN) fatal("Illegal to have extern and static at the same time"); *class = V_STATIC; scan(&Token); break; default: exstatic = 0; } } // Now work on the actual type keyword switch (Token.token) { case T_VOID: type = P_VOID; scan(&Token); break; case T_CHAR: type = P_CHAR; scan(&Token); break; case T_INT: type = P_INT; scan(&Token); break; case T_LONG: type = P_LONG; scan(&Token); break; // For the following, if we have a ';' after the // parsing then there is no type, so return -1. // Example: struct x {int y; int z}; case T_STRUCT: type = P_STRUCT; *ctype = composite_declaration(P_STRUCT); if (Token.token == T_SEMI) type = -1; break; case T_UNION: type = P_UNION; *ctype = composite_declaration(P_UNION); if (Token.token == T_SEMI) type = -1; break; case T_ENUM: type = P_INT; // Enums are really ints enum_declaration(); if (Token.token == T_SEMI) type = -1; break; case T_TYPEDEF: type = typedef_declaration(ctype); if (Token.token == T_SEMI) type = -1; break; case T_IDENT: type = type_of_typedef(Text, ctype); break; default: fatals("Illegal type, token", Tstring[Token.token]); } return (type); } // Given a type parsed by parse_type(), scan in any following // '*' tokens and return the new type int parse_stars(int type) { while (1) { if (Token.token != T_STAR) break; type = pointer_to(type); scan(&Token); } return (type); } // Parse a type which appears inside a cast int parse_cast(struct symtable **ctype) { int type = 0, class = 0; // Get the type inside the parentheses type = parse_stars(parse_type(ctype, &class)); // Do some error checking. I'm sure more can be done if (type == P_STRUCT || type == P_UNION || type == P_VOID) fatal("Cannot cast to a struct, union or void type"); return (type); } // Given a type, parse an expression of literals and ensure // that the type of this expression matches the given type. // Parse any type cast that precedes the expression. // If an integer literal, return this value. // If a string literal, return the label number of the string. int parse_literal(int type) { struct ASTnode *tree; struct symtable *sym; // Parse the expression and optimise the resulting AST tree tree = optimise(binexpr(0)); // If there's a cast, get the child and // mark it as having the type from the cast if (tree->op == A_CAST) { tree->left->type = tree->type; tree = tree->left; } // The tree must now have an integer or string literal if (tree->op != A_INTLIT && tree->op != A_STRLIT) fatal("Cannot initialise globals with a general expression"); // Deal with pointer to literals if (ptrtype(type)) { // If the type is char * and we have a string literal if (type == pointer_to(P_CHAR) && tree->op == A_STRLIT) { // Add it to the string literal symbol // table and return the symbol's id sym= addglob(tree->name, type, NULL, S_STRLIT, V_GLOBAL, 0, 0); return (sym->id); } // We have a zero int literal, so that's a NULL if (tree->op == A_INTLIT && tree->a_intvalue == 0) return (0); } // We only get here with an integer literal. // If the tree is an A_INTLIT and the left type is P_CHAR, // and the INTLIT is in the range 0 to 255, change the trees's // type to PCHAR to ensure we can do the assignment if ((tree->op == A_INTLIT) && (type == P_CHAR) && (tree->a_intvalue >= 0) && (tree->a_intvalue < 256)) tree->type = P_CHAR; // Check that the input type is an integer type // and is wide enough to hold the literal value if (inttype(type) && typesize(type, NULL) >= typesize(tree->type, NULL)) return (tree->a_intvalue); fatal("Type mismatch: literal vs. variable"); return (0); // Keep -Wall happy } // Given a pointer to a symbol that may already exist // return true if this symbol doesn't exist. We use // this function to convert externs into globals static int is_new_symbol(struct symtable *sym, int class, int type, struct symtable *ctype) { // There is no existing symbol, thus is new if (sym == NULL) return (1); // global versus extern: if they match that it's not new // and we can convert the class to global if ((sym->class == V_GLOBAL && class == V_EXTERN) || (sym->class == V_EXTERN && class == V_GLOBAL)) { // If the types don't match, there's a problem if (type != sym->type) fatals("Type mismatch between global/extern", sym->name); // Struct/unions, also compare the ctype if (type >= P_STRUCT && ctype != sym->ctype) fatals("Type mismatch between global/extern", sym->name); // If we get to here, the types match, so mark the symbol // as global sym->class = V_GLOBAL; // Return that symbol is not new return (0); } // It must be a duplicate symbol if we get here fatals("Duplicate global variable declaration", sym->name); return (-1); // Keep -Wall happy } // Given the type, name and class of a scalar variable, // parse any initialisation value and allocate storage for it. // Return the variable's symbol table entry. static struct symtable *scalar_declaration(char *varname, int type, struct symtable *ctype, int class, struct ASTnode **tree) { struct symtable *sym = NULL; struct ASTnode *varnode, *exprnode; *tree = NULL; // Add this as a known scalar switch (class) { case V_STATIC: case V_EXTERN: case V_GLOBAL: // See if this variable is new or already exists sym = findSymbol(varname, S_NOTATYPE, 0); if (is_new_symbol(sym, class, type, ctype)) sym = addglob(varname, type, ctype, S_VARIABLE, class, 1, 0); break; case V_LOCAL: sym = addmemb(varname, type, ctype, V_LOCAL, S_VARIABLE, 1); break; case V_PARAM: sym = addmemb(varname, type, ctype, V_PARAM, S_VARIABLE, 1); break; case V_MEMBER: sym = addmemb(varname, type, ctype, V_MEMBER, S_VARIABLE, 1); break; } // The variable is being initialised if (Token.token == T_ASSIGN) { // Only possible for a global or local if (class != V_GLOBAL && class != V_LOCAL && class != V_STATIC) fatals("Variable can not be initialised", varname); scan(&Token); // Globals must be assigned a literal value if (class == V_GLOBAL || class == V_STATIC) { // Create one initial value for the variable and // parse this value sym->initlist = (int *) malloc(sizeof(int)); sym->initlist[0] = parse_literal(type); } if (class == V_LOCAL) { // Make an A_IDENT AST node with the variable varnode = mkastleaf(A_IDENT, sym->type, sym->ctype, sym, 0); // Get the expression for the assignment, make into a rvalue exprnode = binexpr(0); exprnode->rvalue = 1; // If the exprnode is an A_INTLIT and the variable type is P_CHAR, // and the INTLIT is in the range 0 to 255, change the exprnode's // type to PCHAR to ensure we can do the assignment if ((exprnode->op == A_INTLIT) && (varnode->type == P_CHAR) && (exprnode->a_intvalue >= 0) && (exprnode->a_intvalue < 256)) exprnode->type = P_CHAR; // Ensure the expression's type matches the variable exprnode = modify_type(exprnode, varnode->type, varnode->ctype, 0); if (exprnode == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree *tree = mkastnode(A_ASSIGN, exprnode->type, exprnode->ctype, exprnode, NULL, varnode, NULL, 0); } } return (sym); } // Given the type, name and class of an array variable, parse // the size of the array, if any. Then parse any initialisation // value and allocate storage for it. // Return the variable's symbol table entry. static struct symtable *array_declaration(char *varname, int type, struct symtable *ctype, int class) { struct symtable *sym = NULL; // New symbol table entry int nelems = -1; // Assume the number of elements won't be given int maxelems; // The maximum number of elements in the init list int *initlist; // The list of initial elements int i = 0, j; // Skip past the '[' scan(&Token); // See if we have an array size if (Token.token != T_RBRACKET) { nelems = parse_literal(P_INT); if (nelems <= 0) fatald("Array size is illegal", nelems); } // Ensure we have a following ']' match(T_RBRACKET, "]"); // Add this as a known array. We treat the // array as a pointer to its elements' type switch (class) { case V_STATIC: case V_EXTERN: case V_GLOBAL: // See if this variable is new or already exists sym = findSymbol(varname, S_NOTATYPE, 0); if (is_new_symbol(sym, class, pointer_to(type), ctype)) sym = addglob(varname, pointer_to(type), ctype, S_ARRAY, class, 0, 0); break; case V_LOCAL: // Add the array to the local symbol table. sym = addmemb(varname, pointer_to(type), ctype, V_LOCAL, S_ARRAY, 0); sym->st_hasaddr = 1; break; default: fatal("Declaration of array parameters is not implemented"); } // Array initialisation if (Token.token == T_ASSIGN) { if (class != V_GLOBAL && class != V_STATIC) fatals("Variable can not be initialised", varname); scan(&Token); // Get the following left curly bracket match(T_LBRACE, "{"); #define TABLE_INCREMENT 10 // If the array already has nelems, allocate that many elements // in the list. Otherwise, start with TABLE_INCREMENT. if (nelems != -1) maxelems = nelems; else maxelems = TABLE_INCREMENT; initlist = (int *) malloc(maxelems * sizeof(int)); // Loop getting a new literal value from the list while (1) { // Check we can add the next value, then parse and add it if (nelems != -1 && i == maxelems) fatal("Too many values in initialisation list"); initlist[i++] = parse_literal(type); // Increase the list size if the original size was // not set and we have hit the end of the current list if (nelems == -1 && i == maxelems) { maxelems += TABLE_INCREMENT; initlist = (int *) realloc(initlist, maxelems * sizeof(int)); } // Leave when we hit the right curly bracket if (Token.token == T_RBRACE) { scan(&Token); break; } // Next token must be a comma, then comma(); } // Zero any unused elements in the initlist. // Attach the list to the symbol table entry for (j = i; j < sym->nelems; j++) initlist[j] = 0; if (i > nelems) nelems = i; sym->initlist = initlist; } // Set the size of the array and the number of elements // Only externs can have no elements. if (class != V_EXTERN && nelems <= 0) fatals("Array must have non-zero elements", sym->name); sym->nelems = nelems; sym->size = sym->nelems * typesize(type, ctype); return (sym); } // Given a pointer to the new function being declared and // a possibly NULL pointer to the function's previous declaration, // parse a list of parameters and cross-check them against the // previous declaration. Return the count of parameters static int param_declaration_list(struct symtable *oldfuncsym, struct symtable *newfuncsym) { int type, paramcnt = 0; struct symtable *ctype; struct symtable *protoptr = NULL; struct ASTnode *unused; // Get the pointer to the first prototype parameter if (oldfuncsym != NULL) protoptr = oldfuncsym->member; // Loop getting any parameters while (Token.token != T_RPAREN) { // If the first token is 'void' if (Token.token == T_VOID) { // Peek at the next token. If a ')', the function // has no parameters, so leave the loop. scan(&Peektoken); if (Peektoken.token == T_RPAREN) { // Move the Peektoken into the Token paramcnt = 0; scan(&Token); break; } } // If an ellipsis (...), mark the function as such if (Token.token == T_ELLIPSIS) { newfuncsym->has_ellipsis= 1; // This must be the last parameter, so expect a ')' scan(&Token); if (Token.token != T_RPAREN) fatal("Expecting right parenthesis after ellipsis"); // Leave the parameter loop break; } // Get the type of the next parameter type = declaration_list(&ctype, V_PARAM, T_COMMA, T_RPAREN, &unused); if (type == -1) fatal("Bad type in parameter list"); if (protoptr != NULL) { // Ensure the type of this parameter matches the prototype if (type != protoptr->type) { fatald("Type doesn't match prototype for parameter", paramcnt + 1); } // Ensure the old/new parameter names also match if (strcmp(Text, protoptr->name)) { fatals("New parameter name doesn't match prototype", Text); } protoptr = protoptr->next; } paramcnt++; // Stop when we hit the right parenthesis if (Token.token == T_RPAREN) break; // We need a comma as separator comma(); } if (oldfuncsym != NULL && paramcnt != oldfuncsym->nelems) fatals("Parameter count mismatch for function", oldfuncsym->name); // Return the count of parameters return (paramcnt); } // // function_declaration: type identifier '(' parameter_list ')' ; // | type identifier '(' parameter_list ')' compound_statement ; // // Parse the declaration of function. static struct symtable *function_declaration(char *funcname, int type, struct symtable *ctype, int class) { struct ASTnode *tree; struct symtable *oldfuncsym, *newfuncsym = NULL; int endlabel = 0, paramcnt; int linenum = Line; // Search for an existing symbol with this name // and point oldfuncsym at it, or NULL. if ((oldfuncsym = findSymbol(funcname, S_NOTATYPE, 0)) != NULL) if (oldfuncsym->stype != S_FUNCTION) oldfuncsym = NULL; // Add the function to the symbol table. // Assumption: functions only return scalar types, so NULL below newfuncsym = addglob(funcname, type, NULL, S_FUNCTION, class, 0, 0); newfuncsym->has_ellipsis=0; // Assume no ellipsis for now // NULL the global Functionid so that we don't try to match this // function's parameters against the ones in the previous function Functionid= NULL; // Scan in the '(', any parameters and the ')'. // Pass in any existing function prototype pointer lparen(); paramcnt = param_declaration_list(oldfuncsym, newfuncsym); rparen(); // If this is a new function declaration, update the // function symbol entry with the number of parameters. // Also copy the parameter list into the function's node. if (newfuncsym) { newfuncsym->nelems = paramcnt; oldfuncsym = newfuncsym; } // If the declaration ends in a semicolon, only a prototype. if (Token.token == T_SEMI) { return (oldfuncsym); } // This is not just a prototype. // Set the Functionid global to the function's symbol pointer Functionid = oldfuncsym; // Get the AST tree for the compound statement and mark // that we have parsed no loops or switches yet Looplevel = 0; Switchlevel = 0; lbrace(); tree = compound_statement(0); rbrace(); // If the function type isn't P_VOID ... if (type != P_VOID) { // Error if no statements in the function if (tree == NULL) fatal("No statements in function with non-void type"); // Check that the last AST operation in the // compound statement was a return statement // NOTE! Because we have free'd the tree, // we can't do this any more #if 0 finalstmt = (tree->op == A_GLUE) ? tree->right : tree; if (finalstmt == NULL || finalstmt->op != A_RETURN) fatal("No return for function with non-void type"); #endif } // Build the A_FUNCTION node which has the function's symbol pointer // and the compound statement sub-tree tree = mkastunary(A_FUNCTION, type, ctype, tree, oldfuncsym, endlabel); tree->linenum = linenum; // Do optimisations on the AST tree // WAS tree = optimise(tree); // Serialise the tree serialiseAST(tree); freetree(tree, 0); // Flush out the in-memory symbol table. // We are no longer in a function. flushSymtable(); Functionid= NULL; return (oldfuncsym); } // Parse composite type declarations: structs or unions. // Either find an existing struct/union declaration, or build // a struct/union symbol table entry and return its pointer. static struct symtable *composite_declaration(int type) { struct symtable *ctype = NULL; struct symtable *m; struct ASTnode *unused; int offset; int t; // Skip the struct/union keyword scan(&Token); // See if there is a following struct/union name if (Token.token == T_IDENT) { // Find any matching composite type if (type == P_STRUCT) ctype = findstruct(Text); else ctype = findunion(Text); scan(&Token); } // If the next token isn't an LBRACE , this is // the usage of an existing struct/union type. // Return the pointer to the type. if (Token.token != T_LBRACE) { if (ctype == NULL) fatals("unknown struct/union type", Text); return (ctype); } // Ensure this struct/union type hasn't been // previously defined if (ctype) fatals("previously defined struct/union", Text); // Build the composite type and skip the left brace if (type == P_STRUCT) ctype = addtype(Text, P_STRUCT, NULL, S_STRUCT, V_GLOBAL, 0, 0); else ctype = addtype(Text, P_UNION, NULL, S_UNION, V_GLOBAL, 0, 0); scan(&Token); // Scan in the list of members while (1) { // Get the next member. m is used as a dummy t = declaration_list(&m, V_MEMBER, T_SEMI, T_RBRACE, &unused); if (t == -1) fatal("Bad type in member list"); if (Token.token == T_SEMI) scan(&Token); if (Token.token == T_RBRACE) break; } // Find the closing parenthesis rbrace(); // Set the offset of the initial member // and find the first free byte after it m = ctype->member; m->st_posn = 0; offset = typesize(m->type, m->ctype); // Set the position of each successive member in the composite type // Unions are easy. For structs, align the member and find the next free byte for (m = m->next; m != NULL; m = m->next) { // Set the offset for this member if (type == P_STRUCT) m->st_posn = genalign(m->type, offset, 1); else m->st_posn = 0; // Get the offset of the next free byte after this member offset += typesize(m->type, m->ctype); } // Set the overall size of the composite type ctype->size = offset; return (ctype); } // Parse an enum declaration static void enum_declaration(void) { struct symtable *etype = NULL; char *name = NULL; int intval = 0; // Skip the enum keyword. scan(&Token); // If there's a following enum type name, get a // pointer to any existing enum type node. if (Token.token == T_IDENT) { etype = findenumtype(Text); name = strdup(Text); // As it gets tromped soon scan(&Token); } // If the next token isn't a LBRACE, check // that we have an enum type name, then return if (Token.token != T_LBRACE) { if (etype == NULL) fatals("undeclared enum type:", name); return; } // We do have an LBRACE. Skip it scan(&Token); // If we have an enum type name, ensure that it // hasn't been declared before. if (etype != NULL) fatals("enum type redeclared:", etype->name); // Build an enum type node for this identifier // if there is a name if (name!=NULL) { etype = addtype(name, P_INT, NULL, S_ENUMTYPE, V_GLOBAL, 0, 0); free(name); } // Loop to get all the enum values while (1) { // Ensure we have an identifier // Copy it in case there's an int literal coming up ident(); name = strdup(Text); // Ensure this enum value hasn't been declared before etype = findenumval(name); if (etype != NULL) fatals("enum value redeclared:", name); // If the next token is an '=', skip it and // get the following int literal if (Token.token == T_ASSIGN) { scan(&Token); if ((Token.token != T_INTLIT) && (Token.token != T_CHARLIT)) fatal("Expected int literal after '='"); intval = Token.intvalue; scan(&Token); } // Build an enum value node for this identifier. // Increment the value for the next enum identifier. etype = addglob(name, P_INT, NULL, S_ENUMVAL, V_GLOBAL, 0, intval++); free(name); // Bail out on a right curly bracket, else get a comma if (Token.token == T_RBRACE) break; comma(); } scan(&Token); // Skip over the right curly bracket } // Parse a typedef declaration and return the type // and ctype that it represents static int typedef_declaration(struct symtable **ctype) { int type, class = 0; // Skip the typedef keyword. scan(&Token); // Get the actual type following the keyword type = parse_type(ctype, &class); if (class != 0) fatal("Can't have static/extern in a typedef declaration"); // Get any following '*' tokens type = parse_stars(type); // See if the typedef identifier already exists if (findtypedef(Text) != NULL) fatals("redefinition of typedef", Text); // It doesn't exist so add it to the type list addtype(Text, type, *ctype, S_TYPEDEF, class, 0, 0); scan(&Token); return (type); } // Given a typedef name, return the type it represents static int type_of_typedef(char *name, struct symtable **ctype) { struct symtable *t; // Look up the typedef in the list t = findtypedef(name); if (t == NULL) fatals("unknown type", name); scan(&Token); *ctype = t->ctype; return (t->type); } // Parse the declaration of a variable or function. // The type and any following '*'s have been scanned, and we // have the identifier in the Token variable. // The class argument is the symbol's class. // Return a pointer to the symbol's entry in the symbol table static struct symtable *symbol_declaration(int type, struct symtable *ctype, int class, struct ASTnode **tree) { struct symtable *sym = NULL; char *varname = strdup(Text); // Ensure that we have an identifier. // We copied it above so we can scan more tokens in, e.g. // an assignment expression for a local variable. ident(); // Deal with function declarations if (Token.token == T_LPAREN) { sym= function_declaration(varname, type, ctype, class); free(varname); return(sym); } // See if this array or scalar variable has already been declared switch (class) { case V_EXTERN: case V_STATIC: case V_GLOBAL: case V_LOCAL: case V_PARAM: if (findlocl(varname, 0) != NULL) fatals("Duplicate local variable declaration", varname); break; case V_MEMBER: if (findmember(varname) != NULL) fatals("Duplicate struct/union member declaration", varname); } // Add the array or scalar variable to the symbol table if (Token.token == T_LBRACKET) { sym = array_declaration(varname, type, ctype, class); *tree = NULL; // Local arrays are not initialised } else sym = scalar_declaration(varname, type, ctype, class, tree); free(varname); return (sym); } // Parse a list of symbols where there is an initial type. // Return the type of the symbols. et1 and et2 are end tokens. int declaration_list(struct symtable **ctype, int class, int et1, int et2, struct ASTnode **gluetree) { int inittype, type; struct symtable *sym; struct ASTnode *tree = NULL; *gluetree = NULL; // Get the initial type. If -1, it was // a composite type definition, return this if ((inittype = parse_type(ctype, &class)) == -1) return (inittype); // Now parse the list of symbols while (1) { // See if this symbol is a pointer type = parse_stars(inittype); // Parse this symbol sym = symbol_declaration(type, *ctype, class, &tree); // We parsed a function, there is no list so leave if (sym->stype == S_FUNCTION) { if (class != V_GLOBAL && class != V_STATIC) fatal("Function definition not at global level"); return (type); } // Glue any AST tree from a local declaration // to build a sequence of assignments to perform if (*gluetree == NULL) *gluetree = tree; else *gluetree = mkastnode(A_GLUE, P_NONE, NULL, *gluetree, NULL, tree, NULL, 0); // We are at the end of the list, leave if (Token.token == et1 || Token.token == et2) return (type); // Otherwise, we need a comma as separator comma(); } return (0); // Keep -Wall happy } // Parse one or more global declarations, // either variables, functions or structs void global_declarations(void) { struct symtable *ctype = NULL; struct ASTnode *unused; // Loop parsing one declaration list until the end of file while (Token.token != T_EOF) { declaration_list(&ctype, V_GLOBAL, T_SEMI, T_EOF, &unused); // Skip any separating semicolons if (Token.token == T_SEMI) scan(&Token); } } ================================================ FILE: 64_6809_Target/decl.h ================================================ /* decl.c */ int parse_type(struct symtable **ctype, int *class); int parse_stars(int type); int parse_cast(struct symtable **ctype); int parse_literal(int type); int declaration_list(struct symtable **ctype, int class, int et1, int et2, struct ASTnode **gluetree); void global_declarations(void); ================================================ FILE: 64_6809_Target/defs.h ================================================ #include #include #include #include // Structure and enum definitions // Copyright (c) 2019 Warren Toomey, GPL3 enum { TEXTLEN = 512 // Length of identifiers in input }; // Commands and default filenames #define AOUT "a.out" #define ASCMD "as6809 -o " #define LDCMD "ld6809 -o %s /tmp/crt0.o %s /opt/fcc/lib/6809/libc.a /opt/fcc/lib/6809/lib6809.a -m %s.map" #define CPPCMD "cpp -nostdinc -isystem " // Token types enum { T_EOF, // Binary operators T_ASSIGN, T_ASPLUS, T_ASMINUS, // 1 T_ASSTAR, T_ASSLASH, T_ASMOD, // 4 T_QUESTION, T_LOGOR, T_LOGAND, // 7 T_OR, T_XOR, T_AMPER, // 10 T_EQ, T_NE, // 13 T_LT, T_GT, T_LE, T_GE, // 15 T_LSHIFT, T_RSHIFT, // 19 T_PLUS, T_MINUS, T_STAR, T_SLASH, T_MOD, // 21 // Other operators T_INC, T_DEC, T_INVERT, T_LOGNOT, // 26 // Type keywords T_VOID, T_CHAR, T_INT, T_LONG, // 30 // Other keywords T_IF, T_ELSE, T_WHILE, T_FOR, T_RETURN, // 34 T_STRUCT, T_UNION, T_ENUM, T_TYPEDEF, // 39 T_EXTERN, T_BREAK, T_CONTINUE, T_SWITCH, // 43 T_CASE, T_DEFAULT, T_SIZEOF, T_STATIC, // 47 // Structural tokens T_INTLIT, T_STRLIT, T_SEMI, T_IDENT, // 51 T_LBRACE, T_RBRACE, T_LPAREN, T_RPAREN, // 55 T_LBRACKET, T_RBRACKET, T_COMMA, T_DOT, // 59 T_ARROW, T_COLON, T_ELLIPSIS, T_CHARLIT, // 63 // Misc T_FILENAME, T_LINENUM // 67 }; // Token structure struct token { int token; // Token type, from the enum list above char *tokstr; // String version of the token int intvalue; // For T_INTLIT, the integer value }; // AST node types. The first few line up // with the related tokens enum { A_ASSIGN = 1, A_ASPLUS, A_ASMINUS, A_ASSTAR, // 1 A_ASSLASH, A_ASMOD, A_TERNARY, A_LOGOR, // 5 A_LOGAND, A_OR, A_XOR, A_AND, A_EQ, A_NE, A_LT, // 9 A_GT, A_LE, A_GE, A_LSHIFT, A_RSHIFT, // 16 A_ADD, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, A_MOD, // 21 A_INTLIT, A_STRLIT, A_IDENT, A_GLUE, // 26 A_IF, A_WHILE, A_FUNCTION, A_WIDEN, A_RETURN, // 30 A_FUNCCALL, A_DEREF, A_ADDR, A_SCALE, // 35 A_PREINC, A_PREDEC, A_POSTINC, A_POSTDEC, // 39 A_NEGATE, A_INVERT, A_LOGNOT, A_TOBOOL, A_BREAK, // 43 A_CONTINUE, A_SWITCH, A_CASE, A_DEFAULT, A_CAST // 48 }; // Primitive types. The bottom 4 bits is an integer // value that represents the level of indirection, // e.g. 0= no pointer, 1= pointer, 2= pointer pointer etc. enum { P_NONE, P_VOID = 16, P_CHAR = 32, P_INT = 48, P_LONG = 64, P_STRUCT=80, P_UNION=96 }; // A symbol in the symbol table is // one of these structural types. enum { S_VARIABLE, S_FUNCTION, S_ARRAY, S_ENUMVAL, S_STRLIT, S_STRUCT, S_UNION, S_ENUMTYPE, S_TYPEDEF, S_NOTATYPE }; // Visibilty class for symbols enum { V_GLOBAL, // Globally visible symbol V_EXTERN, // External globally visible symbol V_STATIC, // Static symbol, visible in one file V_LOCAL, // Locally visible symbol V_PARAM, // Locally visible function parameter V_MEMBER // Member of a struct or union }; // Symbol table structure struct symtable { char *name; // Name of a symbol int id; // Numeric id of the symbol int type; // Primitive type for the symbol struct symtable *ctype; // If struct/union, ptr to that type int ctypeid; // Numeric id of the struct/union type int stype; // Structural type for the symbol int class; // Visibility class for the symbol int size; // Total size in bytes of this symbol // For functions: size 1 means ... (ellipsis) #define has_ellipsis size int nelems; // Functions: # params. Arrays: # elements. int st_hasaddr; // For locals, 1 if any A_ADDR operation #define st_endlabel st_posn // For functions, the end label #define st_label st_posn // For string literals, the associated label int st_posn; // For locals, the negative offset // from the stack base pointer. // For struct members, the offset of // the member from the base of the struct int *initlist; // List of initial values struct symtable *next; // Next symbol in the symbol table struct symtable *member; // List of member of struct, union or enum. }; // For functions, list of parameters & locals. // Abstract Syntax Tree structure struct ASTnode { int op; // "Operation" to be performed on this tree int type; // Type of any expression this tree generates struct symtable *ctype; // If struct/union, ptr to that type int rvalue; // True if the node is an rvalue struct ASTnode *left; // Left, middle and right child trees struct ASTnode *mid; struct ASTnode *right; int nodeid; // Node id when tree is serialised int leftid; // Numeric ids when serialised int midid; int rightid; struct symtable *sym; // For many AST nodes, the pointer to // the symbol in the symbol table char *name; // The symbol's name (used by serialiser) int symid; // Symbol's unique id (used by serialiser) #define a_intvalue a_size // For A_INTLIT, the integer value int a_size; // For A_SCALE, the size to scale by int linenum; // Line number from where this node comes }; enum { NOREG = -1, // Use NOREG when the AST generation // functions have no register to return NOLABEL = 0 // Use NOLABEL when we have no label to // pass to genAST() }; ================================================ FILE: 64_6809_Target/desym.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ // Symbol table deserialiser // Copyright (c) 2024 Warren Toomey, GPL3 // Read at most count-1 characters from the // f FILE and store them in the s buffer. // Terminate the s buffer with a NUL. // Return NULL if unable to read or an EOF. // Else, return the original s pointer pointer. char *fgetstr(char *s, size_t count, FILE * f) { size_t i = count; size_t err; char ch; char *ret = s; while (i-- != 0) { err= fread(&ch, 1, 1, f); if (err!=1) { if (s == ret) return(NULL); break; } *s++ = ch; if (ch == 0) break; } *s = 0; return(ferror(f) ? (char *) NULL : ret); } // Read one symbol in. Return -1 if none. int deserialiseSym(struct symtable *sym, FILE *in) { struct symtable *memb, *last; // Read one symbol struct in from in if (fread(sym, sizeof(struct symtable), 1, in)!=1) return(-1); // If the type is P_NONE, skip this one and read in // another symbol. This marks the division between // AST trees if (sym->type==P_NONE) { // Debug: printf("Skipping a P_NONE symbol\n"); if (fread(sym, sizeof(struct symtable), 1, in)!=1) return(-1); } // Get the symbol name if (sym->name != NULL) { fgetstr(Text, TEXTLEN + 1, in); sym->name= strdup(Text); } // Get any initial values if (sym->initlist != NULL) { sym->initlist= (int *)malloc(sym->nelems* sizeof(int)); fread(sym->initlist, sizeof(int), sym->nelems, in); } // If there are any members, read them in if (sym->member != NULL) { sym->member= last= NULL; while (1) { // Create an empty symbol struct memb= (struct symtable *)malloc(sizeof(struct symtable)); // Read the struct in from the in file // Stop if no symbols in the file if (deserialiseSym(memb, in)== -1) return(0); // Attach this to either the head or the last if (sym->member==NULL) { sym->member= last= memb; } else { last->next= memb; last= memb; } // Stop if there is no next member if (memb->next == NULL) return(0); } } // For now set ctype to NULL sym->ctype=NULL; return(0); } void dumptable(struct symtable *head, int indent); // Dump a single symbol void dumpsym(struct symtable *sym, int indent) { int i; for (i = 0; i < indent; i++) printf(" "); switch (sym->type & (~0xf)) { case P_VOID: printf("void "); break; case P_CHAR: printf("char "); break; case P_INT: printf("int "); break; case P_LONG: printf("long "); break; case P_STRUCT: printf("struct "); break; case P_UNION: printf("union "); break; default: printf("unknown type "); } for (i = 0; i < (sym->type & 0xf); i++) printf("*"); printf("%s", sym->name); switch (sym->stype) { case S_VARIABLE: break; case S_FUNCTION: printf("()"); break; case S_ARRAY: printf("[]"); break; case S_STRUCT: printf(": struct"); break; case S_UNION: printf(": union"); break; case S_ENUMTYPE: printf(": enum"); break; case S_ENUMVAL: printf(": enumval"); break; case S_TYPEDEF: printf(": typedef"); break; case S_STRLIT: printf(": strlit"); break; default: printf(" unknown stype"); } printf(" id %d", sym->id); switch (sym->class) { case V_GLOBAL: printf(": global"); break; case V_LOCAL: printf(": local offset %d", sym->st_posn); break; case V_PARAM: printf(": param offset %d", sym->st_posn); break; case V_EXTERN: printf(": extern"); break; case V_STATIC: printf(": static"); break; case V_MEMBER: printf(": member"); break; default: printf(": unknown class"); } if (sym->st_hasaddr!=0) printf(", hasaddr "); switch (sym->stype) { case S_VARIABLE: printf(", size %d", sym->size); break; case S_FUNCTION: printf(", %d params", sym->nelems); break; case S_ARRAY: printf(", %d elems, size %d", sym->nelems, sym->size); break; } printf(", ctypeid %d, nelems %d st_posn %d\n", sym->ctypeid, sym->nelems, sym->st_posn); if (sym->initlist != NULL) { printf(" initlist: "); for (i=0; i< sym->nelems; i++) printf("%d ", sym->initlist[i]); printf("\n"); } if (sym->member != NULL) dumptable(sym->member, 4); } // Dump one symbol table void dumptable(struct symtable *head, int indent) { struct symtable *sym; for (sym = head; sym != NULL; sym = sym->next) dumpsym(sym, indent); } int main(int argc, char **argv) { FILE *in; struct symtable sym; if (argc !=2) { fprintf(stderr, "Usage: %s symbolfile\n", argv[0]); exit(1); } in= fopen(argv[1], "r"); if (in==NULL) { fprintf(stderr, "Unable to open %s\n", argv[1]); exit(1); } while (1) { if (deserialiseSym(&sym, in)== -1) break; dumpsym(&sym, 0); } exit(0); return(0); } ================================================ FILE: 64_6809_Target/detok.c ================================================ #include "defs.h" // C Lexical scanning: detokeniser // Copyright (c) 2023 Warren Toomey, GPL3 char Text[TEXTLEN + 1]; // Last identifier scanned extern char *Tstring[]; // Read at most count-1 characters from the // f FILE and store them in the s buffer. // Terminate the s buffer with a NUL. // Return NULL if unable to read or an EOF. // Else, return the original s pointer pointer. char *fgetstr(char *s, size_t count, FILE * f) { size_t i = count; int ch; char *ret = s; while (i-- != 0) { if ((ch = getc(f)) == EOF) { if (s == ret) return(NULL); break; } *s++ = (char) ch; if (ch == 0) break; } *s = 0; return(ferror(f) ? (char *) NULL : ret); } int main(int argc, char **argv) { FILE *in; int token; int intval; if (argc !=2) { fprintf(stderr, "Usage: %s tokenfile\n", argv[0]); exit(1); } in= fopen(argv[1], "r"); if (in==NULL) { fprintf(stderr, "Unable to open %s\n", argv[1]); exit(1); } // Read until no more tokens left while (1) { token = fgetc(in); if (token == EOF) break; switch (token) { case T_INTLIT: case T_CHARLIT: fread(&intval, sizeof(int), 1, in); printf("%02X: %d\n", token, intval); break; case T_STRLIT: fgetstr(Text, TEXTLEN + 1, in); printf("%02X: \"%s\"\n", token, Text); break; case T_FILENAME: fgetstr(Text, TEXTLEN + 1, in); printf("%02X: filename \"%s\"\n", token, Text); break; case T_LINENUM: fread(&intval, sizeof(int), 1, in); printf("%02X: linenum %d\n", token, intval); break; case T_IDENT: fgetstr(Text, TEXTLEN + 1, in); printf("%02X: %s\n", token, Text); break; default: printf("%02X: %s\n", token, Tstring[token]); } } return (0); } ================================================ FILE: 64_6809_Target/detree.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "misc.h" #include "tree.h" // Deserialise an AST // Copyright (c) 2023 Warren Toomey, GPL3 int showglue=0; // Generate and return a new label number // just for AST dumping purposes static int dumpid = 1; static int gendumplabel(void) { return (dumpid++); } // List of AST node names static char *astname[] = { NULL, "ASSIGN", "ASPLUS", "ASMINUS", "ASSTAR", "ASSLASH", "ASMOD", "TERNARY", "LOGOR", "LOGAND", "OR", "XOR", "AND", "EQ", "NE", "LT", "GT", "LE", "GE", "LSHIFT", "RSHIFT", "ADD", "SUBTRACT", "MULTIPLY", "DIVIDE", "MOD", "INTLIT", "STRLIT", "IDENT", "GLUE", "IF", "WHILE", "FUNCTION", "WIDEN", "RETURN", "FUNCCALL", "DEREF", "ADDR", "SCALE", "PREINC", "PREDEC", "POSTINC", "POSTDEC", "NEGATE", "INVERT", "LOGNOT", "TOBOOL", "BREAK", "CONTINUE", "SWITCH", "CASE", "DEFAULT", "CAST" }; // Given an AST node, print it out and then // recursively deal with the sub-nodes. void dumpAST(struct ASTnode *n, int label, int level) { int Lfalse, Lstart, Lend; int i; struct ASTnode *nleft=NULL, *nmid=NULL, *nright=NULL; if (n == NULL) fatal("NULL AST node"); if (n->op > A_CAST) fatald("Unknown dumpAST operator", n->op); // Load in the sub-nodes if (n->leftid) nleft=loadASTnode(n->leftid,0); if (n->midid) nmid=loadASTnode(n->midid,0); if (n->rightid) nright=loadASTnode(n->rightid,0); // Deal with IF and WHILE statements specifically switch (n->op) { case A_IF: Lfalse = gendumplabel(); for (i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "IF"); if (nright) { Lend = gendumplabel(); fprintf(stdout, ", end L%d", Lend); } fprintf(stdout, " (id %d)\n", n->nodeid); dumpAST(nleft, Lfalse, level + 2); dumpAST(nmid, NOLABEL, level + 2); if (nright) dumpAST(nright, NOLABEL, level + 2); free(n); return; case A_WHILE: Lstart = gendumplabel(); for (i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "WHILE start L%d (id %d)\n", Lstart, n->nodeid); Lend = gendumplabel(); dumpAST(nleft, Lend, level + 2); if (nright) dumpAST(nright, NOLABEL, level + 2); free(n); return; } // Reset level to -2 for A_GLUE nodes if (n->op == A_GLUE) { if (showglue) fprintf(stdout, "glue %d %d\n", n->leftid, n->rightid); level -= 2; } else { // General AST node handling for (i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "%s", astname[n->op]); if (n->symid != 0) fprintf(stdout, " symid %d", n->symid); switch (n->op) { case A_FUNCTION: case A_FUNCCALL: case A_ADDR: case A_PREINC: case A_POSTINC: if (n->name != NULL) fprintf(stdout, " %s", n->name); break; case A_INTLIT: fprintf(stdout, " %d", n->a_intvalue); break; case A_STRLIT: fprintf(stdout, " rval \"%s\"", n->name); break; case A_IDENT: if (n->name != NULL) { if (n->rvalue) fprintf(stdout, " rval %s", n->name); else fprintf(stdout, " %s", n->name); } break; case A_DEREF: if (n->rvalue) fprintf(stdout, " rval"); break; case A_SCALE: fprintf(stdout, " %d", n->a_size); break; case A_CASE: fprintf(stdout, " %d", n->a_intvalue); break; case A_CAST: fprintf(stdout, " %d", n->type); break; } fprintf(stdout, " (id %d)\n", n->nodeid); } // General AST node handling if (nleft) dumpAST(nleft, NOLABEL, level + 2); if (nmid) dumpAST(nmid, NOLABEL, level + 2); if (nright) dumpAST(nright, NOLABEL, level + 2); if (n->name!=NULL) free(n->name); free(n); } int main(int argc, char **argv) { struct ASTnode *node; int fileid= 1; if (argc !=2 && argc!=3) { fprintf(stderr, "Usage: %s [-g] astfile\n", argv[0]); exit(1); } if (!strcmp(argv[1], "-g")) { showglue=1; fileid=2; } Infile= fopen(argv[fileid], "r"); if (Infile==NULL) { fprintf(stderr, "Unable to open %s\n", argv[fileid]); exit(1); } Idxfile= tmpfile(); mkASTidxfile(); // Build the AST index offset file // Loop reading the next function's top node in from file while (1) { node= loadASTnode(0, 1); if (node==NULL) break; // Dump the function's tree dumpAST(node, NOLABEL, 0); printf("\n\n"); } return (0); } ================================================ FILE: 64_6809_Target/docs/NOTES.md ================================================ ## Thu 16 May 2024 10:41:14 AEST A start on cwj for the 6809. I'm using a simple approach: use memory locations as the registers. Later on, I'll improve the code quality. I also need to split the compiler into phases. One to parse and make the AST and the symbol table. The other to output the assembly from the AST and symbol table. Hopefully I can get it all to fit into 64K! Right now I can do: ``` int p=3; int q=4; int main() { p= p + q; return(0); } ``` ## Thu 16 May 2024 14:55:39 AEST I added the code to push arguments and fix up the stack after a function call. I can call `printint()` which work. However, my compiler isn't adding the leading underscore. To fix. ## Thu 16 May 2024 15:13:22 AEST Fixed the `_` by adding it to all the `fprintf()`s in `cg.c`. ## Thu 16 May 2024 16:49:54 AEST Hmm. Small int lits are treated as char lits, but I need them to be int sized for `printf()`. Ah. I already did this when I last tried to write a 6809 compiler using `cwj`, see cloud/Nine_E/Old/Compiler` dated June 2023! I've imported some of that code which has helped. ## Sat 18 May 2024 10:00:44 AEST OK I've finished the conversion of `cg.c` to 6809 but no testing yet. Just about to change over to the `crt0` which has stdio suuport, because all the tests use printf. I'm using the `libc` compiled by `fcc` for now. ## Sat 18 May 2024 13:44:47 AEST Not quite finished. I'd forgotten the comparisons. I've done them except for longs. Now up to input009.c: OK. ## Sat 18 May 2024 14:33:42 AEST Now up to input022.c: OK. ## Sat 18 May 2024 15:11:34 AEST Now up to input054.c: OK ## Sat 18 May 2024 15:24:25 AEST Now up to input114.c: OK Wow! ## Mon 20 May 2024 10:42:50 AEST Test 115 was a sizeof test which is now different on the 6809 cf. amd64 :-) So now we are up to input135.c: OK. Test 136 is essentially this: ``` result= 3 * add(2,3) - 5 * add(4,6); ``` I've checked the debug output and the `add()` is working fine. However, on the first `add()` return I see: ``` lbsr _add leas 4,s puls d std R0 std R1 ``` which doesn't make sense as `R0` has the 3 from the `3*`. I see the problem. We push any in-use local registers on the stack before a function call. On return, we pop them off and restore them. Then we save the function's return value. But the function uses `Y,D` to hold the return value, and these are getting destroyed by the register popping. OK fixed with a slightly ugly fix. Now up to input143.c OK. ## Mon 20 May 2024 12:33:02 AEST Yay. I now pass all the tests :-) I had to import some stuff from the Fuzix include files into my include files. That means I can now start on breaking the compiler up into phases. There are going to be eight phases. 1. The C pre-processor interprets #include, #ifdef and the pre-processor macros. 2. The lexer reads this and produces a token stream. 3. The parser reads the token stream and creates a symbol table plus a set of AST trees. 4. The AST optimiser reads the trees and optimises them. 5. The code generator reads the new AST trees and the symbol table, and generates assembly code. 6. The peephole optimiser improves the assembly code. 7. The assembler produces object files. 8. The linker takes crt0.o, the object files and several libraries and produces an executable. As well, there will be debug tools to: - dump the token stream, - dump the AST trees, and - dump the symbol table For the symbol table, it will be in a file that has the globals, prototypes, struct/union/typedef defines. Then followed by sections that have the per-function symbols. The AST file will have several separate AST trees, one for each function. I already did some of this when I was working on PipeC, so I can borrow this code. Yay! ## Mon 20 May 2024 14:55:17 AEST I've got separate scanner and detok programs with the code borrowed from PipeC. I also added tokens that hold the new filename and new linenumber when these change. The weird thing is that, from this input: ``` # 0 "scan.c" # 0 "" # 0 "" # 1 "scan.c" # 1 "defs.h" 1 # 1 "/tmp/include/stdlib.h" 1 3 4 # 4 "/tmp/include/stdlib.h" 3 4 void exit(int status); ... ``` I get these tokens: ``` 1E: void 43: filename "/tmp/include/stdlib.h" 44: linenum 4 36: exit 39: ( 20: int 36: status 3A: ) 35: ; ``` with the `void` coming before the filename! It's because of the `scan()` recursion. I've got it fixed now. The line numbers do seem to be a bit out, though. ## Tue 21 May 2024 09:57:34 AEST So, I've rebuilt the compiler with a front-end which reads the token stream from stdin and calls `global_declarations()` to start the compilation. Right now it outputs mostly the same assembly code, except for: ``` input021.s has changed in the tree input058.s has changed in the tree input084.s has changed in the tree input089.s has changed in the tree input090.s has changed in the tree input134.s has changed in the tree input140.s has changed in the tree input148.s has changed in the tree ``` So now I've got some stuff to look at! Ah, I'd forgotten the CHARLITs in the token decoder. Fixed. Now the compiler produces the same assembly files for all tests. Yay! ## Tue 21 May 2024 11:08:53 AEST I'm now working on the parsing phase. I've got the code now to serialise all the ASTs out to stdout and there is a `detree` to print the trees. There's a small problem that we used to generate assembly for string literals along with labels while parsing. I've commented this out for now but it will need fixing. Now I need to dink `gen.c` out from the parser, and also work out to dump the symbol table! ## Tue 21 May 2024 11:24:39 AEST I've pulled `gen.c` and `cg.c` out from the parser. I had to abstract out a few functions from these files into a new `target.c` file which can be shared by the parser and code generator. Now to think about the symbol table serialisation. I'm worried that we might do: ``` ``` If we dump out the global symbol table before `function1` then we won't have the second global variable. Can I just seralise symbols as they get declared? No as they get modified e.g. an array with declared elements. Perhaps I keep pointers to the last symbols dumped, and dump from there each time we complete a function? ## Tue 21 May 2024 16:53:06 AEST OK, I've got the start for the serialisation code. Now need to write the deserialiser and see where the bugs are. ## Wed 22 May 2024 08:28:42 AEST I rewrote the serialising code a bit. It runs and doesn't crash. Now to write the deserialiser! OK, I've got a start so I can see what's in the symbol file. Still not right as yet. Found a couple of bugs and fixed them. Right now I'm only dumping the symbols, I'm not rebuilding the symbol table. That will require a new program, the code generator. This will do the symbol table and AST deserialising, and have: ``` gen.c misc.c sym.c tree.c type.c cg.c ``` ## Wed 22 May 2024 15:26:54 AEST I now have a code generator and it's actually working. I had to fix a bunch of things like storing string lits in the AST so I could generate them later. But I'm now passing the first dozen tests! Yay! We are now up to input025.c: OK. Wow. Test 26 is failing because none of the params or locals have a frame offset. ## Wed 22 May 2024 16:42:47 AEST Actually they do, but I had a bug in my find symbol by id code. We are now up to input057.c OK. ## Thu 23 May 2024 10:17:43 AEST Ah, I was serialising the globals first in the symbol table but they may be of struct type, so I need to output the structs, enums and unions before the globals. Now up to input073.c: OK. That was because I switched to the literal segment from the text segment, and went to the data segment not back to the code segment! Now up to input088.c: OK. So the problem is: ``` char *z= "Hello world"; ``` In the original code, we could generate the string literal, get a label for it and then generate the global `z` with the label. Now the string gets lost and `z` is initialised to zero. We also have to deal with `char *z=NULL;` also. I was thinking of putting the characters of the string in the symbol's `initlist` and setting `nelems` to the string length, but what if the initial value is NULL? Can I set `nelems` to zero? But what if we do `char *z= "";`, so not NULL but no characters? Hmm ... No that's not an answer. And we also need to support: ``` char *fred[]= { "Hello", "there", "Warren", "piano", NULL }; ``` which the original `cwj` compiler supported. Maybe I need another symbol table for string literals. Then I can serialise that and generate the strings and labels in the back end? ## Fri 24 May 2024 06:32:06 AEST My solution will be the string literal symbol tables. The initlist for `char *` globals will have the symid dumped in the symfile. In the generator, we load the symbol tables. We generate the asm code for the string literals & make a label. Then when we hit a `char *` global we replace the symids in the initlist with the relevant labels. ## Fri 24 May 2024 09:42:04 AEST OK. Implemented. Seems to work. We are now up to input098.c: OK. Actually, I forgot about arrays of strings. Now fixed and we are up to input129.c: OK. Looks like we are not dealing with this string literal as an expression: ``` "Hello " "world" "\n" ``` ## Sat 25 May 2024 07:49:15 AEST Ah yes, I wasn't incrementing `litlast` correctly in `primary()`, now fixed. And now we pass all the tests! Some of the error reporting isn't exactly right, but that's fine. Yay, we have reached a milestone :-) I was thinking of bringing the QBE backend back in as well. Then we will have two backends which might help reveal more bugs. Hopefully it won't be too hard to adapt the existing backend to suit the rearranged compiler. And I also have to write the frontend which I'll call `wcc.c` to run all the phases correctly. So here's a TODO list: - create the QBE backend, get it to work correctly - make it possible to build the compiler with each backend - change the final location to be `/opt/wcc` a la `fcc` - tease out the tree optimisation as an other phase, and add some of the `SubC` optimisations - bring in the peephole optimiser from `fcc` and make it a phase - improve the 6809 code generation - add in `register, volatile, unsigned, float, double, const` - Eventually, start compiling the 6809 libc and see what other language features I need to add. This is going to take quite a while! ## Sat 25 May 2024 08:30:19 AEST I've rearranged the filenames and massaged the Makefile so I now have 6809-specific executables for "parse" and "gen". I imported the QBE backend. The old compiler used to pass the type down into the `cg` functions, whereas the new compiler doesn't do this. I think I can go back to the old way, as it would help with other backends apart from 6809 and QBE. OK, I've nearly everything. The new code pushes the function args with one `cg` function and does the call with another. The existing QBE backend does it in one function. So I'll need to split that up. But that's the only thing left to do, apart from testing it :-) Actually the reason it changed for QBE is that QBE does the call first and the list of args second. I guess I could change the 6809 version to use this. ## Sat 25 May 2024 17:14:01 AEST I've done the change and fixed a bug or two along the way. We are up to test input135.c: OK, so it's very close! All tests now pass, I'd forgotten about spilling registers before calling a function. ## Sun 26 May 2024 08:09:25 AEST I'm working on `cgqbe.c` now. Almost done, I need to write `cgswitch()` in the QBE file; it was in `gen.c` in the QBE version previously. ## Sun 26 May 2024 11:01:30 AEST I've written the `cgswitch()` in `cgqbe.c` and also added code to delay outputting string literals until after the code. We are up to input010.c: OK. A small issue, now up to input057.c: OK. ## Sun 26 May 2024 11:42:03 AEST The size of INTLITs used to access a struct member are longs in QBE, ints on the 6809. Another target function added. We are up to input062 which is the endianness test. I'll have to remove it for now. Now up to input074.c which is the first switch test and which is failing at present. ## Sun 26 May 2024 12:19:05 AEST I've fixed up the bug in `cgqbe.c` and also rearranged the switch generation code. Now all tests pass on both the 6809 and the QBE side. Yay, I now have a compiler with two back ends!!! ## Tue 28 May 2024 09:23:11 AEST I've written the front-end `wcc` and it now works for both QBE and 6809. I've moved install to `/opt/wcc`. I've rewritten `runtests` and `onetest`. Right now, for some reason, one of the QBE tests is failing. Sigh! ## Tue 28 May 2024 10:34:14 AEST Ah, there are several things in `include` which are different between 6809 and QBE. So we now have two `include` trees, one for each platform. We now pass the tests again! ## Tue 28 May 2024 11:12:20 AEST I just got the peephole optimiser added to the front-end. My first rule failed! Wonder why. Ah, I'd written the rule wrong, fixed. The 6809 tests pass with the peephole optimiser working. ## Tue 28 May 2024 11:50:54 AEST I teased out the AST optimiser as a standalone program, leaving it still in the parser. It seems to work. However, I then tried to remove the optimiser from the parser with problems: We have this: ``` decl.c: // Parse the expression and optimise the resulting AST tree decl.c: tree = optimise(binexpr(0)); ``` With this line changed to `tree=binexpr(0);` instead, we die on test 112 with `Cannot initialise globals with a general expression`. Why: ``` int x= 10 + 6; ``` These need to be resolved at parse time. Argh! Perhaps I can keep some of the tree optimisation code in the parser? Don't know. Just looking at the current amd64 binary sizes: ``` text data bss dec hex filename 9973 1008 4200 15181 3b4d wcc 8902 676 680 10258 2812 cscan 13695 752 1328 15775 3d9f cpeep 38772 1880 936 41588 a274 cparse6809 47917 1456 1064 50437 c505 cgen6809 38796 1880 936 41612 a28c cparseqbe 35088 1264 968 37320 91c8 cgenqbe ``` It looks like the 6809 generator needs some dieting before the parser does. I decided to try compiling the compiler code itself with the 6809 compiler: ``` $ for i in *.c; do wcc -c -m 6809 $i; done Expecting a primary expression, got token:void on line 27 of cgen.c Type mismatch: literal vs. variable on line 20 of cgqbe.c Unrecognised character \ Unrecognised character \ Unrecognised character \ Unrecognised character \ Unrecognised character \ Type mismatch: literal vs. variable on line 35 of cpeep.c Expecting a primary expression, got token:void on line 23 of ctreeopt.c Expecting a primary expression, got token:void on line 25 of desym.c Unrecognised character \ Unrecognised character \ Expecting a primary expression, got token:void on line 22 of detok.c Expecting a primary expression, got token:void on line 25 of detree.c Expecting a primary expression, got token:void on line 27 of parse.c & operator must be followed by an identifier on line 604 of scan.c Unrecognised character \ Unknown variable or function:s on line 143 of tree.c Expecting a primary expression, got token:} on line 17 of wcc.h $ ls *.o 71859 May 28 13:30 cg6809.o 688 May 28 13:26 crt0.o 33009 May 28 13:30 decl.o 29981 May 28 13:30 expr.o 22461 May 28 13:30 gen.o 1342 May 28 13:30 misc.o 3639 May 28 13:30 opt.o 12545 May 28 13:30 stmt.o 22795 May 28 13:30 sym.o 1053 May 28 13:30 targ6809.o 1393 May 28 13:30 targqbe.o 126 May 28 13:30 tstring.o 6732 May 28 13:30 types.o ``` Interesting. I need to find out why the scanner/parser is dieing. Also `cg6809.o` is way too big! ## Tue 28 May 2024 14:03:18 AEST I fixed the bug where the scanner could not deal with `\"` inside string literals. We now have: ``` Expecting a primary expression, got token:void on line 27 of cgen.c Type mismatch: literal vs. variable on line 20 of cgqbe.c Type mismatch: literal vs. variable on line 35 of cpeep.c Expecting a primary expression, got token:void on line 23 of ctreeopt.c Expecting a primary expression, got token:void on line 25 of desym.c Expecting a primary expression, got token:void on line 22 of detok.c Expecting a primary expression, got token:void on line 25 of detree.c Expecting a primary expression, got token:void on line 27 of parse.c & operator must be followed by an identifier on line 612 of scan.c Expecting a primary expression, got token:} on line 17 of wcc.h ``` The problem is that the line numbers are a bit bogus :-( Ah, the bug is that my compiler doesn't allow `return ;`, it only likes `return();`. ## Tue 28 May 2024 14:26:43 AEST I tried to fix it but it's hard. We have to be able to deal with: ``` return( (void *)0 ); and return (void *)0 ; ``` For now I'll just fix my own code. Done. ## Tue 28 May 2024 14:47:59 AEST The `& operator must be followed by an identifier` is because we can't do this yet: ``` mary= &fred.x; ``` OK, down to these: ``` $ for i in *.c; do wcc -S -m6809 $i; done Type mismatch: literal vs. variable on line 20 of cgqbe.c Type mismatch: literal vs. variable on line 35 of cpeep.c ``` ## Tue 28 May 2024 15:14:56 AEST I'm down to one source file now: ``` $ for i in *.c; do wcc -c -m6809 $i; done Incompatible types in binary expression on line 95 of cpeep.c $ ls *.o 71859 May 28 15:14 cg6809.o 12497 May 28 15:14 cgen.o 37028 May 28 15:14 cgqbe.o 688 May 28 15:13 crt0.o 8427 May 28 15:14 ctreeopt.o 33009 May 28 15:14 decl.o 4832 May 28 15:14 desym.o 3093 May 28 15:14 detok.o 3336 May 28 15:14 detree.o 29981 May 28 15:14 expr.o 22461 May 28 15:14 gen.o 1342 May 28 15:14 misc.o 3639 May 28 15:14 opt.o 8853 May 28 15:14 parse.o 23115 May 28 15:14 scan.o 12545 May 28 15:14 stmt.o 22795 May 28 15:14 sym.o 1053 May 28 15:14 targ6809.o 1393 May 28 15:14 targqbe.o 9685 May 28 15:14 tree.o 126 May 28 15:14 tstring.o 6732 May 28 15:14 types.o 19125 May 28 15:14 wcc.o ``` I decided to get `fcc` to compile the code. The results are: ``` $ ls *.o 35320 May 28 15:21 cg6809.o 5587 May 28 15:21 cgen.o 16838 May 28 15:21 cgqbe.o 17642 May 28 15:21 cpeep.o 3965 May 28 15:21 ctreeopt.o 14467 May 28 15:21 decl.o 2308 May 28 15:21 desym.o 1569 May 28 15:21 detok.o 1520 May 28 15:21 detree.o 11534 May 28 15:21 expr.o 8617 May 28 15:21 gen.o 742 May 28 15:21 misc.o 1521 May 28 15:21 opt.o 4228 May 28 15:21 parse.o 11414 May 28 15:21 scan.o 5617 May 28 15:21 stmt.o 10648 May 28 15:21 sym.o 761 May 28 15:21 targ6809.o 903 May 28 15:21 targqbe.o 5152 May 28 15:21 tree.o 2012 May 28 15:21 tstring.o 2515 May 28 15:21 types.o 10225 May 28 15:21 wcc.o ``` or about half the size :-) OK, just for fun I built `cgen6809` using the `fcc` compiler. I get `8A91 B __end` as the size. And for `cparse6809` we have `765A B __end` :-) And the amd64 versions have these size: ``` $ size cparse6809 cgen6809 text data bss dec hex filename 38793 1880 936 41609 a289 cparse6809 47942 1456 1064 50462 c51e cgen6809 ``` So, if we can get the code generator to be around as good as `fcc` then we stand a chance of getting it to cross-compile. Lots of work to do. ## Tue 28 May 2024 16:34:46 AEST Just brainstorming the code improvement. We sort of have to keep the register idea in `gen.c` so as to support QBE. How about: - cg6809.c keeps an array of Location structs - the index into the array is a "register" - global `d_free` so we know when we can load the D register - the Location holds enough details to use as an operand to the B/D/Y operations. Looking at the current code: ``` ldd #0\n"); ldd 0,x\n"); ldd #1\n"); ldd 2,x\n"); ldd #%d\n", offset); ldd #%d\n", offset & 0xffff); ldd #%d\n", val & 0xff); ldd #%d\n", value & 0xffff); ldd #%d\n", (value>>16) & 0xffff); ldd %d,u\n", 2+sym->st_posn); ldd %d,u\n", sym->st_posn); ldd #L%d\n", label); ldd _%s+2\n", sym->name); ldd #_%s\n", sym->name); ldd _%s\n", sym->name); ``` so we need to record: - symbol names with optional position - offset on the stack frame - constants - label-ids - address of symbol names Something like: ``` enum { L_SYMBOL, L_LOCAL, L_CONST, L_LABEL, L_SYMADDR, L_DREG }; struct Location { int type; char *name; int intval; // Offset, const value, label-id }; ``` and a function which prints out the Location. We keep the register allocation/freeing and we can set `d_free` true when we free all registers. Register spilling should be simpler. The `cg` functions which allocate a register will now allocate a Location element and fill it in. Then, something like: ``` int cgadd(int r1, int r2, int type) { int size= cgprimsize(type); // If r1 is already L_DREG, do nothing. // Otherwise load the existing r1 location // into D and mark it as L_DREG. // This could load B, D or Y,D load_d(r1, size); switch (size) { case 1: fprintf(Outfile, "\taddb"); printlocation(r2,0); break; case 2: fprintf(Outfile, "\taddd"); printlocation(r2,0); break; case 4: fprintf(Outfile, "\taddd"); printlocation(r2,2); // Some code here to update Y :-) } return(r1); } ``` ## Wed 29 May 2024 09:01:58 AEST I got a start last night with good results. I've thought of some improvements and will try to get some done now. I can compile this so far: ``` int x, y, z; int main() { int result; x=2; y=3; z=4; result= x + y + z; printf("%d\n", result); return(result); } ``` with the assembly (some bits omitted): ``` _main: pshs u tfr s,u leas -2,s ldd #2 std _x ldd #3 std _y ldd #4 std _z ldd _x+0 addd _y+0 addd _z+0 std -2,u ldd -2,u ; This could be improved! pshs d ldd #L2 pshs d lbsr _printf leas 4,s ldd -2,u leas 2,s puls u rts ``` which is much nicer than going through R0, R1 etc. ## Wed 29 May 2024 12:48:15 AEST It's slow going. We are up to input009.c: OK though. Now input010.c: OK. I can see that dealing with longs isn't going to be fun. ## Thu 30 May 2024 12:51:23 AEST Fait went away with Liz today :-( We are up to input026.c: OK ## Thu 30 May 2024 13:19:48 AEST Now up to input090.c OK. ## Thu 30 May 2024 14:05:55 AEST Now up to input139.c: OK ## Thu 30 May 2024 15:09:40 AEST Yay, all the tests now pass. Wow. For a lark I tried to compile the compiler source with itself: ``` $ for i in *.c; do wcc -c -m6809 $i; done Incompatible argument type in function call on line 58 of cg6809.c Out of locations in cgalloclocn on line 241 of (null) Incompatible types in binary expression on line 95 of cpeep.c Out of locations in cgalloclocn on line 284 of (null) Out of locations in cgalloclocn on line 41 of (null) child phase didn't Exit Out of locations in cgalloclocn on line 69 of (null) child phase didn't Exit Out of locations in cgalloclocn on line 48 of (null) ``` So a few files didn't compile, but for those here are the size changes from the old to new code generator: ``` Old Size New Size Fcc Size ------------------------------------------------ 71859 cg6809.o 12497 cgen.o 7890 5587 37028 cgqbe.o 688 crt0.o 8427 ctreeopt.o 5530 3965 33009 decl.o 4832 desym.o 3464 2308 3093 detok.o 2242 1569 3336 detree.o 2313 1520 29981 expr.o 22461 gen.o 11608 8617 1342 misc.o 834 742 3639 opt.o 2132 1521 8853 parse.o 5897 4228 23115 scan.o 12545 stmt.o 22795 sym.o 1053 targ6809.o 806 761 1393 targqbe.o 1062 903 9685 tree.o 126 tstring.o 126 2012 6732 types.o 4385 2515 19125 wcc.o 13238 10225 ``` Quite an improvement I think. And there's more work to do as `fcc` is still much better. ## Thu 30 May 2024 17:43:35 AEST Found a bug in `gen.c` because pointers and ints are different sizes in QBE/amd64. This line `Locn[l].type = 23;` failed because `l` was being multiplied by the struct size (as an int) but then was added to the address of `Locn` (long vs. word). Fixed. No, not fixed. It trips up later when it tries to widen a long to be a long, which QBE can't do (for some reason :-). ## Thu 30 May 2024 20:02:32 AEST Fixed and fixed a few others. We now have: ``` $ for i in *.c; do wcc -c $i; done Incompatible types in binary expression on line 95 of cpeep.c qbe:decl.c_qbe:2530: invalid type for first operand %class in copy ``` The `cpeep.c` one is subtracting pointers and assigning to an int: ``` int main() { char *a, *b; int x; x= a - b; return(x); } ``` The other one seems to be that `class` should be marked as having an address but it isn't. Not sure why not. ## Fri 31 May 2024 08:56:58 AEST Ideas for putting the compiler on a diet. - More AST optimisations, e.g. add/sub/mul where one side is int lit 0. But use an #ifdef to keep the gen ones away from the parse ones. Also e.g. switch left/right on commutative ops so that D reg already holds one of the values. - Use free() where possible - When an AST op is the top of the tree, don't load the D register with the result, e.g. `i++;`. - Don't use variables, e.g. `int primtype= ...; switch(primtype)` - What ints can be turned into chars? - We need to keep P_POINTER even though it's the same as P_INT at present. When we have `unsigned` then pointers will be unsigned, but ints might be signed. - Find duplicated string lits and make them globals, so only declared once. - Definitely some 6809 code improvements. - More peephole optimisations - Do a code coverage analysis? - Move the temporaries on the stack. Use an assembler constant to `leas` the stack at the start/end of each function. This will save us the necessity of spilling temps on the stack. ## Fri 31 May 2024 09:22:30 AEST So we need to be able to add/subtract pointers. But we have to unscale the result. Example (on amd64 gcc): ``` #include int main() { int a, b; int *x, *y; long z; a=2; b=3; x=&a; y=&b; printf("x is %lx y is %lx\n", x, y); z= x - y; printf("z is %lx\n", z); z= x - y + 1; printf("z is %lx\n", z); return(0); } ``` produces ``` x is 7ffc7b25b244 y is 7ffc7b25b240 z is 1 z is 2 ``` Note x and y are four bytes apart, but the subtraction gives 1 as a result. But then the `+1` is treated as a long addition. Hmm. Yes I think we will need an A_DESCALE AST operation and a `cgshrconst()` function. ## Fri 31 May 2024 14:43:58 AEST Looking at the QBE `%class` problem in `decl.c`. I've got a new file `d.c` with just the problem function `declaration_list()` in it. This compiles with no problem! I even put in a function prototype just like in `decl.c` with no problems. Hmm. In the `sym` file, this is `class`: ``` {name = 0x555555566860 "class", id = 575, type = 48, ctype = 0x0, ctypeid = 0, stype = 0, class = 3, size = 4, nelems = 1, st_hasaddr = 0, st_posn = 0, initlist = 0x0, next = 0x555555566880, member = 0x0} ``` and `st_hasaddr` is zero. But I ran the parser through `gdb` and saw it being set to 1. So not sure how it got reset to 0. I ran the parser again. No, nothing is resetting it to 0. I modified `desym` to show `hasaddr` and none of the parameters have it set to 1. I added a printf to the parser and I see: ``` In declaration_list set hasaddr 1 on class stype 0 class 3 ``` which is a variable (0) parameter (3). Hmm. So why isn't it being dumped properly? ## Sat 01 Jun 2024 10:03:16 AEST OK I think I can see the problem. The symbol is being dumped before `hasaddr` is set: ``` $ wcc -S -X -v decl.c ... Serialising class stype 0 class 3 id 575 hasaddr 0 ... In declaration_list set hasaddr 1 on class stype 0 class 3 id 575 ``` I'm guessing that we serialise the `class` mentioned in the prototype when it has `hasaddr 0`. Later on, `hasaddr` gets set but as the symbol was already serialised, the change doesn't make it out. Damn. We have to output protptypes in the symbol table in case a function calls a function which is only represented by the prototype. My solution for now is to mark all parameters as having an address in `cgqbe.c`. I only have to do the parameters as they appear in the prototypes. Sigh. All the tests still pass. Now we are down to: ``` Incompatible types in binary expression on line 95 of cpeep.c ``` But as I don't use this in the QBE version of the compiler, I should be able to try and compile the QBE compiler with itself. ## Sat 01 Jun 2024 13:26:27 AEST The `wcc` front-end doesn't work but I suspect system calls. `cscan` works fine. Damn, the parser isn't right, I'm seeing: ``` $ ls *ast ; ls *sym 235105 Jun 1 13:28 decl.c_ast 216017 Jun 1 13:28 fred_ast 59119 Jun 1 13:28 decl.c_sym 56399 Jun 1 13:28 fred_sym ``` as the outputs, the "fred" ones are from `cparseqbe` compiled with `wcc`. Ah, it looks like the struct sizes are different. I've run `detree` (compiled with wcc and gcc) and I see these differences: ``` < STRLIT rval \"Can't have static/extern in a typedef declaration\" --- > STRLIT rval "Can't have static/extern in a typedef declaration" 1641c1641 < STRLIT rval \"redefinition of typedef\" --- > STRLIT rval "redefinition of typedef" 1669c1669 < STRLIT rval \"unknown type\" --- > STRLIT rval "unknown type" ... ``` Not sure if that's bad or not. For the dumped symbol table, `desymqbe` produces the same output except the `wcc` compiled one does a segfault right at the end. Sigh. Damn, I made the `.s` files for `desymqbe` and did a `cc -o desymqbe -g -Wall *.s` but that didn't tell me where it crashed. Hmm, well I can actually do this: ``` $ ./wscan < decl.c_cpp > fred.tok $ ./wparseqbe fred_sym fred_ast < fred.tok $ ./wgenqbe fred_sym fred_ast > fred_qbe ``` with no crashes. Doing a diff: ``` $ paste fred_qbe decl.c_qbe @L2 @L2 %ctype =l alloc8 1 %ctype =l alloc8 1 storel %.pctype, %ctype storel %.pctype, %ctype %class =l alloc8 1 %class =l alloc8 1 storel %.pclass, %class storel %.pclass, %class %.t1 =w copy 0 %.t1 =w copy 0 %type =w copy %.t1 %type =w copy %.t1 %.t2 =w copy 1 %.t2 =w copy 1 %exstatic =w copy %.t2 %exstatic =w copy %.t2 %.t3 =w copy 0 %.t3 =w copy 0 %.t5 =l extsw %.t3 %.t5 =l extsw %.t3 %.t6 =l loadl $ctype %.t6 =l loadl %ctype <=== storel %.t5, %.t6 storel %.t5, %.t6 ``` `ctype` is a local and should always be `%ctype`, but our compiler is producing `$ctype` which is a global. Hmm ... ## Sat 01 Jun 2024 15:23:22 AEST Going back to `desymqbe`, I added a few printfs ... ``` int main() { struct symtable sym; while (1) { printf("A\n"); if (deserialiseSym(&sym, stdin)== -1) break; printf("B\n"); dumpsym(&sym, 0); printf("C\n"); } printf("D\n"); return(0); } ``` and when I run the version compiled with our compiler: ``` C A D Segmentation fault ``` So it's segfaulting on the `return(0)`? When I change it to an `exit(0)` it's fine?! I'll do that for now. ## Sat 01 Jun 2024 15:29:20 AEST Summary: we can link and run the passes but not the front-end `wcc`. Doing a compile of `decl.c` to assembly: - the token files are identical - the dumped symbol files are identical from the `desym` perspective - the strlits in the AST file are different. The self-compiled version is putting \ before each " - the `cgenqbe` seems to be treating locals as globals here and there. ## Sun 02 Jun 2024 09:42:19 AEST I moved the symbol dumping code from `sym.c` to `desym.c` and added more code to print out everything. I need a name for the two sets of binaries. The G binaries are compiled with `gcc`, the W binaries with `wcc`. The G & W tokeniser produce the same token stream. The G & W parser produce different symbol table files, but when I run the G & W `desym` on them I get identical results. We still have the STRLIT issue with the G & W `detree` outputs. The AST files are different, but doing a `hd` on them I see: ``` G version 00030440 00 00 00 00 44 75 70 6c 69 63 61 74 65 20 73 74 |....Duplicate st| 00030450 72 75 63 74 2f 75 6e 69 6f 6e 20 6d 65 6d 62 65 |ruct/union membe| 00030460 72 20 64 65 63 6c 61 72 61 74 69 6f 6e 00 1c 00 |r declaration...| W version 00034880 00 00 00 00 44 75 70 6c 69 63 61 74 65 20 73 74 |....Duplicate st| 00034890 72 75 63 74 2f 75 6e 69 6f 6e 20 6d 65 6d 62 65 |ruct/union membe| 000348a0 72 20 64 65 63 6c 61 72 61 74 69 6f 6e 00 1c 00 |r declaration...| ``` so the literals are the same. It's just `detree` somehow printing out them differently. ## Sun 02 Jun 2024 10:03:32 AEST Hmm. So I added code in `cgqbe.c` to print out how we decide to use either a local `%` or global `$` character. Yes, we get a wrong answer: ``` < loadvar ctype 3 -> % < loadvar exstatic 2 -> % --- > loadvar ctype 3 -> $ > loadvar exstatic 2 -> $ < loadvar class 3 -> % < loadvar class 3 -> % --- > loadvar class 3 -> $ > loadvar class 3 -> $ ``` The code for this is: ``` // Get the relevant QBE prefix for the symbol qbeprefix = ((sym->class == C_GLOBAL) || (sym->class == C_STATIC) || (sym->class == C_EXTERN)) ? (char)'$' : (char)'%'; ``` So it might be LOGOR or ternaries or a combination of both?! I've got a test program and, for all class values we always get '$' and not '%'. Hmm. I'm comparing the QBE output from the `acwj 63` compiler. The LOGOR code is the same. The ternary code isn't. ## Mon 03 Jun 2024 13:47:00 AEST So it turns out I hadn't thought through the cases where a) I need a boolean value from an arbitrary expression and b) when to compare and jump, or) jump on a boolean value. The answer was as follows ... We already have code in IF and WHILE statements, e.g. ``` // Parse the following expression // and the ')' following. Force a // non-comparison operation to be boolean. condAST = binexpr(0); if (condAST->op < A_EQ || condAST->op > A_GE) condAST = mkastunary(A_TOBOOL, condAST->type, condAST->ctype, condAST, NULL, 0); rparen(); ``` And in `gen.c` and `cg.c`: ``` case A_TOBOOL: // If the parent AST node is an A_IF or A_WHILE, generate // a compare followed by a jump. Otherwise, set the register // to 0 or 1 based on it's zeroeness or non-zeroeness return (cgboolean(leftreg, parentASTop, iflabel, type)); ... case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: // If the parent AST node is an A_IF, A_WHILE or A_TERNARY, // generate a compare followed by a jump. Otherwise, compare // registers and set one to 1 or 0 based on the comparison. if (parentASTop == A_IF || parentASTop == A_WHILE || parentASTop == A_TERNARY) return (cgcompare_and_jump (n->op, leftreg, rightreg, iflabel, n->left->type)); else return (cgcompare_and_set(n->op, leftreg, rightreg, n->left->type)); ``` So, we can fix the problem by a) adding `mkastunary(A_TOBOOL...` to the ternary expression code, and b) adding `parentASTop == A_TERNARY` to the `case A_TOBOOL`. I've just done this and now all the tests pass including a few extras I made up. Phew! ## Mon 03 Jun 2024 14:00:23 AEST Now, going back to compiling `decl.c` with our own compiler, I now get: ``` $ md5sum decl.c_qbe fred.qbe 0f299257e088b3de96b68430e9d1f123 decl.c_qbe G version 0f299257e088b3de96b68430e9d1f123 fred.qbe W version ``` ## Mon 03 Jun 2024 14:24:00 AEST With a shell script that uses the W binaries to compile the passes down to QBE code, I think I should be able to pass the triple test! ``` 18fe9843b22b0ad06acfbc2011864619 cg6809.c_qbe G version 18fe9843b22b0ad06acfbc2011864619 fred.qbe W version === f7db53fe1cb35bd6ed19e633a5c618a6 cgen.c_qbe f7db53fe1cb35bd6ed19e633a5c618a6 fred.qbe === 9bfc78b1ef9b08eecf9dde1046bc4ab6 cgqbe.c_qbe 9bfc78b1ef9b08eecf9dde1046bc4ab6 fred.qbe === === 76ec1c53e8362a69ef1935a5efd7a1e3 ctreeopt.c_qbe 76ec1c53e8362a69ef1935a5efd7a1e3 fred.qbe === 0f299257e088b3de96b68430e9d1f123 decl.c_qbe 0f299257e088b3de96b68430e9d1f123 fred.qbe === 62d30bbd1b3efc61a021fffe76fff670 desym.c_qbe 62d30bbd1b3efc61a021fffe76fff670 fred.qbe === 28e5d1e283ee25a6b7e08f1d69816de8 detok.c_qbe 28e5d1e283ee25a6b7e08f1d69816de8 fred.qbe === c1cdef1a287717a429281f6c439475d4 detree.c_qbe c1cdef1a287717a429281f6c439475d4 fred.qbe === 4c72e8a009e3e6730969b7d55a30b9b4 expr.c_qbe 4c72e8a009e3e6730969b7d55a30b9b4 fred.qbe === ea63b78d3fa59c8d540639d83df8cf75 gen.c_qbe ea63b78d3fa59c8d540639d83df8cf75 fred.qbe === ccb643e15e8cc51969ee41aac2a691e7 misc.c_qbe ccb643e15e8cc51969ee41aac2a691e7 fred.qbe === 58fd815735a4ff3586a460884e58e700 opt.c_qbe 58fd815735a4ff3586a460884e58e700 fred.qbe === 784b5c65469c655868761f5c4501739c parse.c_qbe 784b5c65469c655868761f5c4501739c fred.qbe === 453d1ccb4e334b9e2852fac87a87bcff scan.c_qbe 453d1ccb4e334b9e2852fac87a87bcff fred.qbe === 8d66ab6b83506b92ab7e747c9d645fa3 stmt.c_qbe 8d66ab6b83506b92ab7e747c9d645fa3 fred.qbe === d91591705e6e99733269781f4f44bf47 sym.c_qbe d91591705e6e99733269781f4f44bf47 fred.qbe === 30e39b30644aead98e47ccd5ebf6171c targ6809.c_qbe 30e39b30644aead98e47ccd5ebf6171c fred.qbe === 0be7b46eb6add099be91cd3721ec4f09 targqbe.c_qbe 0be7b46eb6add099be91cd3721ec4f09 fred.qbe === 73bd8faa39878c98f40397e0cf103408 tree.c_qbe 73bd8faa39878c98f40397e0cf103408 fred.qbe === 7cf6e7e9ad7f587e31e3163aad1a40f3 tstring.c_qbe 7cf6e7e9ad7f587e31e3163aad1a40f3 fred.qbe === 392531419455d54d333922f37570cb61 types.c_qbe 392531419455d54d333922f37570cb61 fred.qbe === d7a3ddeafccf98d03d2fe594e78f2689 wcc.c_qbe d7a3ddeafccf98d03d2fe594e78f2689 fred.qbe ``` All the checksums are identical. ## Tue 04 Jun 2024 08:53:19 AEST I rearranged the `Makefile` so that it is set up to run the triple test with the QBE back-end. The level 0 binaries are built with `gcc`. The level 1 binaries in `L1/` are built with the level 0 binaries. The level 2 binaries in `L2/` will get built with the level 1 binaries. Thus, the files in `L1/` and `L2` should be identical except for `wcc` as BINDIR is different. There's still a problem with `wcc`. I can do: ``` $ L1/wcc -S wcc.c and $ L1/wcc -c wcc.c ``` But when I try to do the link stage it just loops around doing: ``` $ L1/wcc -o wcc -v wcc.c Doing: cpp -nostdinc -isystem /usr/local/src/Cwj6809/include/qbe wcc.c redirecting stdout to wcc.c_cpp Doing: /usr/local/src/Cwj6809/L1/cscan redirecting stdin from wcc.c_cpp redirecting stdout to wcc.c_tok Doing: /usr/local/src/Cwj6809/L1/cparseqbe wcc.c_sym wcc.c_ast redirecting stdin from wcc.c_tok Doing: /usr/local/src/Cwj6809/L1/cgenqbe wcc.c_sym wcc.c_ast redirecting stdout to wcc.c_qbe Doing: qbe -o wcc.c_s wcc.c_qbe Doing: as -o wcc.c_o wcc.c_s Doing: cpp -nostdinc -isystem /usr/local/src/Cwj6809/include/qbe wcc.c redirecting stdout to wcc.c_cpp Doing: /usr/local/src/Cwj6809/L1/cscan redirecting stdin from wcc.c_cpp redirecting stdout to wcc.c_tok ... ``` I have found the problem. Here's the test code: ``` #include int i; int main() { for (i=1; i<= 10; i++) { if (i==4) { printf("I don't like 4 very much\n"); continue; } printf("i is %d\n", i); } return(0); } ``` The `continue` should take us to the code that does the `i++` before we do the `i<=10` test. But the compiler is taking us to the `i<=10` test and we get stuck in an infinite loop with `i` set to 4. Sigh. I'm not sure how to deal with this. When I parse a `for` loop I simply glue the postop tree to the end of the loop body and treat it as a `while` loop. So there's only the label before the condition test and the label at the loop's end. There's no label between the body and the postop code. For now I can rewrite `wcc` to avoid the problem. And, yes, we now pass the triple test: ``` md5sum L1/* L2/* | sort 424d006522f88a6c8750888380c48dbe L1/desym 424d006522f88a6c8750888380c48dbe L2/desym 5da0fd17d14f35f19d1e1001c4ffa032 L2/wcc 6459d5698068115890478e4498bad693 L1/wcc 74ce22e789250c3c406980dab1c37df1 L1/detok 74ce22e789250c3c406980dab1c37df1 L2/detok 9cd8c07f0b66df2c775cfa348dfac4f7 L1/cscan 9cd8c07f0b66df2c775cfa348dfac4f7 L2/cscan 9fbe13d2b8120797ace55a37045f2a48 L1/cgenqbe 9fbe13d2b8120797ace55a37045f2a48 L2/cgenqbe a9f109370f44ce15b9245d01b7b03597 L1/cparseqbe a9f109370f44ce15b9245d01b7b03597 L2/cparseqbe ebed2d69321e600bc3f5a634eb1ac1f8 L1/detree ebed2d69321e600bc3f5a634eb1ac1f8 L2/detree ``` Yayy!!! I just merged the `bettercode` branch back in to the `master` branch. ## Tue 04 Jun 2024 10:12:29 AEST Back to the 6809 side now that we pass the triple test. Current object sizes are: ``` 806 Jun 4 10:11 targ6809.o 834 Jun 4 10:11 misc.o 1062 Jun 4 10:11 targqbe.o 2012 Jun 4 10:11 tstring.o 2112 Jun 4 10:11 opt.o 2658 Jun 4 10:11 detok.o 2733 Jun 4 10:11 detree.o 4399 Jun 4 10:11 types.o 5513 Jun 4 10:11 ctreeopt.o 5759 Jun 4 10:11 tree.o 5858 Jun 4 10:11 parse.o 7373 Jun 4 10:11 desym.o 7638 Jun 4 10:11 stmt.o 7868 Jun 4 10:11 cgen.o 8801 Jun 4 10:11 sym.o 11590 Jun 4 10:11 gen.o 13256 Jun 4 10:11 wcc.o 13942 Jun 4 10:11 scan.o 17670 Jun 4 10:11 expr.o 19156 Jun 4 10:11 decl.o 21114 Jun 4 10:11 cgqbe.o 34268 Jun 4 10:11 cg6809.o ``` ## Tue 04 Jun 2024 11:38:09 AEST I brought in some of the SubC tree optimisations. Some didn't work and are commented out. The overall code reduction is minimal. Damn. I didn't check to later but they broke some of the 6809 tests. Sigh. ## Tue 04 Jun 2024 11:44:46 AEST I'm thinking of this idea, see this existing code: ``` L2: ldd -2,u <-- d_holds "-2,u" cmpd #8 bge L3 ; <- now NOREG due to free all locns ldd -2,u ``` But surely we could keep this one because then we wouldn't have to reload D? We would have to flush all locations on a jump. I might see if I can do this. Urgh, the tests pass but the code size is worse!! I did add a simple peephole optimisation to avoid an `ldd` after `std` to the same location. ## Tue 04 Jun 2024 13:25:58 AEST Just did another fcc vs. wcc size comparison: ``` fcc wcc cg6809.o 28682 33364 1.16 cgen.o 5587 7742 1.39 cgqbe.o 16867 20999 1.24 ctreeopt.o 3965 5504 1.39 decl.o 14499 19058 1.31 desym.o 5881 7340 1.25 detok.o 1894 2609 1.38 detree.o 1848 2700 1.46 expr.o 11646 17527 1.50 gen.o 8745 11482 1.31 misc.o 742 834 1.12 opt.o 1519 2051 1.35 parse.o 4265 5797 1.36 scan.o 11312 13822 1.22 stmt.o 5649 7596 1.34 sym.o 6732 8781 1.30 targ6809.o 761 806 1.06 targqbe.o 903 1046 1.16 tree.o 5152 5745 1.12 tstring.o 2012 2012 1.00 types.o 2516 4397 1.75 wcc.o 10282 12897 1.25 Average 1.29 ``` So 29% bigger than `fcc` at present. I'm just looking at `types.c`. The first function is: ``` int inttype(int type) { return (((type & 0xf) == 0) && (type >= P_CHAR && type <= P_LONG)); } ``` with the AST being: ``` FUNCTION inttype RETURN LOGAND EQ AND IDENT rval type INTLIT 15 INTLIT 0 LOGAND GE IDENT rval type INTLIT 32 LE IDENT rval type INTLIT 64 ``` `wcc` generates awful code! But this is wrong: ``` ldd 4,u anda #15 <- Should be #00 andb #15 ``` It's because `printlocation()` can't tell with literals if we are doing an 8-bit, 16-bit or 32-bit operation. Maybe I need to change one of the arguments to indicate if I'm using A, B, D or Y? Done and it seems to work. ## Thu 06 Jun 2024 08:20:21 AEST I've been struggling with producing better LOGAND and LOGOR code, because the current code makes a lot of #0 and #1s and then tests them. We should just be able to jump to labels like `cgcompare_and_jump()`. I split them into two functions, but I think I can merge them. Viz: ``` x || y x && y -------------------------------------------------------------- if (lhs true) goto Ltrue if (lhs false) goto Lfalse if (rhs false) goto Lfalse if (rhs false) goto Lfalse Ltrue: ldd #1; goto Lend Lfalse: ldd #0 Lend: ``` Really only the first line is different. I just need to pass a different AST op down on the first line. ## Thu 06 Jun 2024 11:33:16 AEST After much frustration I think I've finally fixed it. Yes, all the tests still pass as does the triple test. ## Fri 07 Jun 2024 08:40:20 AEST I realised that I could optimise: ``` #11 ldx %1,u ldd 0,x with ldd [%1,u] ``` and ditto D stores, B loads and stores. I finally realised that the peephole optimiser rules are whitespace sensitive. I now have a test input file I can run to check if it's working. I've realised that I can move some of the code generation into the peephole rules file. This will lessen the amount of C code in the code generator. Example, I could generate `a >> 24` as ``` ldd _a ; cgshr 24 ``` and the optimiser can replace it with a few Y/D/A/B exchanges and clears! ## Fri 07 Jun 2024 08:53:54 AEST I wrote a Perl script to compare the `.o` sizes against `fcc`: ``` cg6809.o: 1.07 cgen.o: 1.30 * cgqbe.o: 1.15 ctreeopt.o: 1.33 decl.o: 1.05 desym.o: 1.25 detok.o: 1.38 detree.o: 1.46 expr.o: 1.21 * gen.o: 1.34 * misc.o: 1.12 opt.o: 1.21 parse.o: 1.33 * scan.o: 1.13 stmt.o: 1.22 * sym.o: 1.17 targ6809.o: 1.06 targqbe.o: 1.16 tree.o: 1.12 tstring.o: 1.00 types.o: 0.99 wcc.o: 1.25 Average: 1.19 ``` I've starred the ones that are important. ## Fri 07 Jun 2024 09:53:22 AEST I've been writing a few more peephole rules. But now I've seen this code: ``` char *fred() { return(NULL); } int main() { if (fred() != NULL) printf("fred was not NULL\n"); return(0); } --- FUNCTION main IF NE FUNCCALL fred CAST 17 INTLIT 0 --- lbsr _fred ; Call fred std R0+0 ; Store result ldd #0 std R1+0 ; Store 0 in temp ldd R0+0 ; Now do the compare?! cmpd R1+0 beq L3 ``` Why can't we just `cmpd #0, beq L3`? Yes that works. Now have to work out why we are generating this code! Ah it's because of the cast. I added code to `cgwiden()`: if the primary sizes are the same, do nothing. This has brought the `wcc/fcc` ratios down: ``` cgen.o: 1.30 now 1.23 expr.o: 1.21 now 1.12 gen.o: 1.34 now 1.30 parse.o: 1.33 now 1.28 stmt.o: 1.22 now 1.10 Average: 1.19 now 1.15 ``` ## Fri 07 Jun 2024 16:40:13 AEST I've written `objcompare` to compare function sizes. The worst between `fcc` and `wcc` are: ``` enumerateAST: F/parse.o 85 W/parse.o 151 1.78 (ratio) genlabel: F/gen.o 15 W/gen.o 26 1.73 mkastnode: F/tree.o 133 W/tree.o 215 1.62 ``` so I guess I check those out first. ## Sat 08 Jun 2024 10:02:01 AEST I got an e-mail from Alan Cox about his recent inprovements with `fcc`. I told him about this project and offered to send him the code. While up at the arena I looked through all of `cg6809.c`. There's an awful lot of: ``` int primtype= cgprimtype(type); ... switch(primtype) { case P_CHAR: ... case P_INT: ... case P_POINTER: ... case P_LONG: ... } ``` We could get `gen.c` to send in the `primtype`, but I'm actually thinking of getting in a character that represents the primary type. Then we could do: ``` int cgadd(int l1, int l2, char primchar) { load_d(l1); // Print out a pseudo-op fprintf(Outfile, "\tadd%c ", primchar); printlocation(l2, 0, primchar); cgfreelocn(l2); Locn[l1].type= L_DREG; d_holds= l1; return(l1); } ``` Then the peephole optimiser can expand the pseudo-op with the actual code. This would lose a heap of code from the code generator! There would be an issue with INTLITS. If we generated: ``` ; Pseudo-op addl #1234567 ``` the peephole optimiser would have to do the `>>8` etc. Could be a pain. Ah, a possible solution. For byte and int literals I can just print then, e.g. `#2345`. For long literals I can print the top byte in decimal, the second byte in decimal and the bottom 16 bits in decimal, e.g. ``` ; Pseudo-op addl #0 #18 #54919 ``` Then the optimiser can match against them. Would mean a lot of rules I guess. ## Sat 08 Jun 2024 10:11:45 AEST I've written some other notes. Time to bring them in. ## Fix For and Continue Currently we do ``` for (PRE; COND; POST) BODY ``` and convert to ``` PRE Lstart: COND (which could jump to Lend) BODY POST Lend: ``` and the AST tree: ``` GLUE / \ PRE WHILE / \ COND \ GLUE / \ BODY POST ``` But we don't use the middle child in the WHILE node. So we could build this tree: ``` GLUE / \ PRE WHILE / | \ COND | BODY POST ``` In `genWHILE()` we can check for the presence of the middle child. If it's there, we produce code: ``` PRE (done elsewhere) jump Lcond Lstart: POST Lcond: COND (which could jump to Lend) BODY Lend: ``` Now, any `break` will go to `Lend` and any `continue` will go to `Lstart` and still do the post operation. ## Other Ideas Would it be useful to have an `x_holds` tracker? It wouldn't add much `cg6809.c` code as we don't use it but it might help shave a few percent off the code output. We need `cgshrconst()`. For shifts, we should check if the right is an INTLIT and use `cgshlconst()` and `cgshrconst()` for certain values. For values 8, 16 and 24, just write a comment in the assembly code: `leftshift_8` for example. Use the peephole optimiser to put the code in. This means we move lines of code out of the generator and into the peephole optimiser. We could even do 1 and 2! For the 8, 16, 24, we can do a bunch of register exchanges and clearing to get the work done. ## Sun 09 Jun 2024 07:44:45 AEST Alan wrote back suggesting it would be better to add macros to the assembler than to push pseudo-op expansion on the peephole optimiser. I guess that's true, especially as the optimiser loops back to see if it can apply more optimisations. It would be slow. Alan did mention tables, and I was wondering if I could use these. For each "operation" there would be several size rules. Each rule would have a string and an offset. If the offset is UNUSED, just print the string, else print the string and the location with the offset. Do I just have a big table and a second table which holds the first entry to use and the number of lines? Anyway, before we even go there I still need to improve the code density and reduce the wcc/fcc size ratio. ## Sun 09 Jun 2024 10:37:21 AEST Some more peephole rules. We now have: ``` cgen.o: 1.30 now 1.21 expr.o: 1.21 now 1.10 gen.o: 1.34 now 1.14 parse.o: 1.33 now 1.27 stmt.o: 1.22 now 1.06 Average: 1.19 now 1.12 ``` ## Sun 09 Jun 2024 14:02:30 AEST Musing on the way to/from Bunnings, I thought about the temps being on the stack. I just checked, they are always allocated in incrementing order and then either completely free or the last one is freed then all are freed. This means I can change the allocation algorithm to just increment a number. When they go on the stack I'll need to adjust the `localOffset` to account for the temps. However, I won't know what the final value will be until the end of the function. I was thinking, when doing `cgfuncpreamble(), I make the output file read/write. At the point where I'm going to emit the `leas` to adjust the stack, I'll save the offset using `ftell() and emit: ``` fprintf(Outfile, ";\tleas XXXXX,s\n"); ``` Later on, if the `localOffset` is not zero, in `cgfuncpostamble()` I can `fseek()` back to this point and overwrite the line with the actual value. Slightly ugly but should work. For now I'm going to change the temp allocation to just an increment. It works; the new code is: ``` static int next_free_temp; static int cgalloctemp() { return(next_free_temp++); } ``` Later on I'll keep a `highest_free_temp`. If we allocate past it, we can bump `localOffset` up by four. We definitely need to replace `u` as the frame pointer because this is adding a lot of extra code for really simple functions. It's why `fcc` is doing better with `parse.c`. Idea: we keep a `sp_adjust` variable, initally zero. Each time we `pshs` it goes up by the appropriate size. Down for `puls` or `leas`. As with `fcc`, to begin with we can check that it is zero when we get to the function postamble. On Tuesday I'll do the `sp_adjust` and lose the `u` frame pointer first. Once that's working I can move the temporaries on to the stack. ## Sun 09 Jun 2024 14:48:20 AEST I decided to add the `sp_adjust` but keep the the `u` frame pointer as an intial step. I checked and `sp_adjust` is always zero in the postamble for all the code I can throw at it. ## Sun 09 Jun 2024 14:57:49 AEST Damnit, I started on the code to lose the `u` frame pointer. Up to input026.c: OK, test 27 failed. ## Sun 09 Jun 2024 15:30:10 AEST And ... done! It was actually easier than I expected. Just a few dumb things I should have changed which got fixed. All tests OK. We now have as a comparison: ``` cgen.o: 1.18 size 6438 expr.o: 1.04 size 12204 gen.o: 1.06 size 10552 parse.o: 1.23 size 5120 stmt.o: 1.03 size 6047 Average: 1.09 ``` and the biggest `.o` files: ``` cg6809.o: 1.03 size 29498 decl.o: 0.97 size 13901 expr.o: 1.04 size 12204 gen.o: 1.06 size 10552 scan.o: 1.09 size 12365 wcc.o: 1.17 size 12055 ``` `wcc` is fine but the code generator is `cg6809.o` and `gen.o` plus a bunch of others, so that's going to be the pain point. ## Mon 10 Jun 2024 07:39:53 AEST I decided to try out the `ftell()`, `fseek()` code to patch in the stack change, just with the existing `leas` in the function preamble. It works! Wow. Now I can get on with trying to put the temporaries on the stack. Ah, I'm stuck. Somehow we have to have some arrangement of: ``` parameters return address locals temporaries <- sp ``` with known offsets for them all. But until I've allocated the most temporaries consecutively will I know how much room they require. That means I can't work out the offsets for the locals and parameters. Even if I put the temporaries above the locals, I still can't work out the parameter offsets. Could I, when allocating a temporary, just add its size to `sp_adjust` and return 0 as the temporary's offset? Effectively I've pushed the temporary on the stack. And then lower `sp_adjust` when we free all temporaries. ## Mon 10 Jun 2024 09:45:43 AEST You know what. I just looked in `cg6809.c` and the spill code has been commented out. So there's no need to spill temporaries. That means there is no need to put them on the stack :-) Which means that I can keep the current R0, R1 temporaries, yay! And I can lose the `fseek()` code at the same time. ## Mon 10 Jun 2024 09:59:13 AEST Let's now try to simplify the location allocation code in the same way that I did the temp allocation code. No, because `gen.c` often frees all but one register, so I need to keep track of that one. Damn. I could copy the to-keep register down to R0, but it's a lot of effort. I'd have to rewrite the `gen.c` code to receive the new register's location. I'll park this for now. ## Mon 10 Jun 2024 10:24:44 AEST Now going back to the table of output lines idea. I'll need to encode offset and "register" for printlocation: ``` 1 printlocation(0, 'a'); 8 printlocation(0, 'b'); 10 printlocation(0, 'd'); 3 printlocation(0, 'e'); 3 printlocation(0, 'y'); 1 printlocation(1, 'b'); 3 printlocation(1, 'f'); 1 printlocation(2, 'a'); 6 printlocation(2, 'd'); 1 printlocation(2, 'y'); 1 printlocation(3, 'b'); ``` and if we need to do a printlocation on a line. ## Mon 10 Jun 2024 11:45:59 AEST Taking a step back and looking at the function prototypes in `cg6809.c`, the ones I think I can tabulate have one or two register arguments, a type and a label to jump to. Maybe I can alter `printlocation` to print nothing when it's not needed, and to print out a label. What I do need to do is to change the `type` that `gen.c` sends down to the `cgXXX.c` functions so we don't have to keep converting it. I've made a start but it dies after some tests. ## Tue 11 Jun 2024 10:14:16 AEST All 6809 tests now pass. I haven't rewritten the QBE backend yet. By moving `cgprimtype()` up into `gen.c` I've saved 180 bytes with the 6809 backend. Not much. It's probably not worth it, especially if I have a table based approach where I can do the `cgprimtype()` twice and then use the tables for many operations. I'll park the changes in a side branch. ## Wed 12 Jun 2024 13:47:19 AEST I added some `free()` code to `cgen.c` to clean up the AST trees. Sometimes the left and right nodes are identical. Not sure why. Anyway that works. I tried to free the local and parameter symbol lists but now the QBE backend generates different code. So I've commented this change out for now. ## Wed 12 Jun 2024 14:12:56 AEST So I decided to go crazy and try this: ``` $ sh -x z wcc -m6809 -o wcc wcc.c wcc -m6809 -o cscan scan.c misc.c wcc -m6809 -o detok detok.c tstring.c wcc -m6809 -o detree detree.c misc.c wcc -m6809 -o desym desym.c wcc -m6809 -o cpeep cpeep.c Incompatible types in binary expression on line 95 of cpeep.c wcc -m6809 -o ctreeopt ctreeopt.c tree.c misc.c wcc -m6809 -o cparse6809 decl.c expr.c misc.c opt.c parse.c stmt.c sym.c tree.c targ6809.c tstring.c types.c wcc -m6809 -o cgen6809 cg6809.c cgen.c gen.c misc.c sym.c targ6809.c tree.c types.c ``` with the result: ``` -rwxr-xr-x 1 wkt wkt 12280 Jun 12 14:16 wcc -rwxr-xr-x 1 wkt wkt 10583 Jun 12 14:16 cscan -rwxr-xr-x 1 wkt wkt 7536 Jun 12 14:16 detok -rwxr-xr-x 1 wkt wkt 8984 Jun 12 14:16 detree -rwxr-xr-x 1 wkt wkt 8434 Jun 12 14:16 desym -rwxr-xr-x 1 wkt wkt 7941 Jun 12 14:16 ctreeopt -rwxr-xr-x 1 wkt wkt 27267 Jun 12 14:16 cparse6809 -rwxr-xr-x 1 wkt wkt 29615 Jun 12 14:16 cgen6809 ``` Interesting. Damn. `emu6809 ./cscan < detok.c_cpp > fred` produces and empty output file. ## Wed 12 Jun 2024 15:34:38 AEST I just had an idea. The problem could be my own assembly output, or an interaction with libc, or the emulator not doing something right. But we do have another 6809 compiler, `fcc`. So I could build the binaries with `fcc` and see what happens. Right, I now have binaries built with `fcc`. This time `cscan` does produce output, but it goes into a loop around line 82 of `detok.c` using the input file `detok.c_cpp`. At least the 6809 `detok` binary works :-) Ah, `detok.c` only has 82 lines, so somehow it's not detecting the end of file. So perhaps my emulator isn't sending EOF correctly? Also checking that `fcc` passes my tests. No, test67 fails but the others are OK. ## Thu 13 Jun 2024 09:26:47 AEST I've added some debug code to `scan.c` and it looks like I've found an `fcc` bug: ``` switch (c) { case EOF: t->token = T_EOF; fprintf(stderr, "c is EOF, t->token is %d not %d\n", t->token, T_EOF); return (0); ``` produces the code: ``` ldx #Sw67 bra __switch Sw67_1: clr [0,s] ; Supposedly zero t->token, but clr [1,s] ; [0,s] is different to [8,s] below ... clra clrb pshs d ; Push T_EOF which is zero ldd [8,s] pshs d ; Push t->token ldd #T176+0 pshs d ; Push the string pointer ldd #_stderr+0 pshs d ; Push the FILE * lbsr _fprintf+0 ``` And, with my `wcc` compiling and building `cscan`, it looks like `fgetc()` returns EOF on the first call. Very annoying. ## Thu 13 Jun 2024 09:56:36 AEST I'm thinking of bring `fcc` and the bintools up to date with the Github repo. Before I do that, here are the commits I am currently using: - bintools: bdb0076b5e3d4745aa08289d61e39f646d75805e - compiler: ffda85a94ce900423dc25a020fe62609ddcd46db I've got the lastest of both with the compiler at commit 8a4b65b4d18be9528f3e5a6402b8e392e5ecc341. It runs the `wtests` OK but it spitting out wrong code for some of the Fuzemsys libraries, e.g. ``` $ /opt/fcc/bin/fcc -m6809 -S -O -D__m6809__ clock_gettime.c $ vi +28 clock_gettime.s lbsr __shrul add 2,s adc 1,s adc ,s ``` which should be `addd, adcb, adca`. Now should I bother reporting these to Alan? It means trying to find the commit that caused the problem. ## Fri 14 Jun 2024 11:24:25 AEST The `fgetc()` problem with `wcc` was because I'd defined `stdin` as a pointer not an array of one `FILE` struct. `cscan` is now reading characters but it fails elsewhere. ## Fri 14 Jun 2024 12:01:11 AEST Argh! In the 6809 `cgswitch()` we are given the register (location) that holds the switch value, but it was not being loaded into D. A simple `load_d(reg)` fixed this. I added test 160. ## Fri 14 Jun 2024 12:17:20 AEST We have progress. `cscan` and `detok` work and it looks like I'm making a correct token stream file. The only issue is that it looks like the Fuxiz `printf()` works differently than the Linux one, as I see these sort of differences: ``` 6464c6375 < 36: struct --- > 27: struct 6469d6379 < 43: filename \"decl.c\" 6472c6382 < 36: void --- > 1e: void 6476d6385 < 43: filename \"decl.c\" 6480c6389 < 36: struct --- > 27: struct ``` The token numbers being printed are different but the Tstring used is correct. And the double quotes in filenames are being quoted! Actually that's not quite the truth. I had to make this change in `detok.c`: ``` < *s++ = (char) ch; --- > *s = (char) ch; s++; ``` so I should investigate why as I can pass the QBE triple test with the original line. OK that was an easy fix, thankfully. ## Fri 14 Jun 2024 12:42:40 AEST Ah, I worked out why the token numbers were wrong. It seems that `cscan` isn't detecting any keywords but converting everything to a STRLIT. That's why I'm getting: ``` $ emu6809 cparse6809 decl.c_sym decl.c_ast < decl.c_tok unknown type:void on line 4 of /opt/wcc/include/6809/stdlib.h ``` as that's "void" not 30 (T_VOID). ## Fri 14 Jun 2024 15:19:25 AEST It's because `if (!strcmp(...))` isn't working. Fixed. ## Sat 15 Jun 2024 09:32:15 AEST Not fixed. It's because we are returning from a switch case. I have a new test: ``` int keyword(char *s) { switch (*s) { case 'b': return(1); case 'c': if (!strcmp(s, "case")) return(2); return(3); case 'v': if (!strcmp(s, "void")) return(4); return(5); default: return(6); } return(0); } ``` which works for QBE but always returns 6 with the 6809 backend. Fixed now. The assembly code handling switch cases expects the argument to be `int` but we were sending it a `char` with garbage in the A register. I've changed `stmt.c` to widen a P_CHAR tree to be P_INT if required. It looks like the token streams are now good with T_VOID for "void" etc. Moving on to the next phase: ``` $ emu6809 cparse6809 decl.c_sym decl.c_ast < decl.c_tok unknown struct/union type:FILE on line 35 of /opt/wcc/include/6809/stdio.h ``` Looks like somehow the symbol table is getting mangled: ``` Searching for struct __stdio_file: missing! Adding struct __stdio_file Searching for struct __stdio_file: found it Searching for struct __stdio_file: missing! ``` With more debug prints: ``` Searching for __stdio_file in list 778e class 0 Comparing against __stdio_file (and found it) ... Searching for __stdio_file in list 778e class 0 Did not find __stdio_file ``` where 778e is the head's value in hex and also where the name pointer lives (it's the first member of the struct). My debug code is: ``` fprintf(stderr, "Searching for %s in list %lx class %d\n", s, (long)list, class); for (; list != NULL; list = list->next) { if (list->name != NULL) fprintf(stderr, "Comparing against %s\n", list->name); if ((list->name != NULL) && !strcmp(s, list->name)) if (class == 0 || class == list->class) return (list); } fprintf(stderr, " Did not find %s\n", s); return (NULL); ``` So does this mean that `list->name` is being set to NULL somehow? ## Sat 15 Jun 2024 11:14:39 AEST Using the emulator and a write break, it looks like we are in `scalar_declaration()` at the top, doing `*tree = NULL;`. Which raises the question: are we not doing a double dereference, or how are we getting a pointer into the struct table? Even worse, that `tree` is a `struct ASTnode **` not even a symbol table pointer! Argh! ## Sat 15 Jun 2024 11:42:43 AEST I've got a `debug` file and I'm searching for 778E. I can see a `newsym()` being made. I can see we go into `composite_declaration()`, find the `rbrace()` and add a member to the struct. ## Sat 15 Jun 2024 12:24:59 AEST Stepping back a bit, I can compile this program with the 6809-binary phases: ``` void printint(int x); int main() { printint(5); return(0); } ``` so maybe I should just try compiling the test programs? ## Sat 15 Jun 2024 12:45:54 AEST I have a test script now for this. Tests 1 and 2 are OK, 3 fails. The 6809 `cparse6809` runs and creates outputs. So does the native version of the compiler. The latter produces this symbol table: ``` int printf() id 1: global, 1 params, ctypeid 0, nelems 1 st_posn 0 char *fmt id 2: param offset 0, size 2, ctypeid 0, nelems 1 st_posn 0 void main() id 3: global, 0 params, ctypeid 0, nelems 0 st_posn 0 int x id 4: local offset 0, size 2, ctypeid 0, nelems 1 st_posn 0 unknown type x id 0: unknown class, size 0, ctypeid 0, nelems 0 st_posn 0 ``` The last line is the empty symbol to mark the end of one AST tree. The tree looks like: ``` FUNCTION main ASSIGN INTLIT 1 IDENT x FUNCCALL printf STRLIT rval "%d " IDENT rval x ASSIGN ADD ... ``` Now, doing the same with the 6809 tools: ``` int printf() id 1: global, 1 params, ctypeid 0, nelems 1 st_posn 0 char *fmt id 2: param offset 0, size 2, ctypeid 512, nelems 1 st_posn 0 void main() id 3: global, 0 params, ctypeid 0, nelems 0 st_posn 0 int x id 4: local offset 0, size 2, ctypeid 4, nelems 1 st_posn 0 unknown type id 0: unknown class, size 0, ctypeid 0, nelems 0 st_posn 0 ``` and ``` FUNCTION main Unknown dumpAST operator:8745 on line 1 of ``` With some more debug code, the two `detree`s do: ``` Native 6809 binary Next ASTnode op 32 Next ASTnode op 32 About to read in a name About to read in a name We got main We got main Next ASTnode op 29 Next ASTnode op 29 Next ASTnode op 29 Next ASTnode op 29 Next ASTnode op 29 Next ASTnode op 29 Next ASTnode op 29 Next ASTnode op 29 Next ASTnode op 29 Next ASTnode op 29 Next ASTnode op 29 Next ASTnode op 29 Next ASTnode op 29 Next ASTnode op 29 Next ASTnode op 29 Next ASTnode op 29 Next ASTnode op 29 Next ASTnode op -31959 ``` Just checked, there are nine `op 29` nodes in sequence. Why are we not reading this? Perhaps its a stdio problem. That's the ninth 32-byte record we read in, and it seems we read in: ``` 2B3E: D7 29 D2 28 D2 28 D2 29 D2 00 03 00 5D 00 00 00 .).(.(.)....]... 2B4E: 00 00 00 00 00 00 00 24 85 00 53 00 00 00 00 00 .......$..S..... whereas before we were getting 2B62: 00 1D 00 00 00 00 00 00 74 A8 00 00 73 F4 00 09 ........t...s... 2B72: 00 0A 00 00 00 13 00 00 00 00 00 00 00 00 00 00 ................ ``` and 0x001D is op 29. ## Sat 15 Jun 2024 14:09:01 AEST OK it's an input issue. Here's the two records using `hd`: ``` 00000100 1d 00 00 00 00 00 00 74 a8 00 00000110 00 73 f4 00 09 00 0a 00 00 00 13 00 00 00 00 00 00000120 00 00 00 00 00 00 1d 00 00 00 00 00 00 6f 90 00 00000130 00 74 cc 00 0a 00 0b 00 00 00 0e 00 00 00 00 00 00000140 00 00 00 00 00 00 ``` and a dumb dump of the file: ``` op 1d type 0 ctype 0 rvalue 0 left 74a8 mid 0 right 73f4 nodeid 9 leftid a midid 0 righid 13 sym 0 name NULL symid 0 size 0 linenm 0 op 5823 type 5322 ... ``` ## Sun 16 Jun 2024 10:24:16 AEST Back again today with a cold. I've changed the code to just dump the buffer in hex. Using both `fcc` and `wcc` I get the same behaviour of getting gibberish. Interestingly, if I remove the code that calls my `fgetstr()` function, it works fine and doesn't print any gibberish. Hah. I rewrote the code from scratch and it still fails. Then I decided to remove the `fgetc()` and replace with `fread()` and now it works. So either `fgetc()` itself is bad or the code (compiled by `fcc`) is bad. Or, it's an interaction between `fgetc()` and `fread()`. ## Sun 16 Jun 2024 10:59:40 AEST SEE BELOW... That helped a lot! I can now pass lots more tests. Tests 1 to 10 OK, 11 fails, 12 to 21 OK, 22 fails, 23 to 25 OK. I guess I can compare the assembly files and see how they differ. Actually I can also do the AST and symbol files. The AST files are fine. But for the symbol files I see this: ``` $ diff sym 6sym 2c2 < char *fmt id 2: param offset 0, size 2, ctypeid 0, nelems 1 st_posn 0 --- > char *fmt id 2: param offset 0, size 2, ctypeid 512, nelems 1 st_posn 0 4,7c4,7 < int i id 4: local offset 0, size 2, ctypeid 0, nelems 1 st_posn 0 < char j id 5: local offset 0, size 1, ctypeid 0, nelems 1 st_posn 0 < long k id 6: local offset 0, size 4, ctypeid 0, nelems 1 st_posn 0 < unknown type k id 0: unknown class, size 0, ctypeid 0, nelems 0 st_posn 0 --- > int i id 4: local offset 0, size 2, ctypeid 4, nelems 1 st_posn 0 > char j id 5: local offset 0, size 1, ctypeid 5, nelems 1 st_posn 0 > long k id 6: local offset 0, size 4, ctypeid 6, nelems 1 st_posn 0 > unknown type id 0: unknown class, size 0, ctypeid 0, nelems 0 st_posn 0 ``` and the ctypeids are all wrong. Yes, they are in the actual sym file. I added some debug code to `newsym()` to see what the `ctype` pointer is set to. With the native compiler: ``` newsym printf ctype (nil) ctypeid 0 newsym fmt ctype (nil) ctypeid 0 newsym main ctype (nil) ctypeid 0 newsym i ctype (nil) ctypeid 0 newsym j ctype (nil) ctypeid 0 newsym k ctype (nil) ctypeid 0 ``` and the 6809 version: ``` newsym printf ctype 00000 ctypeid 0 newsym fmt ctype 00003 ctypeid 512 newsym main ctype 00000 ctypeid 0 newsym i ctype 00002 ctypeid 4 newsym j ctype 00002 ctypeid 5 newsym k ctype 00002 ctypeid 6 ``` and I doubt 2 and 3 are real pointers in memory! ## Sun 16 Jun 2024 11:52:03 AEST Hmm. Some printfs later: ``` d_l B2 ctype 00000 s_d ctype 00002 newsym i ctype 00002 ctypeid 4 ``` It looks like I'm not correctly passing the `ctype` to `symbol_declaration()`. Yes, this doesn't look right: ``` leax 6,s tfr x,d ldd [10,s] pshs d ldd 14,s pshs d pshs d <-- Push twice with no D change?! ldd 8,s pshs d lbsr _symbol_declaration leas 8,s ``` and the C code is ``` sym = symbol_declaration(type, *ctype, class, &tree); ``` It seems I'm not loading the actual `*ctype` value before pushing it. The AST tree has: ``` FUNCCALL symbol_declaration IDENT rval type DEREF rval IDENT rval ctype IDENT rval class ADDR tree ``` and there's a bunch of other DEREF IDENTs in the tree elsewhere, but this is the only one where we are not doing the deref. No, it looks like we get into `cgcall()` with two locations marked as D_REGS: ``` (gdb) p Locn[0] $4 = {type = 7, name = 0x0, intval = 0, primtype = 3} <=== (gdb) p Locn[1] $5 = {type = 2, name = 0x0, intval = 12, primtype = 2} (gdb) p Locn[2] $6 = {type = 7, name = 0x0, intval = 0, primtype = 3} <=== (gdb) p Locn[3] $7 = {type = 2, name = 0x0, intval = 2, primtype = 2} ``` I think I can see the problem. In `gen_funccal()` we generate the code to get all the argument values: ``` for (i = 0, gluetree = n->left; gluetree != NULL; gluetree = gluetree->left) { // Calculate the expression's value arglist[i] = genAST(gluetree->right, NOLABEL, NOLABEL, NOLABEL, gluetree->op); typelist[i++] = gluetree->right->type; } ``` but several `cg6809` functions allocate a L_DREG location: `cgwiden()`, `cgaddress()`, `cgderef()`. And on this line: ``` sym = symbol_declaration(type, *ctype, class, &tree); ``` I am getting the address of `tree` and dereferencing `ctype`! Thus we have two locations which think they are L_DREG. ## Sun 16 Jun 2024 13:23:23 AEST SEE ABOVE... That `fgets()` -> `fread()` change broke the tests so I've reverted it for now and kept a copy of the new code in RCS. I've added a `stash_d()` in `cgderef()` and now the assembly is: ``` leax 6,s tfr x,d std R0+0 &tree into R0 ldx 10,s ldd 0,x std R1+0 *ctype in R1 ldd R0+0 &tree pushed pshs d ldd 14,s class pushed pshs d ldd R1+0 *ctype pushed pshs d ldd 8,s type pushed pshs d lbsr _symbol_declaration which is optimised to leax 6,s stx R0+0 &tree into R0 ldd [10,s] std R1+0 *ctype in R1 ldd R0+0 pshs d &tree pushed ldd 14,s pshs d class pushed ldd R1+0 pshs d *ctype pushed ldd 8,s pshs d type pushed lbsr _symbol_declaration ``` ## Sun 16 Jun 2024 13:54:28 AEST That fixed the `ctype` bug. But it doesn't fix the `input011` problem. At least now the trees and symbol tables are the same. It looks like the compiler isn't emitting the right literal sub-values for longs: ``` $ diff goodqbe badqbe 49c49 < ldy #0 --- > ldy #30 150c150 < ldy #0 --- > ldy #1 158c158 < cmpy #0 --- > cmpy #5 189,190c189,190 < adcb #0 < adca #0 --- > adcb #1 > adca #1 ``` In particular, we seem to be using the low half of the value when we should be using the upper half. Thats the const handling in `printlocation()` and it's the only place where we do right shifts. I'll try and replace it with some byte handling. Done (I'd written it on paper before) and now we get up to test 31 FAIL. Actually that's error testing. We are now up to test 58 fail :-) ## Sun 16 Jun 2024 15:09:58 AEST The error is `Bad type in cgprimtype::0 on line 15 of main`. The symbol table is fine but the tree shows INTLIT differences: ``` good bad FUNCTION main FUNCTION main ASSIGN ASSIGN INTLIT 12 INTLIT 12 DEREF DEREF ADD ADD ADDR var2 ADDR var2 INTLIT 0 INTLIT 48 <== FUNCCALL printf FUNCCALL printf STRLIT rval "%d" STRLIT rval "%d" DEREF rval DEREF rval ADD ADD ADDR var2 ADDR var2 INTLIT 0 INTLIT 48 <== ``` which are the member offsets in a struct. Yes they are in the file, I added some printfs to detree: ``` INTLIT node value 12 INTLIT node value 12 INTLIT node value 0 INTLIT node value 48 INTLIT node value 0 INTLIT node value 48 INTLIT node value 99 INTLIT node value 99 INTLIT node value 2 INTLIT node value 48 INTLIT node value 2 INTLIT node value 48 INTLIT node value 4005 INTLIT node value 4005 INTLIT node value 3 INTLIT node value 48 INTLIT node value 3 INTLIT node value 48 INTLIT node value 0 INTLIT node value 48 INTLIT node value 2 INTLIT node value 48 INTLIT node value 3 INTLIT node value 48 INTLIT node value 0 INTLIT node value 48 INTLIT node value 2 INTLIT node value 48 INTLIT node value 3 INTLIT node value 48 INTLIT node value 0 INTLIT node value 0 ``` I checked in `expr.c` where we build the INTLIT node: ``` right = mkastleaf(A_INTLIT, cgaddrint(), NULL, NULL, m->st_posn); ``` and both native and 6809 versions use the right position. So how does it get corrupted to be 48? Ooh, I print the value after the `mkastleaf()` and it's 48!! `mkastleaf()` is getting the 48! We have another double push: ``` ldx 4,s ldd 20,x <- but not storing m->st_posn lbsr _cgaddrint pshs d <- push cgaddrint() result ldd #0 pshs d <- push NULL ldd #0 pshs d <- push NULL pshs d ldd #26 pshs d <- push A_INTLIT lbsr _mkastleaf ``` Question: why is the `cgaddrint()` result being pushed before the NULLs? Ah, I'd missed a D stash in `cgcall()`. Yay, fixed. ## Mon 17 Jun 2024 07:49:35 AEST This gets us up to input130 which fails!! This is doing ``` printf("Hello " "world" "\n"); ``` The AST tree is: ``` FUNCTION FUNCCALL printf STRLIT rval \"Hello \" RETURN INTLIT 0 ``` which isn't right. The three strings are in the token stream. Side node: `printf("My name is \"Warren\" you know!\n");`. `gcc` gives `My name is "Warren" you know!`. But my compiler gives `My name is \"Warren\" you know!`. That means my compiler is interpreting the literal incorrectly, so I should fix it. Fixed that, didn't fix test 130. ## Mon 17 Jun 2024 08:26:02 AEST So the native `wcc` concatenates strings fine: ``` First strlit >foo< totalsize 3 >foo< litend 0x56267086dfe3 First strlit >Hello < totalsize 6 >Hello < litend 0x56267086e196 Next strlit >world< totalsize 11 >Hello world< litend 0x56267086e19b Next strlit > < totalsize 12 >Hello world < litend 0x56267086e19c ``` but the 6809 one doesn't: ``` First strlit >Hello < totalsize 6 litval 08574 >Hello < litend 0857a Next strlit >world< totalsize 11 litval 08564 >Hello < litend 0857f Next strlit > < totalsize 12 litval 08090 >Hello < litend 08580 ``` OK, fixed. Now test 135 fails. Also, the only other test that fails is 162. The symbol table and AST tree look fine. It seems like the code generator is crashing. Looking at the debug trail, we are in `cgcall()`. `ptrtype()` has returned 1 and `cgprimsize()` has returned 2. We then do a `fprintf()`. So we must be doing this line: ``` // Call the function, adjust the stack fprintf(Outfile, "\tlbsr _%s\n", sym->name); ``` Single-stepping the emulator, `sym->name` is fine, it points to "printf". The string literal seems fine. And then we push $765E which the map file says is Outfile. So it doesn't look like our code is wrong. We fall into `vfnprintf()` and the switch statement for '%' with B holding 0x73 i.e. 's'. We get down to: ``` __switchc+0005: CMPB ,X+ | -FHI-Z-- 00:73 7572 0001 0000 FC89 __switchc+0007: BEQ $6410 | -FHI-Z-- 00:73 7572 0001 0000 FC89 __switchc+000F: LDX ,X | -FHI---- 00:73 0007 0001 0000 FC89 __switchc+0011: JMP ,X | -FHI---- 00:73 0007 0001 0000 FC89 0007: LSR <$00 | -FHI---C 00:73 0007 0001 0000 FC89 ``` and that jump to $0007 is definitely wrong! Here's a dump of the switch table: ``` 7536: 00 14 00 55 5D 2D 55 6B 20 55 73 2B 55 73 23 55 7546: 7C 2A 55 84 2E 55 C2 6C 55 CA 68 55 D2 64 55 D9 7556: 69 55 D9 62 56 27 6F 56 2E 70 56 35 58 56 4E 78 7566: 56 55 75 56 5A 21 57 61 63 57 7F 73 00 07 58 06 7576: FD 96 00 05 FF FF 00 00 00 00 00 00 00 04 00 02 ``` Let's rewrite that a bit: ``` 00 14 $14 (20 entries) 00 555D 0: '\0' 2D 556B 1: '-' 20 5573 2: ' ' 2B 5573 3: '+' 23 557C 4: '#' 2A 5584 5: '*' 2E 55C2 6: '.' 6C 55CA 7: 'l' 68 55D2 8: 'h' 64 55D9 9: 'd' 69 55D9 10: 'i' 62 5627 11: 'b' 6F 562E 12: 'o' 70 5635 13: 'p' 58 564E 14: 'X' 78 5655 15: 'x' 75 565A 16: 'u' 21 5761 17: '!' 63 577F 18: 'c' 73 0007 19: 's' <== must have been tromped on 5806 default ``` Yes, the jump should be to $57AC. Putting in some write breaks... It looks like the offending code is at the end of `load_d()`: ``` _load_d+$00C8: LBSR _mul _load_d+$00CB: ADDD #$757C base of Locn[] _load_d+$00CE: TFR D,X _load_d+$00D0: LDD #$0007 _load_d+$00D3: STD ,X which is Locn[l].type= L_DREG; ``` The debug trace shows that `load_d()` is being called with -1 (NOREG)! Leading up to this, we are doing a `switch` on 0x22 in `genAST()` which is an A_RETURN. `genAST()` loads NOREG and calls `cgreturn()`. And in the native version: ``` Breakpoint 1, cgreturn (l=-1, sym=0x555555566200) at cg6809.c:1211 (gdb) s load_d (l=-1) at cg6809.c:154 ``` Hah! So `load_d()` is getting a NOREG! OK I added code in `cgreturn()` to not load D when NOREG. Test 135 passes now. And so does test 162. So all the tests now pass. Yay!!! ## Mon 17 Jun 2024 10:13:33 AEST Now we are down to these issues when building the compiler with itself: - Incompatible types in binary expression on line 95 of cpeep.c - Can't A_ASSIGN in genAST(), op:1 on line 180 of run_command The first is pointer subtraction. The second is, I think, because `stdin` on Fuzix is defined as an array not a pointer, so we can't assign a pointer to an array. Ah, `freopen()` already resets the incoming file handle, no need to do the assign. That's fixed. Now I can put in a band-aid fix for just char pointer subtraction. I've done this, and I've added a test for it. All tests pass for: triple test, QBE, 6809 and 6809 compiled compiler. Now we just need to pass the triple test on the 6809 side! ## Mon 17 Jun 2024 14:47:53 AEST So there is this line in `cpeep.c`: ``` next = &((*next)->o_next); ``` which the compiler doesn't like: & operator must be followed by an identifier on line 155 of cpeep.c. I guess I get to rewrite that line! ## Mon 17 Jun 2024 14:52:55 AEST Damn. I'm going from the smallest file upwards with a script called `smake`. I hit: ``` $ ./smake types.c Doing types.c Unable to malloc in mkastnode() on line 103 of types.c ``` But the assembly files for these ones are OK: tstring.c, targ6809.c, tree.c, detok.c, opt.c. The only minor thing is: ``` $ diff detok.s S/detok.s 536c536 < ldd #65535 --- > ldd #-1 ``` which I think is OK. It is: I made the `.o` file from both versions and they have the same checksum. Damn. So now we need to get on to saving memory! ## Mon 17 Jun 2024 15:29:42 AEST I had this idea. Can we serialise (and then free) subtrees when we know that they are whole statements with no useful rvalue? I just wrote a `dumptree.c` which simply reads in and dumps AST nodes. At line 103 of `types.c` we are up to node 231, with the function `modify_type()` starting at node 76. On 6809 AST nodes are size 32, so that's 4,992 bytes. I also thought about how to `free()` AST nodes. I've tried in a few places with no luck, e.g. in `serialiseAST()` and in the `opt.c` functions. Sigh. ## Tue 18 Jun 2024 08:52:10 AEST I added some `brk()/sbrk()` debug code in the 6809 emulator to see how quickly the heap grows. Looking at who calls `malloc()`, here is how many times it is called when compiling `types.c` before we run out of heap: ``` 241 mkastnode 460 newsym 5 serialiseSymtab 928 strdup ``` I added some `free()`s in `decl.c` to help with the `strdup()`s in there. Just checked with the native compiler and, yes, there are 460 symbols in the symbol table. Yikes! ## Tue 18 Jun 2024 09:27:49 AEST Hah! I didn't realise/remember that the compiler can have unions in structs: ``` typedef union { int x; char c; } fred; typedef struct { int foo; char *name; fred combo; } jim; jim mynode; int main() { mynode.foo= 12; mynode.name="Dennis"; mynode.combo.x=17; printf("%d %s %d\n", mynode.foo, mynode.name, mynode.combo.x); return(0); } ``` Maybe I can put some data structures on a diet! I tried with the symbol table with no success, sigh. ## Wed 19 Jun 2024 10:13:08 AEST I've been trying to come up with a good idea for the symbol table. I think I have one. Firstly, when parsing we write out any non-local/param symbols to the symbol table file as soon as possible and then free them. We keep a single in-memory symbol table. The only things in this table are symbols from the symbol table file that have been brought in by a `findXXX()` operation. In global context, this in-memory table will hold, e.g. typedefs that are used by other things, e.g. ``` extern FILE *stdin; // We need to know about FILE's size ``` In a function context, the table will hold all the symbols that the function uses, including parameters and locals. At the beginning of each function, we can free the list to get rid of anything built-up during global parsing. At the end of each function we can flush all the locals and parameters to the file and then free the list. This should keep the in-memory symbol table relatively small, but help to minimise the number of times we go back to the file to get stuff. I think we can keep most/all of the existing `sym.c` API, with a few new functions to do the free/flush actions. I've made a new git branch: symtable_revamp, for this. I've installed the exsting compiler binaries in `/opt/wcc`. Because this is going to break a heap of things, I'll make a new install area `/opt/nwcc`. That way I can compare the old symbol table and tree vs. when the new code is creating. ## Wed 19 Jun 2024 18:00:54 AEST I've made a start at the new code. It isn't finished and it doesn't compile yet. However, I feel that it's a good approach. I've got the code to add symbols done, now I'm writing the code to find symbols. It will be interesting to debug it and find out what mistakes I've made and what situations I didn't forsee. ## Fri 21 Jun 2024 07:39:23 AEST It's at the point where the code compiles and runs. It doesn't work yet, of course. I've just got a known-good symbol table with: ``` $ wcc -m6809 -S -X gen.c $ /opt/wcc/bin/desym gen.c_sym > goodsym ``` so when I do the same with the new code I can compare. I've added a slew of `fprintf()`s to the code. The code at present doesn't properly attach function params as members of the function symbol. I can fix that. I was thinking, perhaps, of adding the locals to the function symbol as well. There are places in the gen code which walk both lists, so it would make sense to have them available attached to the function symbol. ## Sat 22 Jun 2024 12:58:09 AEST Some progress but I think I've hit another problem. We now have two in-memory symbol lists, one for types (struct, union, enum, typedef) and the other for variables and functions. Things that have members (struct, union, enum, function) get their associated members (locals, params for functions) added to a temp list and attached to the symbol. Now the problem. In the original compiler I defined these lists of "classes": ``` // Storage classes enum { C_GLOBAL = 1, // Globally visible symbol C_LOCAL, // Locally visible symbol C_PARAM, // Locally visible function parameter C_EXTERN, // External globally visible symbol C_STATIC, // Static symbol, visible in one file C_STRUCT, // A struct C_UNION, // A union C_MEMBER, // Member of a struct or union C_ENUMTYPE, // A named enumeration type C_ENUMVAL, // A named enumeration value C_TYPEDEF, // A named typedef C_STRLIT // Not a class: used to denote string literals }; ``` but this is conflating two ideas. For example, I can have a `static struct foo { ... };`, so that the `foo` type is visible only in this file. What I need are two values in each symbol table. One holds that the symbol is a struct/union/enum/typedef (or not), and the other holds the symbol's visibility (global, extern, static). We already have structural types: ``` enum { S_VARIABLE, S_FUNCTION, S_ARRAY }; ``` so maybe I add the struct/union/enum/typedef to this. Also the strlit? Then keep global/extern/static/local/member/param/enumval as the storage classes? And rename it as visibility. The last four will always be in the `member` list and their actual visibility will be determined by their parent symbol. Also note for enums: enum values don't belong to any specific enum type. Also, enum type names don't really do anything, but we do have to prevent redefinitions of them. So both can be stored in the main symbol table: enum types as types and enum values as global symbols. ## Sun 23 Jun 2024 10:50:15 AEST I've done the above and I'm slowly working my way there. Right now we can read in all the global prototypes, enums, structs etc. fine. It dies when we hit the first function with parameters. Hopefully not too hard to fix. ## Mon 24 Jun 2024 09:41:38 AEST Well ... I think there's a problem. Example: ``` int fred(int a, int b, int c); int fred(int x, int y, int z) { return(x+y+x); } ``` The prototype gets written to the symbol table file with `a,b,c`. We now get the actual function with different parameter names. We could remove the old member list and add the new parameters, but these won't get written out to disk. Or, if we did write this out to disk, then now we have two entries in the symbol table for the same function. And we can't go and patch in the new variable names as the new names might be bigger than the old ones. Damn! I'm thinking of adding a function to invalidate an on-disk symbol. It would use the existing `findSyminfile()` to re-find the symbol, then write a -1 `id` to mark it invalid. Then we can write out the new symbol. It's an ugly solution but I can't think of a better one at the moment. ## Mon 24 Jun 2024 15:14:55 AEST I've added the invalidate code. Seems to work. Right now the code appears to write out symbols but not NULLing the Member pointers, as I'm getting a function with a struct's members as variables :-) ## Tue 25 Jun 2024 09:12:08 AEST I was loading symbols + their members from disk but I'd forgotten to reset the Memb pointers to NULL once done. I've also noticed that I'm writing symbols back out to disk multiple times. They get loaded in as needed, then flushed (written) back at the end of a function. Damn! We are now up to this line in `gen.c`: ``` type = n->left->type; ``` and the error message: ``` No member found in struct/union: :type on line 200 of gen.c ``` Up to here, the old/new parse trees are identical which makes me feel good! ## Tue 25 Jun 2024 09:35:21 AEST I think I have to add some symbol searching by id. I've added some dump code for the in-memory symbol tables. I see: ``` struct ASTnode: struct id 302: global, ctypeid 0, nelems 0 st_posn 0 int op id 303: member, size 2, ctypeid 0, nelems 1 st_posn 0 int type id 304: member, size 2, ctypeid 0, nelems 1 st_posn 2 !!! struct exit *ctype id 305: member, size 2, ctypeid 287, nelems 1 st_posn 4 int rvalue id 306: member, size 2, ctypeid 0, nelems 1 st_posn 6 !!! struct nodeid *left id 307: member, size 2, ctypeid 302, nelems 1 st_posn 8 !!! struct nodeid *mid id 308: member, size 2, ctypeid 302, nelems 1 st_posn 10 !!! struct nodeid *right id 309: member, size 2, ctypeid 302, nelems 1 st_posn 1 ``` The `ctype` pointers are pointing to the wrong types as indicated by the `!!!`. When I load symbols in from disk, I'm loading the symbol and its members. What I should do is, if the `ctype` is not NULL, find the matching `ctypeid` symbol and link it back in. Damn!!! ## Tue 25 Jun 2024 11:27:27 AEST I've added more code to load in symbols by id, and to link `ctype` fields to the matching symbol. We can now get down to the last function in `gen.c` and, up to that point, the trees are identical. I still haven't dealt with the repeated same symbol writing to the symbol file yet. OK found the problem with the last function in `gen.c`. I needed to NULL the Functionid before looking for duplicate parameters. Without this, we were checking against the parameters in the previous function! We now generate the same AST tree as compared to the compiler before the on-disk symbol table. Yay! ## Tue 25 Jun 2024 12:21:45 AEST I fixed the duplicate on-disk symbols. As we are allocating ids incrementally, simply record the highest one that we wrote out and don't write ones at or below this id on the next round. Now I'm trying to parse all the other C files and I'm having troubles. Sigh. I've gone to the tests to see if I can parse them. Just a couple are causing problems. They were when a function call had no arguments, then function had no parameters but it did have locals. That's because all the locals are on the member list after the parameters. I had to fix up the logic to stop when we hit the first local. ## Wed 26 Jun 2024 10:02:50 AEST I've decided, now that we can parse all the tests, to try and get the generator to run as well. I've got the generator code to compile, and now comparing the assembly output from old/new compiler. ## Wed 26 Jun 2024 12:33:58 AEST Ah, I wasn't generating the data for global variables. I've got the code nearly working, but there are 10 tests still failing. It's sooo frustrating! ## Wed 26 Jun 2024 14:29:00 AEST Now I'm up to trying to deal with global string literals which appear in the symbol table after the variable they get assigned to. Tricky. I'll find a way I'm sure. Yes, output the strlits first then the non-strlits. Sigh. All the 6809 tests now pass. Some of the QBE tests fail, and they look like strlits. So I'll need to fix that up. ## Fri 28 Jun 2024 14:06:37 AEST I needed to `strdup()` the string value in `cgqbe.c`. All the tests now pass on QBE and 6809. But the current parser is still balking at the compiler's own code. So here is the code in `wcc.c` that we are failing on: ``` void addtmpname(char *name) { struct filelist *this; ... } ``` In the parser, we have added the prototype for `addtmpname()` to the in-memory symbol table (i.e. with the parameter) and we are in the function's body. Now we are searching for the `struct filelist` type. It's been flushed out of memory, so we need to load it back in. In doing so, we have NULL'd `Membhead` and `Membtail`. Why? Because we loaded the `struct filelist` in and then had to read its members. But we needed the member list to append the local `this` variable after the `name` parameter. Damn!! ## Sat 29 Jun 2024 08:15:25 AEST I fixed the above by adding a private list just for `loadSym()` to use when reading symbols in from the file. Now onto the next problem. There's a line in `scan.c` which is `c = next();`. We seem to be finding the `next` symbol as a variable. We are calling: ``` res= loadSym(sym, name, stype, id, 0, 1); // stype NOTATYPE, id 0 ``` and getting back a member of a struct called `next`, not the function. So there is some logic bug in `loadSym()` that needs fixing. Yes. When we use `loadSym()` and there's no match, we skip the initlist. But if it's a struct/union type, we don't skip the members. So, we leave `loadSym()`, then come back to `loadSym()` and it then reads the members in and compares against the name. So, somehow we need to load the struct/union type and load but skip the members. OK that wasn't too easy. If we are searching for a name, then it's not a local, param or member. The first two are found by `findlocl()` and the members are loaded recursively by `loadSym()` itself. I just added another condition in the IF statement. We can now parse `scan.c` but the generator dies. Here are the current list of errors: ``` cg6809.c 6809:Duplicate local variable declaration:cmplist on line 1028 of cg6809.c cgqbe.c: Can't find symbol with id:549 on line 71 of cgpreamble cpeep.c: & operator must be followed by an identifier on line 155 of cpeep.c decl.c: Can't find symbol with id:503 on line 0 of (null) desym.c: Unknown variable or function:dumpsym on line 221 of desym.c expr.c: Unknown variable or function:binexpr on line 156 of expr.c gen.c: Can't find symbol with id:511 on line 30 of Lfalse scan.c: Can't find symbol with id:350 on line 0 of (null) stmt.c: Unknown variable or function:if_statement on line 355 of stmt.c sym.c: Type mismatch: literal vs. variable on line 26 of sym.c ``` The first is only a 6809-side problem. The others (apart from the `cpeep.c` one) seem to be a symbol which was invalidated but not replaced with a new definition. ## Tue 02 Jul 2024 10:47:44 AEST Argh, getting frustrated. I think the parser is now working fine, now a problem in the code generator. The AST tree stores the id of many symbols. However, if we invalidate a function's symbol, we set the id to -1 and store a new version of it in the symbol table, with a new id. But the AST might still have the old id. Example: ``` cgqbe.c:static void cgmakeglobstrs(); cgqbe.c: cgmakeglobstrs(); cgqbe.c:static void cgmakeglobstrs() { ... } ``` The first line will get a symbol put into the table. The second line will use this id in the AST. The third line invalidates the symbol's id and replaces it with a new one, but the AST remains the same. I tried in `decl.c` to get the old id, invalidate that symbol, make the new symbol and insert the old id. Fine, except that the `sym.c` code remembers the highest id we wrote out before, and it won't write out the new symbol because the id "has been already written". Damn! And all this because we need to make sure that the function's variable names are stored and they could be different to the prototype ones. Is there another way? I think I'm going to enforce that the prototype parameter names must match the actual function parameter names. That way I can avoid invalidating anything. ## Tue 02 Jul 2024 11:28:58 AEST Done and I've removed all the invalidation code. Apart from the `cpeep.c` problem, all the other C files compile and we still pass the tests. Now I need to add `-D` pre-processor handling to `wcc.c`. Done. ## Tue 02 Jul 2024 11:47:40 AEST Now a problem with `FILE *Symfile = NULL;` in `sym.c` which doesn't seem to get added to the symbol table. Solved by adding it to `data.h` like the other symbols that need to be extern sometimes and sometimes not. Now I'm trying the triple test. We get into an infinite loop but only on some inputs: ``` $ L1/nwcc -S gen.c $ L1/nwcc -S stmt.c $ L1/nwcc -S cgqbe.c <--- ^C $ L1/nwcc -S wcc.c <--- ^C $ L1/nwcc -S cg6809.c <--- ^C $ L1/nwcc -S expr.c $ L1/nwcc -S decl.c ``` For `wcc.c` the symbol tree and AST files are fine, `nwcc` versus `L1/nwcc`. So it must be the code generator getting stuck. We seem to get into an infinite loop while `Searching for symbol id 143 in memory`. ## Tue 02 Jul 2024 13:11:14 AEST No I think I've been bitten by this bug again. In `cgen.c`: ``` // Now do the non string literals for (sym=Symhead; sym!=NULL; sym=sym->next) { if (sym->stype== S_STRLIT) continue; ``` Can we continue and still get the `sym=sym->next` to work? Nope. I rewrote the code and, yay!!!! we now pass the triple test again. Phew! ## Tue 02 Jul 2024 13:21:31 AEST I can build the 6809 compiler binaries again using `build6809bins`. That means I'm now back to where I was at Mon 17 Jun 2024 trying to `smake types.c`. The scanner works but I'm getting: ``` emu6809 6/cparse6809 misc.c_sym misc.c_ast New parameter name doesn't match prototype:stream on line 51 of /opt/wcc/include/6809/stdio.h ``` Damn! The executable sizes are: ``` 6/cgen6809.map: 7BA9 B __end 6/cparse6809.map: 77D6 B __end 6/cscan.map: 2E70 B __end 6/ctreeopt.map: 2258 B __end 6/desym.map: 24AA B __end 6/detok.map: 218E B __end 6/detree.map: 26D2 B __end 6/nwcc.map: 3951 B __end ``` ## Wed 03 Jul 2024 08:06:08 AEST OK, it's the 6809 code which is wrong. In a new test, these lines: ``` if (strcmp(c, c)) { printf("%s and %s are different\n", c, c); } if (!strcmp(c, c)) { printf("%s and %s are the same\n", c, c); } ``` produce: ``` Fisherman and Fisherman are different Fisherman and Fisherman are the same ``` and the problem goes back to before the symbol table rewrite. I think I can see the problem. The second one has this debug run: ``` _strcmp+003E: LEAS 4,S | -FHI---- 00:00 01E3 0001 1082 FDAC _strcmp+0040: PULS PC,U | -FHI---- 00:00 01E3 0001 0000 FDB0 _main+0032: LEAS 4,S | -FHI---- 00:00 01E3 0001 0000 FDB4 _main+0034: CMPD #$0000 | -FHI-Z-- 00:00 01E3 0001 0000 FDB4 _main+0038: BNE $016A | -FHI-Z-- 00:00 01E3 0001 0000 FDB4 _main+003A: LDD #$0001 | -FHI---- 00:01 01E3 0001 0000 FDB4 _main+003D: BRA $016D | -FHI---- 00:01 01E3 0001 0000 FDB4 ``` `strcmp()` returns. We compare against zero and load a one if it was zero (the negation). However, for the first call: ``` _strcmp+003E: LEAS 4,S | -FHI---- 00:00 01E3 0000 1082 FDAC _strcmp+0040: PULS PC,U | -FHI---- 00:00 01E3 0000 0000 FDB0 _main+000D: LEAS 4,S | -FHI---- 00:00 01E3 0000 0000 FDB4 _main+000F: BEQ $0150 | -FHI---- 00:00 01E3 0000 0000 FDB4 _main+0011: LDD $1105 | -FHI---- 10:78 01E3 0000 0000 FDB4 ``` `strcmp()` returns zero but the Z flag isn't set. Thus the `BEQ` operation isn't taken. So I need to do a `CMPD #$0000` before the `BEQ`, as per the second call. OK fixed now. ## Wed 03 Jul 2024 08:43:20 AEST Damn. We are running out of memory when compiling a small file like `misc.c`: ``` Doing misc.c Unable to malloc member in loadSym() on line 27 of misc.c cparse6809 failed ``` That's worse than before we revamped the symbol table. Why?!! ## Wed 03 Jul 2024 10:30:58 AEST I've spent a bit of time with valgrind. I had forgotten some `free()`s in `sym.c`. When I put them in, I failed the triple test. Argh! It turned out that I was not initialising the `rvalue` field in the symbols and this, not the `free()`s was the problem. I can now pass the triple test again, and valgrind now shows that most of the mem leaks are with the AST. Back to using `smake` to get the 6809 compiler to compile itself. Much slower now that we have them symbol table in a file. This time we got up to `detree.c` before we ran out of memory. That's better than before. But it now means that I need to try and free the AST nodes. I tried this before and failed. Damn. Ah. I'd added it to the code generator which worked/works, but not the parser. Just added it now and still pass the triple test. Wow. For the parser, valgrind tells me that the mem leak has fallen from 100K to about 4K. Let's see if that helps with `smake`. Well, we got a bit further. This time we fail in the generator with `detree.c` but we can compile `desym.c`. We can parse `stmt.c` but fail in the generator. Interestingly, the assembly files that do get generated only differ with this: ``` desym.s 582c582 < ldd #65535 --- > ldd #-1 ``` which is good. So now I think I need to do a valgrind on the code generator. Ah, I was loading the globals but not freeing the unused ones properly. Fixed and still pass the triple test. ## Wed 03 Jul 2024 11:54:43 AEST So with these improvements I can now `smake stmt.c` and `wcc.c`. After that: ``` Unable to malloc in mkastnode() on line 539 of scan.c, cparse6809 failed Unable to malloc in mkastnode() on line 519 of gen.c, cparse6809 failed Unknown variable or function:ftell on line 366 of sym.c, cparse6809 failed Unknown AST operator:14336 on line -1279 of binexpr, cgen6809 failed Unable to malloc in mkastnode() on line 86 of decl.c, cparse6809 failed ``` The `malloc()` ones are when we have really big functions, so the AST tree is huge! And I guess there are a lot of symbols too. Do we try to put the structs on a diet? I don't think I can easily serialise the partial AST tree? ## Wed 03 Jul 2024 13:10:42 AEST Struct dieting. symtable: some things could be chars not ints: - type, stype, class, st_hasaddr We could also unionise ctype and ctypeid. For the AST nodes: - these could be chars: type, rvalue - we could unionise the left/mid/right pointers and ids - we could unionise the sym pointer and the symid - do we need name if we can use sym->name? ## Wed 03 Jul 2024 14:48:23 AEST Musing on partial AST trees ... In the generator we have `genfreeregs()` which signals that there's no result needed. This gets called when generating IF, WHILE, SWITCH, logical OR, logical AND and glue operations. Maybe in the parser we could somehow serialise trees at these points. We also have ids in each AST node. We could do something similar with the current symbol table tree. We could have a `loadASTnode()` function. Each time we need to go left/mid/right, we could `loadASTnode()` with the id. Maybe something like the following: ``` // Given an AST node id, load that AST node from the AST file. // If nextfunc is set, find the next AST node which is a function. // Allocate and return the node. struct ASTnode *loadASTnode(int id, int nextfunc); ``` ## Wed 03 Jul 2024 16:27:09 AEST I'm now thinking that I can ditch the whole in-memory AST tree concept. Instead, keep the tree on-disk as the AST file and left/mid/right ids, which provide an implicit tree. On the generator side, we simply use `loadASTnode()` each time we need to traverse left/mid/right. On the parser side, we still make nodes in memory, but once they are filled in (that means: left/mid/right, hasaddr etc.) then we just write the node to disk and free it. I think it also means that I can use the existing AST file and start work on the generator side. Once that's working, I can go back and change the parser side of things. ## Thu 04 Jul 2024 16:37:19 AEST I've written but not tested the `loadASTnode()` code. I made a start changing `gen.c` over to use it, but there are fiddly spots. I feel that I need to step back a bit and do some thinking first. There is some list walking code in `gen.c`; in a few functions we iterate the same list multiple times. I don't really want to `loadASTnode()` the same nodes multiple times in the same function. I also need to think about the best places to `free()` AST nodes. So I might put in some `free()`s first, get them working before I do the `loadASTnode()` conversion. ## Thu 04 Jul 2024 17:36:11 AEST I rewrote the `genAST()` code so that there was one return only at the end. I added code in `tree.c` to free a single AST node. At the end of `genAST()` I can free the left and middle AST node and pass the triple test. I can't do that when I free the right-hand one. Need to investigate. ## Fri 05 Jul 2024 15:38:36 AEST That is fixed. Somehow right can get set to left, so we must check it isn't before freeing it. I have spent some time setting new variables `nleft, nmid, nright` to hold the sub-nodes. At present I'm just copying pointers but eventually I will use `loadASTnode()`. In CASE and function calls we do have to traverse the node linked list twice; can't get out of it. So I'll just have to suck up the multiple disk reads for now. Maybe later build a cache of some sort? ## Tue 09 Jul 2024 14:00:37 AEST Back from the 3-day comedy cruise. I've rewritten `detree.c` to use the `loadASTnode()` function. After fixing a few minor issues, it prints out the same tree as the old version. That gives me confidence that the `loadASTnode()` function works. I just added `free()`s in `detree.c` and every single AST node and associated name string gets free'd. That makes me very happy!! ## Tue 09 Jul 2024 15:38:05 AEST I've added the `loadASTnode()` code to `gen.c` and it works up to the first SWITCH test. That's pretty good :-) Once I get all the tests to pass I will go back and work on freeing all the nodes that get allocated. ## Tue 09 Jul 2024 15:58:16 AEST I had missed one left node pointer reference which needed to be loaded with `loadASTnode()`. Now all the tests pass, yay!!! ## Wed 10 Jul 2024 07:39:53 AEST I spent some time using valgrind last night and I've added a bunch of `freeASTnode()` and friends to the code. We are down to about 3K of un-free'd memory. That's excellent. I think I'll stop on the code generation side. Now I need to work out, on the parsing side, how to build AST nodes, write them to disk once I have all the child node ids, and then free them. The hard bit is to find one (or a few) places where I can do this. Right now I'm doing it at the end of each function, but that requires too much memory on the 6809 for large functions. ## Wed 10 Jul 2024 10:33:05 AEST I'm trying to move the enumeration of the AST nodes into `tree.c`, so that a node gets an id when it's created. The problem is that I change the tree structure in several places e.g. `optimise()` and I have to find all the relinking and fix the child ids at the same time. Right now for test 009 I have a set of AST nodes where a child node's id is in several parent nodes! Not a tree! Fixed. I wasn't zeroing the node ids when the child pointer was NULL. ## Thu 11 Jul 2024 14:13:17 AEST With the new AST enumeration code, the tests pass for QBE and 6809. Now trying to do the triple test and it fails. Comparing the `gcc` output (Good) vs. the L1 output (Bad). The `cpp` file is fine. The symbol tables are the same. The AST files are the same except that the string literals seem different. Looks like the newlines are getting corrupted, e.g.: ``` Good ---- 00003d70 00 00 00 00 00 00 00 00 00 4f 75 74 20 6f 66 20 |.........Out of | 00003d80 73 70 61 63 65 20 69 6e 20 63 6d 64 61 72 67 73 |space in cmdargs| 00003d90 0a 00 23 00 00 00 10 00 00 00 00 00 00 00 00 00 |..#.............| Bad --- 00003870 00 00 00 00 00 00 00 00 00 4f 75 74 20 6f 66 20 |.........Out of | 00003880 73 70 61 63 65 20 69 6e 20 63 6d 64 61 72 67 73 |space in cmdargs| 00003890 6e 00 23 00 00 00 10 00 00 00 00 00 00 00 00 00 |n.#.............| ``` Note the first byte on the last line: `0a` versus `6e`: `6e` is ASCII 'n'. And the compiler before the symbol table fix has: ``` 00003870 00 00 00 00 00 00 00 00 00 4f 75 74 20 6f 66 20 |.........Out of | 00003880 73 70 61 63 65 20 69 6e 20 63 6d 64 61 72 67 73 |space in cmdargs| 00003890 0a 00 23 00 00 00 10 00 00 00 00 00 00 00 00 00 |..#.............| ``` which is fine. And commit 6b870ee2d... Date: Wed Jul 10 10:20:32 is also fine. So at least I can narrow down the problem (I hope :-). ## Thu 11 Jul 2024 14:59:05 AEST Looking at the diffs between the two sets of files, there is nothing that sticks out as being a problem. Sigh. ## Thu 11 Jul 2024 15:10:40 AEST It's weird. Here are some debug lines for reading/writing string literals for both the `gcc` and `nwcc` passes in the triple test: ``` 1 Read in string >Out of space in cmdargs < 1 Dumping literal string/id >Out of space in cmdargs < ``` This is the `gcc` parser. Newlines are OK. Now the code generator: ``` 1509 Read in string >Out of space in cmdargs < ``` Again, fine. Now the `nwcc` compiled parser and generator: ``` 1 Read in string >Out of space in cmdargsn< 1 Dumping literal string/id >Out of space in cmdargsn< 1509 Read in string >Out of space in cmdargsn< ``` which is wrong. ## Thu 11 Jul 2024 15:34:54 AEST Ah, the problem is back in the scanner! The Good scanner shows: ``` 34: "Out of space in cmdargs " ``` The Bad scanner shows: ``` 34: "Out of space in cmdargsn" ``` ## Fri 12 Jul 2024 07:41:16 AEST I have been able to build a test case which abstracts some of the scanner code and which exhibits the problem. Using the compiler from before the on-disk symbol/AST changes, compared to the current one, I see this difference in the generated AST tree: ``` 28,30d27 < CASE 110 () < RETURN () < INTLIT 10 () ``` The case statement in the switch is completely missing! ``` switch (c) { case 'n': return ('\n'); } ``` Using my new `astdump` program, I can see that the right-hand AST child id is missing in the SWITCH node in the file: ``` 31 SWITCH type 0 rval 0 line 14 symid 0 intval 1 left 30 mid 0 right 0 ``` where the good version has right 37. Ah, I found it. In `stmt.c` I link the case sub-tree to the SWITCH node, but I'd forgotten to set up the right-hand id. Fixed. ## Fri 12 Jul 2024 11:40:05 AEST We are still not passing the triple test. In another directory I've checked out the last commit which does pass the triple test: 50b82937c5da569b. I've just compiled (to asm) the compiler files with both (Good/Bad), and now I can compare the intermediate file. The token files are identical. Now looking at the `detree` outputs. These are different: scan.c, cg6809.c, cgqbe.c. Hmm, it looks like there are still missing case statements. Weird! Found it, I was building a linked list of case trees and I had forgotten to add in the child's nodeid along the way. We now pass the triple test again! Now I can get back to trying to serialise and free the AST nodes in the parser. ## Fri 12 Jul 2024 12:12:08 AEST I've made a start, but I'm not serialising some of the AST nodes in input001.c. Damn! Ah, fixed. I can now pass all the tests except for the one with the error "No return for function with non-void type". I've had to comment out this test in the compiler, as I'm now freeing the tree before I can check if there is a return! Wow. We pass all the QBE tests, all the 6809 tests and the triple test!!! I wonder if `valgrind` can tell me the maximum amount of memory allocated at any point in time? Yes: https://valgrind.org/docs/manual/ms-manual.html ## Fri 12 Jul 2024 16:44:40 AEST I just did a compile of the biggest compiler C file with the compiler. Looking at the AST file with a script: ``` Seen 165 vs. unseen 2264 ``` which means, when we read in an AST node that has children, it is more likely that we haven't seen the children then we have. And that means we should NOT seek back to the start of the file. Instead, we should start from where we currently are, for the next search, and exit if we hit the same seek point. I think. What I should do is add some counters in the `loadASTnode()` function, change the search algorithm and see which is better. ## Sat 13 Jul 2024 09:56:58 AEST OK, so I've built the 6809 compiler binaries with `build6809bins`. Now I'm running `smake` on all the compiler C files to generate assembly using the 6809 compiler binaries. It's as slow as hell! It's taken about 10 minutes and we are only up to line 145 in `cg6809.c`. I tried with a couple of small files yesterday and the code generator died. This time I'm going to try to compile everything, move the `*_*` files into a subdir, then do the same with the native compiler. Then I can compare the intermediate files to see if/where there are differences. I'll definitely have to do an execution profile on the code generator to see where the main bottlenecks are. After about 30-40 minutes, we are still only halfway through the code generation of `cg6809.c`! Compared to the native compiler, the only difference I see so far is: ``` < anda #0 < andb #0 --- > anda #255 > andb #255 ``` That's excellent as that is a very small difference and should not be too hard to track down. We are off to see the kids for lunch, so I can leave this running for several hours :-) ## Sat 13 Jul 2024 16:02:42 AEST Back from the lunch. Damn. `smake` removes all the temp files at the start, so everything is pretty much gone. But it also looked like all the code generation phases failed. So I guess I can do them all again... Looks like the parser ran out of memory on this one: `Unable to malloc in mkastnode() on line 684 of gen.c` but none else so far. ## Sat 13 Jul 2024 16:51:56 AEST Interesting. It looks like most of the code generation was successful, it's just that the code generator doesn't stop nicely. Maybe I should `exit(0)` at the end of `main()` instead of `return(0)`? I'm also seeing a few: ``` < ldd #65535 --- > ldd #-1 ``` which I think I've mentioned before. So we have to check/fix these things: - 65535 / 1 - and 0 / 255 - segfault at end of code generation Once they are fixed, we need to improve the speed of the code generation. Right now I'm thinking of building a temp file which just consists of a list of AST file offsets, one per node. To find AST node X, we multiply by 4 (or shift left 2), `fseek()` to that point in the file and read in the long offset at that point. Then we can `fseek()` into the AST file to get the node. The biggest AST file has 4549 nodes, so that means the file of offsets would be 18K long. Too big to keep as an in-memory array. ## Sun 14 Jul 2024 12:40:56 AEST I just did a debug run of the 6809 `cgen` on `parse.c` as it's a decent sized file. Yes, `main()` returns and we go off into never never land. I'm also trying to see which function have the most instructions run. The results are: ``` 242540320 div16x16 87457045 _fread 34695064 _fgetstr 26953031 boolne 20521224 __mul 10534645 _loadASTnode 7960435 __minus 6814620 __divu 6150026 booleq 4794607 boolgt 4735523 boollt 4630436 __syscall 4594719 __not 3479025 boolult 2271540 _read ``` so, yes, I need to optimise `loadASTnode()`. ## Sun 14 Jul 2024 13:15:23 AEST Looking at the reason for the crash, it seems like something is touching the code in `crt0.s`. It should call `exit()` after `main()` returns, but I see: ``` _main+0156: RTS | -FHI-Z-- 00:00 8160 0000 0000 FD91 __code+0028: NEG <$54 | -FHI-Z-- 00:00 8160 0000 0000 FD91 ``` and `crt0.s` starts at $100. So I think I need to run the emulator with a write break around $128. Hmm, it seems to be one of the `fclose()` operations just before the `return(0)` at the end of `main()`. Weird! ## Mon 15 Jul 2024 07:47:03 AEST I just checked that `#65535` and `#-1` produce the same machine code, so that's something I can ignore. For now, I'll remove the two `fclose()`s and put in an `exit(0)` at the end of cgen.c. Done. I will defer the `and #0` / `and #255` thing as the compiler is too slow! I'll start writing the code to build the AST offset index file. ## Mon 15 Jul 2024 11:03:18 AEST It's done, and we pass the normal tests but not the triple test. However that's a good result for 45 minutes of work, and it seems to run faster. ## Mon 15 Jul 2024 12:09:33 AEST I think maybe my compiler has a bug with `sizeof()`. Part of the index building code is: ``` void mkASTidxfile(void) { struct ASTnode node; long offset, idxoff; while (1) { // Get the current offset offset = ftell(Infile); fprintf(stderr, "A offset %ld\n", offset); // Read in the next node, stop if none if (fread(&node, sizeof(struct ASTnode), 1, Infile)!=1) { break; } fprintf(stderr, "B offset %ld\n", offset); ... ``` and what I'm seeing when I run the L1 code generator: ``` A offset 133032 B offset -7491291578409943040 A offset 133120 B offset -7491311919375056895 A offset 133208 B offset 1 A offset 133308 B offset 0 ``` I think I'm reading in more than I should be when I'm reading in the ASTnode. No, actually it seems like it might be QBE. Looking at the ouput, we have: ``` C code ------ struct ASTnode node; long offset, idxoff; QBE code -------- %node =l alloc8 11 88 bytes in size %offset =l alloc8 1 ASM code -------- subq $40, %rsp ??? should be 96 at least movq %rdx, -8(%rbp) -8 is the offset position on the stack leaq -24(%rbp), %rdi -24 is the node position on the stack ``` This seems to mean that `node` is only 16 bytes below `offset` on the stack, even though it is 88 bytes in size. I've emailed the QBE list for ideas. Hmm. I made `node` a pointer and `malloc()`d it at the top of the function, then `free()`d it at the end. Now I pass the triple test. So yes it seems to be a QBE bug. No, it's my bug. I got this reply on the QBE list: > I believe alloc8 is alloc aligned on 8 byte boundary, not alloc 8*arg > bytes. So alloc8 11 allocates 11, not 88 bytes, etc. So I need to fix the QBE output to deal with this. TO FIX. ## Mon 15 Jul 2024 15:13:00 AEST On the 6809 side, in `tree.c`, if I do this: ``` n->leftid= 0; n->midid= 0; n->rightid= 0; ``` then we get: ``` ldx 0,s ldd 12,s std 10,x ldx 0,s ldd 14,s std 12,x ldx 0,s ldd #0 std 16,x ``` But if the code is this: ``` n->leftid= n->midid= n->rightid= 0; ``` then we get: ``` ldx 0,s ldd #0 std 20,x std R0+0 <== Save the rvalue #0 ldd 0,s addd #18 tfr d,x std 0,x <== Should reload R0 ldd 0,s addd #16 tfr d,x std 0,x <== Should reload R0 ``` which is incorrect. TO FIX!!! Now I'm doing an `smake` on all the compiler files. It's definitely a lot faster! No crashes yet ... And it only too 16 minutes to compile all the files. Now to compare them. ## Mon 15 Jul 2024 15:37:11 AEST Wow. The only file that didn't compile was `gen.c` as we ran out of memory in the parser: `Unable to malloc in mkastnode() on line 684 of gen.c`. The only assembly file differences are the `#-1 / #65535` which is ignorable, and the `anda #0 / #255` problem which I must fix. So, if I can fix the latter issue and find a way to not run out of memory in the parser with `gen.c`, then I should be able to pass the triple test on the 6809 :-) ## Mon 15 Jul 2024 15:50:17 AEST I think the `and` problem is a bug in `printlocation()` with a int literal and 'e' or 'f' as the third argument. ## Tue 16 Jul 2024 09:21:25 AEST I think I found the problem. We are doing `Locn[l].intval & 0xffff` but `intval` is a long. That forces the compiler to widen the `0xffff`. But, on the 6809, this is a negative value, so it gets widened to `0xffffffff` not `0x0000ffff`. I'm trying a solution where I cast/save to an `int` variable first before I do the AND. Yes, that seems to work. I'm now running `smake` again, this time I'm converting the assembly files to object files. Then I can checksum them to see if they are identical. The results are excellent with `gen.c` the only one that didn't compile (parser runs out of memory): ``` 143856ed08f470c9bc5f4b842dcc27bd New/opt.o 143856ed08f470c9bc5f4b842dcc27bd opt.o 18e04acd5b8e0302e95fc3c9cddcdac5 New/tree.o 18e04acd5b8e0302e95fc3c9cddcdac5 tree.o 1d42a151ccf415e102ece78257297cd9 New/tstring.o 1d42a151ccf415e102ece78257297cd9 tstring.o 43b84fc5d30ea22ceac9a21795518fc3 decl.o 43b84fc5d30ea22ceac9a21795518fc3 New/decl.o 45a18eb804fdc0c75f3207482ad8678a detok.o 45a18eb804fdc0c75f3207482ad8678a New/detok.o 57d10f0978232603854a6e18bf386cba New/wcc.o 57d10f0978232603854a6e18bf386cba wcc.o 76ed2bc3c553568d16880dfdd02053e7 New/parse.o 76ed2bc3c553568d16880dfdd02053e7 parse.o 88bfe3920d8f08d527447fef2c24dc3b New/scan.o 88bfe3920d8f08d527447fef2c24dc3b scan.o 8c24c919c06532977a68472c709c5e22 cg6809.o 8c24c919c06532977a68472c709c5e22 New/cg6809.o 8e4fd9f9e9923c20432ec7dae85965c5 expr.o 8e4fd9f9e9923c20432ec7dae85965c5 New/expr.o 8ed5104a4b18a1eb8dea33c5faf6c8bd New/sym.o 8ed5104a4b18a1eb8dea33c5faf6c8bd sym.o 9d11b1336597eaa6bcdac3ade6eb13ab misc.o 9d11b1336597eaa6bcdac3ade6eb13ab New/misc.o a5fba53af6d4ca348554336db9455675 New/types.o a5fba53af6d4ca348554336db9455675 types.o beb8414b95be6f0de7494c21b16e1c53 New/stmt.o beb8414b95be6f0de7494c21b16e1c53 stmt.o c1e56e66055f7868ab20d13585a76eb0 cgen.o c1e56e66055f7868ab20d13585a76eb0 New/cgen.o ca8699919c901ed658c0ce5e0eb1d8e8 detree.o ca8699919c901ed658c0ce5e0eb1d8e8 New/detree.o d25a34d8dc2bb895b8c279d8946733c3 New/targ6809.o d25a34d8dc2bb895b8c279d8946733c3 targ6809.o deca10b552285f2de5c10e70547fd2a6 desym.o deca10b552285f2de5c10e70547fd2a6 New/desym.o ``` So if I can write a suitable script, I should be able to pass the triple test on the 6809 side :-) ## Tue 16 Jul 2024 10:47:01 AEST I've rewritten `build6809bins` to make the 6809 binaries and also make some front-end scripts which run the emulator on the respective binary, so we have `native` executables. We need this because the 6809 `wcc` will just run `cscan ...` not `emu6809 cscan ...`. But it's weird. `wcc` runs some of the phases but not all of them: ``` $ L1/wcc -m6809 -S -X -v targ6809.c Doing: cpp -nostdinc -isystem /usr/local/src/Cwj6809/include/6809 targ6809.c redirecting stdout to targ6809.c_cpp Doing: /usr/local/src/Cwj6809/L1/cscan redirecting stdin from targ6809.c_cpp redirecting stdout to targ6809.c_tok Doing: /usr/local/src/Cwj6809/L1/cparse6809 targ6809.c_sym targ6809.c_ast redirecting stdin from targ6809.c_tok ``` and no code generation phase. I think for now I'll write a Perl version of `wcc` so that I can get the triple test done. ## Tue 16 Jul 2024 11:35:05 AEST I've broken one of the long SWITCH statements in `gen.c` into two; hopefully this will allow the 6809 compiler to parse this without running out of memory. I've checked and we pass all the tests. `gen.c` now does compile using the L1 6809 compiler, and the resulting object file is identical to that made by the native compiler. I've had to put the SWITCH split in an `#ifdef` as the change stopped the QBE triple test from passing. Weird! ## Tue 16 Jul 2024 14:09:06 AEST I've written a Perl version of the `wcc` front-end, basically by transliterating it. It now goes into the `L1` directory. I've just modified `build6809bins` to build the `L2` binaries using the `L1` 6809 compiler binaries. So far the `L2` files that have been built have the same checksum as the ones in `L1`, but it's still going ... ## Tue 16 Jul 2024 14:25:05 AEST Oh, we came _sooo_ close! ``` $ md5sum L?/_* | sort 0778e984e25d407d2067ac43d151d664 L2/_cgen6809 # Different e47a9ab1ed9095f1c4784247c72cb1f8 L1/_cgen6809 0caee9118cb7745eaf40970677897dbf L1/_detree 0caee9118cb7745eaf40970677897dbf L2/_detree 2d333482ad8b4a886b5b78a4a49f3bb5 L1/_detok 2d333482ad8b4a886b5b78a4a49f3bb5 L2/_detok d507bd89c0fc1439efe2dffc5d8edfe3 L1/_desym d507bd89c0fc1439efe2dffc5d8edfe3 L2/_desym e78da1f3003d87ca852f682adc4214e8 L1/_cscan e78da1f3003d87ca852f682adc4214e8 L2/_cscan e9c8b2c12ea5bd4f62091fafaae45971 L1/_cparse6809 e9c8b2c12ea5bd4f62091fafaae45971 L2/_cparse6809 ``` and that's because, at the linker phase: ``` cgen.c_o: Unknown symbol '_genglobstr'. cgen.c_o: Unknown symbol '_genglobsym'. cgen.c_o: Unknown symbol '_genpreamble'. cgen.c_o: Unknown symbol '_genAST'. cgen.c_o: Unknown symbol '_genpostamble'. gen.c_o: Unknown symbol '_genAST'. ``` Damn!! I'll build the asm files for the C files that make the code generator, using the native and the 6809 L1 compilers, and compare. ## Wed 17 Jul 2024 13:30:26 AEST I've spent the last day writing the Readme.md for the next part of the 'acwj' journey. So far about 7,000 words and a couple thousand more to go. I think the 6809 `gen.c` problem was that I forgot to do `-DSPLITSWITCH` when I compiled `gen.c`. Yes, now I have the same assembly except for the -1 / 65535 change. Let's try the triple test again! ## Wed 17 Jul 2024 13:59:56 AEST OK, I think I've passed the 6809 triple test: ``` $ md5sum L1/_* L2/_* | sort 01c5120e56cb299bf0063a07e38ec2b9 L1/_cgen6809 01c5120e56cb299bf0063a07e38ec2b9 L2/_cgen6809 0caee9118cb7745eaf40970677897dbf L1/_detree 0caee9118cb7745eaf40970677897dbf L2/_detree 2d333482ad8b4a886b5b78a4a49f3bb5 L1/_detok 2d333482ad8b4a886b5b78a4a49f3bb5 L2/_detok d507bd89c0fc1439efe2dffc5d8edfe3 L1/_desym d507bd89c0fc1439efe2dffc5d8edfe3 L2/_desym e78da1f3003d87ca852f682adc4214e8 L1/_cscan e78da1f3003d87ca852f682adc4214e8 L2/_cscan e9c8b2c12ea5bd4f62091fafaae45971 L1/_cparse6809 e9c8b2c12ea5bd4f62091fafaae45971 L2/_cparse6809 ``` All the binaries' checksums match!! I still don't have the 6809 `wcc` binary working, so I'm relying on the Perl version. But I've been able to compile the rest of the compiler's code with itself. Yayy!!! ## Thu 18 Jul 2024 09:52:53 AEST I'm trying to work out why the 6809 `wcc` binary is failing. It runs the C preprocessor (a native x64 binary) OK. Then it forks and runs the 6809 `cscan` fine. Then it forks and runs the 6809 `cparse`. This runs and completes; then `wcc` crashes with an unknown page-0 op. Ah, I added an `exit(0)` before the final return which helps. Now it crashes running the peephole optimiser. ================================================ FILE: 64_6809_Target/docs/copt.1 ================================================ .de DS .nf .in +3 .sp .. .de DE .sp .in -3 .fi .. .TH copt 1 .SH NAME copt \- peephole optimizer .SH SYNOPSIS \fBcopt\fP [-d] \fIfile\fP ... .SH OPTIONS .TP .B \-\^d Turn on debug modus. Replacements of original patterns will be sent to stderr in the order of execution. .SH DESCRIPTION \fIcopt\fP is a general-purpose peephole optimizer. It reads code from its standard input and writes an improved version to its standard output. \fIcopt\fP reads the named files for its optimizations, which are encoded as follows: .DS ... = ... .DE Pattern matching uses literal string comparison, with these exceptions: ``%%'' matches the ``%'' character, and ``%'' followed by a digit matches everything up to the next occurrence of the next pattern character, though all occurrences of %\fIn\fP must denote the same string. For example, the pattern ``%1=%1.'' matches exactly those strings that begin with a string X, followed by a ``='' (the first), followed by a second occurrence of X, followed by a period. In this way, the input/output pattern .DS mov $%1,r%2 mov *r%2,r%2 = mov %1,r%2 .DE commands \fIcopt\fP to replace runs like .DS mov $_a,r3 mov *r3,r3 .DE with .DS mov _a,r3 .DE Note that a tab or newline can terminate a %\fIn\fP variable. .LP In the input pattern, you can use \fIregular exporessions\fP to match input patterns and generate values for variables. The syntax is .DS %"\fI\fP"\fI\fP .DE where \fI\fP is the (extended) POSIX regular expression and \fI\fP \fIoptional\fP variable ident (as above). If the expression matches input, the matched pattern will be assigned to the variable. If there is at least one subexpression in \fI\fP and a variable is specified, then the match of the \fUfirst\fP subexpression will be taken. Prefix and suffix (if any) will be skipped. Example: .DS \fIj%"."0 %"(.),"1%2\fP matches \fBjr c,l_label\fP and assignes '\fIr\fP' to %0, '\fIc\fP' to %1 and '\fIl_label\fP' to %2. It won't match jr nc,l_label. This can be used to output inverted jumps: \fIj%0 n%1,%2\fP will gain \fIjr nc,l_label\fP. .DE \fBImportant\fP: If setting a variable, you \fImust\fP use the \fIlast\fP occurance of the variable in the input pattern, because copt matches input patterns in reverse order. .LP Occurances of %L, %M or %N in the output pattern will be substituted by unique integers to allow creation of up to 3 labels. For example, the output pattern .DS sbc hl,de jr c,_unique_%L jr z,_unique_%M inc a ._unique_%L inc a ._unique_%M .DE could produce .DS sbc hl,de jr c,_unique_1 jr z,_unique_2 inc a ._unique_1 inc a ._unique_2 .DE .LP If the second part of a rule starts with the line \fI%once\fP, this rule will be "fired" only once. Example: .DS ---- rules ---- ---- source ---- .%0 .l_10 j%"r|p"2 %1 jp l_label = ... %activate jr l_10 .%0 ... j%2 %1 .l_label = ... %activate .%1 ---- result ---- = ... %%once jp l_10 .%0 ... .%1 .l10 %activate .l_label jr %0 ... = jp %0 .DE .LP If the output pattern starts with \fI%activate\fP it has to contain a valid rule that will be "activated" upon first match. \fIcopt\fP first evaluates the contained rule (i.e. replaces %\fIn\fP variables as usual) and then replaces the current rule with the contained one. At the same time, a flag is set, that will cause a new pass through the source after the current pass finishes. Nested activations are allowed. Note that you have to duplicate all \fI%\fP characters with each nesting level. .LP Example rule Source Output --------------- ------------------ ----------------- .%0 jp z,l_label jp z,l_other jp %1 ... ... = ... ... %activate .l_label .l_label .%1 jp l_other jp l_other = ... ... %%activate ... ... jp%%%%0%0 .l_other .l_other = ... ... jp%%%%0%1 .LP You can activate several rules at once, simply by appending further rules, separated by an '%activate' line. .LP Blank lines and lines starting with \fI;;\fP in the first column that occure in front of a rule will be ignored. This allows to comment the rule file and add some blank lines between them for better readability. ;; this is a comment .LP \fIcopt\fP compares each run of input patterns with the current input instruction and its predecessors. If no match is found, it advances to the next input instruction and tries again. Otherwise, it replaces the input instructions with the corresponding output patterns, pattern variables instantiated, and resumes its search with the \fIfirst\fP instruction of the replacement. \fIcopt\fP matches input patterns in reverse order to cascade optimizations without backing up. .SH BUGS Errors in optimization files are always possible. .SH SEE ALSO regex(7) .SH AUTHORS .TP .B \^Christian W. Fraser 1984 copt version 1.00 .TP .B \^DG 1999 Added out of memory checking and ANSI prototyping .TP .B \^Zrin Ziborski 2002 Added comment lines, %L-%N variables, %activate, regexp capability and %check ================================================ FILE: 64_6809_Target/expr.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" #include "expr.h" #include "gen.h" #include "misc.h" #include "parse.h" #include "sym.h" #include "target.h" #include "tree.h" #include "types.h" // Parsing of expressions // Copyright (c) 2019 Warren Toomey, GPL3 // expression_list: // | expression // | expression ',' expression_list // ; // Parse a list of zero or more comma-separated expressions and // return an AST composed of A_GLUE nodes with the left-hand child // being the sub-tree of previous expressions (or NULL) and the right-hand // child being the next expression. Each A_GLUE node will have size field // set to the number of expressions in the tree at this point. If no // expressions are parsed, NULL is returned struct ASTnode *expression_list(int endtoken) { struct ASTnode *tree = NULL; struct ASTnode *child = NULL; int exprcount = 0; // Loop until the end token while (Token.token != endtoken) { // Parse the next expression and increment the expression count child = binexpr(0); exprcount++; // Build an A_GLUE AST node with the previous tree as the left child // and the new expression as the right child. Store the expression count. tree = mkastnode(A_GLUE, P_NONE, NULL, tree, NULL, child, NULL, exprcount); // Stop when we reach the end token if (Token.token == endtoken) break; // Must have a ',' at this point match(T_COMMA, ","); } // Return the tree of expressions return (tree); } // Recursively check a function call's arguments // against the function's parameters. We take the // an AST subtree with the arguments and the pointer // to the function's first parameter or local. We walk // the AST tree and return a pointer to the next // parameter to process. struct symtable *check_arg_vs_param(struct ASTnode *tree, struct symtable *param, struct symtable *funcptr) { // No tree but there's a parameter, not enough args. // Otherwise, nothing to do. if (tree == NULL) { if (param != NULL && param->class==V_PARAM) fatal("Not enough arguments in function call A"); return (NULL); } // If there's a left AST child, recurse to process it if (tree->left != NULL) param = check_arg_vs_param(tree->left, param, funcptr); // We've bottomed out of the recursion if (tree->right == NULL) fatal("Not enough arguments in function call B"); if (param == NULL) { // If the function allows arbitrary number of arguments, // we can process this argument. Otherwise it's an error. if (funcptr->has_ellipsis) { // If the tree's type is P_CHAR, widen it to P_INT. // This is mainly for doing printf("%d", 'x'); if (tree->right->type == P_CHAR) { tree->right= mkastunary(A_WIDEN, P_INT, NULL, tree->right, NULL, 0); tree->rightid= tree->right->nodeid; } return (NULL); } fatal("Too many arguments in function call"); } // Slightly dirty hack: change any INTLIT type to be the same // type as the function's parameter before we evaluate it. if (tree->right->op == A_INTLIT) tree->right->type = param->type; // Ensure the arg/param types are compatible. // Widen the argument if necessary tree->right = modify_type(tree->right, param->type, param->ctype, 0); tree->rightid= tree->right->nodeid; if (tree->right == NULL) fatal("Incompatible argument type in function call"); // Now return the next parameter for our caller to process. // Return NULL when we hit the first local, as they come // after all the parameters. if (param->next != NULL && param->next->class==V_LOCAL) return(NULL); return (param->next); } // Parse a function call and return its AST static struct ASTnode *funccall(void) { struct ASTnode *tree; struct symtable *funcptr; // Check that the identifier has been defined as a function, // then make a leaf node for it. if ((funcptr = findSymbol(Text, S_NOTATYPE, 0)) == NULL || funcptr->stype != S_FUNCTION) { fatals("Undeclared function", Text); } // Get the '(' lparen(); // Parse the argument expression list tree = expression_list(T_RPAREN); // Check type of each argument against the function's prototype check_arg_vs_param(tree, funcptr->member, funcptr); // Build the function call AST node. Store the // function's return type as this node's type. // Also record the function's symbol-id tree = mkastunary(A_FUNCCALL, funcptr->type, funcptr->ctype, tree, funcptr, 0); // Get the ')' rparen(); return (tree); } // Parse the index into an array and return an AST tree for it static struct ASTnode *array_access(struct ASTnode *left) { struct ASTnode *right; // Check that the sub-tree is a pointer if (!ptrtype(left->type)) fatal("Not an array or pointer"); // Get the '[' scan(&Token); // Parse the following expression right = binexpr(0); // Get the ']' match(T_RBRACKET, "]"); // Ensure that this is of int type if (!inttype(right->type)) fatal("Array index is not of integer type"); // Make the left tree an rvalue left->rvalue = 1; // Scale the index by the size of the element's type right = modify_type(right, left->type, left->ctype, A_ADD); // Return an AST tree where the array's base has the offset added to it, // and dereference the element. Still an lvalue at this point. left = mkastnode(A_ADD, left->type, left->ctype, left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, value_at(left->type), left->ctype, left, NULL, 0); return (left); } // Parse the member reference of a struct or union // and return an AST tree for it. If withpointer is true, // the access is through a pointer to the member. static struct ASTnode *member_access(struct ASTnode *left, int withpointer) { struct ASTnode *right; struct symtable *typeptr; struct symtable *m; // Check that the left AST tree is a pointer to struct or union if (withpointer && left->type != pointer_to(P_STRUCT) && left->type != pointer_to(P_UNION)) fatal("Expression is not a pointer to a struct/union"); // Or, check that the left AST tree is a struct or union. // If so, change it from an A_IDENT to an A_ADDR so that // we get the base address, not the value at this address. if (!withpointer) { if (left->type == P_STRUCT || left->type == P_UNION) left->op = A_ADDR; else fatal("Expression is not a struct/union"); } // Get the details of the composite type typeptr = left->ctype; // Skip the '.' or '->' token and get the member's name scan(&Token); ident(); // Find the matching member's name in the type // Die if we can't find it for (m = typeptr->member; m != NULL; m = m->next) if (!strcmp(m->name, Text)) break; if (m == NULL) fatals("No member found in struct/union: ", Text); // Make the left tree an rvalue left->rvalue = 1; // Build an A_INTLIT node with the offset. Use the // right int size that can be added to the address. right = mkastleaf(A_INTLIT, cgaddrint(), NULL, NULL, m->st_posn); // Add the member's offset to the base of the struct/union // and dereference it. Still an lvalue at this point left = mkastnode(A_ADD, pointer_to(m->type), m->ctype, left, NULL, right, NULL, 0); left = mkastunary(A_DEREF, m->type, m->ctype, left, NULL, 0); return (left); } // Parse a parenthesised expression and // return an AST node representing it. static struct ASTnode *paren_expression(int ptp) { struct ASTnode *n; int type = 0; struct symtable *ctype = NULL; // Beginning of a parenthesised expression, skip the '('. scan(&Token); // If the token after is a type identifier, this is a cast expression switch (Token.token) { case T_IDENT: // We have to see if the identifier matches a typedef. // If not, treat it as an expression. if (findtypedef(Text) == NULL) { n = binexpr(0); // ptp is zero as expression inside ( ) break; } case T_VOID: case T_CHAR: case T_INT: case T_LONG: case T_STRUCT: case T_UNION: case T_ENUM: // Get the type inside the parentheses type = parse_cast(&ctype); // Skip the closing ')' and then parse the following expression rparen(); default: n = binexpr(ptp); // Scan in the expression. We pass in ptp // as the cast doesn't change the // expression's precedence } // We now have at least an expression in n, and possibly a non-zero type // in type if there was a cast. Skip the closing ')' if there was no cast. if (type == 0) rparen(); else // Otherwise, make a unary AST node for the cast n = mkastunary(A_CAST, type, ctype, n, NULL, 0); return (n); } // Parse a primary factor and return an // AST node representing it. static struct ASTnode *primary(int ptp) { struct ASTnode *n; struct symtable *varptr; int type = 0; int size, class, totalsize, prevsize; struct symtable *ctype; char *litval, *litend; switch (Token.token) { case T_STATIC: case T_EXTERN: fatal("Compiler doesn't support static or extern local declarations"); case T_SIZEOF: // Skip the T_SIZEOF and ensure we have a left parenthesis scan(&Token); if (Token.token != T_LPAREN) fatal("Left parenthesis expected after sizeof"); scan(&Token); // Get the type inside the parentheses type = parse_stars(parse_type(&ctype, &class)); // Get the type's size size = typesize(type, ctype); rparen(); // Make a leaf node int literal with the size return (mkastleaf(A_INTLIT, P_INT, NULL, NULL, size)); case T_CHARLIT: // For an CHARLIT token, make a leaf AST node for it. n = mkastleaf(A_INTLIT, P_CHAR, NULL, NULL, Token.intvalue); case T_INTLIT: // For an INTLIT token, make a leaf AST node for it. n = mkastleaf(A_INTLIT, P_INT, NULL, NULL, Token.intvalue); break; case T_STRLIT: // For a STRLIT token, build the literal string and store in name totalsize= strlen(Text); litval= (char *)malloc(totalsize+1); strcpy(litval, Text); // For successive STRLIT tokens, // append their contents to litval while (1) { scan(&Peektoken); if (Peektoken.token != T_STRLIT) break; // Increment the total string size // while saving the previous size size = strlen(Text); prevsize= totalsize; totalsize += size; // Allocate new memory with this total size litval= (char *)realloc(litval, totalsize+1); // Find the current string's end litend= litval + prevsize; // and copy the new literal to the end strcpy(litend, Text); scan(&Token); // To skip it properly } // Now make a leaf AST node for it. id is the string's label. n = mkastleaf(A_STRLIT, pointer_to(P_CHAR), NULL, NULL, 0); n->name= litval; break; case T_IDENT: // See if this identifier exists as a symbol. For arrays, set rvalue to 1. if ((varptr = findSymbol(Text, S_NOTATYPE, 0)) == NULL) { fatals("Unknown variable or function", Text); } switch (varptr->stype) { case S_ENUMVAL: // If the identifier matches an enum value, // return an A_INTLIT node with the value n = mkastleaf(A_INTLIT, P_INT, NULL, NULL, varptr->st_posn); break; case S_VARIABLE: n = mkastleaf(A_IDENT, varptr->type, varptr->ctype, varptr, 0); break; case S_ARRAY: n = mkastleaf(A_ADDR, varptr->type, varptr->ctype, varptr, 0); n->rvalue = 1; break; case S_FUNCTION: // Function call, see if the next token is a left parenthesis scan(&Token); if (Token.token != T_LPAREN) fatals("Function name used without parentheses", Text); return (funccall()); default: fatals("Identifier not a scalar or array variable", Text); } break; case T_LPAREN: return (paren_expression(ptp)); default: fatals("Expecting a primary expression, got token", Tstring[Token.token]); } // Scan in the next token and return the leaf node scan(&Token); return (n); } // Parse a postfix expression and return // an AST node representing it. The // identifier is already in Text. static struct ASTnode *postfix(int ptp) { struct ASTnode *n; // Get the primary expression n = primary(ptp); // Loop until there are no more postfix operators while (1) { switch (Token.token) { case T_LBRACKET: // An array reference n = array_access(n); break; case T_DOT: // Access into a struct or union n = member_access(n, 0); break; case T_ARROW: // Pointer access into a struct or union n = member_access(n, 1); break; case T_INC: // Post-increment: skip over the token if (n->rvalue == 1) fatal("Cannot ++ on rvalue"); scan(&Token); // Can't do it twice if (n->op == A_POSTINC || n->op == A_POSTDEC) fatal("Cannot ++ and/or -- more than once"); // and change the AST operation n->op = A_POSTINC; break; case T_DEC: // Post-decrement: skip over the token if (n->rvalue == 1) fatal("Cannot -- on rvalue"); scan(&Token); // Can't do it twice if (n->op == A_POSTINC || n->op == A_POSTDEC) fatal("Cannot ++ and/or -- more than once"); // and change the AST operation n->op = A_POSTDEC; break; default: return (n); } } return (NULL); // Keep -Wall happy } // Convert a binary operator token into a binary AST operation. // We rely on a 1:1 mapping from token to AST operation static int binastop(int tokentype) { if (tokentype > T_EOF && tokentype <= T_MOD) return (tokentype); fatals("Syntax error, token", Tstring[tokentype]); return (0); // Keep -Wall happy } // Return true if a token is right-associative, // false otherwise. static int rightassoc(int tokentype) { if (tokentype >= T_ASSIGN && tokentype <= T_ASSLASH) return (1); return (0); } // Operator precedence for each token. Must // match up with the order of tokens in defs.h static int OpPrec[] = { 0, 10, 10, // T_EOF, T_ASSIGN, T_ASPLUS, 10, 10, // T_ASMINUS, T_ASSTAR, 10, 10, // T_ASSLASH, T_ASMOD, 15, // T_QUESTION, 20, 30, // T_LOGOR, T_LOGAND 40, 50, 60, // T_OR, T_XOR, T_AMPER 70, 70, // T_EQ, T_NE 80, 80, 80, 80, // T_LT, T_GT, T_LE, T_GE 90, 90, // T_LSHIFT, T_RSHIFT 100, 100, // T_PLUS, T_MINUS 110, 110, 110 // T_STAR, T_SLASH, T_MOD }; // Check that we have a binary operator and // return its precedence. static int op_precedence(int tokentype) { int prec; if (tokentype > T_MOD) fatals("Token with no precedence in op_precedence:", Tstring[tokentype]); prec = OpPrec[tokentype]; if (prec == 0) fatals("Syntax error, token", Tstring[tokentype]); return (prec); } // prefix_expression: postfix_expression // | '*' prefix_expression // | '&' prefix_expression // | '-' prefix_expression // | '++' prefix_expression // | '--' prefix_expression // ; // Parse a prefix expression and return // a sub-tree representing it. static struct ASTnode *prefix(int ptp) { struct ASTnode *tree = NULL; switch (Token.token) { case T_AMPER: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(ptp); // Ensure that it's an identifier if (tree->op != A_IDENT) fatal("& operator must be followed by an identifier"); // Prevent '&' being performed on an array if (tree->sym->stype == S_ARRAY) fatal("& operator cannot be performed on an array"); // Now change the operator to A_ADDR and the type to // a pointer to the original type. Mark the identifier // as needing a real memory address tree->op = A_ADDR; tree->type = pointer_to(tree->type); tree->sym->st_hasaddr = 1; break; case T_STAR: // Get the next token and parse it // recursively as a prefix expression. // Make it an rvalue scan(&Token); tree = prefix(ptp); tree->rvalue = 1; // Ensure the tree's type is a pointer if (!ptrtype(tree->type)) fatal("* operator must be followed by an expression of pointer type"); // Prepend an A_DEREF operation to the tree tree = mkastunary(A_DEREF, value_at(tree->type), tree->ctype, tree, NULL, 0); break; case T_MINUS: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(ptp); // Prepend a A_NEGATE operation to the tree and // make the child an rvalue. Because chars are unsigned, // also widen this if needed to int so that it's signed tree->rvalue = 1; if (tree->type == P_CHAR) tree->type = P_INT; tree = mkastunary(A_NEGATE, tree->type, tree->ctype, tree, NULL, 0); break; case T_INVERT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(ptp); // Prepend a A_INVERT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_INVERT, tree->type, tree->ctype, tree, NULL, 0); break; case T_LOGNOT: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(ptp); // Prepend a A_LOGNOT operation to the tree and // make the child an rvalue. tree->rvalue = 1; tree = mkastunary(A_LOGNOT, tree->type, tree->ctype, tree, NULL, 0); break; case T_INC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(ptp); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("++ operator must be followed by an identifier"); // Prepend an A_PREINC operation to the tree tree = mkastunary(A_PREINC, tree->type, tree->ctype, tree, NULL, 0); break; case T_DEC: // Get the next token and parse it // recursively as a prefix expression scan(&Token); tree = prefix(ptp); // For now, ensure it's an identifier if (tree->op != A_IDENT) fatal("-- operator must be followed by an identifier"); // Prepend an A_PREDEC operation to the tree tree = mkastunary(A_PREDEC, tree->type, tree->ctype, tree, NULL, 0); break; default: tree = postfix(ptp); } return (tree); } // Return an AST tree whose root is a binary operator. // Parameter ptp is the previous token's precedence. struct ASTnode *binexpr(int ptp) { struct ASTnode *left, *right; struct ASTnode *ltemp, *rtemp; int ASTop; int tokentype; // Get the tree on the left. // Fetch the next token at the same time. left = prefix(ptp); // If we hit one of several terminating tokens, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA || tokentype == T_COLON || tokentype == T_RBRACE) { left->rvalue = 1; return (left); } // While the precedence of this token is more than that of the // previous token precedence, or it's right associative and // equal to the previous token's precedence while ((op_precedence(tokentype) > ptp) || (rightassoc(tokentype) && op_precedence(tokentype) == ptp)) { // Fetch in the next integer literal scan(&Token); // Recursively call binexpr() with the // precedence of our token to build a sub-tree right = binexpr(OpPrec[tokentype]); // Determine the operation to be performed on the sub-trees ASTop = binastop(tokentype); switch (ASTop) { case A_TERNARY: // Ensure we have a ':' token, scan in the expression after it match(T_COLON, ":"); ltemp = binexpr(0); // Force the ternary condition to be boolean if // it wasn't a boolean operation if (left->op != A_LOGOR && left->op != A_LOGAND && (left->op < A_EQ || left->op > A_GE)) left = mkastunary(A_TOBOOL, left->type, left->ctype, left, NULL, 0); // Build and return the AST for this statement. Use the middle // expression's type as the return type. We should also // consider the third expression's type. return (mkastnode (A_TERNARY, right->type, right->ctype, left, right, ltemp, NULL, 0)); case A_ASSIGN: // Assignment // Make the right tree into an rvalue right->rvalue = 1; // If the right tree is an A_INTLIT and the left type is P_CHAR, // and the INTLIT is in the range 0 to 255, change the right's // type to PCHAR to ensure we can do the assignment if ((right->op == A_INTLIT) && (left->type == P_CHAR) && (right->a_intvalue >= 0) && (right->a_intvalue < 256)) right->type = P_CHAR; // Ensure the right's type matches the left right = modify_type(right, left->type, left->ctype, 0); if (right == NULL) fatal("Incompatible expression in assignment"); // Make an assignment AST tree. However, switch // left and right around, so that the right expression's // code will be generated before the left expression ltemp = left; left = right; right = ltemp; break; default: // We are not doing a ternary or assignment, so both trees should // be rvalues. Convert both trees into rvalue if they are lvalue trees left->rvalue = 1; right->rvalue = 1; // If the right tree is an A_INTLIT and the left type is P_CHAR, // and the INTLIT is in the range 0 to 255, change the right's // type to PCHAR to ensure we can do the assignment if ((right->op == A_INTLIT) && (left->type == P_CHAR) && (right->a_intvalue >= 0) && (right->a_intvalue < 256)) right->type = P_CHAR; // Ensure the two types are compatible by trying // to modify each tree to match the other's type. ltemp = modify_type(left, right->type, right->ctype, ASTop); rtemp = modify_type(right, left->type, left->ctype, ASTop); if (ltemp == NULL && rtemp == NULL) fatal("Incompatible types in binary expression"); if (ltemp != NULL) left = ltemp; if (rtemp != NULL) right = rtemp; } // Join that sub-tree with ours. Convert the token // into an AST operation at the same time. left = mkastnode(binastop(tokentype), left->type, left->ctype, left, NULL, right, NULL, 0); // Some operators produce an int result regardless of their operands switch (binastop(tokentype)) { case A_LOGOR: case A_LOGAND: case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: left->type = P_INT; } // Update the details of the current token. // If we hit a terminating token, return just the left node tokentype = Token.token; if (tokentype == T_SEMI || tokentype == T_RPAREN || tokentype == T_RBRACKET || tokentype == T_COMMA || tokentype == T_COLON || tokentype == T_RBRACE) { left->rvalue = 1; return (left); } } // Return the tree we have when the precedence // is the same or lower left->rvalue = 1; return (left); } ================================================ FILE: 64_6809_Target/expr.h ================================================ /* expr.c */ struct ASTnode *expression_list(int endtoken); struct symtable *check_arg_vs_param(struct ASTnode *tree, struct symtable *param, struct symtable *funcptr); struct ASTnode *binexpr(int ptp); ================================================ FILE: 64_6809_Target/gen.c ================================================ #include "defs.h" #include "data.h" #include "cg.h" #include "decl.h" #include "gen.h" #include "misc.h" #include "target.h" #include "tree.h" #include "types.h" // Generic code generator // Copyright (c) 2019 Warren Toomey, GPL3 int genAST(struct ASTnode *n, int iflabel, int looptoplabel, int loopendlabel, int parentASTop); // Generate and return a new label number static int labelid = 1; int genlabel(void) { return (labelid++); } void genfreeregs(int keepreg) { cgfreeallregs(keepreg); } static void update_line(struct ASTnode *n) { // Output the line into the assembly if we've // changed the line number in the AST node if (n->linenum != 0 && Line != n->linenum) { Line = n->linenum; cglinenum(Line); } } // Generate the code for an IF statement // and an optional ELSE clause. static int genIF(struct ASTnode *n, struct ASTnode *nleft, struct ASTnode *nmid, struct ASTnode *nright, int looptoplabel, int loopendlabel) { int Lfalse, Lend; // Generate two labels: one for the // false compound statement, and one // for the end of the overall IF statement. // When there is no ELSE clause, Lfalse _is_ // the ending label! Lfalse = genlabel(); if (nright) Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. genAST(nleft, Lfalse, looptoplabel, loopendlabel, n->op); genfreeregs(NOREG); // Generate the true compound statement genAST(nmid, NOLABEL, looptoplabel, loopendlabel, n->op); genfreeregs(NOREG); // If there is an optional ELSE clause, // generate the jump to skip to the end if (nright) cgjump(Lend); // Now the false label cglabel(Lfalse); // Optional ELSE clause: generate the // false compound statement and the // end label if (nright) { genAST(nright, NOLABEL, looptoplabel, loopendlabel, n->op); genfreeregs(NOREG); cglabel(Lend); } return (NOREG); } // Generate the code for a WHILE statement static int genWHILE(struct ASTnode *n, struct ASTnode *nleft, struct ASTnode *nright) { int Lstart, Lend; // Generate the start and end labels // and output the start label Lstart = genlabel(); Lend = genlabel(); cglabel(Lstart); // Generate the condition code followed // by a jump to the end label. genAST(nleft, Lend, Lstart, Lend, n->op); genfreeregs(NOREG); // Generate the compound statement for the body genAST(nright, NOLABEL, Lstart, Lend, n->op); genfreeregs(NOREG); // Finally output the jump back to the condition, // and the end label cgjump(Lstart); cglabel(Lend); return (NOREG); } // Generate the code for a SWITCH statement static int genSWITCH(struct ASTnode *n, int looptoplabel) { int *caseval, *caselabel; int Ljumptop, Lend; int i, reg, defaultlabel = 0, casecount = 0; int rightid; struct ASTnode *nleft; struct ASTnode *nright; struct ASTnode *c, *cleft; // Load in the sub-nodes nleft=loadASTnode(n->leftid,0); nright=loadASTnode(n->rightid,0); // Create arrays for the case values and associated labels. // Ensure that we have at least one position in each array. caseval = (int *) malloc((n->a_intvalue + 1) * sizeof(int)); caselabel = (int *) malloc((n->a_intvalue + 1) * sizeof(int)); // Generate labels for the top of the jump table, and the // end of the switch statement. Set a default label for // the end of the switch, in case we don't have a default. Ljumptop = genlabel(); Lend = genlabel(); defaultlabel = Lend; // Build the case value and label arrays for (i = 0, c = nright; c != NULL;) { // Get a label for this case. Store it // and the case value in the arrays. // Record if it is the default case. caselabel[i] = genlabel(); caseval[i] = c->a_intvalue; if (c->op == A_DEFAULT) defaultlabel = caselabel[i]; else casecount++; i++; rightid= c->rightid; // Don't free c if it is nright, // as genAST() will do this if (c!=nright) freeASTnode(c); c = loadASTnode(rightid,0); } // Output the code to calculate the switch condition reg = genAST(nleft, NOLABEL, NOLABEL, NOLABEL, 0); cgjump(Ljumptop); genfreeregs(reg); // Output the switch code or switch table cgswitch(reg, casecount, Ljumptop, caselabel, caseval, defaultlabel); // Generate the code for each case for (i = 0, c = nright; c != NULL; ) { // Generate the case code. Pass in the end label for the breaks. // If case has no body, we will fall into the following body. cglabel(caselabel[i]); if (c->leftid) { // Looptoplabel is here so we can 'continue', e.g. // while (...) { // switch(...) { // case ...: ... // continue; // } // } cleft= loadASTnode(c->leftid,0); genAST(cleft, NOLABEL, looptoplabel, Lend, 0); freeASTnode(cleft); } genfreeregs(NOREG); i++; rightid= c->rightid; freeASTnode(c); c= loadASTnode(rightid,0); } // Output the end label cglabel(Lend); free(caseval); free(caselabel); freeASTnode(nleft); return (NOREG); } // Generate the code for an A_LOGOR operation. // If the parent AST node is an A_IF, A_WHILE, A_TERNARY // or A_LOGAND, jump to the label if false. // If A_LOGOR, jump to the label if true. // Otherwise set a register to 1 or 0 and return it. static int gen_logor(struct ASTnode *n, struct ASTnode *nleft, struct ASTnode *nright, int parentASTop, int label) { int Ltrue, Lfalse, Lend; int reg; int type; int makebool = 0; // Generate labels if (parentASTop == A_LOGOR) { Ltrue = label; Lfalse = genlabel(); } else { Ltrue = genlabel(); Lfalse = label; } Lend = genlabel(); // Mark if we need to generate a boolean value if (parentASTop != A_IF && parentASTop != A_WHILE && parentASTop != A_TERNARY && parentASTop != A_LOGAND && parentASTop != A_LOGOR) { makebool = 1; Ltrue = genlabel(); Lfalse = genlabel(); } // Generate the code for the left expression. // The genAST() could do the jump and return NOREG. // But if we get a register back, do our own jump. reg = genAST(nleft, Ltrue, NOLABEL, NOLABEL, A_LOGOR); if (reg != NOREG) { type = nleft->type; cgboolean(reg, A_LOGOR, Ltrue, type); genfreeregs(NOREG); } // Generate the code for the right expression // with the same logic as for the left expression. reg = genAST(nright, Ltrue, NOLABEL, NOLABEL, A_LOGOR); if (reg != NOREG) { type = nright->type; cgboolean(reg, A_LOGOR, Ltrue, type); genfreeregs(reg); } // The result is false. // If there is no need to make a boolean, stop now if (makebool == 0) { // Jump to the false label if it was provided if (label == Lfalse) { cgjump(Lfalse); cglabel(Ltrue); } return (NOREG); } // We do need to make a boolean and we didn't jump type = n->type; cglabel(Lfalse); reg = cgloadboolean(reg, 0, type); cgjump(Lend); cglabel(Ltrue); reg = cgloadboolean(reg, 1, type); cglabel(Lend); return (reg); } // Generate the code for an A_LOGAND operation. // If the parent AST node is an A_IF, A_WHILE, A_TERNARY // or A_LOGAND, jump to the label if false. // If A_LOGOR, jump to the label if true. // Otherwise set a register to 1 or 0 and return it. static int gen_logand(struct ASTnode *n, struct ASTnode *nleft, struct ASTnode *nright, int parentASTop, int label) { int Ltrue, Lfalse, Lend; int reg; int type; int makebool = 0; // Generate labels if (parentASTop == A_LOGOR) { Ltrue = label; Lfalse = genlabel(); } else { Ltrue = genlabel(); Lfalse = label; } Lend = genlabel(); // Mark if we need to generate a boolean value if (parentASTop != A_IF && parentASTop != A_WHILE && parentASTop != A_TERNARY && parentASTop != A_LOGAND && parentASTop != A_LOGOR) { makebool = 1; Ltrue = genlabel(); Lfalse = genlabel(); } // Generate the code for the left expression. // The genAST() could do the jump and return NOREG. // But if we get a register back, do our own jump. reg = genAST(nleft, Lfalse, NOLABEL, NOLABEL, A_LOGAND); if (reg != NOREG) { type = nleft->type; cgboolean(reg, A_LOGAND, Lfalse, type); genfreeregs(NOREG); } // Generate the code for the right expression // with the same logic as for the left expression. reg = genAST(nright, Lfalse, NOLABEL, NOLABEL, A_LOGAND); if (reg != NOREG) { type = nright->type; cgboolean(reg, A_LOGAND, Lfalse, type); genfreeregs(reg); } // The result is true. // If there is no need to make a boolean, stop now if (makebool == 0) { // Jump to the label if we were given it if (label == Ltrue) { cgjump(Ltrue); cglabel(Lfalse); } return (NOREG); } // We do need to make a boolean and we didn't jump type = n->type; cglabel(Ltrue); reg = cgloadboolean(reg, 1, type); cgjump(Lend); cglabel(Lfalse); reg = cgloadboolean(reg, 0, type); cglabel(Lend); return (reg); } // Generate the code to calculate the arguments of a // function call, then call the function with these // arguments. Return the register that holds // the function's return value. static int gen_funccall(struct ASTnode *n) { struct ASTnode *gluetree; int i = 0, numargs = 0; int reg; int leftid; int *arglist = NULL; int *typelist = NULL; struct ASTnode *nleft; struct ASTnode *glueright; // Load in the sub-nodes nleft=loadASTnode(n->leftid,0); // Determine the actual number of arguments // Allocate memory to hold the list of argument temporaries. // We need to walk the list of arguments to determine the size // XXX We need to free here for (i = 0, gluetree = nleft; gluetree != NULL; ) { numargs++; i++; leftid= gluetree->leftid; // Don't free gluetree if it is nleft, // as genAST() will do this for us if (gluetree != nleft) freeASTnode(gluetree); gluetree = loadASTnode(leftid,0); } if (i != 0) { arglist = (int *) malloc(i * sizeof(int)); if (arglist == NULL) fatal("malloc failed in gen_funccall"); typelist = (int *) malloc(i * sizeof(int)); if (typelist == NULL) fatal("malloc failed in gen_funccall"); } // If there is a list of arguments, walk this list // from the last argument (right-hand child) to the first. // Also cache the type of each expression for (i = 0, gluetree = nleft; gluetree != NULL; gluetree = loadASTnode(leftid,0)) { // Calculate the expression's value glueright= loadASTnode(gluetree->rightid,0); arglist[i] = genAST(glueright, NOLABEL, NOLABEL, NOLABEL, gluetree->op); typelist[i++] = glueright->type; freeASTnode(glueright); leftid= gluetree->leftid; freeASTnode(gluetree); } // Call the function and return its result reg= cgcall(n->sym, numargs, arglist, typelist); free(arglist); free(typelist); return(reg); } // Generate code for a ternary expression static int gen_ternary(struct ASTnode *n, struct ASTnode *nleft, struct ASTnode *nmid, struct ASTnode *nright) { int Lfalse, Lend; int reg, expreg; // Load in the sub-nodes nleft=loadASTnode(n->leftid,0); nmid=loadASTnode(n->midid,0); nright=loadASTnode(n->rightid,0); // Generate two labels: one for the // false expression, and one for the // end of the overall expression Lfalse = genlabel(); Lend = genlabel(); // Generate the condition code followed // by a jump to the false label. genAST(nleft, Lfalse, NOLABEL, NOLABEL, n->op); // genfreeregs(NOREG); // Get a register to hold the result of the two expressions reg = cgallocreg(nleft->type); // Generate the true expression and the false label. // Move the expression result into the known register. expreg = genAST(nmid, NOLABEL, NOLABEL, NOLABEL, n->op); cgmove(expreg, reg, nmid->type); cgfreereg(expreg); cgjump(Lend); cglabel(Lfalse); // Generate the false expression and the end label. // Move the expression result into the known register. expreg = genAST(nright, NOLABEL, NOLABEL, NOLABEL, n->op); cgmove(expreg, reg, nright->type); cgfreereg(expreg); cglabel(Lend); return (reg); } // Given an AST, an optional label, and the AST op // of the parent, generate assembly code recursively. // Return the register id with the tree's final value. int genAST(struct ASTnode *n, int iflabel, int looptoplabel, int loopendlabel, int parentASTop) { int leftreg = NOREG, rightreg = NOREG; int type = P_VOID; int id; int special = 0; struct ASTnode *nleft, *nmid, *nright; // Empty tree, do nothing if (n == NULL) return (NOREG); // Load in the sub-nodes nleft=loadASTnode(n->leftid,0); nmid=loadASTnode(n->midid,0); nright=loadASTnode(n->rightid,0); // Update the line number in the output update_line(n); // We have some specific AST node handling at the top // so that we don't evaluate the child sub-trees immediately switch (n->op) { case A_IF: special = 1; leftreg = genIF(n, nleft, nmid, nright, looptoplabel, loopendlabel); break; case A_WHILE: special = 1; leftreg = genWHILE(n, nleft, nright); break; case A_SWITCH: special = 1; leftreg = genSWITCH(n, looptoplabel); break; case A_FUNCCALL: special = 1; leftreg = gen_funccall(n); break; case A_TERNARY: special = 1; leftreg = gen_ternary(n, nleft, nmid, nright); break; case A_LOGOR: special = 1; leftreg = gen_logor(n, nleft, nright, parentASTop, iflabel); break; case A_LOGAND: special = 1; leftreg = gen_logand(n, nleft, nright, parentASTop, iflabel); break; case A_GLUE: // Do each child statement, and free the // registers after each child special = 1; if (nleft != NULL) genAST(nleft, iflabel, looptoplabel, loopendlabel, n->op); genfreeregs(NOREG); if (nright != NULL) genAST(nright, iflabel, looptoplabel, loopendlabel, n->op); genfreeregs(NOREG); leftreg = NOREG; break; case A_FUNCTION: // Generate the function's preamble before the code // in the child sub-tree. Ugly: use function's name // as the Infilename for fatal messages. special = 1; Infilename = n->sym->name; cgfuncpreamble(n->sym); genAST(nleft, NOLABEL, NOLABEL, NOLABEL, n->op); cgfuncpostamble(n->sym); leftreg = NOREG; } if (!special) { // General AST node handling below // Get the left and right sub-tree values if (nleft) { type = nleft->type; leftreg = genAST(nleft, NOLABEL, looptoplabel, loopendlabel, n->op); } if (nright) { type = nright->type; rightreg = genAST(nright, NOLABEL, looptoplabel, loopendlabel, n->op); } switch (n->op) { case A_ADD: leftreg = cgadd(leftreg, rightreg, type); break; case A_SUBTRACT: leftreg = cgsub(leftreg, rightreg, type); break; case A_MULTIPLY: leftreg = cgmul(leftreg, rightreg, type); break; case A_DIVIDE: leftreg = cgdiv(leftreg, rightreg, type); break; case A_MOD: leftreg = cgmod(leftreg, rightreg, type); break; case A_AND: leftreg = cgand(leftreg, rightreg, type); break; case A_OR: leftreg = cgor(leftreg, rightreg, type); break; case A_XOR: leftreg = cgxor(leftreg, rightreg, type); break; case A_LSHIFT: leftreg = cgshl(leftreg, rightreg, type); break; case A_RSHIFT: leftreg = cgshr(leftreg, rightreg, type); break; case A_EQ: case A_NE: case A_LT: case A_GT: case A_LE: case A_GE: // If the parent AST node is an A_IF, A_WHILE, A_TERNARY, // A_LOGAND, generate a compare followed by a jump if the // comparison is false. If A_LOGOR, jump if true. Otherwise, // compare registers and set one to 1 or 0 based on the comparison. if (parentASTop == A_IF || parentASTop == A_WHILE || parentASTop == A_TERNARY || parentASTop == A_LOGAND || parentASTop == A_LOGOR) { leftreg = cgcompare_and_jump (n->op, parentASTop, leftreg, rightreg, iflabel, nleft->type); } else { leftreg = cgcompare_and_set(n->op, leftreg, rightreg, nleft->type); } break; case A_INTLIT: leftreg = cgloadint(n->a_intvalue, n->type); break; case A_STRLIT: // Output the actual literal id = genglobstr(n->name); leftreg = cgloadglobstr(id); break; case A_IDENT: // Load our value if we are an rvalue // or we are being dereferenced if (n->rvalue || parentASTop == A_DEREF) { leftreg = cgloadvar(n->sym, n->op); } else leftreg = NOREG; break; case A_ASPLUS: case A_ASMINUS: case A_ASSTAR: case A_ASSLASH: case A_ASMOD: case A_ASSIGN: // For the '+=' and friends operators, generate suitable code // and get the register with the result. Then take the left child, // make it the right child so that we can fall into the assignment code. switch (n->op) { case A_ASPLUS: leftreg = cgadd(leftreg, rightreg, type); nright = nleft; break; case A_ASMINUS: leftreg = cgsub(leftreg, rightreg, type); nright = nleft; break; case A_ASSTAR: leftreg = cgmul(leftreg, rightreg, type); nright = nleft; break; case A_ASSLASH: leftreg = cgdiv(leftreg, rightreg, type); nright = nleft; break; case A_ASMOD: leftreg = cgmod(leftreg, rightreg, type); nright = nleft; break; } // Now into the assignment code // Are we assigning to an identifier or through a pointer? switch (nright->op) { case A_IDENT: if (nright->sym->class == V_GLOBAL || nright->sym->class == V_EXTERN || nright->sym->class == V_STATIC) { leftreg = cgstorglob(leftreg, nright->sym); } else { leftreg = cgstorlocal(leftreg, nright->sym); } break; case A_DEREF: leftreg = cgstorderef(leftreg, rightreg, nright->type); break; default: fatald("Can't A_ASSIGN in genAST(), op", n->op); } break; case A_WIDEN: // Widen the child's type to the parent's type leftreg = cgwiden(leftreg, nleft->type, n->type); break; case A_RETURN: cgreturn(leftreg, Functionid); leftreg = NOREG; break; case A_ADDR: // If we have a symbol, get its address. Otherwise, // the left register already has the address because // it's a member access if (n->sym != NULL) leftreg = cgaddress(n->sym); break; #ifdef SPLITSWITCH } // I've broken the switch statement into two, so that // the 6809 version of the compiler can parse this file // without running out of room. switch (n->op) { #endif case A_DEREF: // If we are an rvalue, dereference to get the value we point at, // otherwise leave it for A_ASSIGN to store through the pointer if (n->rvalue) leftreg = cgderef(leftreg, nleft->type); break; case A_SCALE: // Small optimisation: use shift if the // scale value is a known power of two switch (n->a_size) { case 2: leftreg = cgshlconst(leftreg, 1, type); break; case 4: leftreg = cgshlconst(leftreg, 2, type); break; case 8: leftreg = cgshlconst(leftreg, 3, type); break; default: // Load a register with the size and // multiply the leftreg by this size rightreg = cgloadint(n->a_size, P_INT); leftreg = cgmul(leftreg, rightreg, type); } // On some architectures the pointer type is // different to the int type. Widen the result // if we are scaling what will become an address offset if (cgprimsize(n->type) > cgprimsize(type)) leftreg = cgwiden(leftreg, type, n->type); break; case A_POSTINC: case A_POSTDEC: // Load and decrement the variable's value into a register // and post increment/decrement it leftreg = cgloadvar(n->sym, n->op); break; case A_PREINC: case A_PREDEC: // Load and decrement the variable's value into a register // and pre increment/decrement it leftreg = cgloadvar(nleft->sym, n->op); break; case A_NEGATE: leftreg = cgnegate(leftreg, type); break; case A_INVERT: leftreg = cginvert(leftreg, type); break; case A_LOGNOT: leftreg = cglognot(leftreg, type); break; case A_TOBOOL: // If the parent AST node is an IF, WHILE, TERNARY, // LOGAND or LOGOR operation, generate a compare // followed by a jump. Otherwise, set the register // to 0 or 1 based on it's zeroeness or non-zeroeness leftreg = cgboolean(leftreg, parentASTop, iflabel, type); break; case A_BREAK: cgjump(loopendlabel); leftreg = NOREG; break; case A_CONTINUE: cgjump(looptoplabel); leftreg = NOREG; break; case A_CAST: leftreg = cgcast(leftreg, nleft->type, n->type); break; #ifndef SPLITSWITCH default: fatald("Unknown AST operator", n->op); #endif } } // End of if (!special) // Free the AST sub trees before returning // Sometimes n->right has been set to n->left // e.g. by the +=, -= etc. operations. if (nright != nleft) freeASTnode(nright); freeASTnode(nleft); freeASTnode(nmid); return (leftreg); } void genpreamble() { cgpreamble(); } void genpostamble() { cgpostamble(); } void genglobsym(struct symtable *node) { cgglobsym(node); } // Generate a global string. int genglobstr(char *strvalue) { int l = genlabel(); cglitseg(); cgglobstr(l, strvalue); cgtextseg(); return (l); } ================================================ FILE: 64_6809_Target/gen.h ================================================ /* gen.c */ int genlabel(void); int genAST(struct ASTnode *n, int iflabel, int looptoplabel, int loopendlabel, int parentASTop); void genpreamble(); void genpostamble(); void genfreeregs(int keepreg); void genglobsym(struct symtable *node); int genglobstr(char *strvalue); ================================================ FILE: 64_6809_Target/include/6809/ctype.h ================================================ #ifndef _CTYPE_H_ # define _CTYPE_H_ int isalnum(int c); int isalpha(int c); int iscntrl(int c); int isdigit(int c); int isgraph(int c); int islower(int c); int isprint(int c); int ispunct(int c); int isspace(int c); int isupper(int c); int isxdigit(int c); int isascii(int c); int isblank(int c); int toupper(int c); int tolower(int c); #endif // _CTYPE_H_ ================================================ FILE: 64_6809_Target/include/6809/errno.h ================================================ #ifndef __ERRNO_H #define __ERRNO_H /* * Error codes */ #define EPERM 1 /* Not owner */ #define ENOENT 2 /* No such file or directory */ #define ESRCH 3 /* No such process */ #define EINTR 4 /* Interrupted System Call */ #define EIO 5 /* I/O Error */ #define ENXIO 6 /* No such device or address */ #define E2BIG 7 /* Arg list too long */ #define ENOEXEC 8 /* Exec format error */ #define EBADF 9 /* Bad file number */ #define ECHILD 10 /* No children */ #define EAGAIN 11 /* No more processes */ #define ENOMEM 12 /* Not enough core */ #define EACCES 13 /* Permission denied */ #define EFAULT 14 /* Bad address */ #define ENOTBLK 15 /* Block device required */ #define EBUSY 16 /* Mount device busy */ #define EEXIST 17 /* File exists */ #define EXDEV 18 /* Cross-device link */ #define ENODEV 19 /* No such device */ #define ENOTDIR 20 /* Not a directory */ #define EISDIR 21 /* Is a directory */ #define EINVAL 22 /* Invalid argument */ #define ENFILE 23 /* File table overflow */ #define EMFILE 24 /* Too many open files */ #define ENOTTY 25 /* Not a typewriter */ #define ETXTBSY 26 /* Text file busy */ #define EFBIG 27 /* File too large */ #define ENOSPC 28 /* No space left on device */ #define ESPIPE 29 /* Illegal seek */ #define EROFS 30 /* Read-only file system */ #define EMLINK 31 /* Too many links */ #define EPIPE 32 /* Broken pipe */ /* math software */ #define EDOM 33 /* Argument too large */ #define ERANGE 34 /* Result too large */ #define EWOULDBLOCK EAGAIN /* Operation would block */ #define ENOLOCK 35 /* Lock table full */ #define ENOTEMPTY 36 /* Directory is not empty */ #define ENAMETOOLONG 37 /* File name too long */ #define EAFNOSUPPORT 38 /* Address family not supported */ #define EALREADY 39 /* Operation already in progress */ #define EADDRINUSE 40 /* Address already in use */ #define EADDRNOTAVAIL 41 /* Address not available */ #define ENOSYS 42 /* No such system call */ #define EPFNOSUPPORT 43 /* Protocol not supported */ #define EOPNOTSUPP 44 /* Operation not supported on transport endpoint */ #define ECONNRESET 45 /* Connection reset by peer */ #define ENETDOWN 46 /* Network is down */ #define EMSGSIZE 47 /* Message too long */ #define ETIMEDOUT 48 /* Connection timed out */ #define ECONNREFUSED 49 /* Connection refused */ #define EHOSTUNREACH 50 /* No route to host */ #define EHOSTDOWN 51 /* Host is down */ #define ENETUNREACH 52 /* Network is unreachable */ #define ENOTCONN 53 /* Transport endpoint is not connected */ #define EINPROGRESS 54 /* Operation now in progress */ #define ESHUTDOWN 55 /* Cannot send after transport endpoint shutdown */ #define EISCONN 56 /* Socket is already connected */ #define EDESTADDRREQ 57 /* No destination address specified */ #define ENOBUFS 58 /* No buffer space available */ #define EPROTONOSUPPORT 59 /* Protocol not supported */ #define __ERRORS 56 extern int sys_nerr; extern char *sys_errlist[]; extern int errno; #endif ================================================ FILE: 64_6809_Target/include/6809/fcntl.h ================================================ #ifndef _FCNTL_H_ # define _FCNTL_H_ #define O_RDONLY 00 #define O_WRONLY 01 #define O_RDWR 02 #define O_CREAT 256 int open(char *pathname, int flags, ...); #endif // _FCNTL_H_ ================================================ FILE: 64_6809_Target/include/6809/stddef.h ================================================ #ifndef _STDDEF_H_ # define _STDDEF_H_ #ifndef NULL # define NULL (void *)0 #endif typedef int size_t; #endif //_STDDEF_H_ ================================================ FILE: 64_6809_Target/include/6809/stdint.h ================================================ #ifndef __STDINT_H #define __STDINT_H /* C types */ typedef char uint8_t; typedef short int16_t; typedef long int32_t; #endif ================================================ FILE: 64_6809_Target/include/6809/stdio.h ================================================ #ifndef _STDIO_H_ # define _STDIO_H_ #include #ifndef NULL # define NULL (void *)0 #endif #ifndef EOF # define EOF (-1) #endif #define __MODE_ERR 0x200 /* Error status */ struct __stdio_file { char *bufpos; /* the next byte to write to or read from */ char *bufread; /* the end of data returned by last read() */ char *bufwrite; /* highest address writable by macro */ char *bufstart; /* the start of the buffer */ char *bufend; /* the end of the buffer; ie the byte after the last malloc()ed byte */ int fd; /* the file descriptor associated with the stream */ int mode; char unbuf ; /* The buffer for 'unbuffered' streams */ char unbuf1 ; char unbuf2 ; char unbuf3 ; char unbuf4 ; char unbuf5 ; char unbuf6 ; char unbuf7 ; struct __stdio_file * next; }; typedef struct __stdio_file FILE; #define ferror(fp) (((fp)->mode&__MODE_ERR) != 0) #define getc(stream) fgetc(stream) FILE *__fopen(char *__path, int __fd, FILE * __stream, char *__mode); #define fopen(__file, __mode) __fopen((__file), -1, (FILE*)0, (__mode)) #define freopen(__file, __mode, __fp) __fopen((__file), -1, (__fp), (__mode)) size_t fread(void *ptr, size_t size, size_t nmemb, FILE *stream); size_t fwrite(void *ptr, size_t size, size_t nmemb, FILE *stream); int fclose(FILE *stream); int printf(char *format, ...); int fprintf(FILE *stream, char *format, ...); int sprintf(char *str, char *format, ...); int snprintf(char *str, size_t size, char *format, ...); int fgetc(FILE *stream); int getc(FILE *stream); int getchar(void); int fputc(int c, FILE *stream); int fputs(char *s, FILE *stream); int putc(int c, FILE *stream); int putchar(int c); int puts(char *s); FILE *popen(char *command, char *type); int pclose(FILE *stream); char *fgets(char *s, int size, FILE *stream); int rename(char *path, char *newpath); int fseek(FILE *stream, long offset, int whence); long ftell(FILE *stream); FILE *tmpfile(void); #ifndef SEEK_SET #define SEEK_SET 0 #define SEEK_CUR 1 #define SEEK_END 2 #endif extern FILE stdin[1]; extern FILE stdout[1]; extern FILE stderr[1]; #endif // _STDIO_H_ ================================================ FILE: 64_6809_Target/include/6809/stdlib.h ================================================ #ifndef _STDLIB_H_ # define _STDLIB_H_ void exit(int status); void _Exit(int status); void *malloc(int size); void free(void *ptr); void *calloc(int nmemb, int size); void *realloc(void *ptr, int size); int system(char *command); int abs(int j); #endif // _STDLIB_H_ ================================================ FILE: 64_6809_Target/include/6809/string.h ================================================ #ifndef _STRING_H_ # define _STRING_H_ char *strdup(char *s); char *strchr(char *s, int c); char *strrchr(char *s, int c); int strcmp(char *s1, char *s2); int strncmp(char *s1, char *s2, size_t n); char *strcpy(char *dst, char *src); char *strerror(int errnum); int strlen(char *s); char *strcat(char *dst, char *src); #endif // _STRING_H_ ================================================ FILE: 64_6809_Target/include/6809/sys/stat.h ================================================ #ifndef _SYS_STAT #define _SYS_STAT int mkdir(char *pathname, int mode); #endif ================================================ FILE: 64_6809_Target/include/6809/sys/types.h ================================================ #ifndef _SYS_TYPES #define _SYS_TYPES #include typedef int32_t off_t; #endif ================================================ FILE: 64_6809_Target/include/6809/sys/wait.h ================================================ #ifndef _SYS_WAIT.H #define _SYS_WAIT.H int waitpid(int pid, int * wstatus, int options); #define WEXITSTATUS(status) (((status) & 0xff00) >> 8) #define WIFEXITED(status) (((status) & 0xff) == 0) #endif ================================================ FILE: 64_6809_Target/include/6809/unistd.h ================================================ #ifndef _UNISTD_H_ # define _UNISTD_H_ void _exit(int status); int unlink(char *pathname); int fork(void); int execvp(char *file, char **argv); int getopt(int argc, char **argv, char *optstring); int close(int fd); int read(int fd, void *buf, int len); int write(int fd, void *buf, int len); int link(char *path, char *path2); int chdir(char *path); int fchdir(int fd); int rmdir(char *pathname); int access(char *pathname, int mode); extern char *optarg; extern int optind, opterr, optopt; #define F_OK 0 /* Test for existence. */ #define F_ULOCK 0 #define F_LOCK 1 #define F_TLOCK 2 #define F_TEST 3 #endif // _UNISTD_H_ ================================================ FILE: 64_6809_Target/include/qbe/ctype.h ================================================ #ifndef _CTYPE_H_ # define _CTYPE_H_ int isalnum(int c); int isalpha(int c); int iscntrl(int c); int isdigit(int c); int isgraph(int c); int islower(int c); int isprint(int c); int ispunct(int c); int isspace(int c); int isupper(int c); int isxdigit(int c); int isascii(int c); int isblank(int c); int toupper(int c); int tolower(int c); #endif // _CTYPE_H_ ================================================ FILE: 64_6809_Target/include/qbe/errno.h ================================================ #ifndef _ERRNO_H_ # define _ERRNO_H_ int * __errno_location(void); #define errno (* __errno_location()) #endif // _ERRNO_H_ ================================================ FILE: 64_6809_Target/include/qbe/fcntl.h ================================================ #ifndef _FCNTL_H_ # define _FCNTL_H_ #define O_RDONLY 00 #define O_WRONLY 01 #define O_RDWR 02 int open(char *pathname, int flags, ...); #endif // _FCNTL_H_ ================================================ FILE: 64_6809_Target/include/qbe/stddef.h ================================================ #ifndef _STDDEF_H_ # define _STDDEF_H_ #ifndef NULL # define NULL (void *)0 #endif typedef long size_t; #endif //_STDDEF_H_ ================================================ FILE: 64_6809_Target/include/qbe/stdio.h ================================================ #ifndef _STDIO_H_ # define _STDIO_H_ #include #ifndef NULL # define NULL (void *)0 #endif #ifndef EOF # define EOF (-1) #endif // This FILE definition will do for now typedef char * FILE; FILE *fopen(char *pathname, char *mode); size_t fread(void *ptr, size_t size, size_t nmemb, FILE *stream); size_t fwrite(void *ptr, size_t size, size_t nmemb, FILE *stream); int fclose(FILE *stream); int printf(char *format, ...); int fprintf(FILE *stream, char *format, ...); int sprintf(char *str, char *format, ...); int snprintf(char *str, size_t size, char *format, ...); int fgetc(FILE *stream); int getc(FILE *stream); int getchar(void); int fputc(int c, FILE *stream); int fputs(char *s, FILE *stream); int putc(int c, FILE *stream); int putchar(int c); int puts(char *s); FILE *popen(char *command, char *type); int pclose(FILE *stream); int ferror(FILE *stream); FILE *freopen(char *pathname, char *mode, FILE *stream); char *fgets(char *s, int size, FILE *stream); long ftell(FILE *stream); int fseek(FILE *stream, long offset, int whence); FILE *tmpfile(void); #ifndef SEEK_SET #define SEEK_SET 0 #define SEEK_CUR 1 #define SEEK_END 2 #endif extern FILE *stdin; extern FILE *stdout; extern FILE *stderr; #endif // _STDIO_H_ ================================================ FILE: 64_6809_Target/include/qbe/stdlib.h ================================================ #ifndef _STDLIB_H_ # define _STDLIB_H_ void exit(int status); void _Exit(int status); void *malloc(int size); void free(void *ptr); void *calloc(int nmemb, int size); void *realloc(void *ptr, int size); int system(char *command); int abs(int j); #endif // _STDLIB_H_ ================================================ FILE: 64_6809_Target/include/qbe/string.h ================================================ #ifndef _STRING_H_ # define _STRING_H_ char *strdup(char *s); char *strchr(char *s, int c); char *strrchr(char *s, int c); int strcmp(char *s1, char *s2); int strncmp(char *s1, char *s2, size_t n); char *strcpy(char *dst, char *src); char *strerror(int errnum); int strlen(char *s); char *strcat(char *dst, char *src); #endif // _STRING_H_ ================================================ FILE: 64_6809_Target/include/qbe/sys/wait.h ================================================ #ifndef _SYS_WAIT.H #define _SYS_WAIT.H int waitpid(int pid, int * wstatus, int options); #define __WEXITSTATUS(status) (((status) & 0xff00) >> 8) #define __WTERMSIG(status) ((status) & 0x7f) #define __WSTOPSIG(status) __WEXITSTATUS(status) #define __WIFEXITED(status) (__WTERMSIG(status) == 0) #define WEXITSTATUS(status) __WEXITSTATUS (status) #define WTERMSIG(status) __WTERMSIG (status) #define WIFEXITED(status) __WIFEXITED (status) #endif ================================================ FILE: 64_6809_Target/include/qbe/unistd.h ================================================ #ifndef _UNISTD_H_ # define _UNISTD_H_ void _exit(int status); int unlink(char *pathname); int fork(void); int execvp(char *file, char **argv); int getopt(int argc, char **argv, char *optstring); extern char *optarg; extern int optind, opterr, optopt; #endif // _UNISTD_H_ ================================================ FILE: 64_6809_Target/lib/6809/Makefile ================================================ crt0.o: crt0.s as6809 crt0.s ================================================ FILE: 64_6809_Target/lib/6809/crt0.s ================================================ .dp .export R0 .export R1 .export R2 .export R3 .export R4 .export R5 .export R6 .export R7 R0: .word 0 .word 0 R1: .word 0 .word 0 R2: .word 0 .word 0 R3: .word 0 .word 0 R4: .word 0 .word 0 R5: .word 0 .word 0 R6: .word 0 .word 0 R7: .word 0 .word 0 .code start: .word 0x80A8 .byte 0x04 ; 6809 .byte 0x00 ; 6309 not needed .byte >__code ; page to load at .byte 0 ; no hints .word __code_size ; text size info .word __data_size ; data size info .word __bss_size ; bss size info .byte 16 ; entry relative to start .word __end ; to help the emulator :-) .byte 0 ; ZP not used on 6809 jmp start2 .code start2: ldd #0 std @zero ldd #1 std @one ; Set up _environ ldd 4,s std _environ jsr ___stdio_init_vars jsr _main ; return and exit jsr _exit .data .export _environ _environ: .word 0 ================================================ FILE: 64_6809_Target/lib/6809/rules.6809 ================================================ # No need to add 0 to D #1 addd #0 = ==== # No need to in/decrease S by 0 #2 leas -0,s = ==== #3 leas 0,s = ==== # Avoid D reloads #4 std %1 ldd %1 = std %1 ==== #5 std %1 ; ldd %1 = std %1 ==== #6 std %1 ; ; ldd %1 = std %1 ==== # Avoid B reloads #7 stb %1 ldb %1 = stb %1 ==== #8 stb %1 ; ldb %1 = stb %1 ==== #9 stb %1 ; ; ldb %1 = stb %1 ==== # Avoid using a temporary #10 std R%1 ldd %2 addd R%1 = addd %2 ==== # Use indexed addressing #11 ldx %1,s ldd 0,x = ldd [%1,s] ==== #12 ldx %1,s std 0,x = std [%1,s] ==== #13 ldx %1,s ldb 0,x = ldb [%1,s] ==== #14 ldx %1,s stb 0,x = stb [%1,s] ==== # Some more indexed addressing #15 ldx %1,s ldd %2 std 0,x = ldd %2 std [%1,s] ==== # Skip some D to X transfers #16 ldd %1 tfr d,x = ldx %1 ==== # Get rid of silly jumps #17 bra L%1 ; L%1: = ; L%1: ==== #18 bra L%1 L%1: = ; L%1: ==== # Skip some X to D transfers #19 tfr x,d std %1 = stx %1 ==== # Lose some silly X index operations #20 ldx #%1 ldd %2 std 0,x = ldd %2 std %1 ==== #21 ldd #%1 addd #%2 tfr d,x ldd %3 std 0,x = ldd %3 std %1+%2 ==== #22 ldx #%1 ldd 0,x = ldd %1 ==== #23 ldd #%1 addd #%2 tfr d,x ldd 0,x = ldd %1+%2 ==== # Simplify some X offsets #24 ldd %1 addd #%2 tfr d,x ldd %3 std 0,x = ldx %1 ldd %3 std %2,x ==== #25 ldd %1 addd #%2 tfr d,x ldd 0,x = ldx %1 ldd %2,x ==== # Optmise << 1 #26 pshs d ldd #1 lbsr __shl = aslb rola ==== ================================================ FILE: 64_6809_Target/misc.c ================================================ #include #include #include "defs.h" #include "data.h" #include "parse.h" // Miscellaneous functions // Copyright (c) 2019 Warren Toomey, GPL3 // Print out fatal messages void fatal(char *s) { fprintf(stderr, "%s on line %d of %s\n", s, Line, Infilename); exit(1); } void fatals(char *s1, char *s2) { fprintf(stderr, "%s:%s on line %d of %s\n", s1, s2, Line, Infilename); exit(1); } void fatald(char *s, int d) { fprintf(stderr, "%s:%d on line %d of %s\n", s, d, Line, Infilename); exit(1); } void fatalc(char *s, int c) { fprintf(stderr, "%s:%c on line %d of %s\n", s, c, Line, Infilename); exit(1); } // Read at most count-1 characters from the // f FILE and store them in the s buffer. // Terminate the s buffer with a NUL. // Return NULL if unable to read or an EOF. // Else, return the original s pointer pointer. char *fgetstr(char *s, size_t count, FILE * f) { size_t i = count; size_t err; char ch; char *ret = s; while (i-- != 0) { err= fread(&ch, 1, 1, f); if (err!=1) { if (s == ret) return(NULL); break; } *s++ = ch; if (ch == 0) break; } *s = 0; return(ferror(f) ? (char *) NULL : ret); } ================================================ FILE: 64_6809_Target/misc.h ================================================ /* misc.c */ void fatal(char *s); void fatals(char *s1, char *s2); void fatald(char *s, int d); void fatalc(char *s, int c); char *fgetstr(char *s, size_t count, FILE * f); ================================================ FILE: 64_6809_Target/opt.c ================================================ #include "defs.h" #include "data.h" #include "tree.h" // AST Tree Optimisation Code // Copyright (c) 2019 Warren Toomey, GPL3 // Fold an AST tree with a unary operator // and one INTLIT children. Return either // the original tree or a new leaf node. static struct ASTnode *fold1(struct ASTnode *n) { int val; // Get the child value. Do the // operation if recognised. // Return the new leaf node. val = n->left->a_intvalue; switch (n->op) { case A_WIDEN: break; case A_INVERT: val = ~val; break; case A_LOGNOT: val = !val; break; case A_SCALE: val = val * n->a_intvalue; break; default: return (n); } // Return a leaf node with the new value return (mkastleaf(A_INTLIT, n->type, NULL, NULL, val)); } // Fold an AST tree with a binary operator // and two A_INTLIT children. Return either // the original tree or a new leaf node. static struct ASTnode *fold2(struct ASTnode *n) { int val, leftval, rightval; // Get the values from each child leftval = n->left->a_intvalue; rightval = n->right->a_intvalue; // Perform some of the binary operations. // For any AST op we can't do, return // the original tree. switch (n->op) { case A_ADD: val = leftval + rightval; break; case A_SUBTRACT: val = leftval - rightval; break; case A_MULTIPLY: val = leftval * rightval; break; case A_DIVIDE: // Don't try to divide by zero. if (rightval == 0) return (n); val = leftval / rightval; break; case A_AND: val = leftval & rightval; break; case A_OR: val = leftval | rightval; break; case A_XOR: val = leftval ^ rightval; break; case A_LSHIFT: val = leftval << rightval; break; case A_RSHIFT: val = leftval >> rightval; break; default: return (n); } // Return a leaf node with the new value return (mkastleaf(A_INTLIT, n->type, NULL, NULL, val)); } // Optimise an AST tree with // a depth-first node traversal struct ASTnode *optimise(struct ASTnode *n) { if (n == NULL) return (NULL); // Optimise the left child then the right n->left = optimise(n->left); if (n->left!=NULL) n->leftid= n->left->nodeid; n->right = optimise(n->right); if (n->right!=NULL) n->rightid= n->right->nodeid; // Fold literal constants: // If both children are A_INTLITs, do a fold2() if (n->left && n->left->op == A_INTLIT) { if (n->right && n->right->op == A_INTLIT) n = fold2(n); else // If only the left is A_INTLIT, do a fold1() n = fold1(n); } // Return the possibly modified tree return (n); } ================================================ FILE: 64_6809_Target/opt.h ================================================ /* opt.c */ struct ASTnode *optimise(struct ASTnode *n); ================================================ FILE: 64_6809_Target/parse.c ================================================ #include "defs.h" #define extern_ #include "data.h" #undef extern_ #include "decl.h" #include "gen.h" #include "misc.h" #include "sym.h" #include "tree.h" // C parser front-end. // Copyright (c) 2023 Warren Toomey, GPL3 #ifdef DEBUG void print_token(struct token *t) { switch (t->token) { case T_INTLIT: case T_CHARLIT: printf("%02X: %d\n", t->token, t->intvalue); break; case T_STRLIT: printf("%02X: \"%s\"\n", t->token, Text); break; case T_FILENAME: printf("%02X: filename \"%s\"\n", t->token, Text); break; case T_LINENUM: printf("%02X: linenum %d\n", t->token, t->intvalue); break; case T_IDENT: printf("%02X: %s\n", t->token, Text); break; default: printf("%02X: %s\n", t->token, Tstring[t->token]); } } #endif // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. int scan(struct token *t) { int intvalue; // If we have a lookahead token, return this token if (Peektoken.token != 0) { t->token = Peektoken.token; t->tokstr = Peektoken.tokstr; t->intvalue = Peektoken.intvalue; Peektoken.token = 0; #ifdef DEBUG print_token(t); #endif return (1); } // We loop because we don't want to return // T_FILENAME or T_LINENUM tokens while (1) { t->token = fgetc(stdin); if (t->token == EOF) { t->token = T_EOF; break; } switch (t->token) { case T_LINENUM: fread(&Line, sizeof(int), 1, stdin); continue; case T_FILENAME: if (Infilename!=NULL) free(Infilename); fgetstr(Text, TEXTLEN + 1, stdin); Infilename= strdup(Text); continue; case T_INTLIT: case T_CHARLIT: fread(&intvalue, sizeof(int), 1, stdin); t->intvalue = intvalue; break; case T_STRLIT: case T_IDENT: fgetstr(Text, TEXTLEN + 1, stdin); break; } #ifdef DEBUG print_token(t); #endif return (1); } return(0); } // Ensure that the current token is t, // and fetch the next token. Otherwise // throw an error void match(int t, char *what) { if (Token.token == t) { scan(&Token); } else { fatals("Expected", what); } } // Match a semicolon and fetch the next token void semi(void) { match(T_SEMI, ";"); } // Match a left brace and fetch the next token void lbrace(void) { match(T_LBRACE, "{"); } // Match a right brace and fetch the next token void rbrace(void) { match(T_RBRACE, "}"); } // Match a left parenthesis and fetch the next token void lparen(void) { match(T_LPAREN, "("); } // Match a right parenthesis and fetch the next token void rparen(void) { match(T_RPAREN, ")"); } // Match an identifier and fetch the next token void ident(void) { match(T_IDENT, "identifier"); } // Match a comma and fetch the next token void comma(void) { match(T_COMMA, "comma"); } // Seralise an AST to Outfile void serialiseAST(struct ASTnode *tree) { if (tree==NULL) return; // Dump this node fwrite(tree, sizeof(struct ASTnode), 1, Outfile); // Dump any literal string/identifier if (tree->name!=NULL) { fputs(tree->name, Outfile); fputc(0, Outfile); } // Dump all the children serialiseAST(tree->left); serialiseAST(tree->mid); serialiseAST(tree->right); } // Parse the token stream on stdin // and output serialised ASTs and // a symbol table. int main(int argc, char **argv) { if (argc <2 || argc >3) { fprintf(stderr, "Usage: %s symfile \n", argv[0]); fprintf(stderr, " ASTs on stdout if astfile not specified\n"); exit(1); } if (argc==3) { Outfile= fopen(argv[2], "w"); if (Outfile == NULL) { fprintf(stderr, "Can't create %s\n", argv[2]); exit(1); } } else Outfile= stdout; Symfile= fopen(argv[1], "w+"); if (Symfile == NULL) { fprintf(stderr, "Can't create %s\n", argv[1]); exit(1); } freeSymtable(); // Clear the symbol table scan(&Token); // Get the first token from the input Peektoken.token = 0; // and set there is no lookahead token global_declarations(); // Parse the global declarations flushSymtable(); // Flush any residual symbols fclose(Symfile); exit(0); return(0); } ================================================ FILE: 64_6809_Target/parse.h ================================================ /* parse.c */ int scan(struct token *t); void match(int t, char *what); void semi(void); void lbrace(void); void rbrace(void); void lparen(void); void rparen(void); void ident(void); void comma(void); void serialiseAST(struct ASTnode *tree); ================================================ FILE: 64_6809_Target/scan.c ================================================ #include "defs.h" #include "misc.h" // Lexical scanning // Copyright (c) 2019 Warren Toomey, GPL3 int Line = 1; // Current line number int Newlinenum=0; // Flag: has line number changed int Linestart = 1; // True if at start of a line int Putback = '\n'; // Character put back by scanner char *Infilename; // Name of file we are parsing int Newfilename=0; // Flag: has filename changed FILE *Infile; // Input file struct struct token Token; // Last token scanned struct token Peektoken; // A look-ahead token char Text[TEXTLEN + 1]; // Last identifier scanned int scan(struct token *t, int nocpp); // Return the position of character c // in string s, or -1 if c not found static int chrpos(char *s, int c) { int i; for (i = 0; s[i] != '\0'; i++) if (s[i] == (char) c) return (i); return (-1); } // Get the next character from the input file. static int next(void) { int c, l; if (Putback) { // Use the character put c = Putback; // back if there is one Putback = 0; return (c); } c = fgetc(Infile); // Read from input file while (Linestart && c == '#') { // We've hit a pre-processor statement Linestart = 0; // No longer at the start of the line scan(&Token, 1); // Get the line number into l if (Token.token != T_INTLIT) fatals("Expecting pre-processor line number, got:", Text); l = Token.intvalue; scan(&Token, 1); // Get the filename in Text if (Token.token != T_STRLIT) fatals("Expecting pre-processor file name, got:", Text); if (Text[0] != '<') { // If this is a real filename if (strcmp(Text, Infilename)) { // and not the one we have now free(Infilename); Infilename = strdup(Text); // save it. Then update the line num Newfilename=1; } Line = l; Newlinenum=1; } while ((c = fgetc(Infile)) != '\n'); // Skip to the end of the line c = fgetc(Infile); // and get the next character Linestart = 1; // Now back at the start of the line } Linestart = 0; // No longer at the start of the line if ('\n' == c) { Line++; // Increment line count Newlinenum=1; Linestart = 1; // Now back at the start of the line } return (c); } // Put back an unwanted character static void putback(int c) { Putback = c; } // Skip past input that we don't need to deal with, // i.e. whitespace, newlines. Return the first // character we do need to deal with. static int skip(void) { int c; c = next(); while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) { c = next(); } return (c); } // Read in a hexadecimal constant from the input static int hexchar(void) { int c, h, n = 0, f = 0; // Loop getting characters while (isxdigit(c = next())) { // Convert from char to int value h = chrpos("0123456789abcdef", tolower(c)); // Add to running hex value n = n * 16 + h; f = 1; } // We hit a non-hex character, put it back putback(c); // Flag tells us we never saw any hex characters if (!f) fatal("missing digits after '\\x'"); if (n > 255) fatal("value out of range after '\\x'"); return (n); } // Return the next character from a character // or string literal. Also return if this // character was quoted with a preceding backslash static int scanch(int *slash) { int i, c, c2; *slash=0; // Get the next input character and interpret // metacharacters that start with a backslash c = next(); if (c == '\\') { *slash=1; switch (c = next()) { case 'a': return ('\a'); case 'b': return ('\b'); case 'f': return ('\f'); case 'n': return ('\n'); case 'r': return ('\r'); case 't': return ('\t'); case 'v': return ('\v'); case '\\': return ('\\'); case '"': return ('"'); case '\'': return ('\''); // Deal with octal constants by reading in // characters until we hit a non-octal digit. // Build up the octal value in c2 and count // # digits in i. Permit only 3 octal digits. case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': for (i = c2 = 0; isdigit(c) && c < '8'; c = next()) { if (++i > 3) break; c2 = c2 * 8 + (c - '0'); } putback(c); // Put back the first non-octal char return (c2); case 'x': return (hexchar()); default: fatalc("unknown escape sequence", c); } } return (c); // Just an ordinary old character! } // Scan and return an integer literal // value from the input file. static int scanint(int c) { int k, val = 0, radix = 10; // Assume the radix is 10, but if it starts with 0 if (c == '0') { // and the next character is 'x', it's radix 16 if ((c = next()) == 'x') { radix = 16; c = next(); } else // Otherwise, it's radix 8 radix = 8; } // Convert each character into an int value while ((k = chrpos("0123456789abcdef", tolower(c))) >= 0) { if (k >= radix) fatalc("invalid digit in integer literal", c); val = val * radix + k; c = next(); } // We hit a non-integer character, put it back. putback(c); return (val); } // Scan in a string literal from the input file, // and store it in buf[]. Return the length of // the string. static int scanstr(char *buf) { int i, c; int slash; // Loop while we have enough buffer space for (i = 0; i < TEXTLEN - 1; i++) { // Get the next char and append to buf // Return when we hit the ending double quote // (which wasn't quoted with a backslash) c = scanch(&slash); if (c == '"' && slash==0) { buf[i] = 0; return (i); } buf[i] = (char) c; } // Ran out of buf[] space fatal("String literal too long"); return (0); } // Scan an identifier from the input file and // store it in buf[]. Return the identifier's length static int scanident(int c, char *buf, int lim) { int i = 0; // Allow digits, alpha and underscores while (isalpha(c) || isdigit(c) || '_' == c) { // Error if we hit the identifier length limit, // else append to buf[] and get next character if (lim - 1 == i) { fatal("Identifier too long"); } else if (i < lim - 1) { buf[i++] = (char) c; } c = next(); } // We hit a non-valid character, put it back. // NUL-terminate the buf[] and return the length putback(c); buf[i] = '\0'; return (i); } // Given a word from the input, return the matching // keyword token number or 0 if it's not a keyword. // Switch on the first letter so that we don't have // to waste time strcmp()ing against all the keywords. static int keyword(char *s) { switch (*s) { case 'b': if (!strcmp(s, "break")) return (T_BREAK); break; case 'c': if (!strcmp(s, "case")) return (T_CASE); if (!strcmp(s, "char")) return (T_CHAR); if (!strcmp(s, "continue")) return (T_CONTINUE); break; case 'd': if (!strcmp(s, "default")) return (T_DEFAULT); break; case 'e': if (!strcmp(s, "else")) return (T_ELSE); if (!strcmp(s, "enum")) return (T_ENUM); if (!strcmp(s, "extern")) return (T_EXTERN); break; case 'f': if (!strcmp(s, "for")) return (T_FOR); break; case 'i': if (!strcmp(s, "if")) return (T_IF); if (!strcmp(s, "int")) return (T_INT); break; case 'l': if (!strcmp(s, "long")) return (T_LONG); break; case 'r': if (!strcmp(s, "return")) return (T_RETURN); break; case 's': if (!strcmp(s, "sizeof")) return (T_SIZEOF); if (!strcmp(s, "static")) return (T_STATIC); if (!strcmp(s, "struct")) return (T_STRUCT); if (!strcmp(s, "switch")) return (T_SWITCH); break; case 't': if (!strcmp(s, "typedef")) return (T_TYPEDEF); break; case 'u': if (!strcmp(s, "union")) return (T_UNION); break; case 'v': if (!strcmp(s, "void")) { return (T_VOID); } break; case 'w': if (!strcmp(s, "while")) return (T_WHILE); break; } return (0); } #ifdef DEBUG // List of token strings, for debugging purposes char *Tstring[] = { "EOF", "=", "+=", "-=", "*=", "/=", "%=", "?", "||", "&&", "|", "^", "&", "==", "!=", "<", ">", "<=", ">=", "<<", ">>", "+", "-", "*", "/", "%", "++", "--", "~", "!", "void", "char", "int", "long", "if", "else", "while", "for", "return", "struct", "union", "enum", "typedef", "extern", "break", "continue", "switch", "case", "default", "sizeof", "static", "intlit", "strlit", ";", "identifier", "{", "}", "(", ")", "[", "]", ",", ".", "->", ":", "...", "charlit", "filename", "linenum" }; #endif // Scan and return the next token found in the input. // Return 1 if token valid, 0 if no tokens left. // If nocpp, don't return on new filenames or line numbers. // This is because we use scan() when parsing new filenames // and line numbers :-) int scan(struct token *t, int nocpp) { int c, tokentype; int slash; // Skip whitespace c = skip(); if (nocpp==0) { // If the filename changed, return the filename if (Newfilename) { t->token = T_FILENAME; Newfilename=0; putback(c); return (1); } // If the line number changed, return the line number if (Newlinenum) { t->intvalue = Line; t->token = T_LINENUM; Newlinenum=0; putback(c); return (1); } } // If we have a lookahead token, return this token if (Peektoken.token != 0) { t->token = Peektoken.token; t->intvalue = Peektoken.intvalue; Peektoken.token = 0; return (1); } // Determine the token based on // the input character switch (c) { case EOF: t->token = T_EOF; return (0); case '+': if ((c = next()) == '+') { t->token = T_INC; } else if (c == '=') { t->token = T_ASPLUS; } else { putback(c); t->token = T_PLUS; } break; case '-': if ((c = next()) == '-') { t->token = T_DEC; } else if (c == '>') { t->token = T_ARROW; } else if (c == '=') { t->token = T_ASMINUS; } else if (isdigit(c)) { // Negative int literal t->intvalue = -scanint(c); t->token = T_INTLIT; } else { putback(c); t->token = T_MINUS; } break; case '*': if ((c = next()) == '=') { t->token = T_ASSTAR; } else { putback(c); t->token = T_STAR; } break; case '/': if ((c = next()) == '=') { t->token = T_ASSLASH; } else { putback(c); t->token = T_SLASH; } break; case '%': if ((c = next()) == '=') { t->token = T_ASMOD; } else { putback(c); t->token = T_MOD; } break; case ';': t->token = T_SEMI; break; case '{': t->token = T_LBRACE; break; case '}': t->token = T_RBRACE; break; case '(': t->token = T_LPAREN; break; case ')': t->token = T_RPAREN; break; case '[': t->token = T_LBRACKET; break; case ']': t->token = T_RBRACKET; break; case '~': t->token = T_INVERT; break; case '^': t->token = T_XOR; break; case ',': t->token = T_COMMA; break; case '.': if ((c = next()) == '.') { t->token = T_ELLIPSIS; if ((c = next()) != '.') fatal("Expected '...', only got '..'\n"); } else { putback(c); t->token = T_DOT; } break; case ':': t->token = T_COLON; break; case '?': t->token = T_QUESTION; break; case '=': if ((c = next()) == '=') { t->token = T_EQ; } else { putback(c); t->token = T_ASSIGN; } break; case '!': if ((c = next()) == '=') { t->token = T_NE; } else { putback(c); t->token = T_LOGNOT; } break; case '<': if ((c = next()) == '=') { t->token = T_LE; } else if (c == '<') { t->token = T_LSHIFT; } else { putback(c); t->token = T_LT; } break; case '>': if ((c = next()) == '=') { t->token = T_GE; } else if (c == '>') { t->token = T_RSHIFT; } else { putback(c); t->token = T_GT; } break; case '&': if ((c = next()) == '&') { t->token = T_LOGAND; } else { putback(c); t->token = T_AMPER; } break; case '|': if ((c = next()) == '|') { t->token = T_LOGOR; } else { putback(c); t->token = T_OR; } break; case '\'': // If it's a quote, scan in the // literal character value and // the trailing quote t->intvalue = scanch(&slash); t->token = T_CHARLIT; if (next() != '\'') fatal("Expected '\\'' at end of char literal"); break; case '"': // Scan in a literal string scanstr(Text); t->token = T_STRLIT; break; default: // If it's a digit, scan the // literal integer value in if (isdigit(c)) { t->intvalue = scanint(c); t->token = T_INTLIT; break; } else if (isalpha(c) || '_' == c) { // Read in a keyword or identifier scanident(c, Text, TEXTLEN); // If it's a recognised keyword, return that token if ((tokentype = keyword(Text)) != 0) { t->token = tokentype; break; } // Not a recognised keyword, so it must be an identifier t->token = T_IDENT; break; } // The character isn't part of any recognised token, error fatalc("Unrecognised character", c); } // We found a token return (1); } // Read lines of code from stdin and output // a token stream int main() { int i; Infile= stdin; Infilename = strdup(""); // Cpp hasn't told us the filename yet Peektoken.token = 0; // Set there is no lookahead token scan(&Token, 0); // Get the first token from the input // Loop getting more tokens while (Token.token != T_EOF) { // Output a binary stream of tokens to standard output. // T_INTLIT tokens are followed by the n-byte literal value. // T_STRLIT and T_IDENT tokens are followed by a NUL-terminated string. fputc(Token.token, stdout); switch (Token.token) { case T_INTLIT: case T_CHARLIT: i= Token.intvalue; fwrite(&i, sizeof(int), 1, stdout); // fprintf(stderr, "%02X: %d\n", Token.token, Token.intvalue); break; case T_STRLIT: fputs(Text, stdout); fputc(0, stdout); // fprintf(stderr, "%02X: \"%s\"\n", Token.token, Text); break; case T_IDENT: fputs(Text, stdout); fputc(0, stdout); // fprintf(stderr, "%02X: %s\n", Token.token, Text); break; case T_FILENAME: fputs(Infilename, stdout); fputc(0, stdout); // fprintf(stderr, "%02X: %s\n", Token.token, Infilename); break; case T_LINENUM: fwrite(&Line, sizeof(int), 1, stdout); // fprintf(stderr, "%02X: %d\n", Token.token, Line); break; default: // fprintf(stderr, "%02X: %s\n", Token.token, Tstring[Token.token]); } scan(&Token, 0); } exit(0); return(0); } ================================================ FILE: 64_6809_Target/stmt.c ================================================ #include "defs.h" #include "data.h" #include "decl.h" #include "expr.h" #include "misc.h" #include "opt.h" #include "parse.h" #include "stmt.h" #include "sym.h" #include "tree.h" #include "types.h" // Parsing of statements // Copyright (c) 2019 Warren Toomey, GPL3 // Prototypes static struct ASTnode *single_statement(void); // compound_statement: // empty, i.e. no statement // | statement // | statement statements // ; // // statement: declaration // | expression_statement // | function_call // | if_statement // | while_statement // | for_statement // | return_statement // ; // if_statement: if_head // | if_head 'else' statement // ; // // if_head: 'if' '(' true_false_expression ')' statement ; // // Parse an IF statement including any // optional ELSE clause and return its AST static struct ASTnode *if_statement(void) { struct ASTnode *condAST, *trueAST, *falseAST = NULL; // Ensure we have 'if' '(' match(T_IF, "if"); lparen(); // Parse the following expression // and the ')' following. Force a // non-boolean operation to be boolean. condAST = binexpr(0); if (condAST->op != A_LOGOR && condAST->op != A_LOGAND && (condAST->op < A_EQ || condAST->op > A_GE)) condAST = mkastunary(A_TOBOOL, condAST->type, condAST->ctype, condAST, NULL, 0); rparen(); // Get the AST for the statement trueAST = single_statement(); // If we have an 'else', skip it // and get the AST for the statement if (Token.token == T_ELSE) { scan(&Token); falseAST = single_statement(); } // Build and return the AST for this statement return (mkastnode(A_IF, P_NONE, NULL, condAST, trueAST, falseAST, NULL, 0)); } // while_statement: 'while' '(' true_false_expression ')' statement ; // // Parse a WHILE statement and return its AST static struct ASTnode *while_statement(void) { struct ASTnode *condAST, *bodyAST; // Ensure we have 'while' '(' match(T_WHILE, "while"); lparen(); // Parse the following expression // and the ')' following. Force a // non-boolean operation to be boolean. condAST = binexpr(0); if (condAST->op != A_LOGOR && condAST->op != A_LOGAND && (condAST->op < A_EQ || condAST->op > A_GE)) condAST = mkastunary(A_TOBOOL, condAST->type, condAST->ctype, condAST, NULL, 0); rparen(); // Get the AST for the statement. // Update the loop depth in the process Looplevel++; bodyAST = single_statement(); Looplevel--; // Build and return the AST for this statement return (mkastnode(A_WHILE, P_NONE, NULL, condAST, NULL, bodyAST, NULL, 0)); } // for_statement: 'for' '(' expression_list ';' // true_false_expression ';' // expression_list ')' statement ; // // Parse a FOR statement and return its AST static struct ASTnode *for_statement(void) { struct ASTnode *condAST, *bodyAST; struct ASTnode *preopAST, *postopAST; struct ASTnode *tree; // Ensure we have 'for' '(' match(T_FOR, "for"); lparen(); // Get the pre_op expression and the ';' preopAST = expression_list(T_SEMI); semi(); // Get the condition and the ';'. Force a // non-boolean operation to be boolean. condAST = binexpr(0); if (condAST->op != A_LOGOR && condAST->op != A_LOGAND && (condAST->op < A_EQ || condAST->op > A_GE)) condAST = mkastunary(A_TOBOOL, condAST->type, condAST->ctype, condAST, NULL, 0); semi(); // Get the post_op expression and the ')' postopAST = expression_list(T_RPAREN); rparen(); // Get the statement which is the body // Update the loop depth in the process Looplevel++; bodyAST = single_statement(); Looplevel--; // Glue the statement and the postop tree tree = mkastnode(A_GLUE, P_NONE, NULL, bodyAST, NULL, postopAST, NULL, 0); // Make a WHILE loop with the condition and this new body tree = mkastnode(A_WHILE, P_NONE, NULL, condAST, NULL, tree, NULL, 0); // And glue the preop tree to the A_WHILE tree return (mkastnode(A_GLUE, P_NONE, NULL, preopAST, NULL, tree, NULL, 0)); } // return_statement: 'return' '(' expression ')' ; // // Parse a return statement and return its AST static struct ASTnode *return_statement(void) { struct ASTnode *tree= NULL; // Ensure we have 'return' match(T_RETURN, "return"); // See if we have a return value if (Token.token == T_LPAREN) { // Can't return a value if function returns P_VOID if (Functionid->type == P_VOID) fatal("Can't return from a void function"); // Skip the left parenthesis lparen(); // Parse the following expression tree = binexpr(0); // Ensure this is compatible with the function's type tree = modify_type(tree, Functionid->type, Functionid->ctype, 0); if (tree == NULL) fatal("Incompatible type to return"); // Get the ')' rparen(); } // Add on the A_RETURN node tree = mkastunary(A_RETURN, P_NONE, NULL, tree, NULL, 0); // Get the ';' semi(); return (tree); } // break_statement: 'break' ; // // Parse a break statement and return its AST static struct ASTnode *break_statement(void) { if (Looplevel == 0 && Switchlevel == 0) fatal("no loop or switch to break out from"); scan(&Token); semi(); return (mkastleaf(A_BREAK, P_NONE, NULL, NULL, 0)); } // continue_statement: 'continue' ; // // Parse a continue statement and return its AST static struct ASTnode *continue_statement(void) { if (Looplevel == 0) fatal("no loop to continue to"); scan(&Token); semi(); return (mkastleaf(A_CONTINUE, P_NONE, NULL, NULL, 0)); } // Parse a switch statement and return its AST static struct ASTnode *switch_statement(void) { struct ASTnode *left, *body, *n, *c; struct ASTnode *casetree = NULL, *casetail; int inloop = 1, casecount = 0; int seendefault = 0; int ASTop, casevalue; // Skip the 'switch' and '(' scan(&Token); lparen(); // Get the switch expression, the ')' and the '{' left = binexpr(0); rparen(); lbrace(); // Ensure that this is of int type if (!inttype(left->type)) fatal("Switch expression is not of integer type"); // If its type P_CHAR, widen it to P_INT if (left->type == P_CHAR) left= mkastunary(A_WIDEN, P_INT, NULL, left, NULL, 0); // Build an A_SWITCH subtree with the expression as the child n = mkastunary(A_SWITCH, P_NONE, NULL, left, NULL, 0); // Now parse the cases Switchlevel++; while (inloop) { switch (Token.token) { // Leave the loop when we hit a '}' case T_RBRACE: if (casecount == 0) fatal("No cases in switch"); inloop = 0; break; case T_CASE: case T_DEFAULT: // Ensure this isn't after a previous 'default' if (seendefault) fatal("case or default after existing default"); // Set the AST operation. Scan the case value if required if (Token.token == T_DEFAULT) { ASTop = A_DEFAULT; seendefault = 1; scan(&Token); } else { ASTop = A_CASE; scan(&Token); left = binexpr(0); // Ensure the case value is an integer literal if (left->op != A_INTLIT) fatal("Expecting integer literal for case value"); casevalue = left->a_intvalue; // Walk the list of existing case values to ensure // that there isn't a duplicate case value for (c = casetree; c != NULL; c = c->right) if (casevalue == c->a_intvalue) fatal("Duplicate case value"); } // Scan the ':' and increment the casecount match(T_COLON, ":"); casecount++; // If the next token is a T_CASE, the existing case will fall // into the next case. Otherwise, parse the case body. if (Token.token == T_CASE) body = NULL; else body = compound_statement(1); // Build a sub-tree with any compound statement as the left child // and link it in to the growing A_CASE tree if (casetree == NULL) { casetree = casetail = mkastunary(ASTop, P_NONE, NULL, body, NULL, casevalue); } else { casetail->right = mkastunary(ASTop, P_NONE, NULL, body, NULL, casevalue); casetail->rightid= casetail->right->nodeid; casetail = casetail->right; } break; default: fatals("Unexpected token in switch", Tstring[Token.token]); } } Switchlevel--; // We have a sub-tree with the cases and any default. Put the // case count into the A_SWITCH node and attach the case tree. n->a_intvalue = casecount; n->right = casetree; n->rightid = casetree->nodeid; rbrace(); return (n); } // Parse a single statement and return its AST. static struct ASTnode *single_statement(void) { struct ASTnode *stmt; struct symtable *ctype; int linenum= Line; switch (Token.token) { case T_SEMI: // An empty statement semi(); break; case T_LBRACE: // We have a '{', so this is a compound statement lbrace(); stmt = compound_statement(0); stmt->linenum= linenum; rbrace(); return (stmt); case T_IDENT: // We have to see if the identifier matches a typedef. // If not, treat it as an expression. // Otherwise, fall down to the parse_type() call. if (findtypedef(Text) == NULL) { stmt = binexpr(0); stmt->linenum= linenum; semi(); return (stmt); } case T_CHAR: case T_INT: case T_LONG: case T_STRUCT: case T_UNION: case T_ENUM: case T_TYPEDEF: // The beginning of a variable declaration list. declaration_list(&ctype, V_LOCAL, T_SEMI, T_EOF, &stmt); semi(); return (stmt); // Any assignments from the declarations case T_IF: stmt= if_statement(); stmt->linenum= linenum; return(stmt); case T_WHILE: stmt= while_statement(); stmt->linenum= linenum; return(stmt); case T_FOR: stmt= for_statement(); stmt->linenum= linenum; return(stmt); case T_RETURN: stmt= return_statement(); stmt->linenum= linenum; return(stmt); case T_BREAK: stmt= break_statement(); stmt->linenum= linenum; return(stmt); case T_CONTINUE: stmt= continue_statement(); stmt->linenum= linenum; return(stmt); case T_SWITCH: stmt= switch_statement(); stmt->linenum= linenum; return(stmt); default: // For now, see if this is an expression. // This catches assignment statements. stmt = binexpr(0); stmt->linenum= linenum; semi(); return (stmt); } return (NULL); // Keep -Wall happy } // Parse a compound statement // and return its AST. If inswitch is true, // we look for a '}', 'case' or 'default' token // to end the parsing. Otherwise, look for // just a '}' to end the parsing. struct ASTnode *compound_statement(int inswitch) { struct ASTnode *left = NULL; struct ASTnode *tree; while (1) { // Leave if we've hit the end token. We do this first to allow // an empty compound statement if (Token.token == T_RBRACE) return (left); if (inswitch && (Token.token == T_CASE || Token.token == T_DEFAULT)) return (left); // Parse a single statement tree = single_statement(); // For each new tree, either save it in left // if left is empty, or glue the left and the // new tree together if (tree != NULL) { if (left == NULL) left = tree; else { left = mkastnode(A_GLUE, P_NONE, NULL, left, NULL, tree, NULL, 0); // To conserve memory, we try to optimise the single statement tree. // Then we serialise the tree and free it. We set the right pointer // in left NULL; this will stop the serialiser from descending into // the tree that we already serialised. tree = optimise(tree); serialiseAST(tree); freetree(tree, 0); left->right=NULL; } } } return (NULL); // Keep -Wall happy } ================================================ FILE: 64_6809_Target/stmt.h ================================================ /* stmt.c */ struct ASTnode *compound_statement(int inswitch); ================================================ FILE: 64_6809_Target/sym.c ================================================ #include "defs.h" #include "data.h" #include "misc.h" #include "tree.h" #include "types.h" #include "sym.h" #undef MEMBDEBUG #undef DEBUG // Symbol table functions // Copyright (c) 2024 Warren Toomey, GPL3 // We have two in-memory symbol tables. One is for // types (structs, unions, enums, typedefs); the // other is for variables and functions. These // cache symbols from the symbol file which have // been recently used. // There is also a temporary list which we build // before attaching to a symbol's member field. // This is used for structs, unions and enums. // It also holds the parameters and locals of // the function we are currently parsing. // // Symhead needs to be visible for cgen.c struct symtable *Symhead = NULL; static struct symtable *Symtail = NULL; static struct symtable *Typehead = NULL; static struct symtable *Typetail = NULL; static struct symtable *Membhead = NULL; static struct symtable *Membtail = NULL; #ifdef DEBUG static void dumptable(struct symtable *head, int indent); // List of structural type strings char *Sstring[] = { "variable", "function", "array", "enumval", "strlit", "struct", "union", "enumtype", "typedef", "notype" }; // Dump a single symbol static void dumpsym(struct symtable *sym, int indent) { int i; for (i = 0; i < indent; i++) printf(" "); switch (sym->type & (~0xf)) { case P_VOID: printf("void "); break; case P_CHAR: printf("char "); break; case P_INT: printf("int "); break; case P_LONG: printf("long "); break; case P_STRUCT: printf("struct "); if (sym->ctype && sym->ctype->name) printf("%s ", sym->ctype->name); break; case P_UNION: printf("union "); if (sym->ctype && sym->ctype->name) printf("%s ", sym->ctype->name); break; default: printf("unknown type "); } for (i = 0; i < (sym->type & 0xf); i++) printf("*"); printf("%s", sym->name); switch (sym->stype) { case S_VARIABLE: break; case S_FUNCTION: printf("()"); break; case S_ARRAY: printf("[]"); break; case S_STRUCT: printf(": struct"); break; case S_UNION: printf(": union"); break; case S_ENUMTYPE: printf(": enum"); break; case S_ENUMVAL: printf(": enumval"); break; case S_TYPEDEF: printf(": typedef"); break; case S_STRLIT: printf(": strlit"); break; default: printf(" unknown stype"); } printf(" id %d", sym->id); switch (sym->class) { case V_GLOBAL: printf(": global"); break; case V_LOCAL: printf(": local offset %d", sym->st_posn); break; case V_PARAM: printf(": param offset %d", sym->st_posn); break; case V_EXTERN: printf(": extern"); break; case V_STATIC: printf(": static"); break; case V_MEMBER: printf(": member"); break; default: printf(": unknown class"); } if (sym->st_hasaddr != 0) printf(", hasaddr "); switch (sym->stype) { case S_VARIABLE: printf(", size %d", sym->size); break; case S_FUNCTION: printf(", %d params", sym->nelems); break; case S_ARRAY: printf(", %d elems, size %d", sym->nelems, sym->size); break; } printf(", ctypeid %d, nelems %d st_posn %d\n", sym->ctypeid, sym->nelems, sym->st_posn); if (sym->member != NULL) dumptable(sym->member, 4); } // Dump one symbol table static void dumptable(struct symtable *head, int indent) { struct symtable *sym; for (sym = head; sym != NULL; sym = sym->next) dumpsym(sym, indent); } void dumpSymlists(void) { fprintf(stderr, "Typelist\n"); fprintf(stderr, "--------\n"); dumptable(Typehead, 0); fprintf(stderr, "\nSymlist\n"); fprintf(stderr, "-------\n"); dumptable(Symhead, 0); fprintf(stderr, "\nFunctionid\n"); fprintf(stderr, "----------\n"); dumptable(Functionid, 0); fprintf(stderr, "\nMemblist\n"); fprintf(stderr, "--------\n"); dumptable(Membhead, 0); } #endif // Append a node to the singly-linked list pointed to by head or tail static void appendSym(struct symtable **head, struct symtable **tail, struct symtable *node) { // Check for valid pointers if (head == NULL || tail == NULL || node == NULL) fatal("Either head, tail or node is NULL in appendSym"); // Append to the list if (*tail) { (*tail)->next = node; *tail = node; } else *head = *tail = node; node->next = NULL; } // The last name we loaded from the symbol file static char SymText[TEXTLEN + 1]; #ifdef WRITESYMS // Unique id for each symbol. We need this when serialising // so that the composite type of a variable can be found. static int Symid = 1; // For symbols which are not locals or parameters, // we point this at the symbol once created. This // allows us to e.g. add initial values or members. // When we come to make another non-local/param, // this gets flushed to disk. struct symtable *thisSym = NULL; // When we are serialising symbols to the symbol table file, // track the one with the highest id. After each flushing // of the in-memory lists, record the highest id into skipSymid. // Then, on the next flush, don't write out symbols at or below // the skipSymid. static int highestSymid = 0; static int skipSymid = 0; // Serialise one symbol to the symbol table file static void serialiseSym(struct symtable *sym) { struct symtable *memb; if (sym->id > highestSymid) highestSymid = sym->id; if (sym->id <= skipSymid) { #ifdef DEBUG fprintf(stderr, "NOT Writing %s %s id %d to disk\n", Sstring[sym->stype], sym->name, sym->id); #endif return; } // Output the symbol struct and the name // once we are at the end of the file fseek(Symfile, 0, SEEK_END); #ifdef DEBUG fprintf(stderr, "Writing %s %s id %d to disk offset %ld\n", Sstring[sym->stype], sym->name, sym->id, ftell(Symfile)); #endif fwrite(sym, sizeof(struct symtable), 1, Symfile); if (sym->name != NULL) { fputs(sym->name, Symfile); fputc(0, Symfile); } // Output the initial values, if any if (sym->initlist != NULL) fwrite(sym->initlist, sizeof(int), sym->nelems, Symfile); // Output the member symbols #ifdef DEBUG if (sym->member != NULL) { fprintf(stderr, "%s has members\n", sym->name); } #endif for (memb = sym->member; memb != NULL; memb = memb->next) serialiseSym(memb); } // Create a symbol table node. Set up the node's: // + type: char, int etc. // + ctype: composite type pointer for struct/union // + structural type: var, function, array etc. // + size: number of elements, or endlabel: end label for a function // + posn: Position information for local symbols // Return a pointer to the new node. static struct symtable *newsym(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn) { // Get a new node struct symtable *node = (struct symtable *) malloc(sizeof(struct symtable)); if (node == NULL) fatal("Unable to malloc a symbol table node in newsym"); // Fill in the values node->id = Symid++; #ifdef DEBUG fprintf(stderr, "Newsym %s %s id %d\n", Sstring[stype], name, node->id); #endif if (name == NULL) node->name = NULL; else node->name = strdup(name); node->type = type; node->ctype = ctype; if (ctype != NULL) node->ctypeid = ctype->id; else node->ctypeid = 0; node->stype = stype; node->class = class; node->nelems = nelems; node->st_hasaddr = 0; // For pointers and integer types, set the size // of the symbol. structs and union declarations // manually set this up themselves. if (ptrtype(type) || inttype(type)) node->size = nelems * typesize(type, ctype); node->st_posn = posn; node->next = NULL; node->member = NULL; node->initlist = NULL; return (node); } // Add a new type to the list of types. Return a pointer to the symbol. struct symtable *addtype(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn) { struct symtable *sym; thisSym = sym = newsym(name, type, ctype, stype, class, nelems, posn); #ifdef DEBUG fprintf(stderr, "Added %s %s to Typelist\n", Sstring[sym->stype], sym->name); #endif appendSym(&Typehead, &Typetail, sym); Membhead = Membtail = NULL; return (sym); } // Add a new symbol to the global table. Return a pointer to the symbol. struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn) { struct symtable *sym; thisSym = sym = newsym(name, type, ctype, stype, class, nelems, posn); // For structs and unions, copy the size from the type node if (type == P_STRUCT || type == P_UNION) sym->size = ctype->size; #ifdef DEBUG fprintf(stderr, "Added %s %s to Symlist\n", Sstring[sym->stype], sym->name); #endif appendSym(&Symhead, &Symtail, sym); Membhead = Membtail = NULL; return (sym); } // Add a symbol to the member list in thisSym struct symtable *addmemb(char *name, int type, struct symtable *ctype, int class, int stype, int nelems) { struct symtable *sym = newsym(name, type, ctype, stype, class, nelems, 0); // For structs and unions, copy the size from the type node if (type == P_STRUCT || type == P_UNION) sym->size = ctype->size; // Add this to the member list and link into thisSym if needed appendSym(&Membhead, &Membtail, sym); #ifdef DEBUG fprintf(stderr, "Added %s %s to Memblist\n", Sstring[sym->stype], sym->name); #endif if (thisSym->member == NULL) { thisSym->member = Membhead; #ifdef DEBUG fprintf(stderr, "Added %s to start of %s\n", name, thisSym->name); #endif } return (sym); } // Flush the contents of the in-memory symbol tables // to the file. void flushSymtable() { struct symtable *this; // Write out types for (this = Typehead; this != NULL; this = this->next) { serialiseSym(this); } // Write out variables and functions. // Skip invalid symbols for (this = Symhead; this != NULL; this = this->next) { serialiseSym(this); } skipSymid = highestSymid; freeSymtable(); } #endif // WRITESYMS // When reading in globals that have members (structs, unions, // functions), we stop once we hit a non-member. We need to // record that symbol's offset so we can fseek() back to that. // Otherwise we would not be able to load it if it also was a global. static long lastSymOffset; // We need a linked list when loadSym() loads in members of a symbol // from the disk. We can't use Membhead/tail as this might be in use // when parsing the body of a function. So we keep a private list. static struct symtable *Mhead, *Mtail; // Given a pointer to a symtable node, read in the next entry // in the on-disk symbol table. Do this always if loadit is true. // Only read one node if recurse is zero. // If loadit is false, load the data and return true if the symbol // a) matches the given name and stype or b) matches the id. // Return -1 when there is nothing left to read. static int loadSym(struct symtable *sym, char *name, int stype, int id, int loadit, int recurse) { struct symtable *memb; #ifdef DEBUGTOOMUCH if (name!=NULL) fprintf(stderr, "loadSym: name %s stype %d loadit %d recurse %d\n", name, stype, loadit, recurse); else fprintf(stderr, "loadSym: id %d stype %d loadit %d recurse %d\n", id, stype, loadit, recurse); #endif // Read in the next node. Get a copy of the offset beforehand lastSymOffset = ftell(Symfile); if (fread(sym, sizeof(struct symtable), 1, Symfile) != 1) return (-1); // Get the symbol name into a separate buffer for now if (sym->name != NULL) { fgetstr(SymText, TEXTLEN + 1, Symfile); } #ifdef DEBUG if (sym->name != NULL) fprintf(stderr, "symoff %ld name %s stype %d\n", lastSymOffset, SymText, sym->stype); else fprintf(stderr, "symoff %ld id %d\n", lastSymOffset, sym->id); #endif // If loadit is off, see if the ids match. Or, // see if the names are a match and the stype matches. // For the latter, if NOTATYPE match anything which isn't // a type and which isn't a member, local or param: we are // trying to find a variable, enumval or function. findlocl() // will find it if it's a local or parameter. We only get // here when we are trying to find a global variable, // enumval or function. if (loadit == 0) { if (id != 0 && sym->id == id) loadit = 1; if (name != NULL && !strcmp(name, SymText)) { if (stype == S_NOTATYPE && sym->stype < S_STRUCT && sym->class < V_LOCAL) loadit = 1; if (stype >= S_STRUCT && stype == sym->stype) loadit = 1; } } // Yes, we need to load the rest of the symbol if (loadit) { // Copy the name over. sym->name = strdup(SymText); if (sym->name == NULL) fatal("Unable to malloc name in loadSym()"); #ifdef DEBUG if (sym->name == NULL) { fprintf(stderr, "loadSym found %s NONAME id %d loadit %d\n", Sstring[sym->stype], sym->id, loadit); } else { fprintf(stderr, "loadSym found %s %s id %d loadit %d\n", Sstring[sym->stype], sym->name, sym->id, loadit); } #endif // Get the initialisation list. if (sym->initlist != NULL) { sym->initlist = (int *) malloc(sym->nelems * sizeof(int)); if (sym->initlist == NULL) fatal("Unable to malloc initlist in loadSym()"); fread(sym->initlist, sizeof(int), sym->nelems, Symfile); } // Stop now if we must not recursively load more nodes if (!recurse) { #ifdef DEBUG fprintf(stderr, "loadSym found it - no recursion\n"); #endif return (1); } // For structs, unions and functions load and add // the members (or params/locals) to the member list if (sym->stype == S_STRUCT || sym->stype == S_UNION || sym->stype == S_FUNCTION) { Mhead = Mtail = NULL; while (1) { #ifdef DEBUG fprintf(stderr, "loadSym: about to try loading members\n"); #endif memb = (struct symtable *) malloc(sizeof(struct symtable)); if (memb == NULL) fatal("Unable to malloc member in loadSym()"); #ifdef MEMBDEBUG fprintf(stderr, "%p allocated\n", memb); #endif // Get the next symbol. Stop when there are no symbols // or when the symbol isn't a member, enumval, param or local if (loadSym(memb, NULL, 0, 0, 1, 0) != 1) break; if (memb->class != V_LOCAL && memb->class != V_PARAM && memb->class != V_MEMBER) break; #ifdef DEBUG fprintf(stderr, "loadSym: appending %s to member list\n", memb->name); #endif appendSym(&Mhead, &Mtail, memb); } // We found a non-member symbol. Seek back // to where it was and free the unused struct. // Attach the member list to the original symbol. fseek(Symfile, lastSymOffset, SEEK_SET); #ifdef DEBUG fprintf(stderr, "Seeked to lastSymOffset %ld as non-member id %d\n", lastSymOffset, memb->id); #endif #ifdef MEMBDEBUG fprintf(stderr, "%p freed, unused memb\n", memb); #endif free(memb); sym->member = Mhead; Mhead = Mtail = NULL; } return (1); } else { // No match and loadit was 0. Skip over any initialisation list. if (sym->initlist != NULL) fseek(Symfile, sizeof(int) * sym->nelems, SEEK_CUR); } return (0); } // Given a name or an id, search the symbol table file for the next // symbol that matches. Fill in the node and return true on a match. // Otherwise, return false. static int findSyminfile(struct symtable *sym, char *name, int id, int stype) { int res; #ifdef DEBUG if (name!=NULL) fprintf(stderr, "findSyminfile: searching name %s stype %d\n", name, stype); else fprintf(stderr, "findSyminfile: search id %d\n", id); #endif // Loop over the file starting at the beginning fseek(Symfile, 0, SEEK_SET); while (1) { // Does the next symbol match? Yes, return it res = loadSym(sym, name, stype, id, 0, 1); if (res == 1) return (1); if (res == -1) break; } #ifdef DEBUG fprintf(stderr, "findSyminfile: not found\n"); #endif return (0); } // Determine if the symbol name, or id if not zero, is a local // or parameter. Return a pointer to the found node or NULL if not found. struct symtable *findlocl(char *name, int id) { struct symtable *this; // We must be in a function if (Functionid == NULL) return (NULL); #ifdef DEBUG if (id!=0) fprintf(stderr, "findlocl() searching for id %d\n", id); if (name!=NULL) fprintf(stderr, "findlocl() searching for name %s\n", name); #endif for (this = Functionid->member; this != NULL; this = this->next) { if (id && this->id == id) return (this); if (name && !strcmp(this->name, name)) return (this); } return (NULL); } // Given a name and a stype, search for a matching symbol. // Or, if the id is non-zero, search for the symbol with that id. // Bring the symbol in to one of the in-memory lists if necessary. // Return a pointer to the found node or NULL if not found. struct symtable *findSymbol(char *name, int stype, int id) { struct symtable *this; struct symtable *sym; int notatype; // Set a flag if we are not looking for a type notatype = (stype == S_NOTATYPE || stype == S_ENUMVAL); #ifdef DEBUG if (id != 0) fprintf(stderr, "Searching for symbol id %d in memory\n", id); else fprintf(stderr, "Searching for symbol %s %s in memory\n", Sstring[stype], name); #endif // If it's not a type, see if // it's a local or parameter if (id || notatype) { this = findlocl(name, id); if (this != NULL) return (this); #ifdef DEBUG fprintf(stderr, "Not in local, try the global Symlist\n"); #endif // Not a local, so search the global symbol list. for (this = Symhead; this != NULL; this = this->next) { if (id && this->id == id) return (this); if (name && !strcmp(this->name, name)) return (this); } } #ifdef DEBUG fprintf(stderr, "Not in , try the global Typelist\n"); #endif // We have an id or it is a type, // Search the global type list. // Sorry for the double negative :-) if (id || !notatype) { for (this = Typehead; this != NULL; this = this->next) { if (id && this->id == id) return (this); if (name && !strcmp(this->name, name) && this->stype == stype) return (this); } } #ifdef DEBUG fprintf(stderr, " not in memory, try the file\n"); #endif // Not in memory. Try the on-disk symbol table sym = (struct symtable *) malloc(sizeof(struct symtable)); if (sym == NULL) { fatal("Unable to malloc sym in findSyminlist()"); } // If we found a match in the file if (findSyminfile(sym, name, id, stype)) { // Add it to one of the in-memory lists and return it if (sym->stype < S_STRUCT) appendSym(&Symhead, &Symtail, sym); else appendSym(&Typehead, &Typetail, sym); // If the symbol points at a composite type, find and link it if (sym->ctype != NULL) { #ifdef DEBUG fprintf(stderr, "About to findSymid on id %d for %s\n", sym->ctypeid, sym->name); #endif sym->ctype = findSymbol(NULL, 0, sym->ctypeid); } // If any member symbols point at a composite type, ditto for (this = sym->member; this != NULL; this = this->next) if (this->ctype != NULL) { #ifdef DEBUG fprintf(stderr, "About to member findSymid on id %d for %s\n", this->ctypeid, this->name); #endif this->ctype = findSymbol(NULL, 0, this->ctypeid); } return (sym); } free(sym); return (NULL); } // Find a member in the member list. Return a pointer // to the found node or NULL if not found. struct symtable *findmember(char *s) { struct symtable *node; for (node = Membhead; node != NULL; node = node->next) if (!strcmp(s, node->name)) return (node); return (NULL); } // Find a node in the struct list // Return a pointer to the found node or NULL if not found. struct symtable *findstruct(char *s) { return (findSymbol(s, S_STRUCT, 0)); } // Find a node in the union list // Return a pointer to the found node or NULL if not found. struct symtable *findunion(char *s) { return (findSymbol(s, S_UNION, 0)); } // Find an enum type in the enum list // Return a pointer to the found node or NULL if not found. struct symtable *findenumtype(char *s) { return (findSymbol(s, S_ENUMTYPE, 0)); } // Find an enum value in the enum list // Return a pointer to the found node or NULL if not found. struct symtable *findenumval(char *s) { return (findSymbol(s, S_ENUMVAL, 0)); } // Find a type in the tyedef list // Return a pointer to the found node or NULL if not found. struct symtable *findtypedef(char *s) { return (findSymbol(s, S_TYPEDEF, 0)); } // Free a symbol's memory, returning the symbol's next pointer struct symtable *freeSym(struct symtable *sym) { struct symtable *next, *memb; if (sym == NULL) return (NULL); next = sym->next; #ifdef MEMBDEBUG fprintf(stderr, "%p freeing\n", sym); #endif #ifdef DEBUG fprintf(stderr, "Freeing %s %s\n", Sstring[sym->stype], sym->name); #endif // Free any members for (memb = sym->member; memb != NULL;) memb = freeSym(memb); // Free the initlist and the name if (sym->initlist != NULL) free(sym->initlist); if (sym->name != NULL) free(sym->name); free(sym); return (next); } // Free the contents of the in-memory symbol tables void freeSymtable() { struct symtable *this; for (this = Symhead; this != NULL;) this = freeSym(this); for (this = Typehead; this != NULL;) this = freeSym(this); Symhead = Symtail = Typehead = Typetail = NULL; Membhead = Membtail = Functionid = NULL; } // Loop over the symbol table file. // Load in all the types and // global/static variables. void loadGlobals(void) { struct symtable *sym; int i; // Start at the file's beginning. Load all symbols fseek(Symfile, 0, SEEK_SET); while (1) { // Load the next symbol + members + initlist sym = (struct symtable *) malloc(sizeof(struct symtable)); if (sym == NULL) fatal("Unable to malloc in allocateGlobals()"); i = loadSym(sym, NULL, 0, 0, 1, 1); if (i == -1) { free(sym); break; } // Add any type to the type list if (sym->stype >= S_STRUCT) { appendSym(&Typehead, &Typetail, sym); continue; } // Add any global/static variable/array/strlit to the sym list if ((sym->class == V_GLOBAL || sym->class == V_STATIC) && (sym->stype == S_VARIABLE || sym->stype == S_ARRAY || sym->stype == S_STRLIT)) { appendSym(&Symhead, &Symtail, sym); // If the symbol points at a composite type, find and link it if (sym->ctype != NULL) { sym->ctype = findSymbol(NULL, 0, sym->ctypeid); } continue; } // Didn't add it to any list freeSym(sym); } } ================================================ FILE: 64_6809_Target/sym.h ================================================ // sym.c struct symtable *addtype(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn); struct symtable *addglob(char *name, int type, struct symtable *ctype, int stype, int class, int nelems, int posn); struct symtable *addmemb(char *name, int type, struct symtable *ctype, int class, int stype, int nelems); struct symtable *findlocl(char *name, int id); struct symtable *findSymbol(char *name, int stype, int id); struct symtable *findmember(char *s); struct symtable *findstruct(char *s); struct symtable *findunion(char *s); struct symtable *findenumtype(char *s); struct symtable *findenumval(char *s); struct symtable *findtypedef(char *s); void loadGlobals(void); struct symtable *freeSym(struct symtable *sym); void freeSymtable(void); void flushSymtable(void); void dumpSymlists(void); extern struct symtable *Symhead; ================================================ FILE: 64_6809_Target/targ6809.c ================================================ #include "defs.h" #include "misc.h" #include "types.h" // Target-specific functions which get used // by the parser as well as the code generator. // Copyright (c) 2024 Warren Toomey, GPL3 // Given a scalar type value, return the // size of the type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (2); switch (type) { case P_VOID: return (0); case P_CHAR: return (1); case P_INT: return (2); case P_LONG: return (4); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } int cgalign(int type, int offset, int direction) { return (offset); } int genprimsize(int type) { return (cgprimsize(type)); } int genalign(int type, int offset, int direction) { return (cgalign(type, offset, direction)); } // Return the primitive type that can hold an address. // This is used when we need to add a INTLIT to a // pointer. int cgaddrint(void) { return(P_INT); } ================================================ FILE: 64_6809_Target/target.h ================================================ /* target.c */ int cgprimsize(int type); int cgalign(int type, int offset, int direction); int genprimsize(int type); int genalign(int type, int offset, int direction); int cgaddrint(void); ================================================ FILE: 64_6809_Target/targqbe.c ================================================ #include "defs.h" #include "misc.h" #include "types.h" // Target-specific functions which get used // by the parser as well as the code generator. // Copyright (c) 2024 Warren Toomey, GPL3 // Given a scalar type value, return the // size of the QBE type in bytes. int cgprimsize(int type) { if (ptrtype(type)) return (8); switch (type) { case P_CHAR: return (1); case P_INT: return (4); case P_LONG: return (8); default: fatald("Bad type in cgprimsize:", type); } return (0); // Keep -Wall happy } // Given a scalar type, an existing memory offset // (which hasn't been allocated to anything yet) // and a direction (1 is up, -1 is down), calculate // and return a suitably aligned memory offset // for this scalar type. This could be the original // offset, or it could be above/below the original int cgalign(int type, int offset, int direction) { int alignment; // We don't need to do this on x86-64, but let's // align chars on any offset and align ints/pointers // on a 4-byte alignment switch (type) { case P_CHAR: break; default: // Align whatever we have now on a 4-byte alignment. // I put the generic code here so it can be reused elsewhere. alignment = 4; offset = (offset + direction * (alignment - 1)) & ~(alignment - 1); } return (offset); } int genprimsize(int type) { return (cgprimsize(type)); } int genalign(int type, int offset, int direction) { return (cgalign(type, offset, direction)); } // Return the primitive type that can hold an address. // This is used when we need to add a INTLIT to a // pointer. int cgaddrint(void) { return(P_LONG); } ================================================ FILE: 64_6809_Target/tests/Makefile ================================================ clean: rm -f *_* input??? trial.input058.c rm -rf *.o *.s *.map *_* ================================================ FILE: 64_6809_Target/tests/err.input031.c ================================================ Expecting a primary expression, got token:+ on line 5 of input031.c ================================================ FILE: 64_6809_Target/tests/err.input032.c ================================================ Unknown variable or function:pizza on line 4 of input032.c ================================================ FILE: 64_6809_Target/tests/err.input033.c ================================================ Incompatible type to return on line 4 of input033.c ================================================ FILE: 64_6809_Target/tests/err.input034.c ================================================ For now, declaration of non-global arrays is not implemented on line 4 of input034.c ================================================ FILE: 64_6809_Target/tests/err.input035.c ================================================ Duplicate local variable declaration:a on line 4 of input035.c ================================================ FILE: 64_6809_Target/tests/err.input036.c ================================================ Type doesn't match prototype for parameter:2 on line 4 of input036.c ================================================ FILE: 64_6809_Target/tests/err.input037.c ================================================ Expected:comma on line 3 of input037.c ================================================ FILE: 64_6809_Target/tests/err.input038.c ================================================ Type doesn't match prototype for parameter:2 on line 4 of input038.c ================================================ FILE: 64_6809_Target/tests/err.input039.c ================================================ No statements in function with non-void type on line 4 of input039.c ================================================ FILE: 64_6809_Target/tests/err.input041.c ================================================ Can't return from a void function on line 3 of input041.c ================================================ FILE: 64_6809_Target/tests/err.input042.c ================================================ Unknown variable or function:fred on line 3 of input042.c ================================================ FILE: 64_6809_Target/tests/err.input043.c ================================================ Unknown variable or function:b on line 3 of input043.c ================================================ FILE: 64_6809_Target/tests/err.input044.c ================================================ Unknown variable or function:z on line 3 of input044.c ================================================ FILE: 64_6809_Target/tests/err.input045.c ================================================ & operator must be followed by an identifier on line 3 of input045.c ================================================ FILE: 64_6809_Target/tests/err.input046.c ================================================ * operator must be followed by an expression of pointer type on line 3 of input046.c ================================================ FILE: 64_6809_Target/tests/err.input047.c ================================================ ++ operator must be followed by an identifier on line 3 of input047.c ================================================ FILE: 64_6809_Target/tests/err.input048.c ================================================ -- operator must be followed by an identifier on line 3 of input048.c ================================================ FILE: 64_6809_Target/tests/err.input049.c ================================================ Incompatible expression in assignment on line 6 of input049.c ================================================ FILE: 64_6809_Target/tests/err.input050.c ================================================ Incompatible types in binary expression on line 6 of input050.c ================================================ FILE: 64_6809_Target/tests/err.input051.c ================================================ Expected '\'' at end of char literal on line 4 of input051.c ================================================ FILE: 64_6809_Target/tests/err.input052.c ================================================ Unrecognised character:$ on line 5 of input052.c ================================================ FILE: 64_6809_Target/tests/err.input056.c ================================================ unknown struct/union type:var1 on line 2 of input056.c ================================================ FILE: 64_6809_Target/tests/err.input057.c ================================================ previously defined struct/union:fred on line 2 of input057.c ================================================ FILE: 64_6809_Target/tests/err.input059.c ================================================ Unknown variable or function:y on line 3 of input059.c ================================================ FILE: 64_6809_Target/tests/err.input060.c ================================================ Expression is not a struct/union on line 3 of input060.c ================================================ FILE: 64_6809_Target/tests/err.input061.c ================================================ Expression is not a pointer to a struct/union on line 3 of input061.c ================================================ FILE: 64_6809_Target/tests/err.input064.c ================================================ undeclared enum type::fred on line 1 of input064.c ================================================ FILE: 64_6809_Target/tests/err.input065.c ================================================ enum type redeclared::fred on line 2 of input065.c ================================================ FILE: 64_6809_Target/tests/err.input066.c ================================================ enum value redeclared::z on line 2 of input066.c ================================================ FILE: 64_6809_Target/tests/err.input068.c ================================================ redefinition of typedef:FOO on line 2 of input068.c ================================================ FILE: 64_6809_Target/tests/err.input069.c ================================================ unknown type:FLOO on line 2 of input069.c ================================================ FILE: 64_6809_Target/tests/err.input072.c ================================================ no loop or switch to break out from on line 1 of input072.c ================================================ FILE: 64_6809_Target/tests/err.input073.c ================================================ no loop to continue to on line 1 of input073.c ================================================ FILE: 64_6809_Target/tests/err.input075.c ================================================ Unexpected token in switch:if on line 4 of input075.c ================================================ FILE: 64_6809_Target/tests/err.input076.c ================================================ No cases in switch on line 3 of input076.c ================================================ FILE: 64_6809_Target/tests/err.input077.c ================================================ case or default after existing default on line 6 of input077.c ================================================ FILE: 64_6809_Target/tests/err.input078.c ================================================ case or default after existing default on line 6 of input078.c ================================================ FILE: 64_6809_Target/tests/err.input079.c ================================================ Duplicate case value on line 6 of input079.c ================================================ FILE: 64_6809_Target/tests/err.input085.c ================================================ Bad type in parameter list on line 1 of input085.c ================================================ FILE: 64_6809_Target/tests/err.input086.c ================================================ Function definition not at global level on line 3 of input086.c ================================================ FILE: 64_6809_Target/tests/err.input087.c ================================================ Bad type in member list on line 4 of input087.c ================================================ FILE: 64_6809_Target/tests/err.input092.c ================================================ Type mismatch: literal vs. variable on line 1 of input092.c ================================================ FILE: 64_6809_Target/tests/err.input093.c ================================================ Unknown variable or function:fred on line 1 of input093.c ================================================ FILE: 64_6809_Target/tests/err.input094.c ================================================ Type mismatch: literal vs. variable on line 1 of input094.c ================================================ FILE: 64_6809_Target/tests/err.input095.c ================================================ Variable can not be initialised:x on line 1 of input095.c ================================================ FILE: 64_6809_Target/tests/err.input096.c ================================================ Array size is illegal:0 on line 1 of input096.c ================================================ FILE: 64_6809_Target/tests/err.input097.c ================================================ For now, declaration of non-global arrays is not implemented on line 2 of input097.c ================================================ FILE: 64_6809_Target/tests/err.input098.c ================================================ Too many values in initialisation list on line 1 of input098.c ================================================ FILE: 64_6809_Target/tests/err.input102.c ================================================ Cannot cast to a struct, union or void type on line 3 of input102.c ================================================ FILE: 64_6809_Target/tests/err.input103.c ================================================ Cannot cast to a struct, union or void type on line 3 of input103.c ================================================ FILE: 64_6809_Target/tests/err.input104.c ================================================ Cannot cast to a struct, union or void type on line 2 of input104.c ================================================ FILE: 64_6809_Target/tests/err.input105.c ================================================ Incompatible expression in assignment on line 4 of input105.c ================================================ FILE: 64_6809_Target/tests/err.input118.c ================================================ Compiler doesn't support static or extern local declarations on line 2 of input118.c ================================================ FILE: 64_6809_Target/tests/err.input124.c ================================================ Cannot ++ on rvalue on line 6 of input124.c ================================================ FILE: 64_6809_Target/tests/err.input126.c ================================================ Unknown variable or function:ptr on line 7 of input126.c ================================================ FILE: 64_6809_Target/tests/err.input129.c ================================================ Cannot ++ and/or -- more than once on line 6 of input129.c ================================================ FILE: 64_6809_Target/tests/err.input141.c ================================================ Declaration of array parameters is not implemented on line 4 of input141.c ================================================ FILE: 64_6809_Target/tests/err.input142.c ================================================ Array must have non-zero elements:fred on line 1 of input142.c ================================================ FILE: 64_6809_Target/tests/input.rules.6809 ================================================ # Test file for the peephole optimiser #1 addd #0 #2 leas -0,s #3 leas 0,s #4 std _a ldd _a #5 std _a ; ldd _a #6 std _a ; ; ldd _a #7 stb _a ldb _a #8 stb _a ; ldb _a #9 stb _a ; ; ldb _a #10 std R0 ldd _a addd R0 #11 ldx 6,s ldd 0,x #12 ldx 7,s stb 0,x #13 ldx 6,s ldb 0,x #14 ldx 7,s std 0,x #15 ldx 3,s ldd #15 std 0,x #16 ldd _a tfr d,x #17 bra L5 ; L5: #18 bra L9 L9: #19 tfr x,d std _b #20 ldx #_fred ldd _jim std 0,x #21 ldd #_fred addd #4 tfr d,x ldd _b std 0,x #22 ldx #_fred ldd 0,x #23 ldd #_fred addd #4 tfr d,x ldd 0,x #24 ldd 4,s addd #14 tfr d,x ldd R0+0 std 0,x #25 ldd 4,s addd #8 tfr d,x ldd 0,x #26 ldd 0,s pshs d ldd #1 lbsr __shl std 0,s ================================================ FILE: 64_6809_Target/tests/input001.c ================================================ int printf(char *fmt, ...); void main() { printf("%d\n", 12 * 3); printf("%d\n", 18 - 2 * 4); printf("%d\n", 1 + 2 + 9 - 5/2 + 3*5); } ================================================ FILE: 64_6809_Target/tests/input002.c ================================================ int printf(char *fmt, ...); void main() { int fred; int jim; fred= 5; jim= 12; printf("%d\n", fred + jim); } ================================================ FILE: 64_6809_Target/tests/input003.c ================================================ int printf(char *fmt, ...); void main() { int x; x= 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); x= x + 1; printf("%d\n", x); } ================================================ FILE: 64_6809_Target/tests/input004.c ================================================ int printf(char *fmt, ...); void main() { int x; x= 7 < 9; printf("%d\n", x); x= 7 <= 9; printf("%d\n", x); x= 7 != 9; printf("%d\n", x); x= 7 == 7; printf("%d\n", x); x= 7 >= 7; printf("%d\n", x); x= 7 <= 7; printf("%d\n", x); x= 9 > 7; printf("%d\n", x); x= 9 >= 7; printf("%d\n", x); x= 9 != 7; printf("%d\n", x); } ================================================ FILE: 64_6809_Target/tests/input005.c ================================================ int printf(char *fmt, ...); void main() { int i; int j; i=6; j=12; if (i < j) { printf("%d\n", i); } else { printf("%d\n", j); } } ================================================ FILE: 64_6809_Target/tests/input006.c ================================================ int printf(char *fmt, ...); void main() { int i; i=1; while (i <= 10) { printf("%d\n", i); i= i + 1; } } ================================================ FILE: 64_6809_Target/tests/input007.c ================================================ int printf(char *fmt, ...); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 64_6809_Target/tests/input008.c ================================================ int printf(char *fmt, ...); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } ================================================ FILE: 64_6809_Target/tests/input009.c ================================================ int printf(char *fmt, ...); void main() { int i; for (i= 1; i <= 10; i= i + 1) { printf("%d\n", i); } } void fred() { int a; int b; a= 12; b= 3 * a; if (a >= b) { printf("%d\n", 2 * b - a); } } ================================================ FILE: 64_6809_Target/tests/input010.c ================================================ int printf(char *fmt, ...); void main() { int i; char j; j= 20; printf("%d\n", j); i= 10; printf("%d\n", i); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 2; j= j + 1) { printf("%d\n", j); } } ================================================ FILE: 64_6809_Target/tests/input011.c ================================================ int printf(char *fmt, ...); int main() { int i; char j; long k; i= 10; printf("%d\n", i); j= 20; printf("%d\n", j); k= 30; printf("%ld\n", k); for (i= 1; i <= 5; i= i + 1) { printf("%d\n", i); } for (j= 253; j != 4; j= j + 1) { printf("%d\n", j); } for (k= 1; k <= 5; k= k + 1) { printf("%ld\n", k); } return(i); } ================================================ FILE: 64_6809_Target/tests/input012.c ================================================ int printf(char *fmt, ...); int fred(int x) { return(5); } void main() { int x; x= fred(2); printf("%d\n", x); } ================================================ FILE: 64_6809_Target/tests/input013.c ================================================ int printf(char *fmt, ...); int fred(int x) { return(56); } void main() { int dummy; int result; dummy= printf("%d\n", 23); result= fred(10); dummy= printf("%d\n", result); } ================================================ FILE: 64_6809_Target/tests/input014.c ================================================ int printf(char *fmt, ...); int fred(int x) { return(20); } int main() { int result; printf("%d\n", 10); result= fred(15); printf("%d\n", result); printf("%d\n", fred(15)+10); return(0); } ================================================ FILE: 64_6809_Target/tests/input015.c ================================================ int printf(char *fmt, ...); int main() { char a; char *b; char c; int d; int *e; int f; a= 18; printf("%d\n", a); b= &a; c= *b; printf("%d\n", c); d= 12; printf("%d\n", d); e= &d; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 64_6809_Target/tests/input016.c ================================================ int printf(char *fmt, ...); int c; int d; int *e; int f; int main() { c= 12; d=18; printf("%d\n", c); e= &c + 1; f= *e; printf("%d\n", f); return(0); } ================================================ FILE: 64_6809_Target/tests/input017.c ================================================ int printf(char *fmt, ...); int main() { char a; char *b; int d; int *e; b= &a; *b= 19; printf("%d\n", a); e= &d; *e= 12; printf("%d\n", d); return(0); } ================================================ FILE: 64_6809_Target/tests/input018.c ================================================ int printf(char *fmt, ...); int main() { int a; int b; a= b= 34; printf("%d\n", a); printf("%d\n", b); return(0); } ================================================ FILE: 64_6809_Target/tests/input018a.c ================================================ int printf(char *fmt, ...); int a; int *b; char c; char *d; int main() { b= &a; *b= 15; printf("%d\n", a); d= &c; *d= 16; printf("%d\n", c); return(0); } ================================================ FILE: 64_6809_Target/tests/input019.c ================================================ int printf(char *fmt, ...); int a; int b; int c; int d; int e; int main() { a= 2; b= 4; c= 3; d= 2; e= (a+b) * (c+d); printf("%d\n", e); return(0); } ================================================ FILE: 64_6809_Target/tests/input020.c ================================================ int printf(char *fmt, ...); int a; int b[25]; int main() { b[3]= 12; a= b[3]; printf("%d\n", a); return(0); } ================================================ FILE: 64_6809_Target/tests/input021.c ================================================ int printf(char *fmt, ...); char c; char *str; int main() { c= '\n'; printf("%d\n", c); for (str= "Hello world\n"; *str != 0; str= str + 1) { printf("%c", *str); } return(0); } ================================================ FILE: 64_6809_Target/tests/input022.c ================================================ int printf(char *fmt, ...); char a; char b; char c; int d; int e; int f; long g; long h; long i; int main() { b= 5; c= 7; a= b + c++; printf("%d\n", a); e= 5; f= 7; d= e + f++; printf("%d\n", d); h= 5; i= 7; g= h + i++; printf("%ld\n", g); a= b-- + c; printf("%d\n", a); d= e-- + f; printf("%d\n", d); g= h-- + i; printf("%ld\n", g); a= ++b + c; printf("%d\n", a); d= ++e + f; printf("%d\n", d); g= ++h + i; printf("%ld\n", g); a= b * --c; printf("%d\n", a); d= e * --f; printf("%d\n", d); g= h * --i; printf("%ld\n", g); return(0); } ================================================ FILE: 64_6809_Target/tests/input023.c ================================================ int printf(char *fmt, ...); char *str; int x; int main() { x= -23; printf("%d\n", x); printf("%d\n", -10 * -10); x= 1; x= ~x; printf("%d\n", x); x= 2 > 5; printf("%d\n", x); x= !x; printf("%d\n", x); x= !x; printf("%d\n", x); x= 13; if (x) { printf("%d\n", 13); } x= 0; if (!x) { printf("%d\n", 14); } for (str= "Hello world\n"; *str; str++) { printf("%c", *str); } return(0); } ================================================ FILE: 64_6809_Target/tests/input024.c ================================================ int printf(char *fmt, ...); int a; int b; int c; int main() { a= 42; b= 19; printf("%d\n", a & b); printf("%d\n", a | b); printf("%d\n", a ^ b); printf("%d\n", 1 << 3); printf("%d\n", 63 >> 3); return(0); } ================================================ FILE: 64_6809_Target/tests/input025.c ================================================ int printf(char *fmt, ...); int a; int b; int c; int main() { char z; int y; int x; x= 10; y= 20; z= 30; printf("%d\n", x); printf("%d\n", y); printf("%d\n", z); a= 5; b= 15; c= 25; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); return(0); } ================================================ FILE: 64_6809_Target/tests/input026.c ================================================ int printf(char *fmt, ...); int main(int a, char b, long c, int d, int e, int f, int g, int h) { int i; int j; int k; a= 13; printf("%d\n", a); b= 23; printf("%d\n", b); c= 34; printf("%ld\n", c); d= 44; printf("%d\n", d); e= 54; printf("%d\n", e); f= 64; printf("%d\n", f); g= 74; printf("%d\n", g); h= 84; printf("%d\n", h); i= 94; printf("%d\n", i); j= 95; printf("%d\n", j); k= 96; printf("%d\n", k); return(0); } ================================================ FILE: 64_6809_Target/tests/input027.c ================================================ int printf(char *fmt, ...); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int param5(int a, int b, int c, int d, int e) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param2(int a, int b) { int c; int d; int e; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int param0() { int a; int b; int c; int d; int e; a= 1; b= 2; c= 3; d= 4; e= 5; printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); return(0); } int main() { param8(1,2,3,4,5,6,7,8); param5(1,2,3,4,5); param2(1,2); param0(); return(0); } ================================================ FILE: 64_6809_Target/tests/input028.c ================================================ int printf(char *fmt, ...); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 64_6809_Target/tests/input029.c ================================================ int printf(char *fmt, ...); int param8(int a, int b, int c, int d, int e, int f, int g, int h); int fred(int a, int b, int c); int main(); int param8(int a, int b, int c, int d, int e, int f, int g, int h) { printf("%d\n", a); printf("%d\n", b); printf("%d\n", c); printf("%d\n", d); printf("%d\n", e); printf("%d\n", f); printf("%d\n", g); printf("%d\n", h); return(0); } int fred(int a, int b, int c) { return(a+b+c); } int main() { int x; param8(1, 2, 3, 5, 8, 13, 21, 34); x= fred(2, 3, 4); printf("%d\n", x); return(0); } ================================================ FILE: 64_6809_Target/tests/input030.c ================================================ int printf(char *fmt, ...); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input030.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 64_6809_Target/tests/input031.c ================================================ int printf(char *fmt, ...); int main() { int x; x= 2 + + 3 - * / ; } ================================================ FILE: 64_6809_Target/tests/input032.c ================================================ int printf(char *fmt, ...); int main() { pizza cow llama sausage; } ================================================ FILE: 64_6809_Target/tests/input033.c ================================================ int printf(char *fmt, ...); int main() { char *z; return(z); } ================================================ FILE: 64_6809_Target/tests/input035.c ================================================ int printf(char *fmt, ...); int fred(int a, int b) { int a; return(a); } ================================================ FILE: 64_6809_Target/tests/input036.c ================================================ int printf(char *fmt, ...); int fred(int a, char b, int c); int fred(int a, int b, char c); ================================================ FILE: 64_6809_Target/tests/input037.c ================================================ int printf(char *fmt, ...); int fred(int a, char b +, int z); ================================================ FILE: 64_6809_Target/tests/input038.c ================================================ int printf(char *fmt, ...); int fred(int a, char b, int c); int fred(int a, int b, char c, int g); ================================================ FILE: 64_6809_Target/tests/input039.c ================================================ int printf(char *fmt, ...); int main() { int a; } ================================================ FILE: 64_6809_Target/tests/input041.c ================================================ int printf(char *fmt, ...); void fred() { return(5); } ================================================ FILE: 64_6809_Target/tests/input042.c ================================================ int printf(char *fmt, ...); int main() { fred(5); } ================================================ FILE: 64_6809_Target/tests/input043.c ================================================ int printf(char *fmt, ...); int main() { int a; a= b[4]; } ================================================ FILE: 64_6809_Target/tests/input044.c ================================================ int printf(char *fmt, ...); int main() { int a; a= z; } ================================================ FILE: 64_6809_Target/tests/input045.c ================================================ int printf(char *fmt, ...); int main() { int a; a= &5; } ================================================ FILE: 64_6809_Target/tests/input046.c ================================================ int printf(char *fmt, ...); int main() { int a; a= *5; } ================================================ FILE: 64_6809_Target/tests/input047.c ================================================ int printf(char *fmt, ...); int main() { int a; a= ++5; } ================================================ FILE: 64_6809_Target/tests/input048.c ================================================ int printf(char *fmt, ...); int main() { int a; a= --5; } ================================================ FILE: 64_6809_Target/tests/input049.c ================================================ int printf(char *fmt, ...); int main() { int x; char y; y= x; } ================================================ FILE: 64_6809_Target/tests/input050.c ================================================ int printf(char *fmt, ...); int main() { char *a; char *b; a= a + b; } ================================================ FILE: 64_6809_Target/tests/input051.c ================================================ int printf(char *fmt, ...); int main() { char a; a= 'fred'; } ================================================ FILE: 64_6809_Target/tests/input052.c ================================================ int printf(char *fmt, ...); int main() { int a; a= $5.00; } ================================================ FILE: 64_6809_Target/tests/input053.c ================================================ int printf(char *fmt, ...); int main() { printf("Hello world, %d\n", 23); return(0); } ================================================ FILE: 64_6809_Target/tests/input054.c ================================================ int printf(char *fmt, ...); int main() { int i; for (i=0; i < 20; i++) { printf("Hello world, %d\n", i); } return(0); } ================================================ FILE: 64_6809_Target/tests/input055.c ================================================ int printf(char *fmt, ...); int main(int argc, char **argv) { int i; char *argument; printf("Hello world\n"); for (i=0; i < argc; i++) { argument= *argv; argv= argv + 1; printf("Argument %d is %s\n", i, argument); } return(0); } ================================================ FILE: 64_6809_Target/tests/input056.c ================================================ struct foo { int x; }; struct mary var1; ================================================ FILE: 64_6809_Target/tests/input057.c ================================================ struct fred { int x; } ; struct fred { char y; } ; ================================================ FILE: 64_6809_Target/tests/input058.c ================================================ int printf(char *fmt, ...); struct fred { int x; char y; long z; }; struct fred var2; struct fred *varptr; int main() { long result; var2.x= 12; printf("%d\n", var2.x); var2.y= 'c'; printf("%d\n", var2.y); var2.z= 4005; printf("%ld\n", var2.z); result= var2.x + var2.y + var2.z; printf("%ld\n", result); varptr= &var2; result= varptr->x + varptr->y + varptr->z; printf("%ld\n", result); return(0); } ================================================ FILE: 64_6809_Target/tests/input059.c ================================================ int main() { int x; x= y.foo; } ================================================ FILE: 64_6809_Target/tests/input060.c ================================================ int main() { int x; x= x.foo; } ================================================ FILE: 64_6809_Target/tests/input061.c ================================================ int main() { int x; x= x->foo; } ================================================ FILE: 64_6809_Target/tests/input063.c ================================================ int printf(char *fmt, ...); enum fred { apple=1, banana, carrot, pear=10, peach, mango, papaya }; enum jane { aple=1, bnana, crrot, par=10, pech, mago, paaya }; enum fred var1; enum jane var2; enum fred var3; int main() { var1= carrot + pear + mango; printf("%d\n", var1); return(0); } ================================================ FILE: 64_6809_Target/tests/input064.c ================================================ enum fred var3; ================================================ FILE: 64_6809_Target/tests/input065.c ================================================ enum fred { x, y, z }; enum fred { a, b }; ================================================ FILE: 64_6809_Target/tests/input066.c ================================================ enum fred { x, y, z }; enum mary { a, b, z }; ================================================ FILE: 64_6809_Target/tests/input067.c ================================================ int printf(char *fmt, ...); typedef int FOO; FOO var1; struct bar { int x; int y} ; typedef struct bar BAR; BAR var2; int main() { var1= 5; printf("%d\n", var1); var2.x= 7; var2.y= 10; printf("%d\n", var2.x + var2.y); return(0); } ================================================ FILE: 64_6809_Target/tests/input068.c ================================================ typedef int FOO; typedef char FOO; ================================================ FILE: 64_6809_Target/tests/input069.c ================================================ typedef int FOO; FLOO y; ================================================ FILE: 64_6809_Target/tests/input070.c ================================================ #include typedef int FOO; int main() { FOO x; x= 56; printf("%d\n", x); return(0); } ================================================ FILE: 64_6809_Target/tests/input071.c ================================================ #include int main() { int x; x = 0; while (x < 100) { if (x == 5) { x = x + 2; continue; } printf("%d\n", x); if (x == 14) { break; } x = x + 1; } printf("Done\n"); return (0); } ================================================ FILE: 64_6809_Target/tests/input072.c ================================================ int main() { break; } ================================================ FILE: 64_6809_Target/tests/input073.c ================================================ int main() { continue; } ================================================ FILE: 64_6809_Target/tests/input074.c ================================================ #include int main() { int x; int y; y= 0; for (x=0; x < 5; x++) { switch(x) { case 1: { y= 5; break; } case 2: { y= 7; break; } case 3: { y= 9; } default: { y= 100; } } printf("%d\n", y); } return(0); } ================================================ FILE: 64_6809_Target/tests/input075.c ================================================ int main() { int x; switch(x) { if (x<5); } } ================================================ FILE: 64_6809_Target/tests/input076.c ================================================ int main() { int x; switch(x) { } } ================================================ FILE: 64_6809_Target/tests/input077.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } default: { x= 3; } case 4: { x= 2; } } } ================================================ FILE: 64_6809_Target/tests/input078.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } default: { x= 3; } default: { x= 2; } } } ================================================ FILE: 64_6809_Target/tests/input079.c ================================================ int main() { int x; switch(x) { case 1: { x= 2; } case 2: { x= 2; } case 1: { x= 2; } default: { x= 2; } } } ================================================ FILE: 64_6809_Target/tests/input080.c ================================================ #include int x; int y; int main() { for (x=0, y=1; x < 6; x++, y=y+2) { printf("%d %d\n", x, y); } return(0); } ================================================ FILE: 64_6809_Target/tests/input081.c ================================================ #include int x; int y; int main() { x= 0; y=1; for (;x<5;) { printf("%d %d\n", x, y); x=x+1; y=y+2; } return(0); } ================================================ FILE: 64_6809_Target/tests/input082.c ================================================ #include int main() { int x; x= 10; // Dangling else test if (x > 5) if (x > 15) printf("x > 15\n"); else printf("15 >= x > 5\n"); else printf("5 >= x\n"); return(0); } ================================================ FILE: 64_6809_Target/tests/input083.c ================================================ #include int main() { int x; // Dangling else test. // We should not print anything for x<= 5 for (x=0; x < 12; x++) if (x > 5) if (x > 10) printf("10 < %2d\n", x); else printf(" 5 < %2d <= 10\n", x); return(0); } ================================================ FILE: 64_6809_Target/tests/input084.c ================================================ #include int main() { int x, y; x=2; y=3; printf("%d %d\n", x, y); char a, *b; a= 'f'; b= &a; printf("%c %c\n", a, *b); return(0); } ================================================ FILE: 64_6809_Target/tests/input085.c ================================================ int main(struct foo { int x; }; , int a) { return(0); } ================================================ FILE: 64_6809_Target/tests/input086.c ================================================ int main() { int fred() { return(5); } int x; x=2; return(x); } ================================================ FILE: 64_6809_Target/tests/input087.c ================================================ struct foo { int x; int y; struct blah { int g; }; }; ================================================ FILE: 64_6809_Target/tests/input088.c ================================================ #include struct foo { int x; int y; } fred, mary; struct foo james; int main() { fred.x= 5; mary.y= 6; printf("%d %d\n", fred.x, mary.y); return(0); } ================================================ FILE: 64_6809_Target/tests/input089.c ================================================ #include int x= 23; char y= 'H'; char *z= "Hello world"; int main() { printf("%d %c %s\n", x, y, z); return(0); } ================================================ FILE: 64_6809_Target/tests/input090.c ================================================ #include int a = 23, b = 100; char y = 'H', *z = "Hello world"; int main() { printf("%d %d %c %s\n", a, b, y, z); return (0); } ================================================ FILE: 64_6809_Target/tests/input091.c ================================================ #include int fred[] = { 1, 2, 3, 4, 5 }; int jim[10] = { 1, 2, 3, 4, 5 }; int main() { int i; for (i=0; i < 5; i++) printf("%d\n", fred[i]); for (i=0; i < 10; i++) printf("%d\n", jim[i]); return(0); } ================================================ FILE: 64_6809_Target/tests/input092.c ================================================ char x= 3000; ================================================ FILE: 64_6809_Target/tests/input093.c ================================================ char x= fred; ================================================ FILE: 64_6809_Target/tests/input094.c ================================================ char *s= 54; ================================================ FILE: 64_6809_Target/tests/input095.c ================================================ int fred(int x=2) { return(x); } ================================================ FILE: 64_6809_Target/tests/input096.c ================================================ int fred[0]; ================================================ FILE: 64_6809_Target/tests/input098.c ================================================ int fred[3]= { 1, 2, 3, 4, 5 }; ================================================ FILE: 64_6809_Target/tests/input099.c ================================================ #include // List of token strings, for debugging purposes. // As yet, we can't store a NULL into the list char *Tstring[] = { "EOF", "=", "||", "&&", "|", "^", "&", "==", "!=", ",", ">", "<=", ">=", "<<", ">>", "+", "-", "*", "/", "++", "--", "~", "!", "void", "char", "int", "long", "if", "else", "while", "for", "return", "struct", "union", "enum", "typedef", "extern", "break", "continue", "switch", "case", "default", "intlit", "strlit", ";", "identifier", "{", "}", "(", ")", "[", "]", ",", ".", "->", ":", "" }; int main() { int i; char *str; i=0; while (1) { str= Tstring[i]; if (*str == 0) break; printf("%s\n", str); i++; } return(0); } ================================================ FILE: 64_6809_Target/tests/input100.c ================================================ #include int main() { int x= 3, y=14; int z= 2 * x + y; char *str= "Hello world"; printf("%s %d %d\n", str, x+y, z); return(0); } ================================================ FILE: 64_6809_Target/tests/input101.c ================================================ #include int main() { int x= 65535; char y; char *str; y= (char )x; printf("0x%x\n", y); str= (char *)0; printf("0x%lx\n", (long)str); return(0); } ================================================ FILE: 64_6809_Target/tests/input102.c ================================================ int main() { struct foo { int p; }; int y= (struct foo) x; } ================================================ FILE: 64_6809_Target/tests/input103.c ================================================ int main() { union foo { int p; }; int y= (union foo) x; } ================================================ FILE: 64_6809_Target/tests/input104.c ================================================ int main() { int y= (void) x; } ================================================ FILE: 64_6809_Target/tests/input105.c ================================================ int main() { int x; char *y; y= (char) x; } ================================================ FILE: 64_6809_Target/tests/input106.c ================================================ #include char *y= (char *)0; int main() { printf("0x%lx\n", (long)y); return(0); } ================================================ FILE: 64_6809_Target/tests/input107.c ================================================ #include char *y[] = { "fish", "cow", NULL }; char *z= NULL; int main() { int i; char *ptr; for (i=0; i < 3; i++) { ptr= y[i]; if (ptr != (char *)0) printf("%s\n", y[i]); else printf("NULL\n"); } return(0); } ================================================ FILE: 64_6809_Target/tests/input108.c ================================================ int main() { char *str= (void *)0; return(0); } ================================================ FILE: 64_6809_Target/tests/input109.c ================================================ #include int main() { int x= 17 - 1; printf("%d\n", x); return(0); } ================================================ FILE: 64_6809_Target/tests/input110.c ================================================ #include int x; int y; int main() { x= 3; y= 15; y += x; printf("%d\n", y); x= 3; y= 15; y -= x; printf("%d\n", y); x= 3; y= 15; y *= x; printf("%d\n", y); x= 3; y= 15; y /= x; printf("%d\n", y); return(0); } ================================================ FILE: 64_6809_Target/tests/input111.c ================================================ #include int main() { int x= 2000 + 3 + 4 * 5 + 6; printf("%d\n", x); return(0); } ================================================ FILE: 64_6809_Target/tests/input112.c ================================================ #include char* y = NULL; int x= 10 + 6; int fred [ 2 + 3 ]; int main() { fred[2]= x; printf("%d\n", fred[2]); return(0); } ================================================ FILE: 64_6809_Target/tests/input113.c ================================================ #include void fred(void); void fred(void) { printf("fred says hello\n"); } int main(void) { fred(); return(0); } ================================================ FILE: 64_6809_Target/tests/input114.c ================================================ #include int main() { int x= 0x4a; printf("%c\n", x); return(0); } ================================================ FILE: 64_6809_Target/tests/input116.c ================================================ #include static int counter=0; static int fred(void) { return(counter++); } int main(void) { int i; for (i=0; i < 5; i++) printf("%d\n", fred()); return(0); } ================================================ FILE: 64_6809_Target/tests/input117.c ================================================ #include static char *fred(void) { return("Hello"); } int main(void) { printf("%s\n", fred()); return(0); } ================================================ FILE: 64_6809_Target/tests/input118.c ================================================ int main(void) { static int x; } ================================================ FILE: 64_6809_Target/tests/input119.c ================================================ #include int x; int y= 3; int main() { x= y != 3 ? 6 : 8; printf("%d\n", x); x= (y == 3) ? 6 : 8; printf("%d\n", x); return(0); } ================================================ FILE: 64_6809_Target/tests/input120.c ================================================ #include int x; int y= 3; int main() { for (y= 0; y < 10; y++) { x= y > 4 ? y + 2 : y + 9; printf("%d\n", x); } return(0); } ================================================ FILE: 64_6809_Target/tests/input121.c ================================================ #include int x; int y= 3; int main() { for (y= 0; y < 10; y++) { x= (y < 4) ? y + 2 : (y > 7) ? 1000 : y + 9; printf("%d\n", x); } return(0); } ================================================ FILE: 64_6809_Target/tests/input122.c ================================================ #include int x, y, z1, z2; int main() { for (x= 0; x <= 1; x++) { for (y= 0; y <= 1; y++) { z1= x || y; z2= x && y; printf("x %d, y %d, x || y %d, x && y %d\n", x, y, z1, z2); } } //z= x || y; return(0); } ================================================ FILE: 64_6809_Target/tests/input123.c ================================================ #include int main() { int x; for (x=0; x < 20; x++) switch(x) { case 2: case 3: case 5: case 7: case 11: printf("%2d infant prime\n", x); break; case 13: case 17: case 19: printf("%2d teen prime\n", x); break; case 0: case 1: case 4: case 6: case 8: case 9: case 10: case 12: printf("%2d infant composite\n", x); break; default: printf("%2d teen composite\n", x); break; } return(0); } ================================================ FILE: 64_6809_Target/tests/input124.c ================================================ #include int ary[5]; int main() { ary++; return(0); } ================================================ FILE: 64_6809_Target/tests/input125.c ================================================ #include int ary[5]; int *ptr; int x; int main() { ary[3]= 2008; ptr= ary; // Load ary's address into ptr x= ary[3]; printf("%d\n", x); x= ptr[3]; printf("%d\n", x); // Treat ptr as an array return(0); } ================================================ FILE: 64_6809_Target/tests/input126.c ================================================ #include int ary[5]; int main() { ary[3]= 2008; ptr= &ary; return(0); } ================================================ FILE: 64_6809_Target/tests/input127.c ================================================ #include int ary[5]; void fred(int *ptr) { // Receive a pointer printf("%d\n", ptr[3]); } int main() { ary[3]= 2008; printf("%d\n", ary[3]); fred(ary); // Pass ary as a pointer return(0); } ================================================ FILE: 64_6809_Target/tests/input128.c ================================================ #include struct foo { int val; struct foo *next; }; struct foo head, mid, tail; int main() { struct foo *ptr; tail.val= 20; tail.next= NULL; mid.val= 15; mid.next= &tail; head.val= 10; head.next= ∣ ptr= &head; printf("%d %d\n", head.val, ptr->val); printf("%d %d\n", mid.val, ptr->next->val); printf("%d %d\n", tail.val, ptr->next->next->val); return(0); } ================================================ FILE: 64_6809_Target/tests/input129.c ================================================ #include int x= 6; int main() { printf("%d\n", x++ ++); return(0); } ================================================ FILE: 64_6809_Target/tests/input130.c ================================================ #include char *x= "foo"; int main() { printf("Hello " "world" "\n"); return(0); } ================================================ FILE: 64_6809_Target/tests/input131.c ================================================ #include void donothing() { } int main() { int x=0; printf("Doing nothing... "); donothing(); printf("nothing done\n"); while (++x < 100) ; printf("x is now %d\n", x); return(0); } ================================================ FILE: 64_6809_Target/tests/input132.c ================================================ extern int fred; int fred; int mary; extern int mary; int main() { return(0); } ================================================ FILE: 64_6809_Target/tests/input133.c ================================================ #include extern int fred[]; int fred[23]; char mary[100]; extern char mary[]; void main() { printf("OK\n"); } ================================================ FILE: 64_6809_Target/tests/input134.c ================================================ #include char y = 'a'; char *x; int main() { x= &y; if (x && y == 'a') printf("1st match\n"); x= NULL; if (x && y == 'a') printf("2nd match\n"); x= &y; y='b'; if (x && y == 'a') printf("3rd match\n"); return(0); } ================================================ FILE: 64_6809_Target/tests/input135.c ================================================ #include void fred() { int x= 5; printf("testing x\n"); if (x > 4) return; printf("x below 5\n"); } int main() { fred(); return(0); } ================================================ FILE: 64_6809_Target/tests/input136.c ================================================ #include int add(int x, int y) { return(x+y); } int main() { int result; result= 3 * add(2,3) - 5 * add(4,6); printf("%d\n", result); return(0); } ================================================ FILE: 64_6809_Target/tests/input137.c ================================================ #include int a=1, b=2, c=3, d=4, e=5, f=6, g=7, h=8; int main() { int x; x= ((((((a + b) + c) + d) + e) + f) + g) + h; x= a + (b + (c + (d + (e + (f + (g + h)))))); printf("x is %d\n", x); return(0); } ================================================ FILE: 64_6809_Target/tests/input138.c ================================================ #include int x, y, z; int a=1; int *aptr; int main() { // See if generic AND works for (x=0; x <= 1; x++) for (y=0; y <= 1; y++) { z= x && y; printf("%d %d | %d\n", x, y, z); } // See if generic AND works for (x=0; x <= 1; x++) for (y=0; y <= 1; y++) { z= x || y; printf("%d %d | %d\n", x, y, z); } // Now some lazy evaluation aptr= NULL; if (aptr && *aptr == 1) printf("aptr points at 1\n"); else printf("aptr is NULL or doesn't point at 1\n"); aptr= &a; if (aptr && *aptr == 1) printf("aptr points at 1\n"); else printf("aptr is NULL or doesn't point at 1\n"); return(0); } ================================================ FILE: 64_6809_Target/tests/input139.c ================================================ #include int same(int x) { return(x); } int main() { int a= 3; if (same(a) && same(a) >= same(a)) printf("same apparently\n"); return(0); } ================================================ FILE: 64_6809_Target/tests/input140.c ================================================ #include int main() { int i; int ary[5]; char z; // Write below the array z= 'H'; // Fill the array for (i=0; i < 5; i++) ary[i]= i * i; // Write above the array i=14; // Print out the array for (i=0; i < 5; i++) printf("%d\n", ary[i]); // See if either side is OK printf("%d %c\n", i, z); return(0); } ================================================ FILE: 64_6809_Target/tests/input141.c ================================================ static int fred[5]; int jim; int foo(int mary[6]) { return(5); } ================================================ FILE: 64_6809_Target/tests/input142.c ================================================ static int fred[]; int jim; ================================================ FILE: 64_6809_Target/tests/input143.c ================================================ #include char foo; char *a, *b, *c; int main() { a= b= c= NULL; if (a==NULL || b==NULL || c==NULL) printf("One of the three is NULL\n"); a= &foo; if (a==NULL || b==NULL || c==NULL) printf("One of the three is NULL\n"); b= &foo; if (a==NULL || b==NULL || c==NULL) printf("One of the three is NULL\n"); c= &foo; if (a==NULL || b==NULL || c==NULL) printf("One of the three is NULL\n"); else printf("All three are non-NULL\n"); return(0); } ================================================ FILE: 64_6809_Target/tests/input145.c ================================================ #include char *str= "qwertyuiop"; int list[]= {3, 5, 7, 9, 11, 13, 15}; int *lptr; int main() { printf("%c\n", *str); str= str + 1; printf("%c\n", *str); str += 1; printf("%c\n", *str); str += 1; printf("%c\n", *str); str -= 1; printf("%c\n", *str); lptr= list; printf("%d\n", *lptr); lptr= lptr + 1; printf("%d\n", *lptr); lptr += 1; printf("%d\n", *lptr); lptr += 1; printf("%d\n", *lptr); lptr -= 1; printf("%d\n", *lptr); return(0); } ================================================ FILE: 64_6809_Target/tests/input146.c ================================================ #include char *str= "qwertyuiop"; int list[]= {3, 5, 7, 9, 11, 13, 15}; int *lptr; int main() { printf("%c\n", *str); str= str + 1; printf("%c\n", *str); str += 1; printf("%c\n", *str); str += 1; printf("%c\n", *str); str -= 1; printf("%c\n", *str); str++; printf("%c\n", *str); str--; printf("%c\n", *str); ++str; printf("%c\n", *str); --str; printf("%c\n\n", *str); lptr= list; printf("%d\n", *lptr); lptr= lptr + 1; printf("%d\n", *lptr); lptr += 1; printf("%d\n", *lptr); lptr += 1; printf("%d\n", *lptr); lptr -= 1; printf("%d\n", *lptr); lptr++ ; printf("%d\n", *lptr); lptr-- ; printf("%d\n", *lptr); ++lptr ; printf("%d\n", *lptr); --lptr ; printf("%d\n", *lptr); return(0); } ================================================ FILE: 64_6809_Target/tests/input147.c ================================================ #include int a; int main() { printf("%d\n", 24 % 9); printf("%d\n", 31 % 11); a= 24; a %= 9; printf("%d\n",a); a= 31; a %= 11; printf("%d\n",a); return(0); } ================================================ FILE: 64_6809_Target/tests/input148.c ================================================ #include char *argv[]= { "unused", "-fish", "-cat", "owl" }; int argc= 4; int main() { int i; for (i = 1; i < argc; i++) { printf("i is %d\n", i); if (*argv[i] != '-') break; } while (i < argc) { printf("leftover %s\n", argv[i]); i++; } return (0); } ================================================ FILE: 64_6809_Target/tests/input149.c ================================================ #include static int localOffset=0; static int newlocaloffset(int size) { localOffset += (size > 4) ? size : 4; return (-localOffset); } int main() { int i, r; for (i=1; i <= 12; i++) { r= newlocaloffset(i); printf("%d %d\n", i, r); } return(0); } ================================================ FILE: 64_6809_Target/tests/input150.c ================================================ #include #include struct Svalue { char *thing; int vreg; int intval; }; struct IR { int label; int op; struct Svalue dst; struct Svalue src1; struct Svalue src2; int jmplabel; }; struct foo { int a; int b; struct Svalue *c; int d; }; struct IR *fred; struct foo jane; int main() { fred= (struct IR *)malloc(sizeof(struct IR)); fred->label= 1; fred->op= 2; fred->dst.thing= NULL; fred->dst.vreg=3; fred->dst.intval=4; fred->src1.thing= NULL; fred->src1.vreg=5; fred->src1.intval=6; fred->src2.thing= NULL; fred->src2.vreg=7; fred->src2.intval=8; fred->jmplabel= 9; printf("%d %d %d\n", fred->label, fred->op, fred->dst.vreg); printf("%d %d %d\n", fred->dst.intval, fred->src1.vreg, fred->src1.intval); printf("%d %d %d\n\n", fred->src2.vreg, fred->src2.intval, fred->jmplabel); jane.c= (struct Svalue *)malloc(sizeof(struct Svalue)); jane.a= 1; jane.b= 2; jane.d= 4; jane.c->thing= "fish"; jane.c->vreg= 3; jane.c->intval= 5; printf("%d %d %d\n", jane.a, jane.b, jane.c->vreg); printf("%d %d %s\n", jane.d, jane.c->intval, jane.c->thing); return(0); } ================================================ FILE: 64_6809_Target/tests/input151.c ================================================ #include struct Location { int type; // One of the L_ values char *name; // A symbol's name long intval; // Offset, const value, label-id etc. int primtype; // 6809 primiive type, see P_POINTER below }; #define NUMFREEREGS 16 #define L_FREE 1 struct Location Locn[NUMFREEREGS]; int main() { int l=5; Locn[l].type = 23; printf("%d\n", Locn[l].type); return(0); } ================================================ FILE: 64_6809_Target/tests/input152.c ================================================ #include int main() { int x=0; while (x<10) { switch(x) { case 2: x++; break; case 4: x=7; continue; } printf("%d\n", x); x++; } return(0); } ================================================ FILE: 64_6809_Target/tests/input153.c ================================================ #include enum { C_GLOBAL = 1, // Globally visible symbol C_LOCAL, // Locally visible symbol C_PARAM, // Locally visible function parameter C_EXTERN, // External globally visible symbol C_STATIC, // Static symbol, visible in one file C_STRUCT, // A struct C_UNION, // A union C_MEMBER, // Member of a struct or union C_ENUMTYPE, // A named enumeration type C_ENUMVAL, // A named enumeration value C_TYPEDEF, // A named typedef C_STRLIT // Not a class: used to denote string literals }; void fred(int class) { char qbeprefix; int *silly; silly = &class; // Get the relevant QBE prefix for the symbol qbeprefix = ((class == C_GLOBAL) || (class == C_STATIC) || (class == C_EXTERN)) ? (char)'$' : (char)'%'; printf("class %d prefix %c\n", class, qbeprefix); } int main() { int i; for (i= C_GLOBAL; i<= C_STRLIT; i++) fred(i); return(0); } ================================================ FILE: 64_6809_Target/tests/input154.c ================================================ #include int i; int j; int main() { for (i=1; i<=10; i++) { j= ((i==4) || (i==5)) ? 17 : 23; printf("%d\n", j); } return(0); } ================================================ FILE: 64_6809_Target/tests/input155.c ================================================ #include int i; int j; int main() { for (i=1; i<=10; i++) { j= ((i==4)) ? 17 : 23; printf("%d\n", j); } return(0); } ================================================ FILE: 64_6809_Target/tests/input156.c ================================================ #include int i; int j; int main() { j= (5) ? 7 : 8; printf("%d\n", j); j= (0) ? 7 : 8; printf("%d\n", j); return(0); } ================================================ FILE: 64_6809_Target/tests/input157.c ================================================ #include int main() { if (5) printf("true\n"); if (0) printf("true\n"); return(0); } ================================================ FILE: 64_6809_Target/tests/input159.c ================================================ #include enum { P_NONE, P_VOID = 16, P_CHAR = 32, P_INT = 48, P_LONG = 64, P_STRUCT=80, P_UNION=96 }; int inttype(int type) { return (((type & 0xf) == 0) && (type >= P_CHAR && type <= P_LONG)); } int main() { printf("%d\n", inttype(P_NONE)); printf("%d\n", inttype(P_VOID)); printf("%d\n", inttype(P_CHAR)); printf("%d\n", inttype(P_INT)); printf("%d\n", inttype(P_LONG)); printf("%d\n", inttype(P_STRUCT)); printf("%d\n", inttype(P_UNION)); printf("%d\n", inttype(P_CHAR+1)); printf("%d\n", inttype(P_INT+1)); return(0); } ================================================ FILE: 64_6809_Target/tests/input160.c ================================================ #include int x= '#'; int main() { switch(x) { case 'x': printf("An x\n"); break; case '#': printf("An #\n"); break; default: printf("No idea\n"); } return(0); } ================================================ FILE: 64_6809_Target/tests/input161.c ================================================ #include char fred[5]; char *s; int main() { s= fred; *s = 'F'; s++; *s++ = 'r'; *s++ = 'e'; *s++ = 'd'; *s++ = 0; printf("%s\n", fred); return(0); } ================================================ FILE: 64_6809_Target/tests/input162.c ================================================ #include #include void keyword(char *s) { printf("search on %s: ", s); switch (*s) { case 'b': printf("Starts with b\n"); break; case 'c': if (!strcmp(s, "case")) { printf("It's case\n"); return; } printf("Starts with c\n"); break; case 'v': if (!strcmp(s, "void")) { printf("It's void\n"); return; } printf("Starts with v\n"); break; default: printf("Not found\n"); } } int main() { keyword("char"); keyword("case"); keyword("break"); keyword("horse"); keyword("void"); keyword("piano"); return(0); } ================================================ FILE: 64_6809_Target/tests/input163.c ================================================ #include #include int keyword(char *s) { switch (*s) { case 'b': return(1); case 'c': if (!strcmp(s, "case")) return(2); return(3); case 'v': if (!strcmp(s, "void")) return(4); return(5); default: return(6); } return(0); } int main() { int i; i= keyword("break"); printf("break %d\n", i); i= keyword("case"); printf("case %d\n", i); i= keyword("char"); printf("char %d\n", i); i= keyword("void"); printf("void %d\n", i); i= keyword("volatile"); printf("volatile %d\n", i); i= keyword("horse"); printf("horse %d\n", i); i= keyword("piano"); printf("piano %d\n", i); return(0); } ================================================ FILE: 64_6809_Target/tests/input164.c ================================================ #include #include char *str= "Hello there, this is a sentence. How about that?"; int main() { char *comma, *dot; int diff; comma= strchr(str, ','); dot= strchr(str, '.'); diff= dot - comma; printf("dot comma difference is %d\n", diff); return(0); } ================================================ FILE: 64_6809_Target/tests/input165.c ================================================ #include #include char *a= "Hello"; char *b= "Goodbye"; char *c= "Fisherman"; int main() { if (strcmp(a, b)) { printf("%s and %s are different\n", a, b); } if (strcmp(b, c)) { printf("%s and %s are different\n", b, c); } if (strcmp(a, c)) { printf("%s and %s are different\n", a, c); } if (strcmp(a, a)) { printf("%s and %s are different\n", a, a); } if (strcmp(b, b)) { printf("%s and %s are different\n", b, b); } if (strcmp(c, c)) { printf("%s and %s are different\n", c, c); } if (!strcmp(c, c)) { printf("%s and %s are the same\n", c, c); } return(0); } ================================================ FILE: 64_6809_Target/tests/input166.c ================================================ #include char str[5]; char *ptr; int scanch(int *slash) { int c; *slash=0; c= *ptr; ptr++; if (c == '\\') { *slash=1; c= *ptr; ptr++; switch (c) { case 'n': return ('\n'); } } return(c); } int main() { int c; int slash; ptr= str; str[0]= 'H'; str[1]= 'i'; str[2]= '\\'; str[3]= 'n'; str[4]= 0; while (1) { c= scanch(&slash); if (c==0) break; printf("c %d slash %d\n", c, slash); } return(0); } ================================================ FILE: 64_6809_Target/tests/mktests ================================================ #!/bin/sh # Make the output files for each test # Build our compiler if needed if [ ! -f ../cwj ] then (cd ..; make install) fi for i in input*c do if [ ! -f "out.$i" -a ! -f "err.$i" ] then ../cwj -o out $i 2> "err.$i" # If the err file is empty if [ ! -s "err.$i" ] then rm -f "err.$i" cc -o out $i ./out > "out.$i" fi fi rm -f out out.s done ================================================ FILE: 64_6809_Target/tests/onetest ================================================ #!/bin/sh # Run one test and show the output # Build our compiler if needed if [ ! -f ../cparse6809 ] then (cd ..; make install) fi if [ "$#" -ne 2 ] then echo Usage: $0 cpu sourcefile; exit 1 fi rm -f *_* bin=`echo $2 | sed 's/\.c$//'` ../wcc -v -X -o $bin -m $1 $2 if [ "$1" = "6809" ] then emu6809 -d debug $bin fi if [ "$1" = "qbe" ] then ./$bin fi exit 0 ================================================ FILE: 64_6809_Target/tests/out.input001.c ================================================ 36 10 25 ================================================ FILE: 64_6809_Target/tests/out.input002.c ================================================ 17 ================================================ FILE: 64_6809_Target/tests/out.input003.c ================================================ 1 2 3 4 5 ================================================ FILE: 64_6809_Target/tests/out.input004.c ================================================ 1 1 1 1 1 1 1 1 1 ================================================ FILE: 64_6809_Target/tests/out.input005.c ================================================ 6 ================================================ FILE: 64_6809_Target/tests/out.input006.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 64_6809_Target/tests/out.input007.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 64_6809_Target/tests/out.input008.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 64_6809_Target/tests/out.input009.c ================================================ 1 2 3 4 5 6 7 8 9 10 ================================================ FILE: 64_6809_Target/tests/out.input010.c ================================================ 20 10 1 2 3 4 5 253 254 255 0 1 ================================================ FILE: 64_6809_Target/tests/out.input011.c ================================================ 10 20 30 1 2 3 4 5 253 254 255 0 1 2 3 1 2 3 4 5 ================================================ FILE: 64_6809_Target/tests/out.input012.c ================================================ 5 ================================================ FILE: 64_6809_Target/tests/out.input013.c ================================================ 23 56 ================================================ FILE: 64_6809_Target/tests/out.input014.c ================================================ 10 20 30 ================================================ FILE: 64_6809_Target/tests/out.input015.c ================================================ 18 18 12 12 ================================================ FILE: 64_6809_Target/tests/out.input016.c ================================================ 12 18 ================================================ FILE: 64_6809_Target/tests/out.input017.c ================================================ 19 12 ================================================ FILE: 64_6809_Target/tests/out.input018.c ================================================ 34 34 ================================================ FILE: 64_6809_Target/tests/out.input018a.c ================================================ 15 16 ================================================ FILE: 64_6809_Target/tests/out.input019.c ================================================ 30 ================================================ FILE: 64_6809_Target/tests/out.input020.c ================================================ 12 ================================================ FILE: 64_6809_Target/tests/out.input021.c ================================================ 10 Hello world ================================================ FILE: 64_6809_Target/tests/out.input022.c ================================================ 12 12 12 13 13 13 13 13 13 35 35 35 ================================================ FILE: 64_6809_Target/tests/out.input023.c ================================================ -23 100 -2 0 1 0 13 14 Hello world ================================================ FILE: 64_6809_Target/tests/out.input024.c ================================================ 2 59 57 8 7 ================================================ FILE: 64_6809_Target/tests/out.input025.c ================================================ 10 20 30 5 15 25 ================================================ FILE: 64_6809_Target/tests/out.input026.c ================================================ 13 23 34 44 54 64 74 84 94 95 96 ================================================ FILE: 64_6809_Target/tests/out.input027.c ================================================ 1 2 3 4 5 6 7 8 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 ================================================ FILE: 64_6809_Target/tests/out.input028.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 64_6809_Target/tests/out.input029.c ================================================ 1 2 3 5 8 13 21 34 9 ================================================ FILE: 64_6809_Target/tests/out.input030.c ================================================ int printf(char *fmt, ...); int open(char *pathname, int flags); int read(int fd, char *buf, int count); int write(int fd, void *buf, int count); int close(int fd); char *buf; int main() { int zin; int cnt; buf= " "; zin = open("input030.c", 0); if (zin == -1) { return (1); } while ((cnt = read(zin, buf, 60)) > 0) { write(1, buf, cnt); } close(zin); return (0); } ================================================ FILE: 64_6809_Target/tests/out.input053.c ================================================ Hello world, 23 ================================================ FILE: 64_6809_Target/tests/out.input054.c ================================================ Hello world, 0 Hello world, 1 Hello world, 2 Hello world, 3 Hello world, 4 Hello world, 5 Hello world, 6 Hello world, 7 Hello world, 8 Hello world, 9 Hello world, 10 Hello world, 11 Hello world, 12 Hello world, 13 Hello world, 14 Hello world, 15 Hello world, 16 Hello world, 17 Hello world, 18 Hello world, 19 ================================================ FILE: 64_6809_Target/tests/out.input055.c ================================================ Hello world Argument 0 is ./out ================================================ FILE: 64_6809_Target/tests/out.input058.c ================================================ 12 99 4005 4116 4116 ================================================ FILE: 64_6809_Target/tests/out.input063.c ================================================ 25 ================================================ FILE: 64_6809_Target/tests/out.input067.c ================================================ 5 17 ================================================ FILE: 64_6809_Target/tests/out.input070.c ================================================ 56 ================================================ FILE: 64_6809_Target/tests/out.input071.c ================================================ 0 1 2 3 4 7 8 9 10 11 12 13 14 Done ================================================ FILE: 64_6809_Target/tests/out.input074.c ================================================ 100 5 7 100 100 ================================================ FILE: 64_6809_Target/tests/out.input080.c ================================================ 0 1 1 3 2 5 3 7 4 9 5 11 ================================================ FILE: 64_6809_Target/tests/out.input081.c ================================================ 0 1 1 3 2 5 3 7 4 9 ================================================ FILE: 64_6809_Target/tests/out.input082.c ================================================ 15 >= x > 5 ================================================ FILE: 64_6809_Target/tests/out.input083.c ================================================ 5 < 6 <= 10 5 < 7 <= 10 5 < 8 <= 10 5 < 9 <= 10 5 < 10 <= 10 10 < 11 ================================================ FILE: 64_6809_Target/tests/out.input084.c ================================================ 2 3 f f ================================================ FILE: 64_6809_Target/tests/out.input088.c ================================================ 5 6 ================================================ FILE: 64_6809_Target/tests/out.input089.c ================================================ 23 H Hello world ================================================ FILE: 64_6809_Target/tests/out.input090.c ================================================ 23 100 H Hello world ================================================ FILE: 64_6809_Target/tests/out.input091.c ================================================ 1 2 3 4 5 1 2 3 4 5 0 0 0 0 0 ================================================ FILE: 64_6809_Target/tests/out.input099.c ================================================ EOF = || && | ^ & == != , > <= >= << >> + - * / ++ -- ~ ! void char int long if else while for return struct union enum typedef extern break continue switch case default intlit strlit ; identifier { } ( ) [ ] , . -> : ================================================ FILE: 64_6809_Target/tests/out.input100.c ================================================ Hello world 17 20 ================================================ FILE: 64_6809_Target/tests/out.input101.c ================================================ 0xff 0x0 ================================================ FILE: 64_6809_Target/tests/out.input106.c ================================================ 0x0 ================================================ FILE: 64_6809_Target/tests/out.input107.c ================================================ fish cow NULL ================================================ FILE: 64_6809_Target/tests/out.input108.c ================================================ ================================================ FILE: 64_6809_Target/tests/out.input109.c ================================================ 16 ================================================ FILE: 64_6809_Target/tests/out.input110.c ================================================ 18 12 45 5 ================================================ FILE: 64_6809_Target/tests/out.input111.c ================================================ 2029 ================================================ FILE: 64_6809_Target/tests/out.input112.c ================================================ 16 ================================================ FILE: 64_6809_Target/tests/out.input113.c ================================================ fred says hello ================================================ FILE: 64_6809_Target/tests/out.input114.c ================================================ J ================================================ FILE: 64_6809_Target/tests/out.input116.c ================================================ 0 1 2 3 4 ================================================ FILE: 64_6809_Target/tests/out.input117.c ================================================ Hello ================================================ FILE: 64_6809_Target/tests/out.input119.c ================================================ 8 6 ================================================ FILE: 64_6809_Target/tests/out.input120.c ================================================ 9 10 11 12 13 7 8 9 10 11 ================================================ FILE: 64_6809_Target/tests/out.input121.c ================================================ 2 3 4 5 13 14 15 16 1000 1000 ================================================ FILE: 64_6809_Target/tests/out.input122.c ================================================ x 0, y 0, x || y 0, x && y 0 x 0, y 1, x || y 1, x && y 0 x 1, y 0, x || y 1, x && y 0 x 1, y 1, x || y 1, x && y 1 ================================================ FILE: 64_6809_Target/tests/out.input123.c ================================================ 0 infant composite 1 infant composite 2 infant prime 3 infant prime 4 infant composite 5 infant prime 6 infant composite 7 infant prime 8 infant composite 9 infant composite 10 infant composite 11 infant prime 12 infant composite 13 teen prime 14 teen composite 15 teen composite 16 teen composite 17 teen prime 18 teen composite 19 teen prime ================================================ FILE: 64_6809_Target/tests/out.input125.c ================================================ 2008 2008 ================================================ FILE: 64_6809_Target/tests/out.input127.c ================================================ 2008 2008 ================================================ FILE: 64_6809_Target/tests/out.input128.c ================================================ 10 10 15 15 20 20 ================================================ FILE: 64_6809_Target/tests/out.input130.c ================================================ Hello world ================================================ FILE: 64_6809_Target/tests/out.input131.c ================================================ Doing nothing... nothing done x is now 100 ================================================ FILE: 64_6809_Target/tests/out.input132.c ================================================ ================================================ FILE: 64_6809_Target/tests/out.input133.c ================================================ OK ================================================ FILE: 64_6809_Target/tests/out.input134.c ================================================ 1st match ================================================ FILE: 64_6809_Target/tests/out.input135.c ================================================ testing x ================================================ FILE: 64_6809_Target/tests/out.input136.c ================================================ -35 ================================================ FILE: 64_6809_Target/tests/out.input137.c ================================================ x is 36 ================================================ FILE: 64_6809_Target/tests/out.input138.c ================================================ 0 0 | 0 0 1 | 0 1 0 | 0 1 1 | 1 0 0 | 0 0 1 | 1 1 0 | 1 1 1 | 1 aptr is NULL or doesn't point at 1 aptr points at 1 ================================================ FILE: 64_6809_Target/tests/out.input139.c ================================================ same apparently ================================================ FILE: 64_6809_Target/tests/out.input140.c ================================================ 0 1 4 9 16 5 H ================================================ FILE: 64_6809_Target/tests/out.input143.c ================================================ One of the three is NULL One of the three is NULL One of the three is NULL All three are non-NULL ================================================ FILE: 64_6809_Target/tests/out.input145.c ================================================ q w e r e 3 5 7 9 7 ================================================ FILE: 64_6809_Target/tests/out.input146.c ================================================ q w e r e r e r e 3 5 7 9 7 9 7 9 7 ================================================ FILE: 64_6809_Target/tests/out.input147.c ================================================ 6 9 6 9 ================================================ FILE: 64_6809_Target/tests/out.input148.c ================================================ i is 1 i is 2 i is 3 leftover owl ================================================ FILE: 64_6809_Target/tests/out.input149.c ================================================ 1 -4 2 -8 3 -12 4 -16 5 -21 6 -27 7 -34 8 -42 9 -51 10 -61 11 -72 12 -84 ================================================ FILE: 64_6809_Target/tests/out.input150.c ================================================ 1 2 3 4 5 6 7 8 9 1 2 3 4 5 fish ================================================ FILE: 64_6809_Target/tests/out.input151.c ================================================ 23 ================================================ FILE: 64_6809_Target/tests/out.input152.c ================================================ 0 1 3 7 8 9 ================================================ FILE: 64_6809_Target/tests/out.input153.c ================================================ class 1 prefix $ class 2 prefix % class 3 prefix % class 4 prefix $ class 5 prefix $ class 6 prefix % class 7 prefix % class 8 prefix % class 9 prefix % class 10 prefix % class 11 prefix % class 12 prefix % ================================================ FILE: 64_6809_Target/tests/out.input154.c ================================================ 23 23 23 17 17 23 23 23 23 23 ================================================ FILE: 64_6809_Target/tests/out.input155.c ================================================ 23 23 23 17 23 23 23 23 23 23 ================================================ FILE: 64_6809_Target/tests/out.input156.c ================================================ 7 8 ================================================ FILE: 64_6809_Target/tests/out.input157.c ================================================ true ================================================ FILE: 64_6809_Target/tests/out.input159.c ================================================ 0 0 1 1 1 0 0 0 0 ================================================ FILE: 64_6809_Target/tests/out.input160.c ================================================ An # ================================================ FILE: 64_6809_Target/tests/out.input161.c ================================================ Fred ================================================ FILE: 64_6809_Target/tests/out.input162.c ================================================ search on char: Starts with c search on case: It's case search on break: Starts with b search on horse: Not found search on void: It's void search on piano: Not found ================================================ FILE: 64_6809_Target/tests/out.input163.c ================================================ break 1 case 2 char 3 void 4 volatile 5 horse 6 piano 6 ================================================ FILE: 64_6809_Target/tests/out.input164.c ================================================ dot comma difference is 20 ================================================ FILE: 64_6809_Target/tests/out.input165.c ================================================ Hello and Goodbye are different Goodbye and Fisherman are different Hello and Fisherman are different Fisherman and Fisherman are the same ================================================ FILE: 64_6809_Target/tests/out.input166.c ================================================ c 72 slash 0 c 105 slash 0 c 10 slash 1 ================================================ FILE: 64_6809_Target/tests/runtests ================================================ #!/bin/sh # Run each test and compare # against known good output # We need a CPU name if [ "$#" -ne 1 ] then echo "Usage: $0 cpuname, e.g $0 6809"; exit 1 fi # Build our compiler if needed if [ ! -f ../wcc ] then (cd ..; make install) fi # Try to use each input source file for i in input*c # We can't do anything if there's no file to test against do if [ ! -f "out.$i" -a ! -f "err.$i" ] then echo "Can't run test on $i, no output file!" # Output file: compile the source, run it and # capture the output, and compare it against # the known-good output else if [ -f "out.$i" ] then # Print the test name, compile it # with our compiler echo -n $i ../wcc -o out -m $1 $i emu6809 ./out > trial.$i rm -f out # Compare this agains the correct output cmp -s "out.$i" "trial.$i" # If different, announce failure # and print out the difference if [ "$?" -eq "1" ] then echo ": failed" diff -c "out.$i" "trial.$i" echo exit 0 # No failure, so announce success else echo ": OK" fi # Error file: compile the source and # capture the error messages. Compare # against the known-bad output. Same # mechanism as before else if [ -f "err.$i" ] then echo -n $i ../wcc -o out -m $1 $i 2> "trial.$i" cmp -s "err.$i" "trial.$i" if [ "$?" -eq "1" ] then echo ": failed" diff -c "err.$i" "trial.$i" echo else echo ": OK" fi fi fi fi rm -f out out.s "trial.$i" done ================================================ FILE: 64_6809_Target/tree.c ================================================ #include "defs.h" #include "data.h" #include "misc.h" #include "parse.h" #include "sym.h" #include "gen.h" // AST tree functions // Copyright (c) 2019,2024 Warren Toomey, GPL3 #undef DEBUG // Used to enumerate the AST nodes static int nodeid= 1; // Build and return a generic AST node struct ASTnode *mkastnode(int op, int type, struct symtable *ctype, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue) { struct ASTnode *n; // Malloc a new ASTnode n = (struct ASTnode *) malloc(sizeof(struct ASTnode)); if (n == NULL) fatal("Unable to malloc in mkastnode()"); // Copy in the field values and return it n->nodeid= nodeid++; n->op = op; n->type = type; n->ctype = ctype; n->left = left; n->mid = mid; n->right = right; n->leftid= 0; n->midid= 0; n->rightid= 0; #ifdef DEBUG fprintf(stderr, "mkastnodeA l %d m %d r %d\n", n->leftid, n->midid, n->rightid); #endif if (left!=NULL) n->leftid= left->nodeid; if (mid!=NULL) n->midid= mid->nodeid; if (right!=NULL) n->rightid= right->nodeid; #ifdef DEBUG fprintf(stderr, "mkastnodeB l %d m %d r %d\n", n->leftid, n->midid, n->rightid); #endif n->sym = sym; if (sym != NULL) { n->name= sym->name; n->symid= sym->id; } else { n->name= NULL; n->symid= 0; } n->a_intvalue = intvalue; n->linenum = 0; n->rvalue = 0; return (n); } // Make an AST leaf node struct ASTnode *mkastleaf(int op, int type, struct symtable *ctype, struct symtable *sym, int intvalue) { return (mkastnode(op, type, ctype, NULL, NULL, NULL, sym, intvalue)); } // Make a unary AST node: only one child struct ASTnode *mkastunary(int op, int type, struct symtable *ctype, struct ASTnode *left, struct symtable *sym, int intvalue) { return (mkastnode(op, type, ctype, left, NULL, NULL, sym, intvalue)); } // Free the given AST node void freeASTnode(struct ASTnode *tree) { if (tree==NULL) return; if (tree->name != NULL) free(tree->name); free(tree); } // Free the contents of a tree. Possibly // because of tree optimisation, sometimes // left and right are the same sub-nodes. // Free the names if asked to do so. void freetree(struct ASTnode *tree, int freenames) { if (tree==NULL) return; if (tree->left!=NULL) freetree(tree->left, freenames); if (tree->mid!=NULL) freetree(tree->mid, freenames); if (tree->right!=NULL && tree->right!=tree->left) freetree(tree->right, freenames); if (freenames && tree->name != NULL) free(tree->name); free(tree); } #ifndef WRITESYMS // We record the id of the last function that we loaded. // and the highest index in the array below static int lastFuncid= -1; static int hiFuncid; // We also keep an array of AST node offsets that // represent the functions in the AST file long *Funcoffset; // Given an AST node id, load that AST node from the AST file. // If nextfunc is set, find the next AST node which is a function. // Allocate and return the node or NULL if it can't be found. struct ASTnode *loadASTnode(int id, int nextfunc) { long offset, idxoff; struct ASTnode *node; // Do nothing if nothing to do if (id==0 && nextfunc==0) return(NULL); #ifdef DEBUG fprintf(stderr, "loadASTnode id %d nextfunc %d\n", id, nextfunc); if (id < 0) fatal("negative id in loadASTnode()"); #endif // Determine the offset of the node. // Use the function offset array, or // use the AST index file otherwise if (nextfunc==1) { lastFuncid++; if (lastFuncid > hiFuncid) return(NULL); offset= Funcoffset[lastFuncid]; } else { idxoff= id * sizeof(long); fseek(Idxfile, idxoff, SEEK_SET); fread(&offset, sizeof(long), 1, Idxfile); } // Allocate a node node= (struct ASTnode *)malloc(sizeof(struct ASTnode)); if (node==NULL) fatal("Cannot malloc an AST node in loadASTnode"); // Read the node in from the AST file. Give up if EOF fseek(Infile, offset, SEEK_SET); if (fread(node, sizeof(struct ASTnode), 1, Infile)!=1) { free(node); return(NULL); } #ifdef DEBUG // Check that the node we loaded was the one we wanted if (id!=0 && id!=node->nodeid) fprintf(stderr, "Wanted AST node id %d, got %d\n", id, node->nodeid); #endif // If there is a string/identifier literal, get it if (node->name!=NULL) { fgetstr(Text, TEXTLEN + 1, Infile); node->name= strdup(Text); if (node->name==NULL) fatal("Unable to malloc string literal in deserialiseAST()"); #ifndef DETREE // If this wasn't a string literal // search for the actual symbol and link it in if (node->op != A_STRLIT) { node->sym= findSymbol(NULL, 0, node->symid); if (node->sym==NULL) fatald("Can't find symbol with id", node->symid); } #endif } // Set the pointers to NULL to trip us up! node->left= node->mid= node->right= NULL; #ifndef DETREE // If this is a function, set the global // Functionid and create an endlabel for it. // Update the lastFuncnode too. if (node->op== A_FUNCTION) { Functionid= node->sym; Functionid->st_endlabel= genlabel(); } #endif // Return the node that we found #ifdef DEBUG fprintf(stderr, "Found AST node id %d\n", node->nodeid); #endif return(node); } // Using the open AST file and the newly-created // index file, build a list of AST file offsets // for each AST node in the AST file. void mkASTidxfile(void) { struct ASTnode *node; long offset, idxoff; // Allocate a node and at least some Funcoffset area node= (struct ASTnode *)malloc(sizeof(struct ASTnode)); Funcoffset= (long *)malloc(sizeof(long)); if (node==NULL || Funcoffset==NULL) fatal("Cannot malloc an AST node in loadASTnode"); while (1) { // Get the current offset offset = ftell(Infile); #ifdef DEBUG if (sizeof(long)==4) fprintf(stderr, "A offset %ld sizeof ASTnode %d\n", offset, sizeof(struct ASTnode)); else fprintf(stderr, "A offset %ld sizeof ASTnode %ld\n", offset, sizeof(struct ASTnode)); #endif // Read in the next node, stop if none if (fread(node, sizeof(struct ASTnode), 1, Infile)!=1) { break; } #ifdef DEBUG fprintf(stderr, "Node %d at offset %ld\n", node->nodeid, offset); fprintf(stderr, "Node %d left %d mid %d right %d\n", node->nodeid, node->leftid, node->midid, node->rightid); #endif // If there is a string/identifier literal, get it if (node->name!=NULL) { fgetstr(Text, TEXTLEN + 1, Infile); #ifdef DEBUG fprintf(stderr, " name %s\n", Text); #endif } // Save the node's offset at its index position in the file. idxoff= node->nodeid * sizeof(long); fseek(Idxfile, idxoff, SEEK_SET); fwrite(&offset, sizeof(long), 1, Idxfile); // If this node is a function, increase the size // of the function index array and save the offset if (node->op==A_FUNCTION) { lastFuncid++; Funcoffset= (long *)realloc(Funcoffset, sizeof(long)* (lastFuncid+1)); Funcoffset[lastFuncid]= offset; } } // Reset before we start using the array hiFuncid= lastFuncid; lastFuncid= -1; free(node); } #endif // WRITESYMS ================================================ FILE: 64_6809_Target/tree.h ================================================ /* tree.c */ struct ASTnode *mkastnode(int op, int type, struct symtable *ctype, struct ASTnode *left, struct ASTnode *mid, struct ASTnode *right, struct symtable *sym, int intvalue); struct ASTnode *mkastleaf(int op, int type, struct symtable *ctype, struct symtable *sym, int intvalue); struct ASTnode *mkastunary(int op, int type, struct symtable *ctype, struct ASTnode *left, struct symtable *sym, int intvalue); void freeASTnode(struct ASTnode *tree); void freetree(struct ASTnode *tree, int freenames); struct ASTnode *loadASTnode(int id, int nextfunc); void mkASTidxfile(void); ================================================ FILE: 64_6809_Target/tstring.c ================================================ // List of token strings, for debugging purposes char *Tstring[] = { "EOF", "=", "+=", "-=", "*=", "/=", "%=", "?", "||", "&&", "|", "^", "&", "==", "!=", "<", ">", "<=", ">=", "<<", ">>", "+", "-", "*", "/", "%", "++", "--", "~", "!", "void", "char", "int", "long", "if", "else", "while", "for", "return", "struct", "union", "enum", "typedef", "extern", "break", "continue", "switch", "case", "default", "sizeof", "static", "intlit", "strlit", ";", "identifier", "{", "}", "(", ")", "[", "]", ",", ".", "->", ":", "...", "charlit", "filename", "linenum" }; ================================================ FILE: 64_6809_Target/types.c ================================================ #include "defs.h" #include "data.h" #include "gen.h" #include "misc.h" #include "target.h" #include "tree.h" // Types and type handling // Copyright (c) 2019 Warren Toomey, GPL3 // Return true if a type is an int type // of any size, false otherwise int inttype(int type) { return (((type & 0xf) == 0) && (type >= P_CHAR && type <= P_LONG)); } // Return true if a type is of pointer type int ptrtype(int type) { return ((type & 0xf) != 0); } // Given a primitive type, return // the type which is a pointer to it int pointer_to(int type) { if ((type & 0xf) == 0xf) fatald("Unrecognised in pointer_to: type", type); return (type + 1); } // Given a pointer type, return // the type which it points to int value_at(int type) { if ((type & 0xf) == 0x0) fatald("Unrecognised in value_at: type", type); return (type - 1); } // Given a type and a composite type pointer, return // the size of this type in bytes int typesize(int type, struct symtable *ctype) { if (type == P_STRUCT || type == P_UNION) return (ctype->size); return (genprimsize(type)); } // Given an AST tree and a type which we want it to become, // possibly modify the tree by widening or scaling so that // it is compatible with this type. Return the original tree // if no changes occurred, a modified tree, or NULL if the // tree is not compatible with the given type. // If this will be part of a binary operation, the AST op is not zero. struct ASTnode *modify_type(struct ASTnode *tree, int rtype, struct symtable *rctype, int op) { int ltype; int lsize, rsize; ltype = tree->type; // For A_LOGOR and A_LOGAND, both types have to be int or pointer types if (op==A_LOGOR || op==A_LOGAND) { if (!inttype(ltype) && !ptrtype(ltype)) return(NULL); if (!inttype(ltype) && !ptrtype(rtype)) return(NULL); return (tree); } // No idea on these yet if (ltype == P_STRUCT || ltype == P_UNION) fatal("Don't know how to do this yet"); if (rtype == P_STRUCT || rtype == P_UNION) fatal("Don't know how to do this yet"); // Compare scalar int types if (inttype(ltype) && inttype(rtype)) { // Both types same, nothing to do if (ltype == rtype) return (tree); // Get the sizes for each type lsize = typesize(ltype, NULL); rsize = typesize(rtype, NULL); // The tree's type size is too big and we can't narrow if (lsize > rsize) return (NULL); // Widen to the right if (rsize > lsize) return (mkastunary(A_WIDEN, rtype, NULL, tree, NULL, 0)); } // For pointers if (ptrtype(ltype) && ptrtype(rtype)) { // We can compare them if (op >= A_EQ && op <= A_GE) return (tree); // NOTE We can do subtraction, but we should unscale // by the size of the things that the pointers point at. // For now, we only do char pointers. if (op== A_SUBTRACT && ltype== pointer_to(P_CHAR) && ltype==rtype) { tree->type= P_INT; return (tree); } // A comparison of the same type for a non-binary operation is OK, // or when either tree is of `void *` type. if (op == 0 && (ltype == rtype || ltype == pointer_to(P_VOID) || rtype == pointer_to(P_VOID)) ) return (tree); } // We can scale only on add and subtract operations if (op == A_ADD || op == A_SUBTRACT || op == A_ASPLUS || op == A_ASMINUS) { // Left is int type, right is pointer type and the size // of the original type is >1: scale the left if (inttype(ltype) && ptrtype(rtype)) { rsize = typesize(value_at(rtype), rctype); if (rsize > 1) return (mkastunary(A_SCALE, rtype, rctype, tree, NULL, rsize)); else // No need to scale, but we need to widen to pointer size return (mkastunary(A_WIDEN, rtype, NULL, tree, NULL, 0)); } } // If we get here, the types are not compatible return (NULL); } ================================================ FILE: 64_6809_Target/types.h ================================================ /* types.c */ int inttype(int type); int ptrtype(int type); int pointer_to(int type); int value_at(int type); int typesize(int type, struct symtable *ctype); struct ASTnode *modify_type(struct ASTnode *tree, int rtype, struct symtable *rctype, int op); ================================================ FILE: 64_6809_Target/wcc.c ================================================ #include #include #include #include #include #include #include "dirs.h" #include "wcc.h" // Compiler setup and top-level execution // Copyright (c) 2024 Warren Toomey, GPL3 // List of phases #define CPP_PHASE 0 #define TOK_PHASE 1 #define PARSE_PHASE 2 #define GEN_PHASE 3 #define QBEPEEP_PHASE 4 // Either run QBE or the peephole optimiser #define ASM_PHASE 5 #define LINK_PHASE 6 // A struct to keep a linked list of filenames struct filelist { char *name; struct filelist *next; }; #define CPU_QBE 1 #define CPU_6809 2 // Global variables int cpu = CPU_QBE; // What CPU/platform we are targetting int last_phase = LINK_PHASE; // Which is the last phase int verbose = 0; // Print out the phase details? int keep_tempfiles = 0; // Keep temporary files? char *outname = NULL; // Output filename, if any char *initname; // File name given to us // List of commands and object files char **phasecmd; char **cppflags; char **preobjs; char **postobjs; // And -D preprocessor words get added here #define MAXCPPEXTRA 20 char *cppextra[20]; int cppxindex=0; // Lists of temp and object files struct filelist *Tmphead, *Tmptail; struct filelist *Objhead, *Objtail; #define MAXCMDARGS 500 char *cmdarg[MAXCMDARGS]; // List of arguments to a command int cmdcount = 0; // Number of command arguments // Alter the last letter of the initial filename char *alter_suffix(char ch) { char *str = strdup(initname); char *cptr = str + strlen(str) - 1; *cptr = ch; return (str); } // Add a name to the list of temporary files void addtmpname(char *name) { struct filelist *this; this = (struct filelist *) malloc(sizeof(struct filelist)); this->name = name; this->next = NULL; if (Tmphead == NULL) Tmphead = Tmptail = this; else { Tmptail->next = this; Tmptail = this; } } // Remove temporary files and exit void Exit(int val) { struct filelist *this; if (keep_tempfiles == 0) for (this = Tmphead; this != NULL; this = this->next) unlink(this->name); exit(val); } // Add a name to the list of object files void addobjname(char *name) { struct filelist *this; this = (struct filelist *) malloc(sizeof(struct filelist)); this->name = name; this->next = NULL; if (Objhead == NULL) Objhead = Objtail = this; else { Objtail->next = this; Objtail = this; } } // Clear the list of command arguments void clear_cmdarg(void) { cmdcount = 0; } // Add an argument to the list of command arguments void add_cmdarg(char *str) { if (cmdcount == MAXCMDARGS) { fprintf(stderr, "Out of space in cmdargs\n"); Exit(1); } cmdarg[cmdcount++] = str; } // Return 1 if the string ends in '.' // then the given character, 0 otherwise int endswith(char *str, char ch) { int len = strlen(str); if (len < 2) return (0); if (str[len - 1] != ch) return (0); if (str[len - 2] != '.') return (0); return (1); } // Given a filename, open it for writing or Exit FILE *fopenw(char *filename) { FILE *f = fopen(filename, "w"); if (f == NULL) { fprintf(stderr, "Unable to write to file %s\n", filename); Exit(1); } return (f); } // Given a filename and a desired suffix, // return the name of a temporary file // which can be written to, or return NULL // if a temporary file cannot be made char *newtempfile(char *origname, char *suffix) { char *name; FILE *handle; // First attempt: just add the suffix to the original name name = (char *) malloc(strlen(origname) + strlen(suffix) + 1); if (name != NULL) { strcpy(name, origname); strcat(name, suffix); // Now try to open it handle = fopen(name, "w"); if (handle != NULL) { fclose(handle); addtmpname(name); return (name); } } // That filename didn't work. Try one in the TMPDIR name = (char *) malloc(strlen(TMPDIR) + strlen(suffix) + 20); if (name == NULL) return (NULL); sprintf(name, "%s/%s_XXXXXX", TMPDIR, suffix); // Now try to open it handle = fopenw(name); if (handle != NULL) { fclose(handle); addtmpname(name); return (name); } return (NULL); } // Run the command with arguments in cmdarg[]. // Replace stdin/stdout by opening in/out as required. // If the command doesn't Exit(0), stop. void run_command(char *in, char *out) { int i, pid, wstatus; FILE *fh; if (verbose) { fprintf(stderr, "Doing: "); for (i = 0; cmdarg[i] != NULL; i++) fprintf(stderr, "%s ", cmdarg[i]); fprintf(stderr, "\n"); if (in != NULL) fprintf(stderr, " redirecting stdin from %s\n", in); if (out != NULL) fprintf(stderr, " redirecting stdout to %s\n", out); } pid = fork(); switch (pid) { case -1: fprintf(stderr, "fork failed\n"); Exit(1); // Child process case 0: // Close stdin/stdout as required if (in != NULL) { fh = freopen(in, "r", stdin); if (fh == NULL) { fprintf(stderr, "Unable to freopen %s for reading\n", in); Exit(1); } } if (out != NULL) { fh = freopen(out, "w", stdout); if (fh == NULL) { fprintf(stderr, "Unable to freopen %s for writing\n", out); Exit(1); } } execvp(cmdarg[0], cmdarg); fprintf(stderr, "exec %s failed\n", cmdarg[0]); // The parent: wait for child to exit cleanly default: if (waitpid(pid, &wstatus, 0) == -1) { fprintf(stderr, "waitpid failed\n"); Exit(1); } // Get the child's Exit status and get the parent // to Exit(1) if the Exit status was not zero if (WIFEXITED(wstatus)) { if (WEXITSTATUS(wstatus) != 0) Exit(1); } else { fprintf(stderr, "child phase didn't exit\n"); Exit(1); } // The child phase was successful return; } } // Pre-process the file using the C pre-processor char *do_preprocess(char *name) { int i; char *tempname; // Build the command clear_cmdarg(); add_cmdarg(phasecmd[CPP_PHASE]); for (i = 0; cppflags[i] != NULL; i++) add_cmdarg(cppflags[i]); for (i = 0; i < cppxindex; i++) { add_cmdarg("-D"); add_cmdarg(cppextra[i]); } add_cmdarg(name); add_cmdarg(NULL); // If this is the last phase, use outname // as the output file, or stdout if NULL. if (last_phase == CPP_PHASE) { run_command(NULL, outname); Exit(0); } // Not the last phase, make a temp file tempname = newtempfile(initname, "_cpp"); run_command(NULL, tempname); return (tempname); } // Assemble the given filename char *do_assemble(char *name) { char *tempname; // If this is the last phase, use outname if // not NULL, or change the original file's suffix if (last_phase == ASM_PHASE) { if (outname == NULL) outname = alter_suffix('o'); tempname = outname; } else { // Not the last phase, make a temp filename tempname = newtempfile(initname, "_o"); } // Build and run the assembler command clear_cmdarg(); add_cmdarg(phasecmd[ASM_PHASE]); add_cmdarg("-o"); add_cmdarg(tempname); add_cmdarg(name); add_cmdarg(NULL); run_command(NULL, NULL); // Stop now if we are the last phase if (last_phase == ASM_PHASE) Exit(0); return (tempname); } // Run several compiler phases to take a // pre-processed C file to an assembly file char *do_compile(char *name) { char *tokname, *symname, *astname; char *idxname, *qbename, *asmname; // We need to run the scanner, the parser // and the code generator. Get a temp filename // for the scanner's output. tokname = newtempfile(initname, "_tok"); // Build and run the scanner command clear_cmdarg(); add_cmdarg(phasecmd[TOK_PHASE]); add_cmdarg(NULL); run_command(name, tokname); // Get temp filenames for the parser's output symname = newtempfile(initname, "_sym"); astname = newtempfile(initname, "_ast"); // Build and run the parser command clear_cmdarg(); add_cmdarg(phasecmd[PARSE_PHASE]); add_cmdarg(symname); add_cmdarg(astname); add_cmdarg(NULL); run_command(tokname, NULL); // Get some temporary filenames even // if we don't use them. idxname = newtempfile(initname, "_idx"); qbename = newtempfile(initname, "_qbe"); asmname = newtempfile(initname, "_s"); // If this phase (compile to assembly) is // the last, use outname if not NULL, // or change the original file's suffix. if (last_phase == GEN_PHASE) { if (outname == NULL) outname = alter_suffix('s'); asmname = outname; } // Before we run the code generator, see // if the next (QBE or peephole) phase exists. // If not, we go straight to assembly code, so // change the output file's name if (phasecmd[QBEPEEP_PHASE] == NULL) { qbename = asmname; } // Build and run the code generator command clear_cmdarg(); add_cmdarg(phasecmd[GEN_PHASE]); add_cmdarg(symname); add_cmdarg(astname); add_cmdarg(idxname); add_cmdarg(NULL); run_command(NULL, qbename); // Build and run the QBE command or the // peephole optmiser if needed if (phasecmd[QBEPEEP_PHASE] != NULL) { clear_cmdarg(); add_cmdarg(phasecmd[QBEPEEP_PHASE]); if (cpu== CPU_QBE) { add_cmdarg("-o"); add_cmdarg(asmname); add_cmdarg(qbename); } if (cpu== CPU_6809) { add_cmdarg("-o"); add_cmdarg(asmname); add_cmdarg(qbename); add_cmdarg(LIB6809DIR "/rules.6809"); } add_cmdarg(NULL); run_command(NULL, NULL); } // Stop now if we are the last phase if (last_phase == GEN_PHASE) Exit(0); return (asmname); } // Link the final executable with all // the object files void do_link(void) { int i; struct filelist *this; // Build the command clear_cmdarg(); add_cmdarg(phasecmd[LINK_PHASE]); add_cmdarg("-o"); add_cmdarg(outname); // Insert any files that must come first for (i = 0; preobjs[i] != NULL; i++) add_cmdarg(preobjs[i]); // Now add on all the object files and library names for (this = Objhead; this != NULL; this = this->next) add_cmdarg(this->name); // Insert any files that must come at the end for (i = 0; postobjs[i] != NULL; i++) add_cmdarg(postobjs[i]); add_cmdarg(NULL); run_command(NULL, NULL); } // Given a CPU/platform name, change the phase // programs and object files void set_phaseprograms(char *cpuname) { if (!strcmp(cpuname, "qbe")) { phasecmd = qbephasecmd; cppflags = qbecppflags; preobjs = qbepreobjs; postobjs = qbepostobjs; cpu= CPU_QBE; return; } if (!strcmp(cpuname, "6809")) { phasecmd = phasecmd6809; cppflags = cppflags6809; preobjs = preobjs6809; postobjs = postobjs6809; cpu= CPU_6809; return; } fprintf(stderr, "Unknown CPU/patform: %s\n", cpuname); Exit(1); } // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "Usage: %s [-vcESX] [-D ...] [-m CPU] [-o outfile] file [file ...]\n", prog); fprintf(stderr, " -v give verbose output of the compilation stages\n"); fprintf(stderr, " -c generate object files but don't link them\n"); fprintf(stderr, " -E pre-process the file, output on stdout\n"); fprintf(stderr, " -S generate assembly files but don't link them\n"); fprintf(stderr, " -X keep temporary files for debugging\n"); fprintf(stderr, " -D ..., set a pre-processor define\n"); fprintf(stderr, " -m CPU, set the CPU e.g. -m 6809, -m qbe\n"); fprintf(stderr, " -o outfile, produce the outfile executable file\n"); Exit(1); } // Main program: check arguments, or print a usage // if we don't have any arguments. int main(int argc, char **argv) { int i, opt; phasecmd = qbephasecmd; cppflags = qbecppflags; preobjs = qbepreobjs; postobjs = qbepostobjs; // Get the options if (argc < 2) usage(argv[0]); while ((opt = getopt(argc, argv, "vcESXo:m:D:")) != -1) { switch (opt) { case 'v': verbose = 1; break; case 'c': last_phase = ASM_PHASE; break; case 'E': last_phase = CPP_PHASE; break; case 'S': last_phase = GEN_PHASE; break; case 'X': keep_tempfiles = 1; break; case 'm': set_phaseprograms(optarg); break; case 'o': outname = optarg; break; case 'D': if (cppxindex >= MAXCPPEXTRA) { fprintf(stderr, "Too many -D arguments\n"); Exit(1); } cppextra[cppxindex]= optarg; cppxindex++; break; } } // Now process the filenames after the arguments if (optind >= argc) usage(argv[0]); for (i = optind; i < argc; i++) { initname = argv[i]; if (endswith(argv[i], 'c')) { // A C source file, do all major phases addobjname(do_assemble(do_compile(do_preprocess(argv[i])))); } else if (endswith(argv[i], 's')) { // An assembly file, just assemble addobjname(do_assemble(argv[i])); } else if (endswith(argv[i], 'o')) { // Add object files to the list addobjname(argv[i]); } else { fprintf(stderr, "Input file with unrecognised suffix: %s\n", argv[i]); usage(argv[0]); } } // Now link all the object files together if (outname == NULL) outname = AOUT; do_link(); Exit(0); return (0); } ================================================ FILE: 64_6809_Target/wcc.h ================================================ #define AOUT "a.out" #define TMPDIR "/tmp" ///////////////// // QBE SECTION // ///////////////// // List of phase command strings char *qbephasecmd[]= { "cpp", // C pre-processor BINDIR "/cscan", // Tokeniser BINDIR "/cparseqbe", // Parser BINDIR "/cgenqbe", // Code generator "qbe", // QBE to assembler "as", // Assembler "cc" // Linker }; // List of C preprocessor flags char *qbecppflags[]= { "-nostdinc", "-isystem", INCQBEDIR, NULL }; // List of object files that // must precede any compiled ones // e.g. crt0.o files char *qbepreobjs[]= { NULL }; // List of object files and/or // libraries that must come // after any compiled ones char *qbepostobjs[]= { NULL }; ////////////////// // 6809 SECTION // ////////////////// // List of phase command strings char *phasecmd6809[]= { "cpp", // C pre-processor BINDIR "/cscan", // Tokeniser BINDIR "/cparse6809", // Parser BINDIR "/cgen6809", // Code generator BINDIR "/cpeep", // Peephole optmiser "as6809", // Assembler "ld6809" // Linker }; // List of C preprocessor flags char *cppflags6809[]= { "-nostdinc", "-isystem", INC6809DIR, NULL }; // List of object files that // must precede any compiled ones // e.g. crt0.o files char *preobjs6809[]= { LIB6809DIR "/crt0.o", NULL }; // List of object files and/or // libraries that must come // after any compiled ones char *postobjs6809[]= { LIB6809DIR "/libc.a", LIB6809DIR "/lib6809.a", NULL }; ================================================ FILE: LICENSE ================================================ GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007 Copyright (C) 2007 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The GNU General Public License is a free, copyleft license for software and other kinds of works. The licenses for most software and other practical works are designed to take away your freedom to share and change the works. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change all versions of a program--to make sure it remains free software for all its users. We, the Free Software Foundation, use the GNU General Public License for most of our software; it applies also to any other work released this way by its authors. You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for them if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs, and that you know you can do these things. To protect your rights, we need to prevent others from denying you these rights or asking you to surrender the rights. Therefore, you have certain responsibilities if you distribute copies of the software, or if you modify it: responsibilities to respect the freedom of others. For example, if you distribute copies of such a program, whether gratis or for a fee, you must pass on to the recipients the same freedoms that you received. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. Developers that use the GNU GPL protect your rights with two steps: (1) assert copyright on the software, and (2) offer you this License giving you legal permission to copy, distribute and/or modify it. For the developers' and authors' protection, the GPL clearly explains that there is no warranty for this free software. For both users' and authors' sake, the GPL requires that modified versions be marked as changed, so that their problems will not be attributed erroneously to authors of previous versions. Some devices are designed to deny users access to install or run modified versions of the software inside them, although the manufacturer can do so. This is fundamentally incompatible with the aim of protecting users' freedom to change the software. The systematic pattern of such abuse occurs in the area of products for individuals to use, which is precisely where it is most unacceptable. Therefore, we have designed this version of the GPL to prohibit the practice for those products. If such problems arise substantially in other domains, we stand ready to extend this provision to those domains in future versions of the GPL, as needed to protect the freedom of users. Finally, every program is threatened constantly by software patents. States should not allow patents to restrict development and use of software on general-purpose computers, but in those that do, we wish to avoid the special danger that patents applied to a free program could make it effectively proprietary. To prevent this, the GPL assures that patents cannot be used to render the program non-free. The precise terms and conditions for copying, distribution and modification follow. TERMS AND CONDITIONS 0. Definitions. "This License" refers to version 3 of the GNU General Public License. "Copyright" also means copyright-like laws that apply to other kinds of works, such as semiconductor masks. "The Program" refers to any copyrightable work licensed under this License. Each licensee is addressed as "you". "Licensees" and "recipients" may be individuals or organizations. To "modify" a work means to copy from or adapt all or part of the work in a fashion requiring copyright permission, other than the making of an exact copy. The resulting work is called a "modified version" of the earlier work or a work "based on" the earlier work. A "covered work" means either the unmodified Program or a work based on the Program. To "propagate" a work means to do anything with it that, without permission, would make you directly or secondarily liable for infringement under applicable copyright law, except executing it on a computer or modifying a private copy. Propagation includes copying, distribution (with or without modification), making available to the public, and in some countries other activities as well. To "convey" a work means any kind of propagation that enables other parties to make or receive copies. Mere interaction with a user through a computer network, with no transfer of a copy, is not conveying. An interactive user interface displays "Appropriate Legal Notices" to the extent that it includes a convenient and prominently visible feature that (1) displays an appropriate copyright notice, and (2) tells the user that there is no warranty for the work (except to the extent that warranties are provided), that licensees may convey the work under this License, and how to view a copy of this License. If the interface presents a list of user commands or options, such as a menu, a prominent item in the list meets this criterion. 1. Source Code. The "source code" for a work means the preferred form of the work for making modifications to it. "Object code" means any non-source form of a work. A "Standard Interface" means an interface that either is an official standard defined by a recognized standards body, or, in the case of interfaces specified for a particular programming language, one that is widely used among developers working in that language. The "System Libraries" of an executable work include anything, other than the work as a whole, that (a) is included in the normal form of packaging a Major Component, but which is not part of that Major Component, and (b) serves only to enable use of the work with that Major Component, or to implement a Standard Interface for which an implementation is available to the public in source code form. A "Major Component", in this context, means a major essential component (kernel, window system, and so on) of the specific operating system (if any) on which the executable work runs, or a compiler used to produce the work, or an object code interpreter used to run it. The "Corresponding Source" for a work in object code form means all the source code needed to generate, install, and (for an executable work) run the object code and to modify the work, including scripts to control those activities. However, it does not include the work's System Libraries, or general-purpose tools or generally available free programs which are used unmodified in performing those activities but which are not part of the work. For example, Corresponding Source includes interface definition files associated with source files for the work, and the source code for shared libraries and dynamically linked subprograms that the work is specifically designed to require, such as by intimate data communication or control flow between those subprograms and other parts of the work. The Corresponding Source need not include anything that users can regenerate automatically from other parts of the Corresponding Source. The Corresponding Source for a work in source code form is that same work. 2. Basic Permissions. All rights granted under this License are granted for the term of copyright on the Program, and are irrevocable provided the stated conditions are met. This License explicitly affirms your unlimited permission to run the unmodified Program. The output from running a covered work is covered by this License only if the output, given its content, constitutes a covered work. This License acknowledges your rights of fair use or other equivalent, as provided by copyright law. You may make, run and propagate covered works that you do not convey, without conditions so long as your license otherwise remains in force. You may convey covered works to others for the sole purpose of having them make modifications exclusively for you, or provide you with facilities for running those works, provided that you comply with the terms of this License in conveying all material for which you do not control copyright. Those thus making or running the covered works for you must do so exclusively on your behalf, under your direction and control, on terms that prohibit them from making any copies of your copyrighted material outside their relationship with you. Conveying under any other circumstances is permitted solely under the conditions stated below. Sublicensing is not allowed; section 10 makes it unnecessary. 3. Protecting Users' Legal Rights From Anti-Circumvention Law. No covered work shall be deemed part of an effective technological measure under any applicable law fulfilling obligations under article 11 of the WIPO copyright treaty adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention of such measures. When you convey a covered work, you waive any legal power to forbid circumvention of technological measures to the extent such circumvention is effected by exercising rights under this License with respect to the covered work, and you disclaim any intention to limit operation or modification of the work as a means of enforcing, against the work's users, your or third parties' legal rights to forbid circumvention of technological measures. 4. Conveying Verbatim Copies. You may convey verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice; keep intact all notices stating that this License and any non-permissive terms added in accord with section 7 apply to the code; keep intact all notices of the absence of any warranty; and give all recipients a copy of this License along with the Program. You may charge any price or no price for each copy that you convey, and you may offer support or warranty protection for a fee. 5. Conveying Modified Source Versions. You may convey a work based on the Program, or the modifications to produce it from the Program, in the form of source code under the terms of section 4, provided that you also meet all of these conditions: a) The work must carry prominent notices stating that you modified it, and giving a relevant date. b) The work must carry prominent notices stating that it is released under this License and any conditions added under section 7. This requirement modifies the requirement in section 4 to "keep intact all notices". c) You must license the entire work, as a whole, under this License to anyone who comes into possession of a copy. This License will therefore apply, along with any applicable section 7 additional terms, to the whole of the work, and all its parts, regardless of how they are packaged. This License gives no permission to license the work in any other way, but it does not invalidate such permission if you have separately received it. d) If the work has interactive user interfaces, each must display Appropriate Legal Notices; however, if the Program has interactive interfaces that do not display Appropriate Legal Notices, your work need not make them do so. A compilation of a covered work with other separate and independent works, which are not by their nature extensions of the covered work, and which are not combined with it such as to form a larger program, in or on a volume of a storage or distribution medium, is called an "aggregate" if the compilation and its resulting copyright are not used to limit the access or legal rights of the compilation's users beyond what the individual works permit. Inclusion of a covered work in an aggregate does not cause this License to apply to the other parts of the aggregate. 6. Conveying Non-Source Forms. You may convey a covered work in object code form under the terms of sections 4 and 5, provided that you also convey the machine-readable Corresponding Source under the terms of this License, in one of these ways: a) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by the Corresponding Source fixed on a durable physical medium customarily used for software interchange. b) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by a written offer, valid for at least three years and valid for as long as you offer spare parts or customer support for that product model, to give anyone who possesses the object code either (1) a copy of the Corresponding Source for all the software in the product that is covered by this License, on a durable physical medium customarily used for software interchange, for a price no more than your reasonable cost of physically performing this conveying of source, or (2) access to copy the Corresponding Source from a network server at no charge. c) Convey individual copies of the object code with a copy of the written offer to provide the Corresponding Source. This alternative is allowed only occasionally and noncommercially, and only if you received the object code with such an offer, in accord with subsection 6b. d) Convey the object code by offering access from a designated place (gratis or for a charge), and offer equivalent access to the Corresponding Source in the same way through the same place at no further charge. You need not require recipients to copy the Corresponding Source along with the object code. If the place to copy the object code is a network server, the Corresponding Source may be on a different server (operated by you or a third party) that supports equivalent copying facilities, provided you maintain clear directions next to the object code saying where to find the Corresponding Source. Regardless of what server hosts the Corresponding Source, you remain obligated to ensure that it is available for as long as needed to satisfy these requirements. e) Convey the object code using peer-to-peer transmission, provided you inform other peers where the object code and Corresponding Source of the work are being offered to the general public at no charge under subsection 6d. A separable portion of the object code, whose source code is excluded from the Corresponding Source as a System Library, need not be included in conveying the object code work. A "User Product" is either (1) a "consumer product", which means any tangible personal property which is normally used for personal, family, or household purposes, or (2) anything designed or sold for incorporation into a dwelling. In determining whether a product is a consumer product, doubtful cases shall be resolved in favor of coverage. For a particular product received by a particular user, "normally used" refers to a typical or common use of that class of product, regardless of the status of the particular user or of the way in which the particular user actually uses, or expects or is expected to use, the product. A product is a consumer product regardless of whether the product has substantial commercial, industrial or non-consumer uses, unless such uses represent the only significant mode of use of the product. "Installation Information" for a User Product means any methods, procedures, authorization keys, or other information required to install and execute modified versions of a covered work in that User Product from a modified version of its Corresponding Source. The information must suffice to ensure that the continued functioning of the modified object code is in no case prevented or interfered with solely because modification has been made. If you convey an object code work under this section in, or with, or specifically for use in, a User Product, and the conveying occurs as part of a transaction in which the right of possession and use of the User Product is transferred to the recipient in perpetuity or for a fixed term (regardless of how the transaction is characterized), the Corresponding Source conveyed under this section must be accompanied by the Installation Information. But this requirement does not apply if neither you nor any third party retains the ability to install modified object code on the User Product (for example, the work has been installed in ROM). The requirement to provide Installation Information does not include a requirement to continue to provide support service, warranty, or updates for a work that has been modified or installed by the recipient, or for the User Product in which it has been modified or installed. Access to a network may be denied when the modification itself materially and adversely affects the operation of the network or violates the rules and protocols for communication across the network. Corresponding Source conveyed, and Installation Information provided, in accord with this section must be in a format that is publicly documented (and with an implementation available to the public in source code form), and must require no special password or key for unpacking, reading or copying. 7. Additional Terms. "Additional permissions" are terms that supplement the terms of this License by making exceptions from one or more of its conditions. Additional permissions that are applicable to the entire Program shall be treated as though they were included in this License, to the extent that they are valid under applicable law. If additional permissions apply only to part of the Program, that part may be used separately under those permissions, but the entire Program remains governed by this License without regard to the additional permissions. When you convey a copy of a covered work, you may at your option remove any additional permissions from that copy, or from any part of it. (Additional permissions may be written to require their own removal in certain cases when you modify the work.) You may place additional permissions on material, added by you to a covered work, for which you have or can give appropriate copyright permission. Notwithstanding any other provision of this License, for material you add to a covered work, you may (if authorized by the copyright holders of that material) supplement the terms of this License with terms: a) Disclaiming warranty or limiting liability differently from the terms of sections 15 and 16 of this License; or b) Requiring preservation of specified reasonable legal notices or author attributions in that material or in the Appropriate Legal Notices displayed by works containing it; or c) Prohibiting misrepresentation of the origin of that material, or requiring that modified versions of such material be marked in reasonable ways as different from the original version; or d) Limiting the use for publicity purposes of names of licensors or authors of the material; or e) Declining to grant rights under trademark law for use of some trade names, trademarks, or service marks; or f) Requiring indemnification of licensors and authors of that material by anyone who conveys the material (or modified versions of it) with contractual assumptions of liability to the recipient, for any liability that these contractual assumptions directly impose on those licensors and authors. All other non-permissive additional terms are considered "further restrictions" within the meaning of section 10. If the Program as you received it, or any part of it, contains a notice stating that it is governed by this License along with a term that is a further restriction, you may remove that term. If a license document contains a further restriction but permits relicensing or conveying under this License, you may add to a covered work material governed by the terms of that license document, provided that the further restriction does not survive such relicensing or conveying. If you add terms to a covered work in accord with this section, you must place, in the relevant source files, a statement of the additional terms that apply to those files, or a notice indicating where to find the applicable terms. Additional terms, permissive or non-permissive, may be stated in the form of a separately written license, or stated as exceptions; the above requirements apply either way. 8. Termination. You may not propagate or modify a covered work except as expressly provided under this License. Any attempt otherwise to propagate or modify it is void, and will automatically terminate your rights under this License (including any patent licenses granted under the third paragraph of section 11). However, if you cease all violation of this License, then your license from a particular copyright holder is reinstated (a) provisionally, unless and until the copyright holder explicitly and finally terminates your license, and (b) permanently, if the copyright holder fails to notify you of the violation by some reasonable means prior to 60 days after the cessation. Moreover, your license from a particular copyright holder is reinstated permanently if the copyright holder notifies you of the violation by some reasonable means, this is the first time you have received notice of violation of this License (for any work) from that copyright holder, and you cure the violation prior to 30 days after your receipt of the notice. Termination of your rights under this section does not terminate the licenses of parties who have received copies or rights from you under this License. If your rights have been terminated and not permanently reinstated, you do not qualify to receive new licenses for the same material under section 10. 9. Acceptance Not Required for Having Copies. You are not required to accept this License in order to receive or run a copy of the Program. Ancillary propagation of a covered work occurring solely as a consequence of using peer-to-peer transmission to receive a copy likewise does not require acceptance. However, nothing other than this License grants you permission to propagate or modify any covered work. These actions infringe copyright if you do not accept this License. Therefore, by modifying or propagating a covered work, you indicate your acceptance of this License to do so. 10. Automatic Licensing of Downstream Recipients. Each time you convey a covered work, the recipient automatically receives a license from the original licensors, to run, modify and propagate that work, subject to this License. You are not responsible for enforcing compliance by third parties with this License. An "entity transaction" is a transaction transferring control of an organization, or substantially all assets of one, or subdividing an organization, or merging organizations. If propagation of a covered work results from an entity transaction, each party to that transaction who receives a copy of the work also receives whatever licenses to the work the party's predecessor in interest had or could give under the previous paragraph, plus a right to possession of the Corresponding Source of the work from the predecessor in interest, if the predecessor has it or can get it with reasonable efforts. You may not impose any further restrictions on the exercise of the rights granted or affirmed under this License. For example, you may not impose a license fee, royalty, or other charge for exercise of rights granted under this License, and you may not initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging that any patent claim is infringed by making, using, selling, offering for sale, or importing the Program or any portion of it. 11. Patents. A "contributor" is a copyright holder who authorizes use under this License of the Program or a work on which the Program is based. The work thus licensed is called the contributor's "contributor version". A contributor's "essential patent claims" are all patent claims owned or controlled by the contributor, whether already acquired or hereafter acquired, that would be infringed by some manner, permitted by this License, of making, using, or selling its contributor version, but do not include claims that would be infringed only as a consequence of further modification of the contributor version. For purposes of this definition, "control" includes the right to grant patent sublicenses in a manner consistent with the requirements of this License. Each contributor grants you a non-exclusive, worldwide, royalty-free patent license under the contributor's essential patent claims, to make, use, sell, offer for sale, import and otherwise run, modify and propagate the contents of its contributor version. In the following three paragraphs, a "patent license" is any express agreement or commitment, however denominated, not to enforce a patent (such as an express permission to practice a patent or covenant not to sue for patent infringement). To "grant" such a patent license to a party means to make such an agreement or commitment not to enforce a patent against the party. If you convey a covered work, knowingly relying on a patent license, and the Corresponding Source of the work is not available for anyone to copy, free of charge and under the terms of this License, through a publicly available network server or other readily accessible means, then you must either (1) cause the Corresponding Source to be so available, or (2) arrange to deprive yourself of the benefit of the patent license for this particular work, or (3) arrange, in a manner consistent with the requirements of this License, to extend the patent license to downstream recipients. "Knowingly relying" means you have actual knowledge that, but for the patent license, your conveying the covered work in a country, or your recipient's use of the covered work in a country, would infringe one or more identifiable patents in that country that you have reason to believe are valid. If, pursuant to or in connection with a single transaction or arrangement, you convey, or propagate by procuring conveyance of, a covered work, and grant a patent license to some of the parties receiving the covered work authorizing them to use, propagate, modify or convey a specific copy of the covered work, then the patent license you grant is automatically extended to all recipients of the covered work and works based on it. A patent license is "discriminatory" if it does not include within the scope of its coverage, prohibits the exercise of, or is conditioned on the non-exercise of one or more of the rights that are specifically granted under this License. You may not convey a covered work if you are a party to an arrangement with a third party that is in the business of distributing software, under which you make payment to the third party based on the extent of your activity of conveying the work, and under which the third party grants, to any of the parties who would receive the covered work from you, a discriminatory patent license (a) in connection with copies of the covered work conveyed by you (or copies made from those copies), or (b) primarily for and in connection with specific products or compilations that contain the covered work, unless you entered into that arrangement, or that patent license was granted, prior to 28 March 2007. Nothing in this License shall be construed as excluding or limiting any implied license or other defenses to infringement that may otherwise be available to you under applicable patent law. 12. No Surrender of Others' Freedom. If conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot convey a covered work so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not convey it at all. For example, if you agree to terms that obligate you to collect a royalty for further conveying from those to whom you convey the Program, the only way you could satisfy both those terms and this License would be to refrain entirely from conveying the Program. 13. Use with the GNU Affero General Public License. Notwithstanding any other provision of this License, you have permission to link or combine any covered work with a work licensed under version 3 of the GNU Affero General Public License into a single combined work, and to convey the resulting work. The terms of this License will continue to apply to the part which is the covered work, but the special requirements of the GNU Affero General Public License, section 13, concerning interaction through a network will apply to the combination as such. 14. Revised Versions of this License. The Free Software Foundation may publish revised and/or new versions of the GNU General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies that a certain numbered version of the GNU General Public License "or any later version" applies to it, you have the option of following the terms and conditions either of that numbered version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the GNU General Public License, you may choose any version ever published by the Free Software Foundation. If the Program specifies that a proxy can decide which future versions of the GNU General Public License can be used, that proxy's public statement of acceptance of a version permanently authorizes you to choose that version for the Program. Later license versions may give you additional or different permissions. However, no additional obligations are imposed on any author or copyright holder as a result of your choosing to follow a later version. 15. Disclaimer of Warranty. THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. Limitation of Liability. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 17. Interpretation of Sections 15 and 16. If the disclaimer of warranty and limitation of liability provided above cannot be given local legal effect according to their terms, reviewing courts shall apply local law that most closely approximates an absolute waiver of all civil liability in connection with the Program, unless a warranty or assumption of liability accompanies a copy of the Program in return for a fee. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively state the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . Also add information on how to contact you by electronic and paper mail. If the program does terminal interaction, make it output a short notice like this when it starts in an interactive mode: Copyright (C) This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, your program's commands might be different; for a GUI interface, you would use an "about box". You should also get your employer (if you work as a programmer) or school, if any, to sign a "copyright disclaimer" for the program, if necessary. For more information on this, and how to apply and follow the GNU GPL, see . The GNU General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. But first, please read . ================================================ FILE: Readme.md ================================================ # A Compiler Writing Journey In this Github repository, I'm documenting my journey to write a self-compiling compiler for a subset of the C language. I'm also writing out the details so that, if you want to follow along, there will be an explanation of what I did, why, and with some references back to the theory of compilers. But not too much theory, I want this to be a practical journey. Here are the steps I've taken so far: + [Part 0](00_Introduction/Readme.md): Introduction to the Journey + [Part 1](01_Scanner/Readme.md): Introduction to Lexical Scanning + [Part 2](02_Parser/Readme.md): Introduction to Parsing + [Part 3](03_Precedence/Readme.md): Operator Precedence + [Part 4](04_Assembly/Readme.md): An Actual Compiler + [Part 5](05_Statements/Readme.md): Statements + [Part 6](06_Variables/Readme.md): Variables + [Part 7](07_Comparisons/Readme.md): Comparison Operators + [Part 8](08_If_Statements/Readme.md): If Statements + [Part 9](09_While_Loops/Readme.md): While Loops + [Part 10](10_For_Loops/Readme.md): For Loops + [Part 11](11_Functions_pt1/Readme.md): Functions, part 1 + [Part 12](12_Types_pt1/Readme.md): Types, part 1 + [Part 13](13_Functions_pt2/Readme.md): Functions, part 2 + [Part 14](14_ARM_Platform/Readme.md): Generating ARM Assembly Code + [Part 15](15_Pointers_pt1/Readme.md): Pointers, part 1 + [Part 16](16_Global_Vars/Readme.md): Declaring Global Variables Properly + [Part 17](17_Scaling_Offsets/Readme.md): Better Type Checking and Pointer Offsets + [Part 18](18_Lvalues_Revisited/Readme.md): Lvalues and Rvalues Revisited + [Part 19](19_Arrays_pt1/Readme.md): Arrays, part 1 + [Part 20](20_Char_Str_Literals/Readme.md): Character and String Literals + [Part 21](21_More_Operators/Readme.md): More Operators + [Part 22](22_Design_Locals/Readme.md): Design Ideas for Local Variables and Function Calls + [Part 23](23_Local_Variables/Readme.md): Local Variables + [Part 24](24_Function_Params/Readme.md): Function Parameters + [Part 25](25_Function_Arguments/Readme.md): Function Calls and Arguments + [Part 26](26_Prototypes/Readme.md): Function Prototypes + [Part 27](27_Testing_Errors/Readme.md): Regression Testing and a Nice Surprise + [Part 28](28_Runtime_Flags/Readme.md): Adding More Run-time Flags + [Part 29](29_Refactoring/Readme.md): A Bit of Refactoring + [Part 30](30_Design_Composites/Readme.md): Designing Structs, Unions and Enums + [Part 31](31_Struct_Declarations/Readme.md): Implementing Structs, Part 1 + [Part 32](32_Struct_Access_pt1/Readme.md): Accessing Members in a Struct + [Part 33](33_Unions/Readme.md): Implementing Unions and Member Access + [Part 34](34_Enums_and_Typedefs/Readme.md): Enums and Typedefs + [Part 35](35_Preprocessor/Readme.md): The C Pre-Processor + [Part 36](36_Break_Continue/Readme.md): `break` and `continue` + [Part 37](37_Switch/Readme.md): Switch Statements + [Part 38](38_Dangling_Else/Readme.md): Dangling Else and More + [Part 39](39_Var_Initialisation_pt1/Readme.md): Variable Initialisation, part 1 + [Part 40](40_Var_Initialisation_pt2/Readme.md): Global Variable Initialisation + [Part 41](41_Local_Var_Init/Readme.md): Local Variable Initialisation + [Part 42](42_Casting/Readme.md): Type Casting and NULL + [Part 43](43_More_Operators/Readme.md): Bugfixes and More Operators + [Part 44](44_Fold_Optimisation/Readme.md): Constant Folding + [Part 45](45_Globals_Again/Readme.md): Global Variable Declarations, revisited + [Part 46](46_Void_Functions/Readme.md): Void Function Parameters and Scanning Changes + [Part 47](47_Sizeof/Readme.md): A Subset of `sizeof` + [Part 48](48_Static/Readme.md): A Subset of `static` + [Part 49](49_Ternary/Readme.md): The Ternary Operator + [Part 50](50_Mop_up_pt1/Readme.md): Mopping Up, part 1 + [Part 51](51_Arrays_pt2/Readme.md): Arrays, part 2 + [Part 52](52_Pointers_pt2/Readme.md): Pointers, part 2 + [Part 53](53_Mop_up_pt2/Readme.md): Mopping Up, part 2 + [Part 54](54_Reg_Spills/Readme.md): Spilling Registers + [Part 55](55_Lazy_Evaluation/Readme.md): Lazy Evaluation + [Part 56](56_Local_Arrays/Readme.md): Local Arrays + [Part 57](57_Mop_up_pt3/Readme.md): Mopping Up, part 3 + [Part 58](58_Ptr_Increments/Readme.md): Fixing Pointer Increments/Decrements + [Part 59](59_WDIW_pt1/Readme.md): Why Doesn't It Work, part 1 + [Part 60](60_TripleTest/Readme.md): Passing the Triple Test + [Part 61](61_What_Next/Readme.md): What's Next? + [Part 62](62_Cleanup/Readme.md): Code Cleanup + [Part 63](63_QBE/Readme.md): A New Backend using QBE + [Part 64](64_6809_Target/Readme.md): A Backend for the 6809 CPU I've stopped work on *acwj* and now I'm writing a new language called [alic](https://github.com/DoctorWkt/alic) from scratch. Check it out! ## Copyrights I have borrowed some of the code, and lots of ideas, from the [SubC](http://www.t3x.org/subc/) compiler written by Nils M Holm. His code is in the public domain. I think that my code is substantially different enough that I can apply a different license to my code. Unless otherwise noted, + all source code and scripts are (c) Warren Toomey under the GPL3 license. + all non-source code documents (e.g. English documents, image files) are (c) Warren Toomey under the Creative Commons BY-NC-SA 4.0 license.